-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathtest_frontiers.py
149 lines (110 loc) · 4.53 KB
/
test_frontiers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import time
from types import GeneratorType
from collections import Iterable
from six import string_types
from scrapinghub.client.frontiers import Frontiers, Frontier, FrontierSlot
from ..conftest import TEST_FRONTIER_SLOT
def _add_test_requests_to_frontier(frontier):
slot = frontier.get(TEST_FRONTIER_SLOT)
slot.q.add([{'fp': '/some/path.html'}, {'fp': '/other/path.html'}])
slot.flush()
def _clean_project_frontiers(project):
"""Helper to clean slots of all frontiers for a project.
frontier fixture cleans a test slot before each test, but for some tests
it's convenient to clean all frontiers and test with 0 counters.
"""
for frontier_name in project.frontiers.iter():
frontier = project.frontiers.get(frontier_name)
for slot_name in frontier.iter():
frontier.get(slot_name).delete()
def test_frontiers(project, frontier, frontier_name):
# reset a test slot and add some requests to init it
frontier.get(TEST_FRONTIER_SLOT).delete()
_add_test_requests_to_frontier(frontier)
assert isinstance(project.frontiers, Frontiers)
frontiers = project.frontiers
# test for iter() method
frontiers_names = frontiers.iter()
assert isinstance(frontiers_names, Iterable)
assert frontier_name in list(frontiers_names)
# test for list() method
frontiers_names = frontiers.list()
assert frontier_name in frontiers_names
# test for get() method
frontier = frontiers.get(frontier_name)
assert isinstance(frontier, Frontier)
# other tests
frontiers.flush()
assert isinstance(frontiers.newcount, int)
def test_frontier(project, frontier):
# add some requests to test frontier to init a test slot
frontier.get(TEST_FRONTIER_SLOT).delete()
_add_test_requests_to_frontier(frontier)
slots = frontier.iter()
assert isinstance(slots, Iterable)
assert TEST_FRONTIER_SLOT in list(slots)
slots = frontier.list()
assert TEST_FRONTIER_SLOT in slots
slot = frontier.get(TEST_FRONTIER_SLOT)
assert isinstance(slot, FrontierSlot)
frontier.flush()
def test_frontier_slot(project, frontier):
# add some requests to test frontier to init a test slot
frontier.get(TEST_FRONTIER_SLOT).delete()
_add_test_requests_to_frontier(frontier)
slot = frontier.get(TEST_FRONTIER_SLOT)
# get all batches from slot and validate its content
batches_iter = slot.q.iter()
assert isinstance(batches_iter, GeneratorType)
batches = list(batches_iter)
assert len(batches) == 1
assert isinstance(batches[0], dict)
assert sorted(batches[0].keys()) == ['id', 'requests']
assert isinstance(batches[0]['id'], string_types)
requests = batches[0]['requests']
assert len(requests) == 2
assert requests == [['/some/path.html', None],
['/other/path.html', None]]
# validate that slot.list() returns same data as slot.q.iter()
batches_list = slot.q.list()
assert isinstance(batches, list)
assert batches_list == batches
# add a requests with additional parameters
slot.q.add([{'fp': 'page1.html', 'p': 1, 'qdata': {'depth': 1}}])
slot.flush()
batches = slot.q.list()
assert len(batches) == 2
assert batches[1]['requests'] == [['page1.html', {'depth': 1}]]
# drop all batches and validate that slot is empty
slot.q.delete([batch['id'] for batch in batches])
assert slot.q.list() == []
slot.delete()
assert TEST_FRONTIER_SLOT not in frontier.list()
def test_frontier_newcount(project, frontier):
_clean_project_frontiers(project)
first_slot = frontier.get(TEST_FRONTIER_SLOT)
assert frontier._frontiers.newcount == 0
assert frontier.newcount == 0
assert first_slot.newcount == 0
# shorter batch interval for faster tests
frontier._frontiers._origin.batch_interval = 0.1
_add_test_requests_to_frontier(frontier)
time.sleep(0.5)
assert frontier._frontiers.newcount == 2
assert frontier.newcount == 2
assert first_slot.newcount == 2
second_slot = frontier.get('test2.com')
second_slot.delete()
second_slot.q.add([{'fp': '/different_path.html'}])
second_slot.flush()
assert frontier._frontiers.newcount == 3
assert frontier.newcount == 3
assert second_slot.newcount == 1
assert first_slot.newcount == 2
frontier._frontiers.close()
def test_slot_count(project, frontier):
_clean_project_frontiers(project)
first_slot = frontier.get(TEST_FRONTIER_SLOT)
_count = first_slot.q.count()
fps = [fp for fp in first_slot.q.iter()]
assert _count == len(fps)