Skip to content

Commit 891ba7c

Browse files
Added Response.search_after() method (#1829)
* Added Response.search_after() method * add match clause to pytest.raises
1 parent 8e7b138 commit 891ba7c

File tree

4 files changed

+170
-0
lines changed

4 files changed

+170
-0
lines changed

elasticsearch_dsl/response/__init__.py

+32
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,38 @@ def aggs(self):
9090
super(AttrDict, self).__setattr__("_aggs", aggs)
9191
return self._aggs
9292

93+
def search_after(self):
94+
"""
95+
Return a ``Search`` instance that retrieves the next page of results.
96+
97+
This method provides an easy way to paginate a long list of results using
98+
the ``search_after`` option. For example::
99+
100+
page_size = 20
101+
s = Search()[:page_size].sort("date")
102+
103+
while True:
104+
# get a page of results
105+
r = await s.execute()
106+
107+
# do something with this page of results
108+
109+
# exit the loop if we reached the end
110+
if len(r.hits) < page_size:
111+
break
112+
113+
# get a search object with the next page of results
114+
s = r.search_after()
115+
116+
Note that the ``search_after`` option requires the search to have an
117+
explicit ``sort`` order.
118+
"""
119+
if len(self.hits) == 0:
120+
raise ValueError("Cannot use search_after when there are no search results")
121+
if not hasattr(self.hits[-1].meta, "sort"):
122+
raise ValueError("Cannot use search_after when results are not sorted")
123+
return self._search.extra(search_after=self.hits[-1].meta.sort)
124+
93125

94126
class AggResponse(AttrDict):
95127
def __init__(self, aggs, search, data):

elasticsearch_dsl/search_base.py

+30
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,36 @@ def suggest(self, name, text, **kwargs):
760760
s._suggest[name].update(kwargs)
761761
return s
762762

763+
def search_after(self):
764+
"""
765+
Return a ``Search`` instance that retrieves the next page of results.
766+
767+
This method provides an easy way to paginate a long list of results using
768+
the ``search_after`` option. For example::
769+
770+
page_size = 20
771+
s = Search()[:page_size].sort("date")
772+
773+
while True:
774+
# get a page of results
775+
r = await s.execute()
776+
777+
# do something with this page of results
778+
779+
# exit the loop if we reached the end
780+
if len(r.hits) < page_size:
781+
break
782+
783+
# get a search object with the next page of results
784+
s = s.search_after()
785+
786+
Note that the ``search_after`` option requires the search to have an
787+
explicit ``sort`` order.
788+
"""
789+
if not hasattr(self, "_response"):
790+
raise ValueError("A search must be executed before using search_after")
791+
return self._response.search_after()
792+
763793
def to_dict(self, count=False, **kwargs):
764794
"""
765795
Serialize the search into the dictionary that will be sent over as the

tests/test_integration/_async/test_search.py

+54
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,60 @@ async def test_scan_iterates_through_all_docs(async_data_client):
125125
assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
126126

127127

128+
@pytest.mark.asyncio
129+
async def test_search_after(async_data_client):
130+
page_size = 7
131+
s = AsyncSearch(index="flat-git")[:page_size].sort("authored_date")
132+
commits = []
133+
while True:
134+
r = await s.execute()
135+
commits += r.hits
136+
if len(r.hits) < page_size:
137+
break
138+
s = r.search_after()
139+
140+
assert 52 == len(commits)
141+
assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
142+
143+
144+
@pytest.mark.asyncio
145+
async def test_search_after_no_search(async_data_client):
146+
s = AsyncSearch(index="flat-git")
147+
with raises(
148+
ValueError, match="A search must be executed before using search_after"
149+
):
150+
await s.search_after()
151+
await s.count()
152+
with raises(
153+
ValueError, match="A search must be executed before using search_after"
154+
):
155+
await s.search_after()
156+
157+
158+
@pytest.mark.asyncio
159+
async def test_search_after_no_sort(async_data_client):
160+
s = AsyncSearch(index="flat-git")
161+
r = await s.execute()
162+
with raises(
163+
ValueError, match="Cannot use search_after when results are not sorted"
164+
):
165+
await r.search_after()
166+
167+
168+
@pytest.mark.asyncio
169+
async def test_search_after_no_results(async_data_client):
170+
s = AsyncSearch(index="flat-git")[:100].sort("authored_date")
171+
r = await s.execute()
172+
assert 52 == len(r.hits)
173+
s = r.search_after()
174+
r = await s.execute()
175+
assert 0 == len(r.hits)
176+
with raises(
177+
ValueError, match="Cannot use search_after when there are no search results"
178+
):
179+
await r.search_after()
180+
181+
128182
@pytest.mark.asyncio
129183
async def test_response_is_cached(async_data_client):
130184
s = Repository.search()

tests/test_integration/_sync/test_search.py

+54
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,60 @@ def test_scan_iterates_through_all_docs(data_client):
117117
assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
118118

119119

120+
@pytest.mark.sync
121+
def test_search_after(data_client):
122+
page_size = 7
123+
s = Search(index="flat-git")[:page_size].sort("authored_date")
124+
commits = []
125+
while True:
126+
r = s.execute()
127+
commits += r.hits
128+
if len(r.hits) < page_size:
129+
break
130+
s = r.search_after()
131+
132+
assert 52 == len(commits)
133+
assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
134+
135+
136+
@pytest.mark.sync
137+
def test_search_after_no_search(data_client):
138+
s = Search(index="flat-git")
139+
with raises(
140+
ValueError, match="A search must be executed before using search_after"
141+
):
142+
s.search_after()
143+
s.count()
144+
with raises(
145+
ValueError, match="A search must be executed before using search_after"
146+
):
147+
s.search_after()
148+
149+
150+
@pytest.mark.sync
151+
def test_search_after_no_sort(data_client):
152+
s = Search(index="flat-git")
153+
r = s.execute()
154+
with raises(
155+
ValueError, match="Cannot use search_after when results are not sorted"
156+
):
157+
r.search_after()
158+
159+
160+
@pytest.mark.sync
161+
def test_search_after_no_results(data_client):
162+
s = Search(index="flat-git")[:100].sort("authored_date")
163+
r = s.execute()
164+
assert 52 == len(r.hits)
165+
s = r.search_after()
166+
r = s.execute()
167+
assert 0 == len(r.hits)
168+
with raises(
169+
ValueError, match="Cannot use search_after when there are no search results"
170+
):
171+
r.search_after()
172+
173+
120174
@pytest.mark.sync
121175
def test_response_is_cached(data_client):
122176
s = Repository.search()

0 commit comments

Comments
 (0)