Skip to content

Commit 8b12370

Browse files
authored
Merge pull request #164 from stac-utils/collection-pagination
Collection pagination
2 parents e10ee6b + 97814f4 commit 8b12370

File tree

13 files changed

+218
-28
lines changed

13 files changed

+218
-28
lines changed

.github/workflows/cicd.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
services:
1717

1818
elasticsearch_8_svc:
19-
image: docker.elastic.co/elasticsearch/elasticsearch:8.1.3
19+
image: docker.elastic.co/elasticsearch/elasticsearch:8.10.4
2020
env:
2121
cluster.name: stac-cluster
2222
node.name: es01

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1010
### Added
1111

1212
- Collection-level Assets to the CollectionSerializer [#148](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/148)
13+
- Pagination for /collections - GET all collections - route [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164)
1314
- Examples folder with example docker setup for running sfes from pip [#147](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/147)
1415
- GET /search filter extension queries [#163](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/163)
1516
- Added support for GET /search intersection queries [#158](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/158)
1617

1718
### Changed
1819

20+
- Update elasticsearch version from 8.1.3 to 8.10.4 in cicd, gh actions [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164)
1921
- Updated core stac-fastapi libraries to 2.4.8 from 2.4.3 [#151](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/151)
2022
- Use aliases on Elasticsearch indices, add number suffix in index name. [#152](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/152)
2123

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,17 @@ curl -X "POST" "http://localhost:8080/collections" \
6161
```
6262

6363
Note: this "Collections Transaction" behavior is not part of the STAC API, but may be soon.
64+
65+
66+
## Collection pagination
67+
68+
The collections route handles optional `limit` and `token` parameters. The `links` field that is
69+
returned from the `/collections` route contains a `next` link with the token that can be used to
70+
get the next page of results.
6471

72+
```shell
73+
curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token"
74+
```
6575

6676
## Testing
6777

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ services:
3131

3232
elasticsearch:
3333
container_name: es-container
34-
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.1.3}
34+
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.10.4}
3535
environment:
3636
ES_JAVA_OPTS: -Xms512m -Xmx1g
3737
volumes:

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Item crud client."""
22
import logging
33
import re
4+
from base64 import urlsafe_b64encode
45
from datetime import datetime as datetime_type
56
from datetime import timezone
67
from typing import Any, Dict, List, Optional, Set, Type, Union
@@ -82,30 +83,58 @@ async def all_collections(self, **kwargs) -> Collections:
8283
Raises:
8384
Exception: If any error occurs while reading the collections from the database.
8485
"""
86+
request: Request = kwargs["request"]
8587
base_url = str(kwargs["request"].base_url)
8688

89+
limit = (
90+
int(request.query_params["limit"])
91+
if "limit" in request.query_params
92+
else 10
93+
)
94+
token = (
95+
request.query_params["token"] if "token" in request.query_params else None
96+
)
97+
98+
hits = await self.database.get_all_collections(limit=limit, token=token)
99+
100+
next_search_after = None
101+
next_link = None
102+
if len(hits) == limit:
103+
last_hit = hits[-1]
104+
next_search_after = last_hit["sort"]
105+
next_token = urlsafe_b64encode(
106+
",".join(map(str, next_search_after)).encode()
107+
).decode()
108+
paging_links = PagingLinks(next=next_token, request=request)
109+
next_link = paging_links.link_next()
110+
111+
links = [
112+
{
113+
"rel": Relations.root.value,
114+
"type": MimeTypes.json,
115+
"href": base_url,
116+
},
117+
{
118+
"rel": Relations.parent.value,
119+
"type": MimeTypes.json,
120+
"href": base_url,
121+
},
122+
{
123+
"rel": Relations.self.value,
124+
"type": MimeTypes.json,
125+
"href": urljoin(base_url, "collections"),
126+
},
127+
]
128+
129+
if next_link:
130+
links.append(next_link)
131+
87132
return Collections(
88133
collections=[
89-
self.collection_serializer.db_to_stac(c, base_url=base_url)
90-
for c in await self.database.get_all_collections()
91-
],
92-
links=[
93-
{
94-
"rel": Relations.root.value,
95-
"type": MimeTypes.json,
96-
"href": base_url,
97-
},
98-
{
99-
"rel": Relations.parent.value,
100-
"type": MimeTypes.json,
101-
"href": base_url,
102-
},
103-
{
104-
"rel": Relations.self.value,
105-
"type": MimeTypes.json,
106-
"href": urljoin(base_url, "collections"),
107-
},
134+
self.collection_serializer.db_to_stac(c["_source"], base_url=base_url)
135+
for c in hits
108136
],
137+
links=links,
109138
)
110139

111140
@overrides

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,21 +305,34 @@ class DatabaseLogic:
305305

306306
"""CORE LOGIC"""
307307

308-
async def get_all_collections(self) -> Iterable[Dict[str, Any]]:
308+
async def get_all_collections(
309+
self, token: Optional[str], limit: int
310+
) -> Iterable[Dict[str, Any]]:
309311
"""Retrieve a list of all collections from the database.
310312
313+
Args:
314+
token (Optional[str]): The token used to return the next set of results.
315+
limit (int): Number of results to return
316+
311317
Returns:
312318
collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection.
313319
314320
Notes:
315321
The collections are retrieved from the Elasticsearch database using the `client.search` method,
316-
with the `COLLECTIONS_INDEX` as the target index and `size=1000` to retrieve up to 1000 records.
322+
with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records.
317323
The result is a generator of dictionaries containing the source data for each collection.
318324
"""
319-
# https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/65
320-
# collections should be paginated, but at least return more than the default 10 for now
321-
collections = await self.client.search(index=COLLECTIONS_INDEX, size=1000)
322-
return (c["_source"] for c in collections["hits"]["hits"])
325+
search_after = None
326+
if token:
327+
search_after = urlsafe_b64decode(token.encode()).decode().split(",")
328+
collections = await self.client.search(
329+
index=COLLECTIONS_INDEX,
330+
search_after=search_after,
331+
size=limit,
332+
sort={"id": {"order": "asc"}},
333+
)
334+
hits = collections["hits"]["hits"]
335+
return hits
323336

324337
async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
325338
"""Retrieve a single item from the database.

stac_fastapi/elasticsearch/tests/api/test_api.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,20 @@
3333
}
3434

3535

36+
@pytest.mark.asyncio
3637
async def test_post_search_content_type(app_client, ctx):
3738
params = {"limit": 1}
3839
resp = await app_client.post("/search", json=params)
3940
assert resp.headers["content-type"] == "application/geo+json"
4041

4142

43+
@pytest.mark.asyncio
4244
async def test_get_search_content_type(app_client, ctx):
4345
resp = await app_client.get("/search")
4446
assert resp.headers["content-type"] == "application/geo+json"
4547

4648

49+
@pytest.mark.asyncio
4750
async def test_api_headers(app_client):
4851
resp = await app_client.get("/api")
4952
assert (
@@ -52,11 +55,13 @@ async def test_api_headers(app_client):
5255
assert resp.status_code == 200
5356

5457

58+
@pytest.mark.asyncio
5559
async def test_router(app):
5660
api_routes = set([f"{list(route.methods)[0]} {route.path}" for route in app.routes])
5761
assert len(api_routes - ROUTES) == 0
5862

5963

64+
@pytest.mark.asyncio
6065
async def test_app_transaction_extension(app_client, ctx):
6166
item = copy.deepcopy(ctx.item)
6267
item["id"] = str(uuid.uuid4())
@@ -66,6 +71,7 @@ async def test_app_transaction_extension(app_client, ctx):
6671
await app_client.delete(f"/collections/{item['collection']}/items/{item['id']}")
6772

6873

74+
@pytest.mark.asyncio
6975
async def test_app_search_response(app_client, ctx):
7076
resp = await app_client.get("/search", params={"ids": ["test-item"]})
7177
assert resp.status_code == 200
@@ -77,6 +83,7 @@ async def test_app_search_response(app_client, ctx):
7783
assert resp_json.get("stac_extensions") is None
7884

7985

86+
@pytest.mark.asyncio
8087
async def test_app_context_extension(app_client, ctx, txn_client):
8188
test_item = ctx.item
8289
test_item["id"] = "test-item-2"
@@ -110,13 +117,15 @@ async def test_app_context_extension(app_client, ctx, txn_client):
110117
assert matched == 1
111118

112119

120+
@pytest.mark.asyncio
113121
async def test_app_fields_extension(app_client, ctx, txn_client):
114122
resp = await app_client.get("/search", params={"collections": ["test-collection"]})
115123
assert resp.status_code == 200
116124
resp_json = resp.json()
117125
assert list(resp_json["features"][0]["properties"]) == ["datetime"]
118126

119127

128+
@pytest.mark.asyncio
120129
async def test_app_fields_extension_query(app_client, ctx, txn_client):
121130
resp = await app_client.post(
122131
"/search",
@@ -130,6 +139,7 @@ async def test_app_fields_extension_query(app_client, ctx, txn_client):
130139
assert list(resp_json["features"][0]["properties"]) == ["datetime", "proj:epsg"]
131140

132141

142+
@pytest.mark.asyncio
133143
async def test_app_fields_extension_no_properties_get(app_client, ctx, txn_client):
134144
resp = await app_client.get(
135145
"/search", params={"collections": ["test-collection"], "fields": "-properties"}
@@ -139,6 +149,7 @@ async def test_app_fields_extension_no_properties_get(app_client, ctx, txn_clien
139149
assert "properties" not in resp_json["features"][0]
140150

141151

152+
@pytest.mark.asyncio
142153
async def test_app_fields_extension_no_properties_post(app_client, ctx, txn_client):
143154
resp = await app_client.post(
144155
"/search",
@@ -152,6 +163,7 @@ async def test_app_fields_extension_no_properties_post(app_client, ctx, txn_clie
152163
assert "properties" not in resp_json["features"][0]
153164

154165

166+
@pytest.mark.asyncio
155167
async def test_app_fields_extension_return_all_properties(app_client, ctx, txn_client):
156168
item = ctx.item
157169
resp = await app_client.get(
@@ -168,6 +180,7 @@ async def test_app_fields_extension_return_all_properties(app_client, ctx, txn_c
168180
assert feature["properties"][expected_prop] == expected_value
169181

170182

183+
@pytest.mark.asyncio
171184
async def test_app_query_extension_gt(app_client, ctx):
172185
params = {"query": {"proj:epsg": {"gt": ctx.item["properties"]["proj:epsg"]}}}
173186
resp = await app_client.post("/search", json=params)
@@ -176,6 +189,7 @@ async def test_app_query_extension_gt(app_client, ctx):
176189
assert len(resp_json["features"]) == 0
177190

178191

192+
@pytest.mark.asyncio
179193
async def test_app_query_extension_gte(app_client, ctx):
180194
params = {"query": {"proj:epsg": {"gte": ctx.item["properties"]["proj:epsg"]}}}
181195
resp = await app_client.post("/search", json=params)
@@ -184,22 +198,26 @@ async def test_app_query_extension_gte(app_client, ctx):
184198
assert len(resp.json()["features"]) == 1
185199

186200

201+
@pytest.mark.asyncio
187202
async def test_app_query_extension_limit_lt0(app_client):
188203
assert (await app_client.post("/search", json={"limit": -1})).status_code == 400
189204

190205

206+
@pytest.mark.asyncio
191207
async def test_app_query_extension_limit_gt10000(app_client):
192208
resp = await app_client.post("/search", json={"limit": 10001})
193209
assert resp.status_code == 200
194210
assert resp.json()["context"]["limit"] == 10000
195211

196212

213+
@pytest.mark.asyncio
197214
async def test_app_query_extension_limit_10000(app_client):
198215
params = {"limit": 10000}
199216
resp = await app_client.post("/search", json=params)
200217
assert resp.status_code == 200
201218

202219

220+
@pytest.mark.asyncio
203221
async def test_app_sort_extension(app_client, txn_client, ctx):
204222
first_item = ctx.item
205223
item_date = datetime.strptime(
@@ -225,6 +243,7 @@ async def test_app_sort_extension(app_client, txn_client, ctx):
225243
assert resp_json["features"][1]["id"] == second_item["id"]
226244

227245

246+
@pytest.mark.asyncio
228247
async def test_search_invalid_date(app_client, ctx):
229248
params = {
230249
"datetime": "2020-XX-01/2020-10-30",
@@ -272,6 +291,7 @@ async def test_search_point_intersects_post(app_client, ctx):
272291
assert len(resp_json["features"]) == 1
273292

274293

294+
@pytest.mark.asyncio
275295
async def test_search_point_does_not_intersect(app_client, ctx):
276296
point = [15.04, -3.14]
277297
intersects = {"type": "Point", "coordinates": point}
@@ -287,6 +307,7 @@ async def test_search_point_does_not_intersect(app_client, ctx):
287307
assert len(resp_json["features"]) == 0
288308

289309

310+
@pytest.mark.asyncio
290311
async def test_datetime_non_interval(app_client, ctx):
291312
dt_formats = [
292313
"2020-02-12T12:30:22+00:00",
@@ -308,6 +329,7 @@ async def test_datetime_non_interval(app_client, ctx):
308329
assert resp_json["features"][0]["properties"]["datetime"][0:19] == dt[0:19]
309330

310331

332+
@pytest.mark.asyncio
311333
async def test_bbox_3d(app_client, ctx):
312334
australia_bbox = [106.343365, -47.199523, 0.1, 168.218365, -19.437288, 0.1]
313335
params = {
@@ -320,6 +342,7 @@ async def test_bbox_3d(app_client, ctx):
320342
assert len(resp_json["features"]) == 1
321343

322344

345+
@pytest.mark.asyncio
323346
async def test_search_line_string_intersects(app_client, ctx):
324347
line = [[150.04, -33.14], [150.22, -33.89]]
325348
intersects = {"type": "LineString", "coordinates": line}

0 commit comments

Comments
 (0)