Skip to content

Commit 82e4e8e

Browse files
Switch to the DuckDB Expression API to mitigate sql injection risk (#777)
## What I am changing <!-- What were the high-level goals of the change? --> Using string concatenation in [_geoarrow/_duckdb.py](https://github.com/developmentseed/lonboard/blob/main/lonboard/_geoarrow/_duckdb.py) triggered the warning [hardcoded-sql-expression (S608)](https://docs.astral.sh/ruff/rules/hardcoded-sql-expression/), and rightfully so, e.g. surfacing in the earlier issue #768 . Luckily, the [DuckDB Expression API](https://duckdb.org/docs/stable/clients/python/expression.html) allows us to rewrite these. A nice side effect of this is that the Expression API does not need the connection object to process a DuckDBPyRelation. ## How I did it <!-- How did you go about achieving these goals? Any considerations made along the way? --> The core is to [rewrite](6d41d02) `SELECT ST_AsWKB( {geom_col_name} ) as {geom_col_name} FROM rel;` into: ```python rel.select( FunctionExpression("st_aswkb", ColumnExpression(geom_col_name)).alias( geom_col_name, ) ``` , the rest just cascaded from there. ## How you can test it <!-- How might a reviewer test your changes to verify that they work as expected? --> - I edited the related tests too. ## Related Issues <!-- Reference any issues that inspired this PR. Use a keyword to auto-close any issues when this PR is merged (eg "closes #1") --> - n/a
1 parent 25334bc commit 82e4e8e

File tree

4 files changed

+62
-94
lines changed

4 files changed

+62
-94
lines changed

lonboard/_geoarrow/_duckdb.py

+9-43
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# ruff: noqa: S608
2-
# Possible SQL injection vector through string-based query construction
3-
41
from __future__ import annotations
52

63
import json
@@ -17,6 +14,7 @@
1714
list_array,
1815
struct_field,
1916
)
17+
from duckdb import ColumnExpression, FunctionExpression
2018

2119
from lonboard._constants import EXTENSION_NAME
2220

@@ -37,7 +35,6 @@
3735
def from_duckdb(
3836
rel: duckdb.DuckDBPyRelation,
3937
*,
40-
con: duckdb.DuckDBPyConnection | None = None,
4138
crs: str | pyproj.CRS | None = None,
4239
) -> Table:
4340
geom_col_idxs = [
@@ -67,7 +64,7 @@ def from_duckdb(
6764
crs=crs,
6865
)
6966
if geom_type == "GEOMETRY":
70-
return _from_geometry(rel, con=con, geom_col_idx=geom_col_idx, crs=crs)
67+
return _from_geometry(rel, geom_col_idx=geom_col_idx, crs=crs)
7168
if geom_type == "POINT_2D":
7269
return _from_geoarrow(
7370
rel,
@@ -101,7 +98,6 @@ def from_duckdb(
10198
def _from_geometry(
10299
rel: duckdb.DuckDBPyRelation,
103100
*,
104-
con: duckdb.DuckDBPyConnection | None = None,
105101
geom_col_idx: int,
106102
crs: str | pyproj.CRS | None = None,
107103
) -> Table:
@@ -120,43 +116,13 @@ def _from_geometry(
120116
geom_col_name,
121117
), f"Expected geometry column name to match regex: {re_match}"
122118

123-
if con is not None:
124-
geom_table = Table.from_arrow(
125-
con.sql(f"""
126-
SELECT ST_AsWKB( {geom_col_name} ) as {geom_col_name} FROM rel;
127-
""").arrow(),
128-
)
129-
else:
130-
import duckdb
131-
132-
# We need to re-import the spatial extension because this is a different context
133-
# as the user's context.
134-
# It would be nice to re-use the user's context, but in the case of `viz` where
135-
# we want to visualize a single input object, we want to accept a
136-
# DuckDBPyRelation as input.
137-
sql = f"""
138-
INSTALL spatial;
139-
LOAD spatial;
140-
SELECT ST_AsWKB( {geom_col_name} ) as {geom_col_name} FROM rel;
141-
"""
142-
try:
143-
# duckdb.default_connection changed from attribute to callable in duckdb 1.2
144-
default_con = (
145-
duckdb.default_connection()
146-
if callable(duckdb.default_connection)
147-
else duckdb.default_connection
148-
)
149-
geom_table = Table.from_arrow(
150-
duckdb.execute(sql, connection=default_con).arrow(),
151-
)
152-
except duckdb.CatalogException as err:
153-
msg = (
154-
"Could not coerce type GEOMETRY to WKB.\n"
155-
"This often happens from using a custom DuckDB connection object.\n"
156-
"Either pass in a `con` object containing the DuckDB connection or "
157-
"cast to WKB manually with `ST_AsWKB`."
158-
)
159-
raise ValueError(msg) from err
119+
geom_table = Table.from_arrow(
120+
rel.select(
121+
FunctionExpression("st_aswkb", ColumnExpression(geom_col_name)).alias(
122+
geom_col_name,
123+
),
124+
).arrow(),
125+
)
160126

161127
metadata = _make_geoarrow_field_metadata(EXTENSION_NAME.WKB, crs)
162128
geom_field = geom_table.schema.field(0).with_metadata(metadata)

lonboard/_layer.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,7 @@ def from_duckdb(
420420
sql: The SQL input to visualize. This can either be a string containing a
421421
SQL query or the output of the duckdb `sql` function.
422422
con: The current DuckDB connection. This is required when passing a `str` to
423-
the `sql` parameter or when using a non-global DuckDB connection.
424-
Defaults to None.
423+
the `sql` parameter.
425424
426425
Keyword Args:
427426
crs: The CRS of the input data. This can either be a string passed to
@@ -436,9 +435,9 @@ def from_duckdb(
436435
assert con is not None, "con must be provided when sql is a str"
437436

438437
rel = con.sql(sql)
439-
table = _from_duckdb(rel, con=con, crs=crs)
438+
table = _from_duckdb(rel, crs=crs)
440439
else:
441-
table = _from_duckdb(sql, con=con, crs=crs)
440+
table = _from_duckdb(sql, crs=crs)
442441

443442
return cls(table=table, **kwargs)
444443

lonboard/_viz.py

+16-37
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from __future__ import annotations
66

77
import json
8+
import warnings
89
from textwrap import dedent
910
from typing import (
1011
TYPE_CHECKING,
@@ -126,18 +127,6 @@ def viz(
126127
viz(query)
127128
```
128129
129-
If you're using a custom connection, ensure you pass in the `con` parameter:
130-
131-
```py
132-
import duckdb
133-
from lonboard import viz
134-
135-
con = duckdb.connect()
136-
sql = "SELECT * FROM spatial_table;"
137-
query = con.sql(sql)
138-
viz(query, con=con)
139-
```
140-
141130
You can also render an entire table by using the `table()` method:
142131
143132
```py
@@ -146,7 +135,7 @@ def viz(
146135
147136
con = duckdb.connect()
148137
con.execute("CREATE TABLE spatial_table AS ...;")
149-
viz(con.table(), con=con)
138+
viz(con.table())
150139
```
151140
152141
!!! warning
@@ -179,9 +168,8 @@ def viz(
179168
[`PolygonLayer`][lonboard.PolygonLayer]s.
180169
map_kwargs: a `dict` of parameters to pass down to the generated
181170
[`Map`][lonboard.Map].
182-
con: the active DuckDB connection. This is necessary in some cases when passing
183-
in a DuckDB query. In particular, if you're using a non-global DuckDB
184-
connection and if your SQL query outputs the default `GEOMETRY` type.
171+
con: Deprecated: the active DuckDB connection. This argument has no effect and
172+
might be removed in the future.
185173
186174
For more control over rendering, construct [`Map`][lonboard.Map] and `Layer` objects
187175
directly.
@@ -192,6 +180,14 @@ def viz(
192180
"""
193181
global COLOR_COUNTER # noqa: PLW0603 Using the global statement to update `COLOR_COUNTER` is discouraged
194182

183+
if con is not None:
184+
warnings.warn(
185+
"The 'con' argument is deprecated and may be removed in a future version. "
186+
"It has no effect.",
187+
category=DeprecationWarning,
188+
stacklevel=2,
189+
)
190+
195191
if isinstance(data, (list, tuple)):
196192
layers: list[ScatterplotLayer | PathLayer | PolygonLayer] = []
197193
for i, item in enumerate(data):
@@ -201,7 +197,6 @@ def viz(
201197
scatterplot_kwargs=scatterplot_kwargs,
202198
path_kwargs=path_kwargs,
203199
polygon_kwargs=polygon_kwargs,
204-
con=con,
205200
)
206201
layers.extend(ls)
207202

@@ -213,7 +208,6 @@ def viz(
213208
scatterplot_kwargs=scatterplot_kwargs,
214209
path_kwargs=path_kwargs,
215210
polygon_kwargs=polygon_kwargs,
216-
con=con,
217211
)
218212
COLOR_COUNTER += 1
219213

@@ -228,27 +222,13 @@ def viz(
228222
DUCKDB_PY_CONN_ERROR = dedent("""\
229223
Must pass in DuckDBPyRelation object, not DuckDBPyConnection.
230224
231-
Instead of using `duckdb.execute()` or `con.execute()`, use `duckdb.sql()` or
232-
`con.sql()`.
233-
234-
If using `con.sql()`, ensure you pass the `con` into the `viz()` function:
235-
236-
```
237-
viz(con.sql("SELECT * FROM table;", con=con))
238-
```
239-
240-
Alternatively, you can call the `table()` method of `con`:
241-
242-
```
243-
viz(con.table("table_name", con=con))
244-
```
225+
Instead of using `duckdb.execute()` or `con.execute()`, use `duckdb.sql()`,
226+
`con.sql()` or `con.table()`.
245227
""")
246228

247229

248230
def create_layers_from_data_input(
249231
data: VizDataInput,
250-
*,
251-
con: duckdb.DuckDBPyConnection | None = None,
252232
**kwargs: Any,
253233
) -> list[ScatterplotLayer | PathLayer | PolygonLayer]:
254234
"""Create one or more renderable layers from data input.
@@ -274,7 +254,7 @@ def create_layers_from_data_input(
274254
data.__class__.__module__.startswith("duckdb")
275255
and data.__class__.__name__ == "DuckDBPyRelation"
276256
):
277-
return _viz_duckdb_relation(data, con=con, **kwargs) # type: ignore
257+
return _viz_duckdb_relation(data, **kwargs) # type: ignore
278258

279259
if (
280260
data.__class__.__module__.startswith("duckdb")
@@ -354,12 +334,11 @@ def _viz_geopandas_geoseries(
354334

355335
def _viz_duckdb_relation(
356336
data: duckdb.DuckDBPyRelation,
357-
con: duckdb.DuckDBPyConnection | None = None,
358337
**kwargs: Any,
359338
) -> list[ScatterplotLayer | PathLayer | PolygonLayer]:
360339
from lonboard._geoarrow._duckdb import from_duckdb
361340

362-
table = from_duckdb(data, con=con)
341+
table = from_duckdb(data)
363342
return _viz_geoarrow_table(table, **kwargs)
364343

365344

tests/test_duckdb.py

+34-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@
1818
cities_gdal_path = f"/vsizip/{cities_path}"
1919

2020

21+
def test_viz_geometry_default_con():
22+
# For WKB parsing
23+
pytest.importorskip("shapely")
24+
25+
sql = f"""
26+
INSTALL spatial;
27+
LOAD spatial;
28+
SELECT * FROM ST_Read("{cities_gdal_path}");
29+
"""
30+
rel = duckdb.sql(sql)
31+
assert rel.types[-1] == "GEOMETRY"
32+
33+
m = viz(rel)
34+
assert isinstance(m.layers[0], ScatterplotLayer)
35+
36+
2137
def test_viz_geometry():
2238
# For WKB parsing
2339
pytest.importorskip("shapely")
@@ -30,7 +46,12 @@ def test_viz_geometry():
3046
"""
3147
rel = con.sql(sql)
3248
assert rel.types[-1] == "GEOMETRY"
33-
m = viz(rel, con=con)
49+
m = viz(rel)
50+
assert isinstance(m.layers[0], ScatterplotLayer)
51+
52+
# Tolerates legacy con parameter
53+
with pytest.warns(DeprecationWarning, match="The 'con' argument is deprecated"):
54+
m = viz(rel, con=con)
3455
assert isinstance(m.layers[0], ScatterplotLayer)
3556

3657

@@ -198,14 +219,12 @@ def test_create_table_as_custom_con():
198219
con = duckdb.connect()
199220
con.execute(sql)
200221

201-
with pytest.raises(
202-
duckdb.InvalidInputException,
203-
match="object was created by another Connection",
204-
):
205-
m = viz(con.table("test"))
222+
m = viz(con.table("test"))
223+
assert isinstance(m.layers[0], ScatterplotLayer)
206224

207-
# Succeeds when passing in con object
208-
m = viz(con.table("test"), con=con)
225+
# Tolerates legacy con parameter
226+
with pytest.warns(DeprecationWarning, match="The 'con' argument is deprecated"):
227+
m = viz(con.table("test"), con=con)
209228
assert isinstance(m.layers[0], ScatterplotLayer)
210229

211230

@@ -221,7 +240,12 @@ def test_geometry_only_column():
221240

222241
_layer = ScatterplotLayer.from_duckdb(con.table("data"), con)
223242

224-
m = viz(con.table("data"), con=con)
243+
m = viz(con.table("data"))
244+
assert isinstance(m.layers[0], ScatterplotLayer)
245+
246+
# Tolerates legacy con parameter
247+
with pytest.warns(DeprecationWarning, match="The 'con' argument is deprecated"):
248+
m = viz(con.table("data"), con=con)
225249
assert isinstance(m.layers[0], ScatterplotLayer)
226250

227251

@@ -261,4 +285,4 @@ def test_sanitize_column_name():
261285
AssertionError,
262286
match="Expected geometry column name to match regex:",
263287
):
264-
viz(rel, con=con)
288+
viz(rel)

0 commit comments

Comments
 (0)