Skip to content

Commit

Permalink
refactor(expr/api): solidify positional versus keyword-only arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jan 29, 2025
1 parent 6603c6c commit af56ef3
Show file tree
Hide file tree
Showing 65 changed files with 691 additions and 558 deletions.
6 changes: 4 additions & 2 deletions docs/posts/ibis-duckdb-geospatial/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ streets
Using the deferred API, we can check which streets are within `d=10` meters of distance.

```{python}
sts_near_broad = streets.filter(_.geom.d_within(broad_station_subquery, 10))
sts_near_broad = streets.filter(_.geom.d_within(broad_station_subquery, distance=10))
sts_near_broad
```

Expand Down Expand Up @@ -227,7 +227,9 @@ data we can't tell the street near which it happened. However, we can check if t
distance of a street.

```{python}
h_street = streets.filter(_.geom.d_within(h_near_broad.select(_.geom).as_scalar(), 2))
h_street = streets.filter(
_.geom.d_within(h_near_broad.select(_.geom).as_scalar(), distance=2)
)
h_street
```

Expand Down
24 changes: 16 additions & 8 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def _import_pyarrow():
def to_pandas(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -135,6 +136,7 @@ def to_pandas(
def to_pandas_batches(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -182,6 +184,7 @@ def to_pandas_batches(
def to_pyarrow(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -229,6 +232,7 @@ def to_pyarrow(
def to_polars(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -266,6 +270,7 @@ def to_polars(
def to_pyarrow_batches(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -303,6 +308,7 @@ def to_pyarrow_batches(
def to_torch(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -339,7 +345,7 @@ def to_torch(
}

def read_parquet(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Register a parquet file as a table in the current backend.
Expand All @@ -364,7 +370,7 @@ def read_parquet(
)

def read_csv(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Register a CSV file as a table in the current backend.
Expand All @@ -382,14 +388,13 @@ def read_csv(
-------
ir.Table
The just-registered table
"""
raise NotImplementedError(
f"{self.name} does not support direct registration of CSV data."
)

def read_json(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Register a JSON file as a table in the current backend.
Expand All @@ -414,15 +419,14 @@ def read_json(
)

def read_delta(
self, source: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
):
"""Register a Delta Lake table in the current database.
Parameters
----------
source
The data source. Must be a directory
containing a Delta Lake table.
path
The data source. Must be a directory containing a Delta Lake table.
table_name
An optional name to use for the created table. This defaults to
a sequentially generated name.
Expand All @@ -443,6 +447,7 @@ def read_delta(
def to_parquet(
self,
expr: ir.Table,
/,
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
Expand Down Expand Up @@ -516,6 +521,7 @@ def to_parquet_dir(
def to_csv(
self,
expr: ir.Table,
/,
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
Expand Down Expand Up @@ -552,6 +558,7 @@ def to_csv(
def to_delta(
self,
expr: ir.Table,
/,
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
Expand Down Expand Up @@ -590,6 +597,7 @@ def to_delta(
def to_json(
self,
expr: ir.Table,
/,
path: str | Path,
**kwargs: Any,
) -> None:
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/athena/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ def list_tables(
def to_pyarrow_batches(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand All @@ -556,6 +557,7 @@ def to_pyarrow_batches(
def to_pyarrow(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down
8 changes: 5 additions & 3 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def load(file: str) -> None:
return self.table(table_name, database=(catalog, database))

def read_parquet(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
):
"""Read Parquet data into a BigQuery table.
Expand Down Expand Up @@ -271,7 +271,7 @@ def read_parquet(
)

def read_csv(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Read CSV data into a BigQuery table.
Expand Down Expand Up @@ -300,7 +300,7 @@ def read_csv(
return self._read_file(path, table_name=table_name, job_config=job_config)

def read_json(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Read newline-delimited JSON data into a BigQuery table.
Expand Down Expand Up @@ -766,6 +766,7 @@ def _to_query(
def to_pyarrow(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand All @@ -786,6 +787,7 @@ def to_pyarrow(
def to_pyarrow_batches(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
st_area(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
st_numpoints(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
st_buffer(`t0`.`geog`, 5.2) AS `tmp`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
st_pointn(`t0`.`geog`, 3) AS `tmp`
FROM `t` AS `t0`
45 changes: 27 additions & 18 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_hash(case, dtype, snapshot):
@pytest.mark.parametrize("how", ["md5", "sha1", "sha256", "sha512"])
def test_hashbytes(case, how, dtype, snapshot):
var = ibis.literal(case, type=dtype)
expr = var.hashbytes(how=how).name("tmp")
expr = var.hashbytes(how).name("tmp")
snapshot.assert_match(to_sql(expr), "out.sql")


Expand Down Expand Up @@ -319,25 +319,34 @@ def test_geospatial_unary_union(snapshot):


@pytest.mark.parametrize(
("operation", "keywords"),
"operation",
[
param("area", {}, id="aread"),
param("as_binary", {}, id="as_binary"),
param("as_text", {}, id="as_text"),
param("buffer", {"radius": 5.2}, id="buffer"),
param("centroid", {}, id="centroid"),
param("end_point", {}, id="end_point"),
param("geometry_type", {}, id="geometry_type"),
param("length", {}, id="length"),
param("n_points", {}, id="npoints"),
param("perimeter", {}, id="perimeter"),
param("point_n", {"n": 3}, id="point_n"),
param("start_point", {}, id="start_point"),
"area",
"as_binary",
"as_text",
"centroid",
"end_point",
"geometry_type",
"length",
"n_points",
"perimeter",
"start_point",
],
)
def test_geospatial_unary(operation, keywords, snapshot):
def test_geospatial_unary(operation, snapshot):
t = ibis.table([("geog", "geography")], name="t")
method = methodcaller(operation)
expr = method(t.geog).name("tmp")
snapshot.assert_match(to_sql(expr), "out.sql")


@pytest.mark.parametrize(
("operation", "arg"), [("buffer", 5.2), ("point_n", 3)], ids=["buffer", "point_n"]
)
def test_geospatial_unary_positional_only(operation, arg, snapshot):
t = ibis.table([("geog", "geography")], name="t")
expr = getattr(t.geog, operation)(**keywords).name("tmp")
method = methodcaller(operation, arg)
expr = method(t.geog).name("tmp")
snapshot.assert_match(to_sql(expr), "out.sql")


Expand Down Expand Up @@ -382,13 +391,13 @@ def test_geospatial_xy(dimension_name, snapshot):

def test_geospatial_simplify(snapshot):
t = ibis.table([("geog", "geography")], name="t")
expr = t.geog.simplify(5.2, preserve_collapsed=False).name("tmp")
expr = t.geog.simplify(tolerance=5.2, preserve_collapsed=False).name("tmp")
snapshot.assert_match(to_sql(expr), "out.sql")


def test_geospatial_simplify_error():
t = ibis.table([("geog", "geography")], name="t")
expr = t.geog.simplify(5.2, preserve_collapsed=True).name("tmp")
expr = t.geog.simplify(tolerance=5.2, preserve_collapsed=True).name("tmp")
with pytest.raises(
Exception, match="simplify does not support preserving collapsed geometries"
):
Expand Down
14 changes: 9 additions & 5 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ def _collect_in_memory_tables(
def to_pyarrow(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand All @@ -305,11 +306,7 @@ def to_pyarrow(
# the extra code to make this dance work without first converting to
# record batches isn't worth it without some benchmarking
with self.to_pyarrow_batches(
expr=expr,
params=params,
limit=limit,
external_tables=external_tables,
**kwargs,
expr, params=params, limit=limit, external_tables=external_tables, **kwargs
) as reader:
table = reader.read_all()

Expand All @@ -318,6 +315,7 @@ def to_pyarrow(
def to_pyarrow_batches(
self,
expr: ir.Expr,
/,
*,
limit: int | str | None = None,
params: Mapping[ir.Scalar, Any] | None = None,
Expand Down Expand Up @@ -399,6 +397,8 @@ def batcher(
def execute(
self,
expr: ir.Expr,
/,
*,
limit: str | None = "default",
params: Mapping[ir.Scalar, Any] | None = None,
external_tables: Mapping[str, pd.DataFrame] | None = None,
Expand Down Expand Up @@ -575,6 +575,8 @@ def truncate_table(self, name: str, database: str | None = None) -> None:
def read_parquet(
self,
path: str | Path,
/,
*,
table_name: str | None = None,
engine: str = "MergeTree",
**kwargs: Any,
Expand Down Expand Up @@ -603,6 +605,8 @@ def read_parquet(
def read_csv(
self,
path: str | Path,
/,
*,
table_name: str | None = None,
engine: str = "MergeTree",
**kwargs: Any,
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/databricks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ def list_tables(
def to_pyarrow_batches(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down Expand Up @@ -554,6 +555,7 @@ def batch_producer(con, sql):
def to_pyarrow(
self,
expr: ir.Expr,
/,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
Expand Down
Loading

0 comments on commit af56ef3

Please sign in to comment.