Skip to content

Commit 0fef72e

Browse files
authored
Merge branch 'main' into main_chelsealin_sqlrefactor
2 parents 5f40ccf + 265376f commit 0fef72e

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

bigframes/core/compile/sqlglot/expressions/datetime_ops.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,12 @@ def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression:
371371
)
372372
return sge.Cast(this=result, to="DATETIME")
373373

374-
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.TIMESTAMP_DTYPE):
374+
if expr.dtype in (
375+
dtypes.STRING_DTYPE,
376+
dtypes.TIMESTAMP_DTYPE,
377+
dtypes.DATETIME_DTYPE,
378+
dtypes.DATE_DTYPE,
379+
):
375380
return sge.TryCast(this=expr.expr, to="DATETIME")
376381

377382
value = expr.expr
@@ -396,7 +401,12 @@ def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression:
396401
"PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC")
397402
)
398403

399-
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.DATETIME_DTYPE):
404+
if expr.dtype in (
405+
dtypes.STRING_DTYPE,
406+
dtypes.DATETIME_DTYPE,
407+
dtypes.TIMESTAMP_DTYPE,
408+
dtypes.DATE_DTYPE,
409+
):
400410
return sge.func("TIMESTAMP", expr.expr)
401411

402412
value = expr.expr

bigframes/pandas/core/methods/describe.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ def describe(
5656
"max",
5757
]
5858
).intersection(describe_block.column_labels.get_level_values(-1))
59-
describe_block = describe_block.stack(override_labels=stack_cols)
60-
61-
return dataframe.DataFrame(describe_block).droplevel(level=0)
59+
if not stack_cols.empty:
60+
describe_block = describe_block.stack(override_labels=stack_cols)
61+
return dataframe.DataFrame(describe_block).droplevel(level=0)
62+
return dataframe.DataFrame(describe_block)
6263

6364

6465
def _describe(
@@ -120,5 +121,7 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
120121
dtypes.TIME_DTYPE,
121122
]:
122123
return [aggregations.count_op, aggregations.nunique_op]
124+
elif dtypes.is_json_like(dtype) or dtype == dtypes.OBJ_REF_DTYPE:
125+
return [aggregations.count_op]
123126
else:
124127
return []

tests/system/small/pandas/test_describe.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import pandas.testing
1616
import pytest
1717

18+
import bigframes.pandas as bpd
19+
1820

1921
def test_df_describe_non_temporal(scalars_dfs):
2022
# TODO: supply a reason why this isn't compatible with pandas 1.x
@@ -352,3 +354,40 @@ def test_series_groupby_describe(scalars_dfs):
352354
check_dtype=False,
353355
check_index_type=False,
354356
)
357+
358+
359+
def test_describe_json_and_obj_ref_returns_count(session):
360+
# Test describe() works on JSON and OBJ_REF types (without nunique, which fails)
361+
sql = """
362+
SELECT
363+
PARSE_JSON('{"a": 1}') AS json_col,
364+
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
365+
"""
366+
df = session.read_gbq(sql)
367+
368+
df["obj_ref_col"] = df["uri_col"].str.to_blob()
369+
df = df.drop(columns=["uri_col"])
370+
371+
res = df.describe(include="all").to_pandas()
372+
373+
assert "count" in res.index
374+
assert res.loc["count", "json_col"] == 1.0
375+
assert res.loc["count", "obj_ref_col"] == 1.0
376+
377+
378+
def test_describe_with_unsupported_type_returns_empty_dataframe(session):
379+
df = session.read_gbq("SELECT ST_GEOGPOINT(1.0, 2.0) AS geo_col")
380+
381+
res = df.describe().to_pandas()
382+
383+
assert len(res.columns) == 0
384+
assert len(res.index) == 1
385+
386+
387+
def test_describe_empty_dataframe_returns_empty_dataframe(session):
388+
df = bpd.DataFrame()
389+
390+
res = df.describe().to_pandas()
391+
392+
assert len(res.columns) == 0
393+
assert len(res.index) == 1

0 commit comments

Comments
 (0)