diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql index 1391a6c01c51..14c575abc2d5 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql @@ -1,3 +1,3 @@ SELECT - FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `First(double_col, ())` + FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `First(double_col, ())` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql index 904f923e17e7..cbfa788b8797 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql @@ -1,3 +1,3 @@ SELECT - LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `Last(double_col, ())` + LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `Last(double_col, ())` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql index 4705e8524638..5cdacbb05033 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql @@ -1 +1 @@ -SELECT * FROM (SELECT `t1`.`col`, COUNT(*) OVER (ORDER BY NULL ASC) AS `analytic` FROM (SELECT `t0`.`col`, NULL AS `filter` FROM `x` AS `t0` WHERE NULL IS NULL) AS `t1`) AS `t2` \ No newline at end of file +SELECT * FROM (SELECT `t1`.`col`, COUNT(*) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `analytic` FROM (SELECT `t0`.`col`, NULL AS `filter` FROM `x` AS `t0` WHERE NULL IS NULL) AS `t1`) AS `t2` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql index 79ea467e969b..63aacc7c6eb9 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql @@ -1 +1 @@ -SELECT `t0`.`one`, `t0`.`two`, `t0`.`three`, SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC) AS `four` FROM `my_data` AS `t0` \ No newline at end of file +SELECT `t0`.`one`, `t0`.`two`, `t0`.`three`, SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `four` FROM `my_data` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql index 9a50bddd2c69..dffd16692851 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql @@ -12,7 +12,7 @@ SELECT `t0`.`k`, LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `lag`, LEAD(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) - `t0`.`f` AS `fwd_diff`, - FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `first`, - LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `last`, + FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `first`, + LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `last`, LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`d` ASC) AS `lag2` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql index 3004bcd8535f..8a730a7c6f9e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql @@ -10,5 +10,5 @@ SELECT `t0`.`i`, `t0`.`j`, `t0`.`k`, - `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC) AS `normed_f` + `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `normed_f` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql index 1c00fd99c792..211f7becce07 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql @@ -1,3 +1,3 @@ SELECT - MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql index 1c00fd99c792..211f7becce07 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql @@ -1,3 +1,3 @@ SELECT - MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql index 8bfdfda64125..a623a3cfe066 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql @@ -1,3 +1,3 @@ SELECT - AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql index 8bfdfda64125..a623a3cfe066 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql @@ -1,3 +1,3 @@ SELECT - AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql index fa763e794811..bc120a638006 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql @@ -1,3 +1,3 @@ SELECT - MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql index fa763e794811..bc120a638006 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql @@ -1,3 +1,3 @@ SELECT - MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql index 4a0222bfac7c..bcd6a2ba703e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql index 4a0222bfac7c..bcd6a2ba703e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql index 7f161f658529..69e25f294384 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql @@ -1,4 +1,4 @@ SELECT `t0`.`g`, - SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC) AS `result` + SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `result` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql index 148babbf968d..a5c1c098080e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql @@ -1,4 +1,4 @@ SELECT LAG(`t0`.`d`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST) AS `foo`, - MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST) AS `Max(a)` + MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `Max(a)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql index 59c90d433f04..560362ff0a45 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql index 59c90d433f04..560362ff0a45 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index a86e3013e359..ad3db779d84f 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -1185,12 +1185,12 @@ def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by) ) order = sge.Order(expressions=order_by) if order_by else None - spec = self._minimize_spec(op.start, op.end, spec) + spec = self._minimize_spec(op, spec) return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) @staticmethod - def _minimize_spec(start, end, spec): + def _minimize_spec(op, spec): return spec def visit_LagLead(self, op, *, arg, offset, default): diff --git a/ibis/backends/sql/compilers/bigquery/__init__.py b/ibis/backends/sql/compilers/bigquery/__init__.py index 2b62903d0009..804ba5c2c58b 100644 --- a/ibis/backends/sql/compilers/bigquery/__init__.py +++ b/ibis/backends/sql/compilers/bigquery/__init__.py @@ -19,6 +19,8 @@ from ibis.backends.sql.compilers.bigquery.udf.core import PythonToJavaScriptTranslator from ibis.backends.sql.datatypes import BigQueryType, BigQueryUDFType from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -323,12 +325,10 @@ def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> sge.Create: return func @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + # bigquery doesn't allow certain window functions to specify a window frame + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index fe25f73bc943..1793ebb3b94e 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -120,13 +120,8 @@ class ClickHouseCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following - ): + def _minimize_spec(op, spec): + if isinstance(op.func, ops.NTile): return None return spec diff --git a/ibis/backends/sql/compilers/exasol.py b/ibis/backends/sql/compilers/exasol.py index f1f7ee803c1f..34f45919eaa1 100644 --- a/ibis/backends/sql/compilers/exasol.py +++ b/ibis/backends/sql/compilers/exasol.py @@ -10,6 +10,8 @@ from ibis.backends.sql.datatypes import ExasolType from ibis.backends.sql.dialects import Exasol from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -85,12 +87,9 @@ class ExasolCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/sql/compilers/flink.py b/ibis/backends/sql/compilers/flink.py index da8b27d94cd2..0bd140ba761e 100644 --- a/ibis/backends/sql/compilers/flink.py +++ b/ibis/backends/sql/compilers/flink.py @@ -128,16 +128,16 @@ def _generate_groups(groups): return groups @staticmethod - def _minimize_spec(start, end, spec): + def _minimize_spec(op, spec): if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) + op.start is None + and isinstance(getattr(end := op.end, "value", None), ops.Literal) and end.value.value == 0 and end.following ): return None elif ( - isinstance(getattr(end, "value", None), ops.Cast) + isinstance(getattr(end := op.end, "value", None), ops.Cast) and end.value.arg.value == 0 and end.following ): diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 239e7a5f6940..8159e48ccf95 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -10,7 +10,12 @@ from ibis.backends.sql.compilers.base import NULL, STAR, SQLGlotCompiler from ibis.backends.sql.datatypes import ImpalaType from ibis.backends.sql.dialects import Impala -from ibis.backends.sql.rewrites import lower_sample, rewrite_empty_order_by_window +from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, + lower_sample, + rewrite_empty_order_by_window, +) class ImpalaCompiler(SQLGlotCompiler): @@ -73,28 +78,11 @@ class ImpalaCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - # start is None means unbounded preceding - if start is None: - # end is None: unbounded following - # end == 0 => current row - # these are treated the same because for the functions where these - # are not allowed they end up behaving the same - # - # I think we're not covering some cases here: - # These will be treated the same, even though they're not - # - window(order_by=x, rows=(None, None)) # should be equivalent to `over ()` - # - window(order_by=x, rows=(None, 0)) # equivalent to a cumulative aggregation - # - # TODO(cpcloud): we need to clean up the semantics of unbounded - # following vs current row at the API level. - # - if end is None or ( - isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following - ): - return None + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) + ): + return None return spec def visit_Log2(self, op, *, arg): diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index 429dd2503af0..786d37c9d385 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -20,6 +20,8 @@ from ibis.backends.sql.datatypes import MSSQLType from ibis.backends.sql.dialects import MSSQL from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -147,12 +149,9 @@ def _generate_groups(groups): return groups @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/sql/compilers/mysql.py b/ibis/backends/sql/compilers/mysql.py index 3971c89b4f61..d8f0c3262d13 100644 --- a/ibis/backends/sql/compilers/mysql.py +++ b/ibis/backends/sql/compilers/mysql.py @@ -101,12 +101,9 @@ def POS_INF(self): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance( + op.func, (ops.RankBase, ops.CumeDist, ops.NTile, ops.PercentRank) ): return None return spec diff --git a/ibis/backends/sql/compilers/oracle.py b/ibis/backends/sql/compilers/oracle.py index 062e9074ac83..7548e494b7c1 100644 --- a/ibis/backends/sql/compilers/oracle.py +++ b/ibis/backends/sql/compilers/oracle.py @@ -454,7 +454,7 @@ def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by) order = sge.Order(expressions=order_by) if order_by else None - spec = self._minimize_spec(op.start, op.end, spec) + spec = self._minimize_spec(op, spec) return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) diff --git a/ibis/backends/sql/compilers/snowflake.py b/ibis/backends/sql/compilers/snowflake.py index 7cdc980752fe..060cc789f8de 100644 --- a/ibis/backends/sql/compilers/snowflake.py +++ b/ibis/backends/sql/compilers/snowflake.py @@ -25,6 +25,8 @@ from ibis.backends.sql.datatypes import SnowflakeType from ibis.backends.sql.dialects import Snowflake from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_row_number, lower_log2, @@ -237,17 +239,6 @@ def _compile_udf(self, udf_node: ops.ScalarUDF): _compile_pandas_udf = _compile_udf _compile_python_udf = _compile_udf - @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following - ): - return None - return spec - def visit_Literal(self, op, *, value, dtype): if value is None: return super().visit_Literal(op, value=value, dtype=dtype) @@ -668,47 +659,19 @@ def visit_Xor(self, op, *, left, right): # boolxor accepts numerics ... and returns a boolean? wtf? return self.f.boolxor(self.cast(left, dt.int8), self.cast(right, dt.int8)) - def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by): - if start is None: - start = {} - if end is None: - end = {} - - start_value = start.get("value", "UNBOUNDED") - start_side = start.get("side", "PRECEDING") - end_value = end.get("value", "UNBOUNDED") - end_side = end.get("side", "FOLLOWING") - - if getattr(start_value, "this", None) == "0": - start_value = "CURRENT ROW" - start_side = None - - if getattr(end_value, "this", None) == "0": - end_value = "CURRENT ROW" - end_side = None - - spec = sge.WindowSpec( - kind=how.upper(), - start=start_value, - start_side=start_side, - end=end_value, - end_side=end_side, - over="OVER", - ) - order = sge.Order(expressions=order_by) if order_by else None - - orig_spec = spec - spec = self._minimize_spec(op.start, op.end, orig_spec) - - # due to https://docs.snowflake.com/en/sql-reference/functions-analytic#window-frame-usage-notes - # we need to make the default window rows (since range isn't supported) - # and we need to make the default frame unbounded preceding to current - # row - if spec is None and isinstance(op.func, (ops.First, ops.Last, ops.NthValue)): - spec = orig_spec + @staticmethod + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) + ): + return None + elif isinstance(op.func, (ops.First, ops.Last, ops.NthValue)): + # due to + # https://docs.snowflake.com/en/sql-reference/functions-analytic#window-frame-usage-notes + # we need to make the default window rows (since range isn't supported) and we need + # to make the default frame unbounded preceding to current row spec.args["kind"] = "ROWS" - - return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) + return spec def visit_WindowBoundary(self, op, *, value, preceding): if not isinstance(op.value, ops.Literal): diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index 2019e1d36cd4..8cdbd201ecee 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -22,6 +22,8 @@ from ibis.backends.sql.datatypes import TrinoType from ibis.backends.sql.dialects import Trino from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -104,12 +106,9 @@ class TrinoCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql index 787083fbab6a..642b68f972ff 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql @@ -5,11 +5,11 @@ FROM ( SELECT `t1`.`x`, `t1`.`y`, - AVG(`t1`.`x`) OVER (ORDER BY NULL ASC) AS _w + AVG(`t1`.`x`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w FROM ( SELECT `t0`.`x`, - SUM(`t0`.`x`) OVER (ORDER BY NULL ASC) AS `y` + SUM(`t0`.`x`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` FROM `t` AS `t0` ) AS `t1` WHERE diff --git a/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql index 1224d283f40e..b70218337a59 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql @@ -1,4 +1,4 @@ SELECT - ntile(2) OVER (ORDER BY randCanonical() ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "new_col" + ntile(2) OVER (ORDER BY randCanonical() ASC) - 1 AS "new_col" FROM "test" AS "t0" LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index dff9331a1d26..3cf98273bc30 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -664,11 +664,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-mean", marks=[ - pytest.mark.notimpl( - ["impala"], - reason="default window semantics are different", - raises=AssertionError, - ), pytest.mark.notimpl( ["risingwave"], raises=PsycoPg2InternalError, @@ -1242,3 +1237,52 @@ def test_windowed_order_by_sequence_is_preserved(con): result = con.execute(expr) value = result.bool_col.loc[result["rank"] == 4].item() assert pd.isna(value) + + +@pytest.mark.notimpl( + ["polars"], + raises=com.OperationNotDefinedError, + reason="window functions aren't yet implemented for the polars backend", +) +@pytest.mark.notimpl( + ["flink"], + raises=AssertionError, + reason="default behavior is the same as bigquery and hasn't been addressed", +) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) +@pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="Window function with empty PARTITION BY is not supported due to performance issues", +) +def test_duplicate_ordered_sum(con): + expr = ( + ibis.memtable( + {"id": range(4), "ranking": [1, 2, 3, 3], "rewards": [10, 20, 30, 40]} + ) + .mutate(csum=lambda t: t.rewards.cumsum(order_by="ranking")) + .order_by("id") + ) + arrow_table = con.to_pyarrow(expr) + + result = arrow_table["csum"].to_pylist() + + assert len(result) == 4 + + assert result[0] == 10 + assert result[1] == 30 + # why? because the order_by column is not unique, so both + # + # 10 -> 10 + 20 -> 10 + 20 + 30 => 10 -> 30 -> 60 + # + # *AND* + # + # 10 -> 10 + 20 -> 10 + 20 + 40 => 10 -> 30 -> 70 + # + # are valid, and it *may* depend on how the computation is distributed in + # the query engine + # + # this also means the final cumulative sum can be in the penultimate or + # final position, since the *output* order doesn't depend on ORDER BY + # provided by the user + assert result[2:] in ([60, 100], [70, 100], [100, 60], [100, 70])