Skip to content

Commit 32d8ddd

Browse files
committed
Update unit test to make it easier to debug
1 parent 62f3d2c commit 32d8ddd

File tree

1 file changed

+33
-20
lines changed

1 file changed

+33
-20
lines changed

python/datafusion/tests/test_aggregation.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -149,34 +149,47 @@ def test_aggregation(df, agg_expr, expected, array_sort):
149149
assert result.column(0) == expected
150150

151151

152-
def test_aggregate_100(df_aggregate_100):
152+
@pytest.mark.parametrize(
153+
"name,expr,expected",
154+
[
155+
(
156+
"approx_percentile_cont",
157+
f.approx_percentile_cont(column("c3"), 0.95, num_centroids=200),
158+
[73, 68, 122, 124, 115],
159+
),
160+
(
161+
"approx_perc_cont_few_centroids",
162+
f.approx_percentile_cont(column("c3"), 0.95, num_centroids=5),
163+
[72, 68, 119, 124, 115],
164+
),
165+
(
166+
"approx_perc_cont_filtered",
167+
f.approx_percentile_cont(
168+
column("c3"), 0.95, num_centroids=200, filter=column("c3") > lit(0)
169+
),
170+
[83, 68, 122, 124, 117],
171+
),
172+
],
173+
)
174+
def test_aggregate_100(df_aggregate_100, name, expr, expected):
153175
# https://github.com/apache/datafusion/blob/bddb6415a50746d2803dd908d19c3758952d74f9/datafusion/sqllogictest/test_files/aggregate.slt#L1490-L1498
154176

155-
result = (
177+
df = (
156178
df_aggregate_100.aggregate(
157179
[column("c1")],
158-
[
159-
f.approx_percentile_cont(column("c3"), 0.95, num_centroids=200).alias(
160-
"c3"
161-
),
162-
f.approx_percentile_cont(column("c3"), 0.95, num_centroids=5).alias(
163-
"c4"
164-
),
165-
f.approx_percentile_cont(
166-
column("c3"), 0.95, num_centroids=200, filter=column("c3") > lit(0)
167-
).alias("c5"),
168-
],
180+
[expr.alias(name)],
169181
)
182+
.select("c1", f.round(column(name), lit(4)).alias(name))
170183
.sort(column("c1").sort(ascending=True))
171-
.collect()
172184
)
185+
df.show()
186+
187+
expected_dict = {
188+
"c1": ["a", "b", "c", "d", "e"],
189+
name: expected,
190+
}
173191

174-
assert len(result) == 1
175-
result = result[0]
176-
assert result.column("c1") == pa.array(["a", "b", "c", "d", "e"])
177-
assert result.column("c3") == pa.array([73, 68, 122, 124, 115])
178-
assert result.column("c4") == pa.array([72, 68, 119, 124, 115])
179-
assert result.column("c5") == pa.array([83, 68, 122, 124, 117])
192+
assert df.collect()[0].to_pydict() == expected_dict
180193

181194

182195
data_test_bitwise_and_boolean_functions = [

0 commit comments

Comments
 (0)