@@ -149,34 +149,47 @@ def test_aggregation(df, agg_expr, expected, array_sort):
149
149
assert result .column (0 ) == expected
150
150
151
151
152
- def test_aggregate_100 (df_aggregate_100 ):
152
+ @pytest .mark .parametrize (
153
+ "name,expr,expected" ,
154
+ [
155
+ (
156
+ "approx_percentile_cont" ,
157
+ f .approx_percentile_cont (column ("c3" ), 0.95 , num_centroids = 200 ),
158
+ [73 , 68 , 122 , 124 , 115 ],
159
+ ),
160
+ (
161
+ "approx_perc_cont_few_centroids" ,
162
+ f .approx_percentile_cont (column ("c3" ), 0.95 , num_centroids = 5 ),
163
+ [72 , 68 , 119 , 124 , 115 ],
164
+ ),
165
+ (
166
+ "approx_perc_cont_filtered" ,
167
+ f .approx_percentile_cont (
168
+ column ("c3" ), 0.95 , num_centroids = 200 , filter = column ("c3" ) > lit (0 )
169
+ ),
170
+ [83 , 68 , 122 , 124 , 117 ],
171
+ ),
172
+ ],
173
+ )
174
+ def test_aggregate_100 (df_aggregate_100 , name , expr , expected ):
153
175
# https://github.com/apache/datafusion/blob/bddb6415a50746d2803dd908d19c3758952d74f9/datafusion/sqllogictest/test_files/aggregate.slt#L1490-L1498
154
176
155
- result = (
177
+ df = (
156
178
df_aggregate_100 .aggregate (
157
179
[column ("c1" )],
158
- [
159
- f .approx_percentile_cont (column ("c3" ), 0.95 , num_centroids = 200 ).alias (
160
- "c3"
161
- ),
162
- f .approx_percentile_cont (column ("c3" ), 0.95 , num_centroids = 5 ).alias (
163
- "c4"
164
- ),
165
- f .approx_percentile_cont (
166
- column ("c3" ), 0.95 , num_centroids = 200 , filter = column ("c3" ) > lit (0 )
167
- ).alias ("c5" ),
168
- ],
180
+ [expr .alias (name )],
169
181
)
182
+ .select ("c1" , f .round (column (name ), lit (4 )).alias (name ))
170
183
.sort (column ("c1" ).sort (ascending = True ))
171
- .collect ()
172
184
)
185
+ df .show ()
186
+
187
+ expected_dict = {
188
+ "c1" : ["a" , "b" , "c" , "d" , "e" ],
189
+ name : expected ,
190
+ }
173
191
174
- assert len (result ) == 1
175
- result = result [0 ]
176
- assert result .column ("c1" ) == pa .array (["a" , "b" , "c" , "d" , "e" ])
177
- assert result .column ("c3" ) == pa .array ([73 , 68 , 122 , 124 , 115 ])
178
- assert result .column ("c4" ) == pa .array ([72 , 68 , 119 , 124 , 115 ])
179
- assert result .column ("c5" ) == pa .array ([83 , 68 , 122 , 124 , 117 ])
192
+ assert df .collect ()[0 ].to_pydict () == expected_dict
180
193
181
194
182
195
data_test_bitwise_and_boolean_functions = [
0 commit comments