@@ -752,7 +752,7 @@ where
752
752
753
753
fn size ( & self ) -> usize {
754
754
self . vals . capacity ( ) * size_of :: < T :: Native > ( )
755
- + self . null_builder . capacity ( ) / 8 // capacity is in bits, so convert to bytes
755
+ + self . null_builder . capacity ( ) / 8 // capacity is in bits, so convert to bytes
756
756
+ self . is_sets . capacity ( ) / 8
757
757
+ self . size_of_orderings
758
758
+ self . min_of_each_group_buf . 0 . capacity ( ) * size_of :: < usize > ( )
@@ -827,9 +827,14 @@ impl FirstValueAccumulator {
827
827
}
828
828
829
829
// Updates state with the values in the given row.
830
- fn update_with_new_row ( & mut self , row : & [ ScalarValue ] ) {
831
- self . first = row[ 0 ] . clone ( ) ;
832
- self . orderings = row[ 1 ..] . to_vec ( ) ;
830
+ fn update_with_new_row ( & mut self , mut row : Vec < ScalarValue > ) {
831
+ // Ensure any Array based scalars hold have a single value to reduce memory pressure
832
+ row. iter_mut ( ) . for_each ( |s| {
833
+ s. compact ( ) ;
834
+ } ) ;
835
+
836
+ self . first = row. remove ( 0 ) ;
837
+ self . orderings = row;
833
838
self . is_set = true ;
834
839
}
835
840
@@ -888,7 +893,7 @@ impl Accumulator for FirstValueAccumulator {
888
893
if !self . is_set {
889
894
if let Some ( first_idx) = self . get_first_idx ( values) ? {
890
895
let row = get_row_at_idx ( values, first_idx) ?;
891
- self . update_with_new_row ( & row) ;
896
+ self . update_with_new_row ( row) ;
892
897
}
893
898
} else if !self . requirement_satisfied {
894
899
if let Some ( first_idx) = self . get_first_idx ( values) ? {
@@ -901,7 +906,7 @@ impl Accumulator for FirstValueAccumulator {
901
906
) ?
902
907
. is_gt ( )
903
908
{
904
- self . update_with_new_row ( & row) ;
909
+ self . update_with_new_row ( row) ;
905
910
}
906
911
}
907
912
}
@@ -925,7 +930,7 @@ impl Accumulator for FirstValueAccumulator {
925
930
let min = ( 0 ..filtered_states[ 0 ] . len ( ) ) . min_by ( |& a, & b| comparator. compare ( a, b) ) ;
926
931
927
932
if let Some ( first_idx) = min {
928
- let first_row = get_row_at_idx ( & filtered_states, first_idx) ?;
933
+ let mut first_row = get_row_at_idx ( & filtered_states, first_idx) ?;
929
934
// When collecting orderings, we exclude the is_set flag from the state.
930
935
let first_ordering = & first_row[ 1 ..is_set_idx] ;
931
936
let sort_options = get_sort_options ( self . ordering_req . as_ref ( ) ) ;
@@ -936,7 +941,9 @@ impl Accumulator for FirstValueAccumulator {
936
941
// Update with first value in the state. Note that we should exclude the
937
942
// is_set flag from the state. Otherwise, we will end up with a state
938
943
// containing two is_set flags.
939
- self . update_with_new_row ( & first_row[ 0 ..is_set_idx] ) ;
944
+ assert ! ( is_set_idx <= first_row. len( ) ) ;
945
+ first_row. resize ( is_set_idx, ScalarValue :: Null ) ;
946
+ self . update_with_new_row ( first_row) ;
940
947
}
941
948
}
942
949
Ok ( ( ) )
@@ -1226,9 +1233,14 @@ impl LastValueAccumulator {
1226
1233
}
1227
1234
1228
1235
// Updates state with the values in the given row.
1229
- fn update_with_new_row ( & mut self , row : & [ ScalarValue ] ) {
1230
- self . last = row[ 0 ] . clone ( ) ;
1231
- self . orderings = row[ 1 ..] . to_vec ( ) ;
1236
+ fn update_with_new_row ( & mut self , mut row : Vec < ScalarValue > ) {
1237
+ // Ensure any Array based scalars hold have a single value to reduce memory pressure
1238
+ row. iter_mut ( ) . for_each ( |s| {
1239
+ s. compact ( ) ;
1240
+ } ) ;
1241
+
1242
+ self . last = row. remove ( 0 ) ;
1243
+ self . orderings = row;
1232
1244
self . is_set = true ;
1233
1245
}
1234
1246
@@ -1289,7 +1301,7 @@ impl Accumulator for LastValueAccumulator {
1289
1301
if !self . is_set || self . requirement_satisfied {
1290
1302
if let Some ( last_idx) = self . get_last_idx ( values) ? {
1291
1303
let row = get_row_at_idx ( values, last_idx) ?;
1292
- self . update_with_new_row ( & row) ;
1304
+ self . update_with_new_row ( row) ;
1293
1305
}
1294
1306
} else if let Some ( last_idx) = self . get_last_idx ( values) ? {
1295
1307
let row = get_row_at_idx ( values, last_idx) ?;
@@ -1302,7 +1314,7 @@ impl Accumulator for LastValueAccumulator {
1302
1314
) ?
1303
1315
. is_lt ( )
1304
1316
{
1305
- self . update_with_new_row ( & row) ;
1317
+ self . update_with_new_row ( row) ;
1306
1318
}
1307
1319
}
1308
1320
@@ -1326,7 +1338,7 @@ impl Accumulator for LastValueAccumulator {
1326
1338
let max = ( 0 ..filtered_states[ 0 ] . len ( ) ) . max_by ( |& a, & b| comparator. compare ( a, b) ) ;
1327
1339
1328
1340
if let Some ( last_idx) = max {
1329
- let last_row = get_row_at_idx ( & filtered_states, last_idx) ?;
1341
+ let mut last_row = get_row_at_idx ( & filtered_states, last_idx) ?;
1330
1342
// When collecting orderings, we exclude the is_set flag from the state.
1331
1343
let last_ordering = & last_row[ 1 ..is_set_idx] ;
1332
1344
let sort_options = get_sort_options ( self . ordering_req . as_ref ( ) ) ;
@@ -1339,7 +1351,9 @@ impl Accumulator for LastValueAccumulator {
1339
1351
// Update with last value in the state. Note that we should exclude the
1340
1352
// is_set flag from the state. Otherwise, we will end up with a state
1341
1353
// containing two is_set flags.
1342
- self . update_with_new_row ( & last_row[ 0 ..is_set_idx] ) ;
1354
+ assert ! ( is_set_idx <= last_row. len( ) ) ;
1355
+ last_row. resize ( is_set_idx, ScalarValue :: Null ) ;
1356
+ self . update_with_new_row ( last_row) ;
1343
1357
}
1344
1358
}
1345
1359
Ok ( ( ) )
@@ -1382,7 +1396,13 @@ fn convert_to_sort_cols(arrs: &[ArrayRef], sort_exprs: &LexOrdering) -> Vec<Sort
1382
1396
1383
1397
#[ cfg( test) ]
1384
1398
mod tests {
1385
- use arrow:: { array:: Int64Array , compute:: SortOptions , datatypes:: Schema } ;
1399
+ use std:: iter:: repeat_with;
1400
+
1401
+ use arrow:: {
1402
+ array:: { Int64Array , ListArray } ,
1403
+ compute:: SortOptions ,
1404
+ datatypes:: Schema ,
1405
+ } ;
1386
1406
use datafusion_physical_expr:: { expressions:: col, PhysicalSortExpr } ;
1387
1407
1388
1408
use super :: * ;
@@ -1772,4 +1792,60 @@ mod tests {
1772
1792
1773
1793
Ok ( ( ) )
1774
1794
}
1795
+
1796
+ #[ test]
1797
+ fn test_first_list_acc_size ( ) -> Result < ( ) > {
1798
+ fn size_after_batch ( values : & [ ArrayRef ] ) -> Result < usize > {
1799
+ let mut first_accumulator = FirstValueAccumulator :: try_new (
1800
+ & DataType :: List ( Arc :: new ( Field :: new_list_field ( DataType :: Int64 , false ) ) ) ,
1801
+ & [ ] ,
1802
+ LexOrdering :: default ( ) ,
1803
+ false ,
1804
+ ) ?;
1805
+
1806
+ first_accumulator. update_batch ( values) ?;
1807
+
1808
+ Ok ( first_accumulator. size ( ) )
1809
+ }
1810
+
1811
+ let batch1 = ListArray :: from_iter_primitive :: < Int32Type , _ , _ > (
1812
+ repeat_with ( || Some ( vec ! [ Some ( 1 ) ] ) ) . take ( 10000 ) ,
1813
+ ) ;
1814
+ let batch2 =
1815
+ ListArray :: from_iter_primitive :: < Int32Type , _ , _ > ( [ Some ( vec ! [ Some ( 1 ) ] ) ] ) ;
1816
+
1817
+ let size1 = size_after_batch ( & [ Arc :: new ( batch1) ] ) ?;
1818
+ let size2 = size_after_batch ( & [ Arc :: new ( batch2) ] ) ?;
1819
+ assert_eq ! ( size1, size2) ;
1820
+
1821
+ Ok ( ( ) )
1822
+ }
1823
+
1824
+ #[ test]
1825
+ fn test_last_list_acc_size ( ) -> Result < ( ) > {
1826
+ fn size_after_batch ( values : & [ ArrayRef ] ) -> Result < usize > {
1827
+ let mut last_accumulator = LastValueAccumulator :: try_new (
1828
+ & DataType :: List ( Arc :: new ( Field :: new_list_field ( DataType :: Int64 , false ) ) ) ,
1829
+ & [ ] ,
1830
+ LexOrdering :: default ( ) ,
1831
+ false ,
1832
+ ) ?;
1833
+
1834
+ last_accumulator. update_batch ( values) ?;
1835
+
1836
+ Ok ( last_accumulator. size ( ) )
1837
+ }
1838
+
1839
+ let batch1 = ListArray :: from_iter_primitive :: < Int32Type , _ , _ > (
1840
+ repeat_with ( || Some ( vec ! [ Some ( 1 ) ] ) ) . take ( 10000 ) ,
1841
+ ) ;
1842
+ let batch2 =
1843
+ ListArray :: from_iter_primitive :: < Int32Type , _ , _ > ( [ Some ( vec ! [ Some ( 1 ) ] ) ] ) ;
1844
+
1845
+ let size1 = size_after_batch ( & [ Arc :: new ( batch1) ] ) ?;
1846
+ let size2 = size_after_batch ( & [ Arc :: new ( batch2) ] ) ?;
1847
+ assert_eq ! ( size1, size2) ;
1848
+
1849
+ Ok ( ( ) )
1850
+ }
1775
1851
}
0 commit comments