Skip to content

Commit 2144cf2

Browse files
committed
deprecated statistics API
1 parent 9f28472 commit 2144cf2

37 files changed

+343
-183
lines changed

datafusion/core/src/datasource/file_format/csv.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,11 @@ mod tests {
217217
assert_eq!(tt_batches, 50 /* 100/2 */);
218218

219219
// test metadata
220-
assert_eq!(exec.statistics()?.num_rows, Precision::Absent);
221-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Absent);
220+
assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent);
221+
assert_eq!(
222+
exec.partition_statistics(None)?.total_byte_size,
223+
Precision::Absent
224+
);
222225

223226
Ok(())
224227
}

datafusion/core/src/datasource/file_format/json.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,11 @@ mod tests {
7575
assert_eq!(tt_batches, 6 /* 12/2 */);
7676

7777
// test metadata
78-
assert_eq!(exec.statistics()?.num_rows, Precision::Absent);
79-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Absent);
78+
assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent);
79+
assert_eq!(
80+
exec.partition_statistics(None)?.total_byte_size,
81+
Precision::Absent
82+
);
8083

8184
Ok(())
8285
}

datafusion/core/src/datasource/file_format/parquet.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -616,9 +616,15 @@ mod tests {
616616
assert_eq!(tt_batches, 4 /* 8/2 */);
617617

618618
// test metadata
619-
assert_eq!(exec.statistics()?.num_rows, Precision::Exact(8));
619+
assert_eq!(
620+
exec.partition_statistics(None)?.num_rows,
621+
Precision::Exact(8)
622+
);
620623
// TODO correct byte size: https://github.com/apache/datafusion/issues/14936
621-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671));
624+
assert_eq!(
625+
exec.partition_statistics(None)?.total_byte_size,
626+
Precision::Exact(671)
627+
);
622628

623629
Ok(())
624630
}
@@ -659,9 +665,15 @@ mod tests {
659665
get_exec(&state, "alltypes_plain.parquet", projection, Some(1)).await?;
660666

661667
// note: even if the limit is set, the executor rounds up to the batch size
662-
assert_eq!(exec.statistics()?.num_rows, Precision::Exact(8));
668+
assert_eq!(
669+
exec.partition_statistics(None)?.num_rows,
670+
Precision::Exact(8)
671+
);
663672
// TODO correct byte size: https://github.com/apache/datafusion/issues/14936
664-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671));
673+
assert_eq!(
674+
exec.partition_statistics(None)?.total_byte_size,
675+
Precision::Exact(671)
676+
);
665677
let batches = collect(exec, task_ctx).await?;
666678
assert_eq!(1, batches.len());
667679
assert_eq!(11, batches[0].num_columns());

datafusion/core/src/datasource/listing/table.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,8 +1294,14 @@ mod tests {
12941294
assert_eq!(exec.output_partitioning().partition_count(), 1);
12951295

12961296
// test metadata
1297-
assert_eq!(exec.statistics()?.num_rows, Precision::Exact(8));
1298-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671));
1297+
assert_eq!(
1298+
exec.partition_statistics(None)?.num_rows,
1299+
Precision::Exact(8)
1300+
);
1301+
assert_eq!(
1302+
exec.partition_statistics(None)?.total_byte_size,
1303+
Precision::Exact(671)
1304+
);
12991305

13001306
Ok(())
13011307
}
@@ -1320,9 +1326,15 @@ mod tests {
13201326
let table = ListingTable::try_new(config)?;
13211327

13221328
let exec = table.scan(&state, None, &[], None).await?;
1323-
assert_eq!(exec.statistics()?.num_rows, Precision::Exact(8));
1329+
assert_eq!(
1330+
exec.partition_statistics(None)?.num_rows,
1331+
Precision::Exact(8)
1332+
);
13241333
// TODO correct byte size: https://github.com/apache/datafusion/issues/14936
1325-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671));
1334+
assert_eq!(
1335+
exec.partition_statistics(None)?.total_byte_size,
1336+
Precision::Exact(671)
1337+
);
13261338

13271339
Ok(())
13281340
}
@@ -1348,8 +1360,11 @@ mod tests {
13481360
let table = ListingTable::try_new(config)?;
13491361

13501362
let exec = table.scan(&state, None, &[], None).await?;
1351-
assert_eq!(exec.statistics()?.num_rows, Precision::Absent);
1352-
assert_eq!(exec.statistics()?.total_byte_size, Precision::Absent);
1363+
assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent);
1364+
assert_eq!(
1365+
exec.partition_statistics(None)?.total_byte_size,
1366+
Precision::Absent
1367+
);
13531368

13541369
Ok(())
13551370
}

datafusion/core/tests/custom_sources_cases/statistics.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ async fn sql_basic() -> Result<()> {
232232
let physical_plan = df.create_physical_plan().await.unwrap();
233233

234234
// the statistics should be those of the source
235-
assert_eq!(stats, physical_plan.statistics()?);
235+
assert_eq!(stats, physical_plan.partition_statistics(None)?);
236236

237237
Ok(())
238238
}
@@ -248,7 +248,7 @@ async fn sql_filter() -> Result<()> {
248248
.unwrap();
249249

250250
let physical_plan = df.create_physical_plan().await.unwrap();
251-
let stats = physical_plan.statistics()?;
251+
let stats = physical_plan.partition_statistics(None)?;
252252
assert_eq!(stats.num_rows, Precision::Inexact(1));
253253

254254
Ok(())
@@ -270,7 +270,7 @@ async fn sql_limit() -> Result<()> {
270270
column_statistics: col_stats,
271271
total_byte_size: Precision::Absent
272272
},
273-
physical_plan.statistics()?
273+
physical_plan.partition_statistics(None)?
274274
);
275275

276276
let df = ctx
@@ -279,7 +279,7 @@ async fn sql_limit() -> Result<()> {
279279
.unwrap();
280280
let physical_plan = df.create_physical_plan().await.unwrap();
281281
// when the limit is larger than the original number of lines, statistics remain unchanged
282-
assert_eq!(stats, physical_plan.statistics()?);
282+
assert_eq!(stats, physical_plan.partition_statistics(None)?);
283283

284284
Ok(())
285285
}
@@ -296,7 +296,7 @@ async fn sql_window() -> Result<()> {
296296

297297
let physical_plan = df.create_physical_plan().await.unwrap();
298298

299-
let result = physical_plan.statistics()?;
299+
let result = physical_plan.partition_statistics(None)?;
300300

301301
assert_eq!(stats.num_rows, result.num_rows);
302302
let col_stats = result.column_statistics;

datafusion/core/tests/parquet/file_statistics.rs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,19 @@ async fn check_stats_precision_with_filter_pushdown() {
5050
let (_, _, state) = get_cache_runtime_state();
5151
// Scan without filter, stats are exact
5252
let exec = table.scan(&state, None, &[], None).await.unwrap();
53-
assert_eq!(exec.statistics().unwrap().num_rows, Precision::Exact(8));
53+
assert_eq!(
54+
exec.partition_statistics(None).unwrap().num_rows,
55+
Precision::Exact(8)
56+
);
5457

5558
// Scan with filter pushdown, stats are inexact
5659
let filter = Expr::gt(col("id"), lit(1));
5760

5861
let exec = table.scan(&state, None, &[filter], None).await.unwrap();
59-
assert_eq!(exec.statistics().unwrap().num_rows, Precision::Inexact(8));
62+
assert_eq!(
63+
exec.partition_statistics(None).unwrap().num_rows,
64+
Precision::Inexact(8)
65+
);
6066
}
6167

6268
#[tokio::test]
@@ -79,9 +85,12 @@ async fn load_table_stats_with_session_level_cache() {
7985
assert_eq!(get_static_cache_size(&state1), 0);
8086
let exec1 = table1.scan(&state1, None, &[], None).await.unwrap();
8187

82-
assert_eq!(exec1.statistics().unwrap().num_rows, Precision::Exact(8));
8388
assert_eq!(
84-
exec1.statistics().unwrap().total_byte_size,
89+
exec1.partition_statistics(None).unwrap().num_rows,
90+
Precision::Exact(8)
91+
);
92+
assert_eq!(
93+
exec1.partition_statistics(None).unwrap().total_byte_size,
8594
// TODO correct byte size: https://github.com/apache/datafusion/issues/14936
8695
Precision::Exact(671),
8796
);
@@ -91,9 +100,12 @@ async fn load_table_stats_with_session_level_cache() {
91100
//check session 1 cache result not show in session 2
92101
assert_eq!(get_static_cache_size(&state2), 0);
93102
let exec2 = table2.scan(&state2, None, &[], None).await.unwrap();
94-
assert_eq!(exec2.statistics().unwrap().num_rows, Precision::Exact(8));
95103
assert_eq!(
96-
exec2.statistics().unwrap().total_byte_size,
104+
exec2.partition_statistics(None).unwrap().num_rows,
105+
Precision::Exact(8)
106+
);
107+
assert_eq!(
108+
exec2.partition_statistics(None).unwrap().total_byte_size,
97109
// TODO correct byte size: https://github.com/apache/datafusion/issues/14936
98110
Precision::Exact(671),
99111
);
@@ -103,9 +115,12 @@ async fn load_table_stats_with_session_level_cache() {
103115
//check session 1 cache result not show in session 2
104116
assert_eq!(get_static_cache_size(&state1), 1);
105117
let exec3 = table1.scan(&state1, None, &[], None).await.unwrap();
106-
assert_eq!(exec3.statistics().unwrap().num_rows, Precision::Exact(8));
107118
assert_eq!(
108-
exec3.statistics().unwrap().total_byte_size,
119+
exec3.partition_statistics(None).unwrap().num_rows,
120+
Precision::Exact(8)
121+
);
122+
assert_eq!(
123+
exec3.partition_statistics(None).unwrap().total_byte_size,
109124
// TODO correct byte size: https://github.com/apache/datafusion/issues/14936
110125
Precision::Exact(671),
111126
);

datafusion/core/tests/physical_optimizer/enforce_distribution.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl ExecutionPlan for SortRequiredExec {
170170
}
171171

172172
fn statistics(&self) -> Result<Statistics> {
173-
self.input.statistics()
173+
self.input.partition_statistics(None)
174174
}
175175
}
176176

datafusion/core/tests/physical_optimizer/join_selection.rs

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,19 @@ async fn test_join_with_swap() {
251251
.expect("The type of the plan should not be changed");
252252

253253
assert_eq!(
254-
swapped_join.left().statistics().unwrap().total_byte_size,
254+
swapped_join
255+
.left()
256+
.partition_statistics(None)
257+
.unwrap()
258+
.total_byte_size,
255259
Precision::Inexact(8192)
256260
);
257261
assert_eq!(
258-
swapped_join.right().statistics().unwrap().total_byte_size,
262+
swapped_join
263+
.right()
264+
.partition_statistics(None)
265+
.unwrap()
266+
.total_byte_size,
259267
Precision::Inexact(2097152)
260268
);
261269
}
@@ -291,11 +299,19 @@ async fn test_left_join_no_swap() {
291299
.expect("The type of the plan should not be changed");
292300

293301
assert_eq!(
294-
swapped_join.left().statistics().unwrap().total_byte_size,
302+
swapped_join
303+
.left()
304+
.partition_statistics(None)
305+
.unwrap()
306+
.total_byte_size,
295307
Precision::Inexact(8192)
296308
);
297309
assert_eq!(
298-
swapped_join.right().statistics().unwrap().total_byte_size,
310+
swapped_join
311+
.right()
312+
.partition_statistics(None)
313+
.unwrap()
314+
.total_byte_size,
299315
Precision::Inexact(2097152)
300316
);
301317
}
@@ -336,11 +352,19 @@ async fn test_join_with_swap_semi() {
336352

337353
assert_eq!(swapped_join.schema().fields().len(), 1);
338354
assert_eq!(
339-
swapped_join.left().statistics().unwrap().total_byte_size,
355+
swapped_join
356+
.left()
357+
.partition_statistics(None)
358+
.unwrap()
359+
.total_byte_size,
340360
Precision::Inexact(8192)
341361
);
342362
assert_eq!(
343-
swapped_join.right().statistics().unwrap().total_byte_size,
363+
swapped_join
364+
.right()
365+
.partition_statistics(None)
366+
.unwrap()
367+
.total_byte_size,
344368
Precision::Inexact(2097152)
345369
);
346370
assert_eq!(original_schema, swapped_join.schema());
@@ -455,11 +479,19 @@ async fn test_join_no_swap() {
455479
.expect("The type of the plan should not be changed");
456480

457481
assert_eq!(
458-
swapped_join.left().statistics().unwrap().total_byte_size,
482+
swapped_join
483+
.left()
484+
.partition_statistics(None)
485+
.unwrap()
486+
.total_byte_size,
459487
Precision::Inexact(8192)
460488
);
461489
assert_eq!(
462-
swapped_join.right().statistics().unwrap().total_byte_size,
490+
swapped_join
491+
.right()
492+
.partition_statistics(None)
493+
.unwrap()
494+
.total_byte_size,
463495
Precision::Inexact(2097152)
464496
);
465497
}
@@ -524,11 +556,19 @@ async fn test_nl_join_with_swap(join_type: JoinType) {
524556
);
525557

526558
assert_eq!(
527-
swapped_join.left().statistics().unwrap().total_byte_size,
559+
swapped_join
560+
.left()
561+
.partition_statistics(None)
562+
.unwrap()
563+
.total_byte_size,
528564
Precision::Inexact(8192)
529565
);
530566
assert_eq!(
531-
swapped_join.right().statistics().unwrap().total_byte_size,
567+
swapped_join
568+
.right()
569+
.partition_statistics(None)
570+
.unwrap()
571+
.total_byte_size,
532572
Precision::Inexact(2097152)
533573
);
534574
}
@@ -589,11 +629,19 @@ async fn test_nl_join_with_swap_no_proj(join_type: JoinType) {
589629
);
590630

591631
assert_eq!(
592-
swapped_join.left().statistics().unwrap().total_byte_size,
632+
swapped_join
633+
.left()
634+
.partition_statistics(None)
635+
.unwrap()
636+
.total_byte_size,
593637
Precision::Inexact(8192)
594638
);
595639
assert_eq!(
596-
swapped_join.right().statistics().unwrap().total_byte_size,
640+
swapped_join
641+
.right()
642+
.partition_statistics(None)
643+
.unwrap()
644+
.total_byte_size,
597645
Precision::Inexact(2097152)
598646
);
599647
}

datafusion/core/tests/sql/path_partition.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ async fn parquet_statistics() -> Result<()> {
511511
let schema = physical_plan.schema();
512512
assert_eq!(schema.fields().len(), 4);
513513

514-
let stat_cols = physical_plan.statistics()?.column_statistics;
514+
let stat_cols = physical_plan.partition_statistics(None)?.column_statistics;
515515
assert_eq!(stat_cols.len(), 4);
516516
// stats for the first col are read from the parquet file
517517
assert_eq!(stat_cols[0].null_count, Precision::Exact(3));
@@ -526,7 +526,7 @@ async fn parquet_statistics() -> Result<()> {
526526
let schema = physical_plan.schema();
527527
assert_eq!(schema.fields().len(), 2);
528528

529-
let stat_cols = physical_plan.statistics()?.column_statistics;
529+
let stat_cols = physical_plan.partition_statistics(None)?.column_statistics;
530530
assert_eq!(stat_cols.len(), 2);
531531
// stats for the first col are read from the parquet file
532532
assert_eq!(stat_cols[0].null_count, Precision::Exact(1));

datafusion/datasource/src/memory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,7 @@ mod tests {
976976
)?;
977977

978978
assert_eq!(
979-
values.statistics()?,
979+
values.partition_statistics(None)?,
980980
Statistics {
981981
num_rows: Precision::Exact(rows),
982982
total_byte_size: Precision::Exact(8), // not important

0 commit comments

Comments
 (0)