Skip to content

Commit 3af31ca

Browse files
shruti2522alambblaginin
authored
Migrate datasource tests to insta (#15258)
* insta migrate * Update datafusion/core/src/datasource/physical_plan/parquet.rs Co-authored-by: Dmitrii Blaginin <[email protected]> * More suggestions from code review * Update snapshots * fmt --------- Co-authored-by: Andrew Lamb <[email protected]> Co-authored-by: Dmitrii Blaginin <[email protected]>
1 parent a05514c commit 3af31ca

File tree

11 files changed

+629
-665
lines changed

11 files changed

+629
-665
lines changed

datafusion/core/src/datasource/file_format/avro.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ mod tests {
2828
};
2929
use arrow::array::{as_string_array, Array};
3030
use datafusion_catalog::Session;
31+
use datafusion_common::test_util::batches_to_string;
3132
use datafusion_common::{
32-
assert_batches_eq,
3333
cast::{
3434
as_binary_array, as_boolean_array, as_float32_array, as_float64_array,
3535
as_int32_array, as_timestamp_microsecond_array,
@@ -41,6 +41,7 @@ mod tests {
4141
use datafusion_execution::config::SessionConfig;
4242
use datafusion_physical_plan::{collect, ExecutionPlan};
4343
use futures::StreamExt;
44+
use insta::assert_snapshot;
4445

4546
#[tokio::test]
4647
async fn read_small_batches() -> Result<()> {
@@ -115,20 +116,20 @@ mod tests {
115116
let batches = collect(exec, task_ctx).await?;
116117
assert_eq!(batches.len(), 1);
117118

118-
let expected = ["+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
119-
"| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |",
120-
"+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
121-
"| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |",
122-
"| 5 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30332f30312f3039 | 31 | 2009-03-01T00:01:00 |",
123-
"| 6 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30342f30312f3039 | 30 | 2009-04-01T00:00:00 |",
124-
"| 7 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30342f30312f3039 | 31 | 2009-04-01T00:01:00 |",
125-
"| 2 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30322f30312f3039 | 30 | 2009-02-01T00:00:00 |",
126-
"| 3 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30322f30312f3039 | 31 | 2009-02-01T00:01:00 |",
127-
"| 0 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30312f30312f3039 | 30 | 2009-01-01T00:00:00 |",
128-
"| 1 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30312f30312f3039 | 31 | 2009-01-01T00:01:00 |",
129-
"+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+"];
130-
131-
assert_batches_eq!(expected, &batches);
119+
assert_snapshot!(batches_to_string(&batches),@r###"
120+
+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
121+
| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |
122+
+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
123+
| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |
124+
| 5 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30332f30312f3039 | 31 | 2009-03-01T00:01:00 |
125+
| 6 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30342f30312f3039 | 30 | 2009-04-01T00:00:00 |
126+
| 7 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30342f30312f3039 | 31 | 2009-04-01T00:01:00 |
127+
| 2 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30322f30312f3039 | 30 | 2009-02-01T00:00:00 |
128+
| 3 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30322f30312f3039 | 31 | 2009-02-01T00:01:00 |
129+
| 0 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30312f30312f3039 | 30 | 2009-01-01T00:00:00 |
130+
| 1 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30312f30312f3039 | 31 | 2009-01-01T00:01:00 |
131+
+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+
132+
"###);
132133
Ok(())
133134
}
134135

datafusion/core/src/datasource/file_format/csv.rs

Lines changed: 79 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ mod tests {
3535
use datafusion_common::cast::as_string_array;
3636
use datafusion_common::internal_err;
3737
use datafusion_common::stats::Precision;
38-
use datafusion_common::test_util::arrow_test_data;
39-
use datafusion_common::{assert_batches_eq, Result};
38+
use datafusion_common::test_util::{arrow_test_data, batches_to_string};
39+
use datafusion_common::Result;
4040
use datafusion_datasource::decoder::{
4141
BatchDeserializer, DecoderDeserializer, DeserializerOutput,
4242
};
@@ -57,6 +57,7 @@ mod tests {
5757
use chrono::DateTime;
5858
use futures::stream::BoxStream;
5959
use futures::StreamExt;
60+
use insta::assert_snapshot;
6061
use object_store::local::LocalFileSystem;
6162
use object_store::path::Path;
6263
use object_store::{
@@ -557,15 +558,17 @@ mod tests {
557558
.select_columns(&["c2", "c3"])?
558559
.collect()
559560
.await?;
560-
#[rustfmt::skip]
561-
let expected = ["+----+------+",
562-
"| c2 | c3 |",
563-
"+----+------+",
564-
"| 5 | 36 |",
565-
"| 5 | -31 |",
566-
"| 5 | -101 |",
567-
"+----+------+"];
568-
assert_batches_eq!(expected, &record_batch);
561+
562+
assert_snapshot!(batches_to_string(&record_batch), @r###"
563+
+----+------+
564+
| c2 | c3 |
565+
+----+------+
566+
| 5 | 36 |
567+
| 5 | -31 |
568+
| 5 | -101 |
569+
+----+------+
570+
"###);
571+
569572
Ok(())
570573
}
571574

@@ -671,13 +674,15 @@ mod tests {
671674
let query_result = ctx.sql(query).await?.collect().await?;
672675
let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
673676

674-
#[rustfmt::skip]
675-
let expected = ["+--------------+",
676-
"| sum(aggr.c2) |",
677-
"+--------------+",
678-
"| 285 |",
679-
"+--------------+"];
680-
assert_batches_eq!(expected, &query_result);
677+
insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
678+
+--------------+
679+
| sum(aggr.c2) |
680+
+--------------+
681+
| 285 |
682+
+--------------+
683+
"###);
684+
}
685+
681686
assert_eq!(n_partitions, actual_partitions);
682687

683688
Ok(())
@@ -708,13 +713,15 @@ mod tests {
708713
let query_result = ctx.sql(query).await?.collect().await?;
709714
let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
710715

711-
#[rustfmt::skip]
712-
let expected = ["+--------------+",
713-
"| sum(aggr.c3) |",
714-
"+--------------+",
715-
"| 781 |",
716-
"+--------------+"];
717-
assert_batches_eq!(expected, &query_result);
716+
insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
717+
+--------------+
718+
| sum(aggr.c3) |
719+
+--------------+
720+
| 781 |
721+
+--------------+
722+
"###);
723+
}
724+
718725
assert_eq!(1, actual_partitions); // Compressed csv won't be scanned in parallel
719726

720727
Ok(())
@@ -743,13 +750,15 @@ mod tests {
743750
let query_result = ctx.sql(query).await?.collect().await?;
744751
let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
745752

746-
#[rustfmt::skip]
747-
let expected = ["+--------------+",
748-
"| sum(aggr.c3) |",
749-
"+--------------+",
750-
"| 781 |",
751-
"+--------------+"];
752-
assert_batches_eq!(expected, &query_result);
753+
insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
754+
+--------------+
755+
| sum(aggr.c3) |
756+
+--------------+
757+
| 781 |
758+
+--------------+
759+
"###);
760+
}
761+
753762
assert_eq!(1, actual_partitions); // csv won't be scanned in parallel when newlines_in_values is set
754763

755764
Ok(())
@@ -772,10 +781,10 @@ mod tests {
772781
let query = "select * from empty where random() > 0.5;";
773782
let query_result = ctx.sql(query).await?.collect().await?;
774783

775-
#[rustfmt::skip]
776-
let expected = ["++",
777-
"++"];
778-
assert_batches_eq!(expected, &query_result);
784+
assert_snapshot!(batches_to_string(&query_result),@r###"
785+
++
786+
++
787+
"###);
779788

780789
Ok(())
781790
}
@@ -797,10 +806,10 @@ mod tests {
797806
let query = "select * from empty where random() > 0.5;";
798807
let query_result = ctx.sql(query).await?.collect().await?;
799808

800-
#[rustfmt::skip]
801-
let expected = ["++",
802-
"++"];
803-
assert_batches_eq!(expected, &query_result);
809+
assert_snapshot!(batches_to_string(&query_result),@r###"
810+
++
811+
++
812+
"###);
804813

805814
Ok(())
806815
}
@@ -839,10 +848,10 @@ mod tests {
839848
let query = "select * from empty where random() > 0.5;";
840849
let query_result = ctx.sql(query).await?.collect().await?;
841850

842-
#[rustfmt::skip]
843-
let expected = ["++",
844-
"++"];
845-
assert_batches_eq!(expected, &query_result);
851+
assert_snapshot!(batches_to_string(&query_result),@r###"
852+
++
853+
++
854+
"###);
846855

847856
Ok(())
848857
}
@@ -891,13 +900,14 @@ mod tests {
891900
let query_result = ctx.sql(query).await?.collect().await?;
892901
let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
893902

894-
#[rustfmt::skip]
895-
let expected = ["+---------------------+",
896-
"| sum(empty.column_1) |",
897-
"+---------------------+",
898-
"| 10 |",
899-
"+---------------------+"];
900-
assert_batches_eq!(expected, &query_result);
903+
insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
904+
+---------------------+
905+
| sum(empty.column_1) |
906+
+---------------------+
907+
| 10 |
908+
+---------------------+
909+
"###);}
910+
901911
assert_eq!(n_partitions, actual_partitions); // Won't get partitioned if all files are empty
902912

903913
Ok(())
@@ -930,21 +940,23 @@ mod tests {
930940
let query_result = ctx.sql(query).await?.collect().await?;
931941
let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
932942

933-
#[rustfmt::skip]
934-
let expected = ["+-----------------------+",
935-
"| sum(one_col.column_1) |",
936-
"+-----------------------+",
937-
"| 50 |",
938-
"+-----------------------+"];
939-
940943
let file_size = std::fs::metadata("tests/data/one_col.csv")?.len() as usize;
941944
// A 20-Byte file at most get partitioned into 20 chunks
942945
let expected_partitions = if n_partitions <= file_size {
943946
n_partitions
944947
} else {
945948
file_size
946949
};
947-
assert_batches_eq!(expected, &query_result);
950+
951+
insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
952+
+-----------------------+
953+
| sum(one_col.column_1) |
954+
+-----------------------+
955+
| 50 |
956+
+-----------------------+
957+
"###);
958+
}
959+
948960
assert_eq!(expected_partitions, actual_partitions);
949961

950962
Ok(())
@@ -975,13 +987,14 @@ mod tests {
975987
let query_result = ctx.sql(query).await?.collect().await?;
976988
let actual_partitions = count_query_csv_partitions(&ctx, query).await?;
977989

978-
#[rustfmt::skip]
979-
let expected = ["+---------------+",
980-
"| sum_of_5_cols |",
981-
"+---------------+",
982-
"| 15 |",
983-
"+---------------+"];
984-
assert_batches_eq!(expected, &query_result);
990+
insta::allow_duplicates! {assert_snapshot!(batches_to_string(&query_result),@r###"
991+
+---------------+
992+
| sum_of_5_cols |
993+
+---------------+
994+
| 15 |
995+
+---------------+
996+
"###);}
997+
985998
assert_eq!(n_partitions, actual_partitions);
986999

9871000
Ok(())

0 commit comments

Comments
 (0)