Skip to content

Commit 283c4b5

Browse files
alambmetesynnada
andauthored
Document and scratch directory for sqllogictest and make test specific (#7312)
* Document and `scratch` directory for sqllogictest and make test specific * Fix name * Update test pats * Apply suggestions from code review Co-authored-by: Metehan Yıldırım <[email protected]> --------- Co-authored-by: Metehan Yıldırım <[email protected]>
1 parent 2fd704c commit 283c4b5

File tree

4 files changed

+64
-43
lines changed

4 files changed

+64
-43
lines changed

datafusion/sqllogictest/README.md

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,32 @@ You can update the tests / generate expected output by passing the `--complete`
177177
cargo test --test sqllogictests -- ddl --complete
178178
```
179179

180-
#### sqllogictests
180+
#### Running tests: `scratchdir`
181181

182-
sqllogictest is a program originally written for SQLite to verify the correctness of SQL queries against the SQLite
183-
engine. The program is engine-agnostic and can parse sqllogictest files (`.slt`), runs queries against an SQL engine and
184-
compare the output to the expected output.
182+
The DataFusion sqllogictest runner automatically creates a directory
183+
named `test_files/scratch/<filename>`, creating it if needed and
184+
clearing any file contents if it exists.
185185

186-
Tests in the `.slt` file are a sequence of query record generally starting with `CREATE` statements to populate tables
187-
and then further queries to test the populated data (arrow-datafusion exception).
186+
For example, the `test_files/copy.slt` file should use scratch
187+
directory `test_files/scratch/copy`.
188+
189+
Tests that need to write temporary files should write (only) to this
190+
directory to ensure they do not interfere with others concurrently
191+
running tests.
192+
193+
#### `.slt` file format
194+
195+
[`sqllogictest`] was originally written for SQLite to verify the
196+
correctness of SQL queries against the SQLite engine. The format is designed
197+
engine-agnostic and can parse sqllogictest files (`.slt`), runs
198+
queries against an SQL engine and compares the output to the expected
199+
output.
200+
201+
[`sqllogictest`]: https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
202+
203+
Tests in the `.slt` file are a sequence of query records generally
204+
starting with `CREATE` statements to populate tables and then further
205+
queries to test the populated data.
188206

189207
Each `.slt` file runs in its own, isolated `SessionContext`, to make the test setup explicit and so they can run in
190208
parallel. Thus it important to keep the tests from having externally visible side effects (like writing to a global

datafusion/sqllogictest/bin/sqllogictests.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,26 +55,28 @@ pub async fn main() -> Result<()> {
5555
run_tests().await
5656
}
5757

58-
/// Sets up an empty directory at test_files/scratch
58+
/// Sets up an empty directory at test_files/scratch/<name>
5959
/// creating it if needed and clearing any file contents if it exists
6060
/// This allows tests for inserting to external tables or copy to
6161
/// to persist data to disk and have consistent state when running
6262
/// a new test
63-
fn setup_scratch_dir() -> Result<()> {
64-
let path = std::path::Path::new("test_files/scratch");
63+
fn setup_scratch_dir(name: &Path) -> Result<()> {
64+
// go from copy.slt --> copy
65+
let file_stem = name.file_stem().expect("File should have a stem");
66+
let path = PathBuf::from("test_files").join("scratch").join(file_stem);
67+
68+
info!("Creating scratch dir in {path:?}");
6569
if path.exists() {
66-
fs::remove_dir_all(path)?;
70+
fs::remove_dir_all(&path)?;
6771
}
68-
fs::create_dir(path)?;
72+
fs::create_dir_all(&path)?;
6973
Ok(())
7074
}
7175

7276
async fn run_tests() -> Result<()> {
7377
// Enable logging (e.g. set RUST_LOG=debug to see debug logs)
7478
env_logger::init();
7579

76-
setup_scratch_dir()?;
77-
7880
let options = Options::new();
7981

8082
// Run all tests in parallel, reporting failures at the end
@@ -135,6 +137,7 @@ async fn run_test_file(test_file: TestFile) -> Result<()> {
135137
info!("Skipping: {}", path.display());
136138
return Ok(());
137139
};
140+
setup_scratch_dir(&relative_path)?;
138141
let mut runner = sqllogictest::Runner::new(|| async {
139142
Ok(DataFusion::new(
140143
test_ctx.session_ctx().clone(),

datafusion/sqllogictest/test_files/copy.slt

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,33 +21,33 @@ create table source_table(col1 integer, col2 varchar) as values (1, 'Foo'), (2,
2121

2222
# Copy to directory as multiple files
2323
query IT
24-
COPY source_table TO 'test_files/scratch/table' (format parquet, per_thread_output true);
24+
COPY source_table TO 'test_files/scratch/copy/table' (format parquet, per_thread_output true);
2525
----
2626
2
2727

2828
# Error case
2929
query error DataFusion error: Error during planning: Copy To format not explicitly set and unable to get file extension!
30-
EXPLAIN COPY source_table to 'test_files/scratch/table'
30+
EXPLAIN COPY source_table to 'test_files/scratch/copy/table'
3131

3232
query TT
33-
EXPLAIN COPY source_table to 'test_files/scratch/table' (format parquet, per_thread_output true)
33+
EXPLAIN COPY source_table to 'test_files/scratch/copy/table' (format parquet, per_thread_output true)
3434
----
3535
logical_plan
36-
CopyTo: format=parquet output_url=test_files/scratch/table per_thread_output=true options: (format parquet, per_thread_output true)
36+
CopyTo: format=parquet output_url=test_files/scratch/copy/table per_thread_output=true options: (format parquet, per_thread_output true)
3737
--TableScan: source_table projection=[col1, col2]
3838
physical_plan
3939
InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[])
4040
--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
4141

4242
# Copy more files to directory via query
4343
query IT
44-
COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/table' (format parquet, per_thread_output true);
44+
COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/copy/table' (format parquet, per_thread_output true);
4545
----
4646
4
4747

4848
# validate multiple parquet file output
4949
statement ok
50-
CREATE EXTERNAL TABLE validate_parquet STORED AS PARQUET LOCATION 'test_files/scratch/table/';
50+
CREATE EXTERNAL TABLE validate_parquet STORED AS PARQUET LOCATION 'test_files/scratch/copy/table/';
5151

5252
query IT
5353
select * from validate_parquet;
@@ -61,13 +61,13 @@ select * from validate_parquet;
6161

6262
# Copy from table to single file
6363
query IT
64-
COPY source_table to 'test_files/scratch/table.parquet';
64+
COPY source_table to 'test_files/scratch/copy/table.parquet';
6565
----
6666
2
6767

6868
# validate single parquet file output
6969
statement ok
70-
CREATE EXTERNAL TABLE validate_parquet_single STORED AS PARQUET LOCATION 'test_files/scratch/table.parquet';
70+
CREATE EXTERNAL TABLE validate_parquet_single STORED AS PARQUET LOCATION 'test_files/scratch/copy/table.parquet';
7171

7272
query IT
7373
select * from validate_parquet_single;
@@ -77,13 +77,13 @@ select * from validate_parquet_single;
7777

7878
# copy from table to folder of csv files
7979
query IT
80-
COPY source_table to 'test_files/scratch/table_csv' (format csv, per_thread_output true);
80+
COPY source_table to 'test_files/scratch/copy/table_csv' (format csv, per_thread_output true);
8181
----
8282
2
8383

8484
# validate folder of csv files
8585
statement ok
86-
CREATE EXTERNAL TABLE validate_csv STORED AS csv WITH HEADER ROW LOCATION 'test_files/scratch/table_csv';
86+
CREATE EXTERNAL TABLE validate_csv STORED AS csv WITH HEADER ROW LOCATION 'test_files/scratch/copy/table_csv';
8787

8888
query IT
8989
select * from validate_csv;
@@ -93,13 +93,13 @@ select * from validate_csv;
9393

9494
# Copy from table to single csv
9595
query IT
96-
COPY source_table to 'test_files/scratch/table.csv';
96+
COPY source_table to 'test_files/scratch/copy/table.csv';
9797
----
9898
2
9999

100100
# Validate single csv output
101101
statement ok
102-
CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW LOCATION 'test_files/scratch/table.csv';
102+
CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW LOCATION 'test_files/scratch/copy/table.csv';
103103

104104
query IT
105105
select * from validate_single_csv;
@@ -109,13 +109,13 @@ select * from validate_single_csv;
109109

110110
# Copy from table to folder of json
111111
query IT
112-
COPY source_table to 'test_files/scratch/table_json' (format json, per_thread_output true);
112+
COPY source_table to 'test_files/scratch/copy/table_json' (format json, per_thread_output true);
113113
----
114114
2
115115

116116
# Validate json output
117117
statement ok
118-
CREATE EXTERNAL TABLE validate_json STORED AS json LOCATION 'test_files/scratch/table_json';
118+
CREATE EXTERNAL TABLE validate_json STORED AS json LOCATION 'test_files/scratch/copy/table_json';
119119

120120
query IT
121121
select * from validate_json;
@@ -125,13 +125,13 @@ select * from validate_json;
125125

126126
# Copy from table to single json file
127127
query IT
128-
COPY source_table to 'test_files/scratch/table.json';
128+
COPY source_table to 'test_files/scratch/copy/table.json';
129129
----
130130
2
131131

132132
# Validate single JSON file`
133133
statement ok
134-
CREATE EXTERNAL TABLE validate_single_json STORED AS json LOCATION 'test_files/scratch/table_json';
134+
CREATE EXTERNAL TABLE validate_single_json STORED AS json LOCATION 'test_files/scratch/copy/table_json';
135135

136136
query IT
137137
select * from validate_single_json;
@@ -141,13 +141,13 @@ select * from validate_single_json;
141141

142142
# Copy from table with options
143143
query IT
144-
COPY source_table to 'test_files/scratch/table.json' (row_group_size 55);
144+
COPY source_table to 'test_files/scratch/copy/table.json' (row_group_size 55);
145145
----
146146
2
147147

148148
# Copy from table with options (and trailing comma)
149149
query IT
150-
COPY source_table to 'test_files/scratch/table.json' (row_group_size 55, row_group_limit_bytes 9,);
150+
COPY source_table to 'test_files/scratch/copy/table.json' (row_group_size 55, row_group_limit_bytes 9,);
151151
----
152152
2
153153

datafusion/sqllogictest/test_files/insert_to_external.slt

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ statement ok
4949
CREATE EXTERNAL TABLE
5050
single_file_test(a bigint, b bigint)
5151
STORED AS csv
52-
LOCATION 'test_files/scratch/single_csv_table.csv'
52+
LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv'
5353
OPTIONS(
5454
create_local_path 'true',
5555
single_file 'true',
@@ -70,7 +70,7 @@ statement ok
7070
CREATE EXTERNAL TABLE
7171
directory_test(a bigint, b bigint)
7272
STORED AS parquet
73-
LOCATION 'test_files/scratch/external_parquet_table_q0'
73+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0'
7474
OPTIONS(
7575
create_local_path 'true',
7676
);
@@ -87,10 +87,10 @@ select * from directory_test;
8787
3 4
8888

8989
statement ok
90-
CREATE EXTERNAL TABLE
90+
CREATE EXTERNAL TABLE
9191
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
9292
STORED AS parquet
93-
LOCATION 'test_files/scratch/external_parquet_table_q1'
93+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1'
9494
OPTIONS (create_local_path 'true');
9595

9696
query TT
@@ -153,10 +153,10 @@ drop table table_without_values;
153153

154154
# test_insert_into_as_select_multi_partitioned
155155
statement ok
156-
CREATE EXTERNAL TABLE
156+
CREATE EXTERNAL TABLE
157157
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
158158
STORED AS parquet
159-
LOCATION 'test_files/scratch/external_parquet_table_q2'
159+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2'
160160
OPTIONS (create_local_path 'true');
161161

162162
query TT
@@ -197,10 +197,10 @@ drop table table_without_values;
197197

198198
# test_insert_into_with_sort
199199
statement ok
200-
CREATE EXTERNAL TABLE
200+
CREATE EXTERNAL TABLE
201201
table_without_values(c1 varchar NULL)
202202
STORED AS parquet
203-
LOCATION 'test_files/scratch/external_parquet_table_q3'
203+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3'
204204
OPTIONS (create_local_path 'true');
205205

206206
# verify that the sort order of the insert query is maintained into the
@@ -237,10 +237,10 @@ drop table table_without_values;
237237

238238
# test insert with column names
239239
statement ok
240-
CREATE EXTERNAL TABLE
240+
CREATE EXTERNAL TABLE
241241
table_without_values(id BIGINT, name varchar)
242242
STORED AS parquet
243-
LOCATION 'test_files/scratch/external_parquet_table_q4'
243+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4'
244244
OPTIONS (create_local_path 'true');
245245

246246
query IT
@@ -276,10 +276,10 @@ drop table table_without_values;
276276

277277
# test insert with non-nullable column
278278
statement ok
279-
CREATE EXTERNAL TABLE
279+
CREATE EXTERNAL TABLE
280280
table_without_values(field1 BIGINT NOT NULL, field2 BIGINT NULL)
281281
STORED AS parquet
282-
LOCATION 'test_files/scratch/external_parquet_table_q5'
282+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5'
283283
OPTIONS (create_local_path 'true');
284284

285285
query II

0 commit comments

Comments
 (0)