@@ -26,6 +26,9 @@ use arrow::datatypes::{DataType, Field, Fields, Schema};
26
26
use datafusion:: datasource:: MemTable ;
27
27
use datafusion:: execution:: context:: SessionContext ;
28
28
use std:: sync:: Arc ;
29
+ use test_utils:: tpcds:: tpcds_schemas;
30
+ use test_utils:: tpch:: tpch_schemas;
31
+ use test_utils:: TableDef ;
29
32
use tokio:: runtime:: Runtime ;
30
33
31
34
/// Create a logical plan from the specified sql
@@ -48,116 +51,18 @@ fn physical_plan(ctx: &SessionContext, sql: &str) {
48
51
}
49
52
50
53
/// Create schema with the specified number of columns
51
- pub fn create_schema ( column_prefix : & str , num_columns : usize ) -> Schema {
54
+ fn create_schema ( column_prefix : & str , num_columns : usize ) -> Schema {
52
55
let fields: Fields = ( 0 ..num_columns)
53
56
. map ( |i| Field :: new ( format ! ( "{column_prefix}{i}" ) , DataType :: Int32 , true ) )
54
57
. collect ( ) ;
55
58
Schema :: new ( fields)
56
59
}
57
60
58
- pub fn create_table_provider ( column_prefix : & str , num_columns : usize ) -> Arc < MemTable > {
61
+ fn create_table_provider ( column_prefix : & str , num_columns : usize ) -> Arc < MemTable > {
59
62
let schema = Arc :: new ( create_schema ( column_prefix, num_columns) ) ;
60
63
MemTable :: try_new ( schema, vec ! [ ] ) . map ( Arc :: new) . unwrap ( )
61
64
}
62
65
63
- pub fn create_tpch_schemas ( ) -> [ ( String , Schema ) ; 8 ] {
64
- let lineitem_schema = Schema :: new ( vec ! [
65
- Field :: new( "l_orderkey" , DataType :: Int64 , false ) ,
66
- Field :: new( "l_partkey" , DataType :: Int64 , false ) ,
67
- Field :: new( "l_suppkey" , DataType :: Int64 , false ) ,
68
- Field :: new( "l_linenumber" , DataType :: Int32 , false ) ,
69
- Field :: new( "l_quantity" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
70
- Field :: new( "l_extendedprice" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
71
- Field :: new( "l_discount" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
72
- Field :: new( "l_tax" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
73
- Field :: new( "l_returnflag" , DataType :: Utf8 , false ) ,
74
- Field :: new( "l_linestatus" , DataType :: Utf8 , false ) ,
75
- Field :: new( "l_shipdate" , DataType :: Date32 , false ) ,
76
- Field :: new( "l_commitdate" , DataType :: Date32 , false ) ,
77
- Field :: new( "l_receiptdate" , DataType :: Date32 , false ) ,
78
- Field :: new( "l_shipinstruct" , DataType :: Utf8 , false ) ,
79
- Field :: new( "l_shipmode" , DataType :: Utf8 , false ) ,
80
- Field :: new( "l_comment" , DataType :: Utf8 , false ) ,
81
- ] ) ;
82
-
83
- let orders_schema = Schema :: new ( vec ! [
84
- Field :: new( "o_orderkey" , DataType :: Int64 , false ) ,
85
- Field :: new( "o_custkey" , DataType :: Int64 , false ) ,
86
- Field :: new( "o_orderstatus" , DataType :: Utf8 , false ) ,
87
- Field :: new( "o_totalprice" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
88
- Field :: new( "o_orderdate" , DataType :: Date32 , false ) ,
89
- Field :: new( "o_orderpriority" , DataType :: Utf8 , false ) ,
90
- Field :: new( "o_clerk" , DataType :: Utf8 , false ) ,
91
- Field :: new( "o_shippriority" , DataType :: Int32 , false ) ,
92
- Field :: new( "o_comment" , DataType :: Utf8 , false ) ,
93
- ] ) ;
94
-
95
- let part_schema = Schema :: new ( vec ! [
96
- Field :: new( "p_partkey" , DataType :: Int64 , false ) ,
97
- Field :: new( "p_name" , DataType :: Utf8 , false ) ,
98
- Field :: new( "p_mfgr" , DataType :: Utf8 , false ) ,
99
- Field :: new( "p_brand" , DataType :: Utf8 , false ) ,
100
- Field :: new( "p_type" , DataType :: Utf8 , false ) ,
101
- Field :: new( "p_size" , DataType :: Int32 , false ) ,
102
- Field :: new( "p_container" , DataType :: Utf8 , false ) ,
103
- Field :: new( "p_retailprice" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
104
- Field :: new( "p_comment" , DataType :: Utf8 , false ) ,
105
- ] ) ;
106
-
107
- let supplier_schema = Schema :: new ( vec ! [
108
- Field :: new( "s_suppkey" , DataType :: Int64 , false ) ,
109
- Field :: new( "s_name" , DataType :: Utf8 , false ) ,
110
- Field :: new( "s_address" , DataType :: Utf8 , false ) ,
111
- Field :: new( "s_nationkey" , DataType :: Int64 , false ) ,
112
- Field :: new( "s_phone" , DataType :: Utf8 , false ) ,
113
- Field :: new( "s_acctbal" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
114
- Field :: new( "s_comment" , DataType :: Utf8 , false ) ,
115
- ] ) ;
116
-
117
- let partsupp_schema = Schema :: new ( vec ! [
118
- Field :: new( "ps_partkey" , DataType :: Int64 , false ) ,
119
- Field :: new( "ps_suppkey" , DataType :: Int64 , false ) ,
120
- Field :: new( "ps_availqty" , DataType :: Int32 , false ) ,
121
- Field :: new( "ps_supplycost" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
122
- Field :: new( "ps_comment" , DataType :: Utf8 , false ) ,
123
- ] ) ;
124
-
125
- let customer_schema = Schema :: new ( vec ! [
126
- Field :: new( "c_custkey" , DataType :: Int64 , false ) ,
127
- Field :: new( "c_name" , DataType :: Utf8 , false ) ,
128
- Field :: new( "c_address" , DataType :: Utf8 , false ) ,
129
- Field :: new( "c_nationkey" , DataType :: Int64 , false ) ,
130
- Field :: new( "c_phone" , DataType :: Utf8 , false ) ,
131
- Field :: new( "c_acctbal" , DataType :: Decimal128 ( 15 , 2 ) , false ) ,
132
- Field :: new( "c_mktsegment" , DataType :: Utf8 , false ) ,
133
- Field :: new( "c_comment" , DataType :: Utf8 , false ) ,
134
- ] ) ;
135
-
136
- let nation_schema = Schema :: new ( vec ! [
137
- Field :: new( "n_nationkey" , DataType :: Int64 , false ) ,
138
- Field :: new( "n_name" , DataType :: Utf8 , false ) ,
139
- Field :: new( "n_regionkey" , DataType :: Int64 , false ) ,
140
- Field :: new( "n_comment" , DataType :: Utf8 , false ) ,
141
- ] ) ;
142
-
143
- let region_schema = Schema :: new ( vec ! [
144
- Field :: new( "r_regionkey" , DataType :: Int64 , false ) ,
145
- Field :: new( "r_name" , DataType :: Utf8 , false ) ,
146
- Field :: new( "r_comment" , DataType :: Utf8 , false ) ,
147
- ] ) ;
148
-
149
- [
150
- ( "lineitem" . to_string ( ) , lineitem_schema) ,
151
- ( "orders" . to_string ( ) , orders_schema) ,
152
- ( "part" . to_string ( ) , part_schema) ,
153
- ( "supplier" . to_string ( ) , supplier_schema) ,
154
- ( "partsupp" . to_string ( ) , partsupp_schema) ,
155
- ( "customer" . to_string ( ) , customer_schema) ,
156
- ( "nation" . to_string ( ) , nation_schema) ,
157
- ( "region" . to_string ( ) , region_schema) ,
158
- ]
159
- }
160
-
161
66
fn create_context ( ) -> SessionContext {
162
67
let ctx = SessionContext :: new ( ) ;
163
68
ctx. register_table ( "t1" , create_table_provider ( "a" , 200 ) )
@@ -168,16 +73,19 @@ fn create_context() -> SessionContext {
168
73
. unwrap ( ) ;
169
74
ctx. register_table ( "t1000" , create_table_provider ( "d" , 1000 ) )
170
75
. unwrap ( ) ;
76
+ ctx
77
+ }
171
78
172
- let tpch_schemas = create_tpch_schemas ( ) ;
173
- tpch_schemas. iter ( ) . for_each ( |( name, schema) | {
79
+ /// Register the table definitions as a MemTable with the context and return the
80
+ /// context
81
+ fn register_defs ( ctx : SessionContext , defs : Vec < TableDef > ) -> SessionContext {
82
+ defs. iter ( ) . for_each ( |TableDef { name, schema } | {
174
83
ctx. register_table (
175
84
name,
176
85
Arc :: new ( MemTable :: try_new ( Arc :: new ( schema. clone ( ) ) , vec ! [ ] ) . unwrap ( ) ) ,
177
86
)
178
87
. unwrap ( ) ;
179
88
} ) ;
180
-
181
89
ctx
182
90
}
183
91
@@ -236,40 +144,79 @@ fn criterion_benchmark(c: &mut Criterion) {
236
144
} )
237
145
} ) ;
238
146
147
+ // --- TPC-H ---
148
+
149
+ let tpch_ctx = register_defs ( SessionContext :: new ( ) , tpch_schemas ( ) ) ;
150
+
239
151
let tpch_queries = [
240
152
"q1" , "q2" , "q3" , "q4" , "q5" , "q6" , "q7" , "q8" , "q9" , "q10" , "q11" , "q12" , "q13" ,
241
153
"q14" , // "q15", q15 has multiple SQL statements which is not supported
242
154
"q16" , "q17" , "q18" , "q19" , "q20" , "q21" , "q22" ,
243
155
] ;
244
156
245
157
for q in tpch_queries {
246
- let sql = std :: fs :: read_to_string ( format ! ( "../../benchmarks/queries/{}.sql" , q ) )
247
- . unwrap ( ) ;
158
+ let sql =
159
+ std :: fs :: read_to_string ( format ! ( "../../benchmarks/queries/{q}.sql" ) ) . unwrap ( ) ;
248
160
c. bench_function ( & format ! ( "physical_plan_tpch_{}" , q) , |b| {
249
- b. iter ( || physical_plan ( & ctx , & sql) )
161
+ b. iter ( || physical_plan ( & tpch_ctx , & sql) )
250
162
} ) ;
251
163
}
252
164
253
165
let all_tpch_sql_queries = tpch_queries
254
166
. iter ( )
255
167
. map ( |q| {
256
- std:: fs:: read_to_string ( format ! ( "../../benchmarks/queries/{}.sql" , q) )
257
- . unwrap ( )
168
+ std:: fs:: read_to_string ( format ! ( "../../benchmarks/queries/{q}.sql" ) ) . unwrap ( )
258
169
} )
259
170
. collect :: < Vec < _ > > ( ) ;
260
171
261
172
c. bench_function ( "physical_plan_tpch_all" , |b| {
262
173
b. iter ( || {
263
174
for sql in & all_tpch_sql_queries {
264
- physical_plan ( & ctx , sql)
175
+ physical_plan ( & tpch_ctx , sql)
265
176
}
266
177
} )
267
178
} ) ;
268
179
269
180
c. bench_function ( "logical_plan_tpch_all" , |b| {
270
181
b. iter ( || {
271
182
for sql in & all_tpch_sql_queries {
272
- logical_plan ( & ctx, sql)
183
+ logical_plan ( & tpch_ctx, sql)
184
+ }
185
+ } )
186
+ } ) ;
187
+
188
+ // --- TPC-DS ---
189
+
190
+ let tpcds_ctx = register_defs ( SessionContext :: new ( ) , tpcds_schemas ( ) ) ;
191
+
192
+ // 10, 35: Physical plan does not support logical expression Exists(<subquery>)
193
+ // 45: Physical plan does not support logical expression (<subquery>)
194
+ // 41: Optimizing disjunctions not supported
195
+ let ignored = [ 10 , 35 , 41 , 45 ] ;
196
+
197
+ let raw_tpcds_sql_queries = ( 1 ..100 )
198
+ . filter ( |q| !ignored. contains ( q) )
199
+ . map ( |q| std:: fs:: read_to_string ( format ! ( "./tests/tpc-ds/{q}.sql" ) ) . unwrap ( ) )
200
+ . collect :: < Vec < _ > > ( ) ;
201
+
202
+ // some queries have multiple statements
203
+ let all_tpcds_sql_queries = raw_tpcds_sql_queries
204
+ . iter ( )
205
+ . flat_map ( |sql| sql. split ( ';' ) . filter ( |s| !s. trim ( ) . is_empty ( ) ) )
206
+ . collect :: < Vec < _ > > ( ) ;
207
+
208
+ c. bench_function ( "physical_plan_tpcds_all" , |b| {
209
+ b. iter ( || {
210
+ for sql in & all_tpcds_sql_queries {
211
+ physical_plan ( & tpcds_ctx, sql)
212
+ }
213
+ } )
214
+ } ) ;
215
+
216
+ c. bench_function ( "logical_plan_tpcds_all" , |b| {
217
+ b. iter ( || {
218
+ for sql in & all_tpcds_sql_queries {
219
+ logical_plan ( & tpcds_ctx, sql)
273
220
}
274
221
} )
275
222
} ) ;
0 commit comments