@@ -21,13 +21,11 @@ use std::collections::HashMap;
21
21
use std:: convert:: { TryFrom , TryInto } ;
22
22
use std:: sync:: Arc ;
23
23
24
- use crate :: common:: proto_error;
25
- use crate :: convert_required;
26
- use crate :: logical_plan:: { self , csv_writer_options_from_proto} ;
27
- use crate :: protobuf:: physical_expr_node:: ExprType ;
28
- use crate :: protobuf:: { self , copy_to_node} ;
29
-
30
24
use arrow:: compute:: SortOptions ;
25
+ use chrono:: { TimeZone , Utc } ;
26
+ use object_store:: path:: Path ;
27
+ use object_store:: ObjectMeta ;
28
+
31
29
use datafusion:: arrow:: datatypes:: Schema ;
32
30
use datafusion:: datasource:: file_format:: csv:: CsvSink ;
33
31
use datafusion:: datasource:: file_format:: json:: JsonSink ;
@@ -57,13 +55,15 @@ use datafusion_common::file_options::json_writer::JsonWriterOptions;
57
55
use datafusion_common:: parsers:: CompressionTypeVariant ;
58
56
use datafusion_common:: stats:: Precision ;
59
57
use datafusion_common:: { not_impl_err, DataFusionError , JoinSide , Result , ScalarValue } ;
60
-
61
- use chrono:: { TimeZone , Utc } ;
62
58
use datafusion_expr:: ScalarFunctionDefinition ;
63
- use object_store:: path:: Path ;
64
- use object_store:: ObjectMeta ;
65
59
66
- use super :: { DefaultPhysicalExtensionCodec , PhysicalExtensionCodec } ;
60
+ use crate :: common:: proto_error;
61
+ use crate :: convert_required;
62
+ use crate :: logical_plan:: { self , csv_writer_options_from_proto} ;
63
+ use crate :: protobuf:: physical_expr_node:: ExprType ;
64
+ use crate :: protobuf:: { self , copy_to_node} ;
65
+
66
+ use super :: PhysicalExtensionCodec ;
67
67
68
68
impl From < & protobuf:: PhysicalColumn > for Column {
69
69
fn from ( c : & protobuf:: PhysicalColumn ) -> Column {
@@ -76,9 +76,10 @@ impl From<&protobuf::PhysicalColumn> for Column {
76
76
/// # Arguments
77
77
///
78
78
/// * `proto` - Input proto with physical sort expression node
79
- /// * `registry` - A registry knows how to build logical expressions out of user-defined function' names
79
+ /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
80
80
/// * `input_schema` - The Arrow schema for the input, used for determining expression data types
81
81
/// when performing type coercion.
82
+ /// * `codec` - An extension codec used to decode custom UDFs.
82
83
pub fn parse_physical_sort_expr (
83
84
proto : & protobuf:: PhysicalSortExprNode ,
84
85
registry : & dyn FunctionRegistry ,
@@ -102,9 +103,10 @@ pub fn parse_physical_sort_expr(
102
103
/// # Arguments
103
104
///
104
105
/// * `proto` - Input proto with vector of physical sort expression node
105
- /// * `registry` - A registry knows how to build logical expressions out of user-defined function' names
106
+ /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
106
107
/// * `input_schema` - The Arrow schema for the input, used for determining expression data types
107
108
/// when performing type coercion.
109
+ /// * `codec` - An extension codec used to decode custom UDFs.
108
110
pub fn parse_physical_sort_exprs (
109
111
proto : & [ protobuf:: PhysicalSortExprNode ] ,
110
112
registry : & dyn FunctionRegistry ,
@@ -123,25 +125,26 @@ pub fn parse_physical_sort_exprs(
123
125
///
124
126
/// # Arguments
125
127
///
126
- /// * `proto` - Input proto with physical window exprression node.
128
+ /// * `proto` - Input proto with physical window expression node.
127
129
/// * `name` - Name of the window expression.
128
- /// * `registry` - A registry knows how to build logical expressions out of user-defined function' names
130
+ /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
129
131
/// * `input_schema` - The Arrow schema for the input, used for determining expression data types
130
132
/// when performing type coercion.
133
+ /// * `codec` - An extension codec used to decode custom UDFs.
131
134
pub fn parse_physical_window_expr (
132
135
proto : & protobuf:: PhysicalWindowExprNode ,
133
136
registry : & dyn FunctionRegistry ,
134
137
input_schema : & Schema ,
138
+ codec : & dyn PhysicalExtensionCodec ,
135
139
) -> Result < Arc < dyn WindowExpr > > {
136
- let codec = DefaultPhysicalExtensionCodec { } ;
137
140
let window_node_expr =
138
- parse_physical_exprs ( & proto. args , registry, input_schema, & codec) ?;
141
+ parse_physical_exprs ( & proto. args , registry, input_schema, codec) ?;
139
142
140
143
let partition_by =
141
- parse_physical_exprs ( & proto. partition_by , registry, input_schema, & codec) ?;
144
+ parse_physical_exprs ( & proto. partition_by , registry, input_schema, codec) ?;
142
145
143
146
let order_by =
144
- parse_physical_sort_exprs ( & proto. order_by , registry, input_schema, & codec) ?;
147
+ parse_physical_sort_exprs ( & proto. order_by , registry, input_schema, codec) ?;
145
148
146
149
let window_frame = proto
147
150
. window_frame
@@ -187,9 +190,10 @@ where
187
190
/// # Arguments
188
191
///
189
192
/// * `proto` - Input proto with physical expression node
190
- /// * `registry` - A registry knows how to build logical expressions out of user-defined function' names
193
+ /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
191
194
/// * `input_schema` - The Arrow schema for the input, used for determining expression data types
192
195
/// when performing type coercion.
196
+ /// * `codec` - An extension codec used to decode custom UDFs.
193
197
pub fn parse_physical_expr (
194
198
proto : & protobuf:: PhysicalExprNode ,
195
199
registry : & dyn FunctionRegistry ,
@@ -213,13 +217,15 @@ pub fn parse_physical_expr(
213
217
registry,
214
218
"left" ,
215
219
input_schema,
220
+ codec,
216
221
) ?,
217
222
logical_plan:: from_proto:: from_proto_binary_op ( & binary_expr. op ) ?,
218
223
parse_required_physical_expr (
219
224
binary_expr. r . as_deref ( ) ,
220
225
registry,
221
226
"right" ,
222
227
input_schema,
228
+ codec,
223
229
) ?,
224
230
) ) ,
225
231
ExprType :: AggregateExpr ( _) => {
@@ -241,6 +247,7 @@ pub fn parse_physical_expr(
241
247
registry,
242
248
"expr" ,
243
249
input_schema,
250
+ codec,
244
251
) ?) )
245
252
}
246
253
ExprType :: IsNotNullExpr ( e) => {
@@ -249,20 +256,23 @@ pub fn parse_physical_expr(
249
256
registry,
250
257
"expr" ,
251
258
input_schema,
259
+ codec,
252
260
) ?) )
253
261
}
254
262
ExprType :: NotExpr ( e) => Arc :: new ( NotExpr :: new ( parse_required_physical_expr (
255
263
e. expr . as_deref ( ) ,
256
264
registry,
257
265
"expr" ,
258
266
input_schema,
267
+ codec,
259
268
) ?) ) ,
260
269
ExprType :: Negative ( e) => {
261
270
Arc :: new ( NegativeExpr :: new ( parse_required_physical_expr (
262
271
e. expr . as_deref ( ) ,
263
272
registry,
264
273
"expr" ,
265
274
input_schema,
275
+ codec,
266
276
) ?) )
267
277
}
268
278
ExprType :: InList ( e) => in_list (
@@ -271,6 +281,7 @@ pub fn parse_physical_expr(
271
281
registry,
272
282
"expr" ,
273
283
input_schema,
284
+ codec,
274
285
) ?,
275
286
parse_physical_exprs ( & e. list , registry, input_schema, codec) ?,
276
287
& e. negated ,
@@ -290,12 +301,14 @@ pub fn parse_physical_expr(
290
301
registry,
291
302
"when_expr" ,
292
303
input_schema,
304
+ codec,
293
305
) ?,
294
306
parse_required_physical_expr (
295
307
e. then_expr . as_ref ( ) ,
296
308
registry,
297
309
"then_expr" ,
298
310
input_schema,
311
+ codec,
299
312
) ?,
300
313
) )
301
314
} )
@@ -311,6 +324,7 @@ pub fn parse_physical_expr(
311
324
registry,
312
325
"expr" ,
313
326
input_schema,
327
+ codec,
314
328
) ?,
315
329
convert_required ! ( e. arrow_type) ?,
316
330
None ,
@@ -321,6 +335,7 @@ pub fn parse_physical_expr(
321
335
registry,
322
336
"expr" ,
323
337
input_schema,
338
+ codec,
324
339
) ?,
325
340
convert_required ! ( e. arrow_type) ?,
326
341
) ) ,
@@ -371,12 +386,14 @@ pub fn parse_physical_expr(
371
386
registry,
372
387
"expr" ,
373
388
input_schema,
389
+ codec,
374
390
) ?,
375
391
parse_required_physical_expr (
376
392
like_expr. pattern . as_deref ( ) ,
377
393
registry,
378
394
"pattern" ,
379
395
input_schema,
396
+ codec,
380
397
) ?,
381
398
) ) ,
382
399
} ;
@@ -389,9 +406,9 @@ fn parse_required_physical_expr(
389
406
registry : & dyn FunctionRegistry ,
390
407
field : & str ,
391
408
input_schema : & Schema ,
409
+ codec : & dyn PhysicalExtensionCodec ,
392
410
) -> Result < Arc < dyn PhysicalExpr > > {
393
- let codec = DefaultPhysicalExtensionCodec { } ;
394
- expr. map ( |e| parse_physical_expr ( e, registry, input_schema, & codec) )
411
+ expr. map ( |e| parse_physical_expr ( e, registry, input_schema, codec) )
395
412
. transpose ( ) ?
396
413
. ok_or_else ( || {
397
414
DataFusionError :: Internal ( format ! ( "Missing required field {field:?}" ) )
@@ -433,15 +450,15 @@ pub fn parse_protobuf_hash_partitioning(
433
450
partitioning : Option < & protobuf:: PhysicalHashRepartition > ,
434
451
registry : & dyn FunctionRegistry ,
435
452
input_schema : & Schema ,
453
+ codec : & dyn PhysicalExtensionCodec ,
436
454
) -> Result < Option < Partitioning > > {
437
455
match partitioning {
438
456
Some ( hash_part) => {
439
- let codec = DefaultPhysicalExtensionCodec { } ;
440
457
let expr = parse_physical_exprs (
441
458
& hash_part. hash_expr ,
442
459
registry,
443
460
input_schema,
444
- & codec,
461
+ codec,
445
462
) ?;
446
463
447
464
Ok ( Some ( Partitioning :: Hash (
@@ -456,6 +473,7 @@ pub fn parse_protobuf_hash_partitioning(
456
473
pub fn parse_protobuf_file_scan_config (
457
474
proto : & protobuf:: FileScanExecConf ,
458
475
registry : & dyn FunctionRegistry ,
476
+ codec : & dyn PhysicalExtensionCodec ,
459
477
) -> Result < FileScanConfig > {
460
478
let schema: Arc < Schema > = Arc :: new ( convert_required ! ( proto. schema) ?) ;
461
479
let projection = proto
@@ -489,7 +507,7 @@ pub fn parse_protobuf_file_scan_config(
489
507
. collect :: < Result < Vec < _ > > > ( ) ?;
490
508
491
509
// Remove partition columns from the schema after recreating table_partition_cols
492
- // because the partition columns are not in the file. They are present to allow the
510
+ // because the partition columns are not in the file. They are present to allow
493
511
// the partition column types to be reconstructed after serde.
494
512
let file_schema = Arc :: new ( Schema :: new (
495
513
schema
@@ -502,12 +520,11 @@ pub fn parse_protobuf_file_scan_config(
502
520
503
521
let mut output_ordering = vec ! [ ] ;
504
522
for node_collection in & proto. output_ordering {
505
- let codec = DefaultPhysicalExtensionCodec { } ;
506
523
let sort_expr = parse_physical_sort_exprs (
507
524
& node_collection. physical_sort_expr_nodes ,
508
525
registry,
509
526
& schema,
510
- & codec,
527
+ codec,
511
528
) ?;
512
529
output_ordering. push ( sort_expr) ;
513
530
}
0 commit comments