@@ -22,7 +22,9 @@ use std::collections::HashMap;
22
22
use arrow_schema:: {
23
23
DataType , DECIMAL128_MAX_PRECISION , DECIMAL256_MAX_PRECISION , DECIMAL_DEFAULT_SCALE ,
24
24
} ;
25
- use datafusion_common:: tree_node:: { Transformed , TransformedResult , TreeNode } ;
25
+ use datafusion_common:: tree_node:: {
26
+ Transformed , TransformedResult , TreeNode , TreeNodeRecursion ,
27
+ } ;
26
28
use datafusion_common:: {
27
29
exec_err, internal_err, plan_err, Column , DataFusionError , Result , ScalarValue ,
28
30
} ;
@@ -267,11 +269,13 @@ pub(crate) fn normalize_ident(id: Ident) -> String {
267
269
/// - For list column: unnest(col) with type list -> unnest(col) with type list::item
268
270
/// - For struct column: unnest(struct(field1, field2)) -> unnest(struct).field1, unnest(struct).field2
269
271
/// The transformed exprs will be used in the outer projection
270
- pub ( crate ) fn recursive_transform_unnest (
272
+ /// If along the path from root to bottom, there are multiple unnest expressions, the transformation
273
+ /// is done only for the bottom expression
274
+ pub ( crate ) fn transform_bottom_unnest (
271
275
input : & LogicalPlan ,
272
276
unnest_placeholder_columns : & mut Vec < String > ,
273
277
inner_projection_exprs : & mut Vec < Expr > ,
274
- original_expr : Expr ,
278
+ original_expr : & Expr ,
275
279
) -> Result < Vec < Expr > > {
276
280
let mut transform =
277
281
|unnest_expr : & Expr , expr_in_unnest : & Expr | -> Result < Vec < Expr > > {
@@ -298,35 +302,53 @@ pub(crate) fn recursive_transform_unnest(
298
302
. collect :: < Vec < _ > > ( ) ;
299
303
Ok ( expr)
300
304
} ;
301
- // expr transformed maybe either the same, or different from the originals exprs
302
- // for example:
303
- // - unnest(struct_col) will be transformed into unnest(struct_col).field1, unnest(struct_col).field2
305
+ // This transformation is only done for list unnest
306
+ // struct unnest is done at the root level, and at the later stage
307
+ // because the syntax of TreeNode only support transform into 1 Expr, while
308
+ // Unnest struct will be transformed into multiple Exprs
309
+ // TODO: This can be resolved after this issue is resolved: https://github.com/apache/datafusion/issues/10102
310
+ //
311
+ // The transformation looks like:
304
312
// - unnest(array_col) will be transformed into unnest(array_col)
305
313
// - unnest(array_col) + 1 will be transformed into unnest(array_col) + 1
306
-
307
- // Specifically handle root level unnest expr, this is the only place
308
- // unnest on struct can be handled
309
- if let Expr :: Unnest ( Unnest { expr : ref arg } ) = original_expr {
310
- return transform ( & original_expr, arg) ;
311
- }
312
314
let Transformed {
313
- data : transformed_expr,
314
- transformed,
315
- tnr : _,
316
- } = original_expr. transform_up ( |expr : Expr | {
317
- if let Expr :: Unnest ( Unnest { expr : ref arg } ) = expr {
318
- let ( data_type, _) = arg. data_type_and_nullable ( input. schema ( ) ) ?;
319
- if let DataType :: Struct ( _) = data_type {
320
- return internal_err ! ( "unnest on struct can ony be applied at the root level of select expression" ) ;
321
- }
322
- let transformed_exprs = transform ( & expr, arg) ?;
323
- Ok ( Transformed :: yes ( transformed_exprs[ 0 ] . clone ( ) ) )
324
- } else {
325
- Ok ( Transformed :: no ( expr) )
315
+ data : transformed_expr,
316
+ transformed,
317
+ tnr : _,
318
+ } = original_expr. clone ( ) . transform_up ( |expr : Expr | {
319
+ let is_root_expr = & expr == original_expr;
320
+ // Root expr is transformed separately
321
+ if is_root_expr {
322
+ return Ok ( Transformed :: no ( expr) ) ;
323
+ }
324
+ if let Expr :: Unnest ( Unnest { expr : ref arg } ) = expr {
325
+ let ( data_type, _) = arg. data_type_and_nullable ( input. schema ( ) ) ?;
326
+
327
+ if let DataType :: Struct ( _) = data_type {
328
+ return internal_err ! ( "unnest on struct can ony be applied at the root level of select expression" ) ;
326
329
}
327
- } ) ?;
330
+
331
+ let mut transformed_exprs = transform ( & expr, arg) ?;
332
+ // root_expr.push(transformed_exprs[0].clone());
333
+ Ok ( Transformed :: new (
334
+ transformed_exprs. swap_remove ( 0 ) ,
335
+ true ,
336
+ TreeNodeRecursion :: Stop ,
337
+ ) )
338
+ } else {
339
+ Ok ( Transformed :: no ( expr) )
340
+ }
341
+ } ) ?;
328
342
329
343
if !transformed {
344
+ // Because root expr need to transform separately
345
+ // unnest struct is only possible here
346
+ // The transformation looks like
347
+ // - unnest(struct_col) will be transformed into unnest(struct_col).field1, unnest(struct_col).field2
348
+ if let Expr :: Unnest ( Unnest { expr : ref arg } ) = transformed_expr {
349
+ return transform ( & transformed_expr, arg) ;
350
+ }
351
+
330
352
if matches ! ( & transformed_expr, Expr :: Column ( _) ) {
331
353
inner_projection_exprs. push ( transformed_expr. clone ( ) ) ;
332
354
Ok ( vec ! [ transformed_expr] )
@@ -351,12 +373,13 @@ mod tests {
351
373
use arrow_schema:: Fields ;
352
374
use datafusion_common:: { DFSchema , Result } ;
353
375
use datafusion_expr:: { col, lit, unnest, EmptyRelation , LogicalPlan } ;
376
+ use datafusion_functions:: core:: expr_ext:: FieldAccessor ;
354
377
use datafusion_functions_aggregate:: expr_fn:: count;
355
378
356
- use crate :: utils:: { recursive_transform_unnest , resolve_positions_to_exprs } ;
379
+ use crate :: utils:: { resolve_positions_to_exprs , transform_bottom_unnest } ;
357
380
358
381
#[ test]
359
- fn test_recursive_transform_unnest ( ) -> Result < ( ) > {
382
+ fn test_transform_bottom_unnest ( ) -> Result < ( ) > {
360
383
let schema = Schema :: new ( vec ! [
361
384
Field :: new(
362
385
"struct_col" ,
@@ -390,11 +413,11 @@ mod tests {
390
413
391
414
// unnest(struct_col)
392
415
let original_expr = unnest ( col ( "struct_col" ) ) ;
393
- let transformed_exprs = recursive_transform_unnest (
416
+ let transformed_exprs = transform_bottom_unnest (
394
417
& input,
395
418
& mut unnest_placeholder_columns,
396
419
& mut inner_projection_exprs,
397
- original_expr,
420
+ & original_expr,
398
421
) ?;
399
422
assert_eq ! (
400
423
transformed_exprs,
@@ -413,11 +436,11 @@ mod tests {
413
436
414
437
// unnest(array_col) + 1
415
438
let original_expr = unnest ( col ( "array_col" ) ) . add ( lit ( 1i64 ) ) ;
416
- let transformed_exprs = recursive_transform_unnest (
439
+ let transformed_exprs = transform_bottom_unnest (
417
440
& input,
418
441
& mut unnest_placeholder_columns,
419
442
& mut inner_projection_exprs,
420
- original_expr,
443
+ & original_expr,
421
444
) ?;
422
445
assert_eq ! (
423
446
unnest_placeholder_columns,
@@ -440,6 +463,62 @@ mod tests {
440
463
]
441
464
) ;
442
465
466
+ // a nested structure struct[[]]
467
+ let schema = Schema :: new ( vec ! [
468
+ Field :: new(
469
+ "struct_col" , // {array_col: [1,2,3]}
470
+ ArrowDataType :: Struct ( Fields :: from( vec![ Field :: new(
471
+ "matrix" ,
472
+ ArrowDataType :: List ( Arc :: new( Field :: new(
473
+ "matrix_row" ,
474
+ ArrowDataType :: List ( Arc :: new( Field :: new(
475
+ "item" ,
476
+ ArrowDataType :: Int64 ,
477
+ true ,
478
+ ) ) ) ,
479
+ true ,
480
+ ) ) ) ,
481
+ true ,
482
+ ) ] ) ) ,
483
+ false ,
484
+ ) ,
485
+ Field :: new( "int_col" , ArrowDataType :: Int32 , false ) ,
486
+ ] ) ;
487
+
488
+ let dfschema = DFSchema :: try_from ( schema) ?;
489
+
490
+ let input = LogicalPlan :: EmptyRelation ( EmptyRelation {
491
+ produce_one_row : false ,
492
+ schema : Arc :: new ( dfschema) ,
493
+ } ) ;
494
+
495
+ let mut unnest_placeholder_columns = vec ! [ ] ;
496
+ let mut inner_projection_exprs = vec ! [ ] ;
497
+
498
+ // An expr with multiple unnest
499
+ let original_expr = unnest ( unnest ( col ( "struct_col" ) . field ( "matrix" ) ) ) ;
500
+ let transformed_exprs = transform_bottom_unnest (
501
+ & input,
502
+ & mut unnest_placeholder_columns,
503
+ & mut inner_projection_exprs,
504
+ & original_expr,
505
+ ) ?;
506
+ // Only the inner most/ bottom most unnest is transformed
507
+ assert_eq ! (
508
+ transformed_exprs,
509
+ vec![ unnest( col( "unnest(struct_col[matrix])" ) ) ]
510
+ ) ;
511
+ assert_eq ! (
512
+ unnest_placeholder_columns,
513
+ vec![ "unnest(struct_col[matrix])" ]
514
+ ) ;
515
+ assert_eq ! (
516
+ inner_projection_exprs,
517
+ vec![ col( "struct_col" )
518
+ . field( "matrix" )
519
+ . alias( "unnest(struct_col[matrix])" ) , ]
520
+ ) ;
521
+
443
522
Ok ( ( ) )
444
523
}
445
524
0 commit comments