@@ -17,6 +17,8 @@ use std::collections::BTreeMap;
17
17
use std:: collections:: HashSet ;
18
18
use std:: sync:: Arc ;
19
19
20
+ use databend_common_ast:: parser:: token:: TokenKind ;
21
+ use databend_common_ast:: parser:: tokenize_sql;
20
22
use databend_common_catalog:: catalog:: CatalogManager ;
21
23
use databend_common_catalog:: plan:: DataSourcePlan ;
22
24
use databend_common_catalog:: plan:: Filters ;
@@ -29,20 +31,21 @@ use databend_common_catalog::plan::VirtualColumnInfo;
29
31
use databend_common_exception:: ErrorCode ;
30
32
use databend_common_exception:: Result ;
31
33
use databend_common_expression:: type_check:: check_function;
32
- use databend_common_expression:: type_check:: get_simple_cast_function;
33
34
use databend_common_expression:: types:: DataType ;
34
35
use databend_common_expression:: ConstantFolder ;
35
36
use databend_common_expression:: DataField ;
36
37
use databend_common_expression:: DataSchemaRef ;
37
38
use databend_common_expression:: DataSchemaRefExt ;
38
39
use databend_common_expression:: FieldIndex ;
39
40
use databend_common_expression:: RemoteExpr ;
41
+ use databend_common_expression:: Scalar ;
40
42
use databend_common_expression:: TableDataType ;
41
- use databend_common_expression:: TableField ;
42
43
use databend_common_expression:: TableSchema ;
43
44
use databend_common_expression:: TableSchemaRef ;
44
45
use databend_common_expression:: ROW_ID_COL_NAME ;
45
46
use databend_common_functions:: BUILTIN_FUNCTIONS ;
47
+ use jsonb:: keypath:: KeyPath ;
48
+ use jsonb:: keypath:: KeyPaths ;
46
49
use rand:: distributions:: Bernoulli ;
47
50
use rand:: distributions:: Distribution ;
48
51
use rand:: thread_rng;
@@ -124,7 +127,24 @@ impl PhysicalPlanBuilder {
124
127
let scan = if scan. columns . is_empty ( ) {
125
128
scan. clone ( )
126
129
} else {
127
- let columns = scan. columns . clone ( ) ;
130
+ let mut columns = scan. columns . clone ( ) ;
131
+
132
+ let required_column_ids: Vec < _ > = required. difference ( & columns) . cloned ( ) . collect ( ) ;
133
+ if !required_column_ids. is_empty ( ) {
134
+ // add virtual columns to table scan columns.
135
+ let read_guard = self . metadata . read ( ) ;
136
+ let virtual_column_id_set = read_guard
137
+ . virtual_columns_by_table_index ( scan. table_index )
138
+ . iter ( )
139
+ . map ( |column| column. index ( ) )
140
+ . collect :: < HashSet < _ > > ( ) ;
141
+ for required_column_id in required_column_ids {
142
+ if virtual_column_id_set. contains ( & required_column_id) {
143
+ columns. insert ( required_column_id) ;
144
+ }
145
+ }
146
+ }
147
+
128
148
let mut prewhere = scan. prewhere . clone ( ) ;
129
149
let mut used: ColumnSet = required. intersection ( & columns) . cloned ( ) . collect ( ) ;
130
150
if scan. is_lazy_table {
@@ -150,9 +170,9 @@ impl PhysicalPlanBuilder {
150
170
151
171
// 2. Build physical plan.
152
172
let mut has_inner_column = false ;
153
- let mut has_virtual_column = false ;
154
173
let mut name_mapping = BTreeMap :: new ( ) ;
155
174
let mut project_internal_columns = BTreeMap :: new ( ) ;
175
+ let mut project_virtual_columns = BTreeMap :: new ( ) ;
156
176
let metadata = self . metadata . read ( ) . clone ( ) ;
157
177
158
178
for index in scan. columns . iter ( ) {
@@ -171,8 +191,8 @@ impl PhysicalPlanBuilder {
171
191
} ) => {
172
192
project_internal_columns. insert ( * index, internal_column. to_owned ( ) ) ;
173
193
}
174
- ColumnEntry :: VirtualColumn ( _ ) => {
175
- has_virtual_column = true ;
194
+ ColumnEntry :: VirtualColumn ( virtual_column ) => {
195
+ project_virtual_columns . insert ( * index , virtual_column . clone ( ) ) ;
176
196
}
177
197
_ => { }
178
198
}
@@ -225,8 +245,12 @@ impl PhysicalPlanBuilder {
225
245
table_schema = Arc :: new ( schema) ;
226
246
}
227
247
228
- let push_downs =
229
- self . push_downs ( & scan, & table_schema, has_inner_column, has_virtual_column) ?;
248
+ let push_downs = self . push_downs (
249
+ & scan,
250
+ & table_schema,
251
+ project_virtual_columns,
252
+ has_inner_column,
253
+ ) ?;
230
254
231
255
let mut source = table
232
256
. read_plan (
@@ -338,8 +362,8 @@ impl PhysicalPlanBuilder {
338
362
& self ,
339
363
scan : & crate :: plans:: Scan ,
340
364
table_schema : & TableSchema ,
365
+ virtual_columns : BTreeMap < IndexType , VirtualColumn > ,
341
366
has_inner_column : bool ,
342
- has_virtual_column : bool ,
343
367
) -> Result < PushDownInfo > {
344
368
let metadata = self . metadata . read ( ) . clone ( ) ;
345
369
let projection = Self :: build_projection (
@@ -353,6 +377,7 @@ impl PhysicalPlanBuilder {
353
377
true ,
354
378
true ,
355
379
) ;
380
+ let has_virtual_column = !virtual_columns. is_empty ( ) ;
356
381
357
382
let output_columns = if has_virtual_column {
358
383
Some ( Self :: build_projection (
@@ -524,7 +549,7 @@ impl PhysicalPlanBuilder {
524
549
}
525
550
}
526
551
527
- let virtual_column = self . build_virtual_column ( & scan . columns ) ;
552
+ let virtual_column = self . build_virtual_column ( virtual_columns ) ? ;
528
553
529
554
Ok ( PushDownInfo {
530
555
projection : Some ( projection) ,
@@ -558,63 +583,83 @@ impl PhysicalPlanBuilder {
558
583
}
559
584
}
560
585
561
- fn build_virtual_column ( & self , indices : & ColumnSet ) -> Option < VirtualColumnInfo > {
562
- let mut source_column_ids = HashSet :: new ( ) ;
563
- let mut column_and_indices = Vec :: new ( ) ;
564
- for index in indices. iter ( ) {
565
- if let ColumnEntry :: VirtualColumn ( virtual_column) = self . metadata . read ( ) . column ( * index)
566
- {
567
- source_column_ids. insert ( virtual_column. source_column_id ) ;
568
- let cast_func_name =
569
- if virtual_column. data_type . remove_nullable ( ) != TableDataType :: Variant {
570
- let dest_type = DataType :: from ( & virtual_column. data_type . remove_nullable ( ) ) ;
571
- get_simple_cast_function ( true , & DataType :: Variant , & dest_type)
572
- } else {
573
- None
574
- } ;
575
- let virtual_column_field = VirtualColumnField {
576
- source_column_id : virtual_column. source_column_id ,
577
- source_name : virtual_column. source_column_name . clone ( ) ,
578
- column_id : virtual_column. column_id ,
579
- name : virtual_column. column_name . clone ( ) ,
580
- key_paths : virtual_column. key_paths . clone ( ) ,
581
- cast_func_name,
582
- data_type : Box :: new ( virtual_column. data_type . clone ( ) ) ,
583
- is_created : virtual_column. is_created ,
586
+ fn parse_virtual_column_name ( name : & str ) -> Result < Scalar > {
587
+ let tokens = tokenize_sql ( name) ?;
588
+ let mut i = 0 ;
589
+ let mut key_paths = Vec :: new ( ) ;
590
+ while i < tokens. len ( ) {
591
+ let token = & tokens[ i] ;
592
+ if token. kind == TokenKind :: LBracket {
593
+ i += 1 ;
594
+ if i >= tokens. len ( ) {
595
+ return Err ( ErrorCode :: Internal ( format ! (
596
+ "Invalid virtual column name {}" ,
597
+ name
598
+ ) ) ) ;
599
+ }
600
+ let path_token = & tokens[ i] ;
601
+ let path = path_token. text ( ) ;
602
+ let key_path = if path_token. kind == TokenKind :: LiteralString {
603
+ let s = & path[ 1 ..path. len ( ) - 1 ] ;
604
+ KeyPath :: QuotedName ( std:: borrow:: Cow :: Borrowed ( s) )
605
+ } else if path_token. kind == TokenKind :: LiteralInteger {
606
+ let idx = path. parse :: < i32 > ( ) . unwrap ( ) ;
607
+ KeyPath :: Index ( idx)
608
+ } else {
609
+ return Err ( ErrorCode :: Internal ( format ! (
610
+ "Invalid virtual column name {}" ,
611
+ name
612
+ ) ) ) ;
584
613
} ;
585
- column_and_indices. push ( ( virtual_column_field, * index) ) ;
614
+ key_paths. push ( key_path) ;
615
+ // skip TokenKind::RBracket
616
+ i += 1 ;
586
617
}
618
+ i += 1 ;
587
619
}
588
- if column_and_indices. is_empty ( ) {
589
- return None ;
620
+ let keypaths = KeyPaths { paths : key_paths } ;
621
+
622
+ Ok ( Scalar :: String ( format ! ( "{}" , keypaths) ) )
623
+ }
624
+
625
+ fn build_virtual_column (
626
+ & self ,
627
+ virtual_columns : BTreeMap < IndexType , VirtualColumn > ,
628
+ ) -> Result < Option < VirtualColumnInfo > > {
629
+ if virtual_columns. is_empty ( ) {
630
+ return Ok ( None ) ;
590
631
}
591
- // Make the order of virtual columns the same as their indexes.
592
- column_and_indices. sort_by_key ( |( _, index) | * index) ;
593
-
594
- let virtual_column_fields = column_and_indices
595
- . into_iter ( )
596
- . map ( |( column, _) | column)
597
- . collect :: < Vec < _ > > ( ) ;
598
-
599
- let mut fields = Vec :: with_capacity ( virtual_column_fields. len ( ) ) ;
600
- let next_column_id = virtual_column_fields[ 0 ] . column_id ;
601
- for virtual_column_field in & virtual_column_fields {
602
- let field = TableField :: new_from_column_id (
603
- & virtual_column_field. name ,
604
- * virtual_column_field. data_type . clone ( ) ,
605
- virtual_column_field. column_id ,
606
- ) ;
607
- fields. push ( field) ;
632
+ let mut source_column_ids = HashSet :: new ( ) ;
633
+ let mut virtual_column_fields = Vec :: with_capacity ( virtual_columns. len ( ) ) ;
634
+
635
+ for ( _, virtual_column) in virtual_columns. into_iter ( ) {
636
+ source_column_ids. insert ( virtual_column. source_column_id ) ;
637
+ let target_type = virtual_column. data_type . remove_nullable ( ) ;
638
+
639
+ let key_paths = Self :: parse_virtual_column_name ( & virtual_column. column_name ) ?;
640
+ let cast_func_name = if target_type != TableDataType :: Variant {
641
+ Some ( format ! ( "to_{}" , target_type. to_string( ) . to_lowercase( ) ) )
642
+ } else {
643
+ None
644
+ } ;
645
+
646
+ let virtual_column_field = VirtualColumnField {
647
+ source_column_id : virtual_column. source_column_id ,
648
+ source_name : virtual_column. source_column_name . clone ( ) ,
649
+ column_id : virtual_column. column_id ,
650
+ name : virtual_column. column_name . clone ( ) ,
651
+ key_paths,
652
+ cast_func_name,
653
+ data_type : Box :: new ( virtual_column. data_type . clone ( ) ) ,
654
+ } ;
655
+ virtual_column_fields. push ( virtual_column_field) ;
608
656
}
609
- let metadata = BTreeMap :: new ( ) ;
610
- let schema = TableSchema :: new_from_column_ids ( fields, metadata, next_column_id) ;
611
657
612
658
let virtual_column_info = VirtualColumnInfo {
613
- schema : Arc :: new ( schema) ,
614
659
source_column_ids,
615
660
virtual_column_fields,
616
661
} ;
617
- Some ( virtual_column_info)
662
+ Ok ( Some ( virtual_column_info) )
618
663
}
619
664
620
665
pub ( crate ) fn build_agg_index (
0 commit comments