@@ -27,7 +27,6 @@ use crate::pipelines::processors::transforms::range_join::TransformRangeJoinLeft
27
27
use crate :: pipelines:: processors:: transforms:: range_join:: TransformRangeJoinRight ;
28
28
use crate :: pipelines:: processors:: transforms:: HashJoinBuildState ;
29
29
use crate :: pipelines:: processors:: transforms:: HashJoinProbeState ;
30
- use crate :: pipelines:: processors:: transforms:: RuntimeFilterChannels ;
31
30
use crate :: pipelines:: processors:: transforms:: TransformHashJoinBuild ;
32
31
use crate :: pipelines:: processors:: transforms:: TransformHashJoinProbe ;
33
32
use crate :: pipelines:: processors:: HashJoinDesc ;
@@ -36,83 +35,42 @@ use crate::pipelines::PipelineBuilder;
36
35
use crate :: sessions:: QueryContext ;
37
36
38
37
impl PipelineBuilder {
39
- pub ( crate ) fn build_range_join ( & mut self , range_join : & RangeJoin ) -> Result < ( ) > {
40
- let state = Arc :: new ( RangeJoinState :: new ( self . ctx . clone ( ) , range_join) ) ;
41
- self . expand_right_side_pipeline ( range_join, state. clone ( ) ) ?;
42
- self . build_left_side ( range_join, state) ?;
43
- Ok ( ( ) )
44
- }
45
-
46
- fn build_left_side (
47
- & mut self ,
48
- range_join : & RangeJoin ,
49
- state : Arc < RangeJoinState > ,
50
- ) -> Result < ( ) > {
51
- self . build_pipeline ( & range_join. left ) ?;
52
- let max_threads = self . settings . get_max_threads ( ) ? as usize ;
53
- self . main_pipeline . try_resize ( max_threads) ?;
54
- self . main_pipeline . add_transform ( |input, output| {
55
- Ok ( ProcessorPtr :: create ( TransformRangeJoinLeft :: create (
56
- input,
57
- output,
58
- state. clone ( ) ,
59
- ) ) )
60
- } ) ?;
61
- Ok ( ( ) )
62
- }
63
-
64
- fn expand_right_side_pipeline (
65
- & mut self ,
66
- range_join : & RangeJoin ,
67
- state : Arc < RangeJoinState > ,
68
- ) -> Result < ( ) > {
69
- let right_side_context = QueryContext :: create_from ( self . ctx . as_ref ( ) ) ;
70
- let mut right_side_builder = PipelineBuilder :: create (
38
+ // Create a new pipeline builder with the same context as the current builder
39
+ fn create_sub_pipeline_builder ( & self ) -> PipelineBuilder {
40
+ let sub_context = QueryContext :: create_from ( self . ctx . as_ref ( ) ) ;
41
+ let mut sub_builder = PipelineBuilder :: create (
71
42
self . func_ctx . clone ( ) ,
72
43
self . settings . clone ( ) ,
73
- right_side_context ,
44
+ sub_context ,
74
45
self . main_pipeline . get_scopes ( ) ,
75
46
) ;
76
- right_side_builder. hash_join_states = self . hash_join_states . clone ( ) ;
77
-
78
- let mut right_res = right_side_builder. finalize ( & range_join. right ) ?;
79
- right_res. main_pipeline . add_sink ( |input| {
80
- Ok ( ProcessorPtr :: create (
81
- Sinker :: < TransformRangeJoinRight > :: create (
82
- input,
83
- TransformRangeJoinRight :: create ( state. clone ( ) ) ,
84
- ) ,
85
- ) )
86
- } ) ?;
87
- self . pipelines . push ( right_res. main_pipeline . finalize ( ) ) ;
88
- self . pipelines . extend ( right_res. sources_pipelines ) ;
89
- Ok ( ( ) )
47
+ sub_builder. hash_join_states = self . hash_join_states . clone ( ) ;
48
+ sub_builder
90
49
}
91
50
92
- pub ( crate ) fn build_join ( & mut self , join : & HashJoin ) -> Result < ( ) > {
93
- // for merge into target table as build side.
94
- let ( enable_merge_into_optimization, merge_into_is_distributed) =
95
- self . merge_into_get_optimization_flag ( join) ;
51
+ pub ( crate ) fn build_hash_join ( & mut self , join : & HashJoin ) -> Result < ( ) > {
52
+ // Get optimization flags for merge-into operations
53
+ let ( enable_optimization, is_distributed) = self . merge_into_get_optimization_flag ( join) ;
96
54
97
- let state = self . build_join_state (
98
- join,
99
- merge_into_is_distributed,
100
- enable_merge_into_optimization,
101
- ) ?;
55
+ // Create the join state with optimization flags
56
+ let state = self . build_hash_join_state ( join, is_distributed, enable_optimization) ?;
102
57
if let Some ( ( build_cache_index, _) ) = join. build_side_cache_info {
103
58
self . hash_join_states
104
59
. insert ( build_cache_index, state. clone ( ) ) ;
105
60
}
106
61
107
- self . expand_build_side_pipeline ( & join. build , join, state. clone ( ) ) ?;
108
- self . build_join_probe ( join, state) ?;
62
+ // Build both phases of the Hash Join
63
+ self . build_hash_join_build_side ( & join. build , join, state. clone ( ) ) ?;
64
+ self . build_hash_join_probe_side ( join, state) ?;
109
65
110
- // In the case of spilling, we need to share state among multiple threads. Quickly fetch all data from this round to quickly start the next round.
66
+ // In the case of spilling, we need to share state among multiple threads
67
+ // Quickly fetch all data from this round to quickly start the next round
111
68
self . main_pipeline
112
69
. resize ( self . main_pipeline . output_len ( ) , true )
113
70
}
114
71
115
- fn build_join_state (
72
+ // Create the Hash Join state
73
+ fn build_hash_join_state (
116
74
& mut self ,
117
75
join : & HashJoin ,
118
76
merge_into_is_distributed : bool ,
@@ -130,20 +88,14 @@ impl PipelineBuilder {
130
88
)
131
89
}
132
90
133
- fn expand_build_side_pipeline (
91
+ // Build the build-side pipeline for Hash Join
92
+ fn build_hash_join_build_side (
134
93
& mut self ,
135
94
build : & PhysicalPlan ,
136
95
hash_join_plan : & HashJoin ,
137
96
join_state : Arc < HashJoinState > ,
138
97
) -> Result < ( ) > {
139
- let build_side_context = QueryContext :: create_from ( self . ctx . as_ref ( ) ) ;
140
- let mut build_side_builder = PipelineBuilder :: create (
141
- self . func_ctx . clone ( ) ,
142
- self . settings . clone ( ) ,
143
- build_side_context,
144
- self . main_pipeline . get_scopes ( ) ,
145
- ) ;
146
- build_side_builder. hash_join_states = self . hash_join_states . clone ( ) ;
98
+ let build_side_builder = self . create_sub_pipeline_builder ( ) ;
147
99
let mut build_res = build_side_builder. finalize ( build) ?;
148
100
149
101
assert ! ( build_res. main_pipeline. is_pulling_pipeline( ) ?) ;
@@ -155,13 +107,6 @@ impl PipelineBuilder {
155
107
& hash_join_plan. build_projections ,
156
108
join_state. clone ( ) ,
157
109
output_len,
158
- hash_join_plan
159
- . runtime_filter_plan
160
- . as_ref ( )
161
- . map ( |_| RuntimeFilterChannels {
162
- rf_src_send : self . ctx . rf_src_send ( hash_join_plan. join_id ) ,
163
- rf_sink_recv : self . ctx . rf_sink_recv ( hash_join_plan. join_id ) ,
164
- } ) ,
165
110
) ?;
166
111
build_state. add_runtime_filter_ready ( ) ;
167
112
@@ -171,7 +116,7 @@ impl PipelineBuilder {
171
116
build_state. clone ( ) ,
172
117
) ?) )
173
118
} ;
174
- // for distributed merge into when source as build side.
119
+ // For distributed merge- into when source as build side
175
120
if hash_join_plan. need_hold_hash_table {
176
121
self . join_state = Some ( build_state. clone ( ) )
177
122
}
@@ -182,7 +127,12 @@ impl PipelineBuilder {
182
127
Ok ( ( ) )
183
128
}
184
129
185
- fn build_join_probe ( & mut self , join : & HashJoin , state : Arc < HashJoinState > ) -> Result < ( ) > {
130
+ // Build the probe-side pipeline for Hash Join
131
+ fn build_hash_join_probe_side (
132
+ & mut self ,
133
+ join : & HashJoin ,
134
+ state : Arc < HashJoinState > ,
135
+ ) -> Result < ( ) > {
186
136
self . build_pipeline ( & join. probe ) ?;
187
137
188
138
let max_block_size = self . settings . get_max_block_size ( ) ? as usize ;
@@ -212,16 +162,66 @@ impl PipelineBuilder {
212
162
) ?) )
213
163
} ) ?;
214
164
165
+ // For merge-into operations that need to hold the hash table
215
166
if join. need_hold_hash_table {
216
- let mut projected_probe_fields = vec ! [ ] ;
167
+ // Extract projected fields from probe schema
168
+ let mut projected_fields = vec ! [ ] ;
217
169
for ( i, field) in probe_state. probe_schema . fields ( ) . iter ( ) . enumerate ( ) {
218
170
if probe_state. probe_projections . contains ( & i) {
219
- projected_probe_fields . push ( field. clone ( ) ) ;
171
+ projected_fields . push ( field. clone ( ) ) ;
220
172
}
221
173
}
222
- self . merge_into_probe_data_fields = Some ( projected_probe_fields ) ;
174
+ self . merge_into_probe_data_fields = Some ( projected_fields ) ;
223
175
}
224
176
225
177
Ok ( ( ) )
226
178
}
179
+
180
+ pub ( crate ) fn build_range_join ( & mut self , range_join : & RangeJoin ) -> Result < ( ) > {
181
+ let state = Arc :: new ( RangeJoinState :: new ( self . ctx . clone ( ) , range_join) ) ;
182
+ self . build_range_join_right_side ( range_join, state. clone ( ) ) ?;
183
+ self . build_range_join_left_side ( range_join, state) ?;
184
+ Ok ( ( ) )
185
+ }
186
+
187
+ // Build the left-side pipeline for Range Join
188
+ fn build_range_join_left_side (
189
+ & mut self ,
190
+ range_join : & RangeJoin ,
191
+ state : Arc < RangeJoinState > ,
192
+ ) -> Result < ( ) > {
193
+ self . build_pipeline ( & range_join. left ) ?;
194
+ let max_threads = self . settings . get_max_threads ( ) ? as usize ;
195
+ self . main_pipeline . try_resize ( max_threads) ?;
196
+ self . main_pipeline . add_transform ( |input, output| {
197
+ Ok ( ProcessorPtr :: create ( TransformRangeJoinLeft :: create (
198
+ input,
199
+ output,
200
+ state. clone ( ) ,
201
+ ) ) )
202
+ } ) ?;
203
+ Ok ( ( ) )
204
+ }
205
+
206
+ // Build the right-side pipeline for Range Join
207
+ fn build_range_join_right_side (
208
+ & mut self ,
209
+ range_join : & RangeJoin ,
210
+ state : Arc < RangeJoinState > ,
211
+ ) -> Result < ( ) > {
212
+ let right_side_builder = self . create_sub_pipeline_builder ( ) ;
213
+
214
+ let mut right_res = right_side_builder. finalize ( & range_join. right ) ?;
215
+ right_res. main_pipeline . add_sink ( |input| {
216
+ Ok ( ProcessorPtr :: create (
217
+ Sinker :: < TransformRangeJoinRight > :: create (
218
+ input,
219
+ TransformRangeJoinRight :: create ( state. clone ( ) ) ,
220
+ ) ,
221
+ ) )
222
+ } ) ?;
223
+ self . pipelines . push ( right_res. main_pipeline . finalize ( ) ) ;
224
+ self . pipelines . extend ( right_res. sources_pipelines ) ;
225
+ Ok ( ( ) )
226
+ }
227
227
}
0 commit comments