@@ -44,23 +44,32 @@ pub(crate) trait SnapshotProduceOperation: Send + Sync {
44
44
) -> impl Future < Output = Result < Vec < ManifestEntry > > > + Send ;
45
45
fn existing_manifest (
46
46
& self ,
47
- table : & Table ,
47
+ snapshot_produce : & SnapshotProducer < ' _ > ,
48
48
) -> impl Future < Output = Result < Vec < ManifestFile > > > + Send ;
49
49
}
50
50
51
51
pub ( crate ) struct DefaultManifestProcess ;
52
52
53
53
impl ManifestProcess for DefaultManifestProcess {
54
- fn process_manifests ( & self , manifests : Vec < ManifestFile > ) -> Vec < ManifestFile > {
54
+ fn process_manifests (
55
+ & self ,
56
+ _snapshot_produce : & SnapshotProducer < ' _ > ,
57
+ manifests : Vec < ManifestFile > ,
58
+ ) -> Vec < ManifestFile > {
55
59
manifests
56
60
}
57
61
}
58
62
59
63
pub ( crate ) trait ManifestProcess : Send + Sync {
60
- fn process_manifests ( & self , manifests : Vec < ManifestFile > ) -> Vec < ManifestFile > ;
64
+ fn process_manifests (
65
+ & self ,
66
+ snapshot_produce : & SnapshotProducer < ' _ > ,
67
+ manifests : Vec < ManifestFile > ,
68
+ ) -> Vec < ManifestFile > ;
61
69
}
62
70
63
- pub ( crate ) struct SnapshotProducer {
71
+ pub ( crate ) struct SnapshotProducer < ' a > {
72
+ pub ( crate ) table : & ' a Table ,
64
73
snapshot_id : i64 ,
65
74
commit_uuid : Uuid ,
66
75
key_metadata : Option < Vec < u8 > > ,
@@ -72,15 +81,16 @@ pub(crate) struct SnapshotProducer {
72
81
manifest_counter : RangeFrom < u64 > ,
73
82
}
74
83
75
- impl SnapshotProducer {
84
+ impl < ' a > SnapshotProducer < ' a > {
76
85
pub ( crate ) fn new (
77
- table : & Table ,
86
+ table : & ' a Table ,
78
87
commit_uuid : Uuid ,
79
88
key_metadata : Option < Vec < u8 > > ,
80
89
snapshot_properties : HashMap < String , String > ,
81
90
added_data_files : Vec < DataFile > ,
82
91
) -> Self {
83
92
Self {
93
+ table,
84
94
snapshot_id : Self :: generate_unique_snapshot_id ( table) ,
85
95
commit_uuid,
86
96
key_metadata,
@@ -90,10 +100,7 @@ impl SnapshotProducer {
90
100
}
91
101
}
92
102
93
- pub ( crate ) fn validate_added_data_files (
94
- table : & Table ,
95
- added_data_files : & [ DataFile ] ,
96
- ) -> Result < ( ) > {
103
+ pub ( crate ) fn validate_added_data_files ( & self , added_data_files : & [ DataFile ] ) -> Result < ( ) > {
97
104
for data_file in added_data_files {
98
105
if data_file. content_type ( ) != crate :: spec:: DataContentType :: Data {
99
106
return Err ( Error :: new (
@@ -102,23 +109,23 @@ impl SnapshotProducer {
102
109
) ) ;
103
110
}
104
111
// Check if the data file partition spec id matches the table default partition spec id.
105
- if table. metadata ( ) . default_partition_spec_id ( ) != data_file. partition_spec_id {
112
+ if self . table . metadata ( ) . default_partition_spec_id ( ) != data_file. partition_spec_id {
106
113
return Err ( Error :: new (
107
114
ErrorKind :: DataInvalid ,
108
115
"Data file partition spec id does not match table default partition spec id" ,
109
116
) ) ;
110
117
}
111
118
Self :: validate_partition_value (
112
119
data_file. partition ( ) ,
113
- table. metadata ( ) . default_partition_type ( ) ,
120
+ self . table . metadata ( ) . default_partition_type ( ) ,
114
121
) ?;
115
122
}
116
123
117
124
Ok ( ( ) )
118
125
}
119
126
120
127
pub ( crate ) async fn validate_duplicate_files (
121
- table : & Table ,
128
+ & self ,
122
129
added_data_files : & [ DataFile ] ,
123
130
) -> Result < ( ) > {
124
131
let new_files: HashSet < & str > = added_data_files
@@ -127,12 +134,14 @@ impl SnapshotProducer {
127
134
. collect ( ) ;
128
135
129
136
let mut referenced_files = Vec :: new ( ) ;
130
- if let Some ( current_snapshot) = table. metadata ( ) . current_snapshot ( ) {
137
+ if let Some ( current_snapshot) = self . table . metadata ( ) . current_snapshot ( ) {
131
138
let manifest_list = current_snapshot
132
- . load_manifest_list ( table. file_io ( ) , & table. metadata_ref ( ) )
139
+ . load_manifest_list ( self . table . file_io ( ) , & self . table . metadata_ref ( ) )
133
140
. await ?;
134
141
for manifest_list_entry in manifest_list. entries ( ) {
135
- let manifest = manifest_list_entry. load_manifest ( table. file_io ( ) ) . await ?;
142
+ let manifest = manifest_list_entry
143
+ . load_manifest ( self . table . file_io ( ) )
144
+ . await ?;
136
145
for entry in manifest. entries ( ) {
137
146
let file_path = entry. file_path ( ) ;
138
147
if new_files. contains ( file_path) && entry. is_alive ( ) {
@@ -177,28 +186,28 @@ impl SnapshotProducer {
177
186
snapshot_id
178
187
}
179
188
180
- fn new_manifest_writer (
181
- & mut self ,
182
- content : ManifestContentType ,
183
- table : & Table ,
184
- ) -> Result < ManifestWriter > {
189
+ fn new_manifest_writer ( & mut self , content : ManifestContentType ) -> Result < ManifestWriter > {
185
190
let new_manifest_path = format ! (
186
191
"{}/{}/{}-m{}.{}" ,
187
- table. metadata( ) . location( ) ,
192
+ self . table. metadata( ) . location( ) ,
188
193
META_ROOT_PATH ,
189
194
self . commit_uuid,
190
195
self . manifest_counter. next( ) . unwrap( ) ,
191
196
DataFileFormat :: Avro
192
197
) ;
193
- let output_file = table. file_io ( ) . new_output ( new_manifest_path) ?;
198
+ let output_file = self . table . file_io ( ) . new_output ( new_manifest_path) ?;
194
199
let builder = ManifestWriterBuilder :: new (
195
200
output_file,
196
201
Some ( self . snapshot_id ) ,
197
202
self . key_metadata . clone ( ) ,
198
- table. metadata ( ) . current_schema ( ) . clone ( ) ,
199
- table. metadata ( ) . default_partition_spec ( ) . as_ref ( ) . clone ( ) ,
203
+ self . table . metadata ( ) . current_schema ( ) . clone ( ) ,
204
+ self . table
205
+ . metadata ( )
206
+ . default_partition_spec ( )
207
+ . as_ref ( )
208
+ . clone ( ) ,
200
209
) ;
201
- if table. metadata ( ) . format_version ( ) == FormatVersion :: V1 {
210
+ if self . table . metadata ( ) . format_version ( ) == FormatVersion :: V1 {
202
211
Ok ( builder. build_v1 ( ) )
203
212
} else {
204
213
match content {
@@ -240,7 +249,7 @@ impl SnapshotProducer {
240
249
}
241
250
242
251
// Write manifest file for added data files and return the ManifestFile for ManifestList.
243
- async fn write_added_manifest ( & mut self , table : & Table ) -> Result < ManifestFile > {
252
+ async fn write_added_manifest ( & mut self ) -> Result < ManifestFile > {
244
253
let added_data_files = std:: mem:: take ( & mut self . added_data_files ) ;
245
254
if added_data_files. is_empty ( ) {
246
255
return Err ( Error :: new (
@@ -250,7 +259,7 @@ impl SnapshotProducer {
250
259
}
251
260
252
261
let snapshot_id = self . snapshot_id ;
253
- let format_version = table. metadata ( ) . format_version ( ) ;
262
+ let format_version = self . table . metadata ( ) . format_version ( ) ;
254
263
let manifest_entries = added_data_files. into_iter ( ) . map ( |data_file| {
255
264
let builder = ManifestEntry :: builder ( )
256
265
. status ( crate :: spec:: ManifestStatus :: Added )
@@ -263,7 +272,7 @@ impl SnapshotProducer {
263
272
builder. build ( )
264
273
}
265
274
} ) ;
266
- let mut writer = self . new_manifest_writer ( ManifestContentType :: Data , table ) ?;
275
+ let mut writer = self . new_manifest_writer ( ManifestContentType :: Data ) ?;
267
276
for entry in manifest_entries {
268
277
writer. add_entry ( entry) ?;
269
278
}
@@ -272,29 +281,27 @@ impl SnapshotProducer {
272
281
273
282
async fn manifest_file < OP : SnapshotProduceOperation , MP : ManifestProcess > (
274
283
& mut self ,
275
- table : & Table ,
276
284
snapshot_produce_operation : & OP ,
277
285
manifest_process : & MP ,
278
286
) -> Result < Vec < ManifestFile > > {
279
- let added_manifest = self . write_added_manifest ( table ) . await ?;
280
- let existing_manifests = snapshot_produce_operation. existing_manifest ( table ) . await ?;
287
+ let added_manifest = self . write_added_manifest ( ) . await ?;
288
+ let existing_manifests = snapshot_produce_operation. existing_manifest ( self ) . await ?;
281
289
// # TODO
282
290
// Support process delete entries.
283
291
284
292
let mut manifest_files = vec ! [ added_manifest] ;
285
293
manifest_files. extend ( existing_manifests) ;
286
- let manifest_files = manifest_process. process_manifests ( manifest_files) ;
294
+ let manifest_files = manifest_process. process_manifests ( self , manifest_files) ;
287
295
Ok ( manifest_files)
288
296
}
289
297
290
298
// Returns a `Summary` of the current snapshot
291
299
fn summary < OP : SnapshotProduceOperation > (
292
300
& self ,
293
- table : & Table ,
294
301
snapshot_produce_operation : & OP ,
295
302
) -> Result < Summary > {
296
303
let mut summary_collector = SnapshotSummaryCollector :: default ( ) ;
297
- let table_metadata = table. metadata_ref ( ) ;
304
+ let table_metadata = self . table . metadata_ref ( ) ;
298
305
299
306
let partition_summary_limit = if let Some ( limit) = table_metadata
300
307
. properties ( )
@@ -339,10 +346,10 @@ impl SnapshotProducer {
339
346
)
340
347
}
341
348
342
- fn generate_manifest_list_file_path ( & self , table : & Table , attempt : i64 ) -> String {
349
+ fn generate_manifest_list_file_path ( & self , attempt : i64 ) -> String {
343
350
format ! (
344
351
"{}/{}/snap-{}-{}-{}.{}" ,
345
- table. metadata( ) . location( ) ,
352
+ self . table. metadata( ) . location( ) ,
346
353
META_ROOT_PATH ,
347
354
self . snapshot_id,
348
355
attempt,
@@ -354,34 +361,34 @@ impl SnapshotProducer {
354
361
/// Finished building the action and return the [`ActionCommit`] to the transaction.
355
362
pub ( crate ) async fn commit < OP : SnapshotProduceOperation , MP : ManifestProcess > (
356
363
mut self ,
357
- table : & Table ,
358
364
snapshot_produce_operation : OP ,
359
365
process : MP ,
360
366
) -> Result < ActionCommit > {
361
367
let new_manifests = self
362
- . manifest_file ( table , & snapshot_produce_operation, & process)
368
+ . manifest_file ( & snapshot_produce_operation, & process)
363
369
. await ?;
364
- let next_seq_num = table. metadata ( ) . next_sequence_number ( ) ;
370
+ let next_seq_num = self . table . metadata ( ) . next_sequence_number ( ) ;
365
371
366
- let summary = self
367
- . summary ( table, & snapshot_produce_operation)
368
- . map_err ( |err| {
369
- Error :: new ( ErrorKind :: Unexpected , "Failed to create snapshot summary." )
370
- . with_source ( err)
371
- } ) ?;
372
+ let summary = self . summary ( & snapshot_produce_operation) . map_err ( |err| {
373
+ Error :: new ( ErrorKind :: Unexpected , "Failed to create snapshot summary." ) . with_source ( err)
374
+ } ) ?;
372
375
373
- let manifest_list_path = self . generate_manifest_list_file_path ( table , 0 ) ;
376
+ let manifest_list_path = self . generate_manifest_list_file_path ( 0 ) ;
374
377
375
- let mut manifest_list_writer = match table. metadata ( ) . format_version ( ) {
378
+ let mut manifest_list_writer = match self . table . metadata ( ) . format_version ( ) {
376
379
FormatVersion :: V1 => ManifestListWriter :: v1 (
377
- table. file_io ( ) . new_output ( manifest_list_path. clone ( ) ) ?,
380
+ self . table
381
+ . file_io ( )
382
+ . new_output ( manifest_list_path. clone ( ) ) ?,
378
383
self . snapshot_id ,
379
- table. metadata ( ) . current_snapshot_id ( ) ,
384
+ self . table . metadata ( ) . current_snapshot_id ( ) ,
380
385
) ,
381
386
FormatVersion :: V2 => ManifestListWriter :: v2 (
382
- table. file_io ( ) . new_output ( manifest_list_path. clone ( ) ) ?,
387
+ self . table
388
+ . file_io ( )
389
+ . new_output ( manifest_list_path. clone ( ) ) ?,
383
390
self . snapshot_id ,
384
- table. metadata ( ) . current_snapshot_id ( ) ,
391
+ self . table . metadata ( ) . current_snapshot_id ( ) ,
385
392
next_seq_num,
386
393
) ,
387
394
} ;
@@ -392,10 +399,10 @@ impl SnapshotProducer {
392
399
let new_snapshot = Snapshot :: builder ( )
393
400
. with_manifest_list ( manifest_list_path)
394
401
. with_snapshot_id ( self . snapshot_id )
395
- . with_parent_snapshot_id ( table. metadata ( ) . current_snapshot_id ( ) )
402
+ . with_parent_snapshot_id ( self . table . metadata ( ) . current_snapshot_id ( ) )
396
403
. with_sequence_number ( next_seq_num)
397
404
. with_summary ( summary)
398
- . with_schema_id ( table. metadata ( ) . current_schema_id ( ) )
405
+ . with_schema_id ( self . table . metadata ( ) . current_schema_id ( ) )
399
406
. with_timestamp_ms ( commit_ts)
400
407
. build ( ) ;
401
408
@@ -414,11 +421,11 @@ impl SnapshotProducer {
414
421
415
422
let requirements = vec ! [
416
423
TableRequirement :: UuidMatch {
417
- uuid: table. metadata( ) . uuid( ) ,
424
+ uuid: self . table. metadata( ) . uuid( ) ,
418
425
} ,
419
426
TableRequirement :: RefSnapshotIdMatch {
420
427
r#ref: MAIN_BRANCH . to_string( ) ,
421
- snapshot_id: table. metadata( ) . current_snapshot_id( ) ,
428
+ snapshot_id: self . table. metadata( ) . current_snapshot_id( ) ,
422
429
} ,
423
430
] ;
424
431
0 commit comments