Skip to content

Commit 74e3aa9

Browse files
committed
feat: introduce DeleteVector struct to decouple consumers from implementation details
1 parent 2c92c28 commit 74e3aa9

File tree

4 files changed

+99
-6
lines changed

4 files changed

+99
-6
lines changed

crates/iceberg/src/arrow/delete_file_manager.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use roaring::RoaringTreemap;
19-
18+
use crate::delete_vector::DeleteVector;
2019
use crate::expr::BoundPredicate;
2120
use crate::io::FileIO;
2221
use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile};
@@ -42,7 +41,10 @@ impl DeleteFileManager for CachingDeleteFileManager {
4241
fn read_delete_file(_task: &FileScanTaskDeleteFile) -> Result<ArrowRecordBatchStream> {
4342
// TODO, implementation in https://github.com/apache/iceberg-rust/pull/982
4443

45-
unimplemented!()
44+
Err(Error::new(
45+
ErrorKind::FeatureUnsupported,
46+
"Reading delete files is not yet supported",
47+
))
4648
}
4749
}
4850

@@ -83,7 +85,7 @@ impl CachingDeleteFileManager {
8385
pub(crate) fn get_positional_delete_indexes_for_data_file(
8486
&self,
8587
data_file_path: &str,
86-
) -> Option<RoaringTreemap> {
88+
) -> Option<DeleteVector> {
8789
// TODO
8890

8991
None

crates/iceberg/src/arrow/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ use parquet::arrow::async_reader::AsyncFileReader;
3838
use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask, PARQUET_FIELD_ID_META_KEY};
3939
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData};
4040
use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
41-
use roaring::RoaringTreemap;
4241

4342
use crate::arrow::delete_file_manager::CachingDeleteFileManager;
4443
use crate::arrow::record_batch_transformer::RecordBatchTransformer;
4544
use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
45+
use crate::delete_vector::DeleteVector;
4646
use crate::error::Result;
4747
use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor};
4848
use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
@@ -346,7 +346,7 @@ impl ArrowReader {
346346
fn build_deletes_row_selection(
347347
row_group_metadata: &[RowGroupMetaData],
348348
selected_row_groups: &Option<Vec<usize>>,
349-
mut positional_deletes: RoaringTreemap,
349+
mut positional_deletes: DeleteVector,
350350
) -> Result<RowSelection> {
351351
// TODO
352352

crates/iceberg/src/delete_vector.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use roaring::RoaringTreemap;
19+
20+
#[allow(unused)]
21+
pub struct DeleteVector {
22+
inner: RoaringTreemap,
23+
}
24+
25+
impl DeleteVector {
26+
pub fn iter(&self) -> DeleteVectorIterator {
27+
let mut iter = self.inner.bitmaps();
28+
match iter.next() {
29+
Some((high_bits, bitmap)) => {
30+
DeleteVectorIterator {
31+
inner: Some(DeleteVectorIteratorInner {
32+
// iter,
33+
high_bits: (high_bits as u64) << 32,
34+
bitmap_iter: bitmap.iter(),
35+
}),
36+
}
37+
}
38+
_ => DeleteVectorIterator { inner: None },
39+
}
40+
}
41+
}
42+
43+
pub struct DeleteVectorIterator<'a> {
44+
inner: Option<DeleteVectorIteratorInner<'a>>,
45+
}
46+
47+
struct DeleteVectorIteratorInner<'a> {
48+
// TODO: roaring::treemap::iter::BitmapIter is currently private.
49+
// See https://github.com/RoaringBitmap/roaring-rs/issues/312
50+
// iter: roaring::treemap::iter::BitmapIter<'a>,
51+
high_bits: u64,
52+
bitmap_iter: roaring::bitmap::Iter<'a>,
53+
}
54+
55+
impl Iterator for DeleteVectorIterator<'_> {
56+
type Item = u64;
57+
58+
fn next(&mut self) -> Option<Self::Item> {
59+
let Some(ref mut inner) = &mut self.inner else {
60+
return None;
61+
};
62+
63+
if let Some(lower) = inner.bitmap_iter.next() {
64+
return Some(inner.high_bits & lower as u64);
65+
};
66+
67+
// TODO: roaring::treemap::iter::BitmapIter is currently private.
68+
// See https://github.com/RoaringBitmap/roaring-rs/issues/312
69+
70+
// replace with commented-out code below once BitmapIter is pub,
71+
// or use RoaringTreemap::iter if `advance_to` gets implemented natively
72+
None
73+
74+
// let Some((high_bits, bitmap)) = inner.iter.next() else {
75+
// self.inner = None;
76+
// return None;
77+
// };
78+
//
79+
// inner.high_bits = (high_bits as u64) << 32;
80+
// inner.bitmap_iter = bitmap.iter();
81+
//
82+
// self.next()
83+
}
84+
}
85+
86+
impl<'a> DeleteVectorIterator<'a> {
87+
pub fn advance_to(&'a mut self, _pos: u64) {
88+
// TODO
89+
}
90+
}

crates/iceberg/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,5 @@ pub(crate) mod delete_file_index;
8787
mod utils;
8888
pub mod writer;
8989

90+
mod delete_vector;
9091
mod puffin;

0 commit comments

Comments
 (0)