Skip to content

Commit 80a0801

Browse files
committed
feat: introduce DeleteVector struct to decouple consumers from implementation details
1 parent 2c92c28 commit 80a0801

File tree

4 files changed

+82
-6
lines changed

4 files changed

+82
-6
lines changed

crates/iceberg/src/arrow/delete_file_manager.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use roaring::RoaringTreemap;
19-
18+
use crate::delete_vector::DeleteVector;
2019
use crate::expr::BoundPredicate;
2120
use crate::io::FileIO;
2221
use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile};
@@ -42,7 +41,10 @@ impl DeleteFileManager for CachingDeleteFileManager {
4241
fn read_delete_file(_task: &FileScanTaskDeleteFile) -> Result<ArrowRecordBatchStream> {
4342
// TODO, implementation in https://github.com/apache/iceberg-rust/pull/982
4443

45-
unimplemented!()
44+
Err(Error::new(
45+
ErrorKind::FeatureUnsupported,
46+
"Reading delete files is not yet supported",
47+
))
4648
}
4749
}
4850

@@ -83,7 +85,7 @@ impl CachingDeleteFileManager {
8385
pub(crate) fn get_positional_delete_indexes_for_data_file(
8486
&self,
8587
data_file_path: &str,
86-
) -> Option<RoaringTreemap> {
88+
) -> Option<DeleteVector> {
8789
// TODO
8890

8991
None

crates/iceberg/src/arrow/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ use parquet::arrow::async_reader::AsyncFileReader;
3838
use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask, PARQUET_FIELD_ID_META_KEY};
3939
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData};
4040
use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
41-
use roaring::RoaringTreemap;
4241

4342
use crate::arrow::delete_file_manager::CachingDeleteFileManager;
4443
use crate::arrow::record_batch_transformer::RecordBatchTransformer;
4544
use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
45+
use crate::delete_vector::DeleteVector;
4646
use crate::error::Result;
4747
use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor};
4848
use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
@@ -346,7 +346,7 @@ impl ArrowReader {
346346
fn build_deletes_row_selection(
347347
row_group_metadata: &[RowGroupMetaData],
348348
selected_row_groups: &Option<Vec<usize>>,
349-
mut positional_deletes: RoaringTreemap,
349+
mut positional_deletes: DeleteVector,
350350
) -> Result<RowSelection> {
351351
// TODO
352352

crates/iceberg/src/delete_vector.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use roaring::RoaringTreemap;
2+
3+
#[allow(unused)]
4+
pub struct DeleteVector {
5+
inner: RoaringTreemap,
6+
}
7+
8+
impl DeleteVector {
9+
pub fn iter(&self) -> DeleteVectorIterator {
10+
let mut iter = self.inner.bitmaps();
11+
match iter.next() {
12+
Some((high_bits, bitmap)) => {
13+
DeleteVectorIterator {
14+
inner: Some(DeleteVectorIteratorInner {
15+
// iter,
16+
high_bits: (high_bits as u64) << 32,
17+
bitmap_iter: bitmap.iter(),
18+
}),
19+
}
20+
}
21+
_ => DeleteVectorIterator { inner: None },
22+
}
23+
}
24+
}
25+
26+
pub struct DeleteVectorIterator<'a> {
27+
inner: Option<DeleteVectorIteratorInner<'a>>,
28+
}
29+
30+
struct DeleteVectorIteratorInner<'a> {
31+
// TODO: roaring::treemap::iter::BitmapIter is currently private.
32+
// See https://github.com/RoaringBitmap/roaring-rs/issues/312
33+
// iter: roaring::treemap::iter::BitmapIter<'a>,
34+
high_bits: u64,
35+
bitmap_iter: roaring::bitmap::Iter<'a>,
36+
}
37+
38+
impl Iterator for DeleteVectorIterator<'_> {
39+
type Item = u64;
40+
41+
fn next(&mut self) -> Option<Self::Item> {
42+
let Some(ref mut inner) = &mut self.inner else {
43+
return None;
44+
};
45+
46+
if let Some(lower) = inner.bitmap_iter.next() {
47+
return Some(inner.high_bits & lower as u64);
48+
};
49+
50+
// TODO: roaring::treemap::iter::BitmapIter is currently private.
51+
// See https://github.com/RoaringBitmap/roaring-rs/issues/312
52+
53+
// replace with commented-out code below once BitmapIter is pub,
54+
// or use RoaringTreemap::iter if `advance_to` gets implemented natively
55+
None
56+
57+
// let Some((high_bits, bitmap)) = inner.iter.next() else {
58+
// self.inner = None;
59+
// return None;
60+
// };
61+
//
62+
// inner.high_bits = (high_bits as u64) << 32;
63+
// inner.bitmap_iter = bitmap.iter();
64+
//
65+
// self.next()
66+
}
67+
}
68+
69+
impl<'a> DeleteVectorIterator<'a> {
70+
pub fn advance_to(&'a mut self, _pos: u64) {
71+
// TODO
72+
}
73+
}

crates/iceberg/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,5 @@ pub(crate) mod delete_file_index;
8787
mod utils;
8888
pub mod writer;
8989

90+
mod delete_vector;
9091
mod puffin;

0 commit comments

Comments
 (0)