Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions rust/lance-table/src/rowids.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,14 +422,14 @@ impl RowIdSequence {
let mut holes_passed = 0;
ranges.extend(GroupingIterator::new(unsafe { ids.into_addr_iter() }.map(
|addr| {
let offset_no_holes = addr - range.start + offset_start;
while bitmap_iter_pos < offset_no_holes {
let position_in_range = addr - range.start;
while bitmap_iter_pos < position_in_range {
if !bitmap_iter.next().unwrap() {
holes_passed += 1;
}
bitmap_iter_pos += 1;
}
offset_no_holes - holes_passed
offset_start + position_in_range - holes_passed
},
)));
}
Expand Down
75 changes: 74 additions & 1 deletion rust/lance/tests/query/primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ use arrow_array::{
LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, StringViewArray,
};
use arrow_schema::DataType;
use lance::dataset::optimize::{compact_files, CompactionOptions};
use lance::dataset::WriteParams;
use lance::Dataset;

use lance_datagen::{array, gen_batch, ArrayGeneratorExt, RowCount};
use lance_index::IndexType;
use lance_index::{DatasetIndexExt, IndexType};

use super::{test_filter, test_scan, test_take};
use crate::utils::DatasetTestCases;
Expand Down Expand Up @@ -403,3 +405,74 @@ async fn test_query_decimal(#[case] data_type: DataType) {
})
.await
}

/// Regression test: filtered scan panics after compaction with SRID when a
/// RangeWithBitmap segment appears after a Range segment in a fragment's
/// RowIdSequence. The bitmap iterator was advanced using a global offset
/// instead of a range-local position, exhausting the iterator.
///
/// Sequence: Write(2 frags) → Delete(from frag1) → Compact → CreateIndex → FilteredScan
#[tokio::test]
async fn test_filtered_scan_after_compact_with_srid() {
use arrow::record_batch::RecordBatchIterator;

// Write 100 rows across 2 fragments (50 each) with stable row IDs.
let batch = RecordBatch::try_from_iter(vec![(
"int_col",
Arc::new(Int32Array::from_iter_values(0..100)) as ArrayRef,
)])
.unwrap();
let schema = batch.schema();
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
let write_params = WriteParams {
enable_stable_row_ids: true,
max_rows_per_file: 50,
..Default::default()
};
let mut ds = Dataset::write(reader, "memory://compact_srid_test", Some(write_params))
.await
.unwrap();
assert_eq!(ds.get_fragments().len(), 2);
assert_eq!(ds.count_rows(None).await.unwrap(), 100);

// Delete some rows from the second fragment to create holes.
// After compaction, this fragment's row_ids become a RangeWithBitmap segment.
ds.delete("int_col >= 60 AND int_col < 70").await.unwrap();
assert_eq!(ds.count_rows(None).await.unwrap(), 90);

// Compact: merges both fragments into one. The output RowIdSequence has
// multiple segments: Range(0..50) followed by RangeWithBitmap(50..100).
// The RangeWithBitmap segment has offset_start=50 from the preceding Range.
compact_files(&mut ds, CompactionOptions::default(), None)
.await
.unwrap();

// Create a BTree index so filtered scans use mask_to_offset_ranges.
ds.create_index(
&["int_col"],
IndexType::BTree,
None,
&lance_index::scalar::ScalarIndexParams::default(),
true,
)
.await
.unwrap();

// Filtered scan: the index produces a RowAddrMask, which is passed to
// mask_to_offset_ranges on the multi-segment RowIdSequence. Before the
// fix, this panicked with "called Option::unwrap() on a None value".
let results = ds
.scan()
.filter("int_col < 200")
.unwrap()
.try_into_batch()
.await
.unwrap();

assert_eq!(
results.num_rows(),
90,
"Expected 90 rows (100 written - 10 deleted) but got {}",
results.num_rows()
);
}