Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Level Compaction] Use binary search to find overlapped SsTables #136

Open
ppdogg opened this issue Mar 13, 2025 · 1 comment
Open

[Level Compaction] Use binary search to find overlapped SsTables #136

ppdogg opened this issue Mar 13, 2025 · 1 comment

Comments

@ppdogg
Copy link
Contributor

ppdogg commented Mar 13, 2025

I have codes for you if you don't mind:

    fn find_overlapping_ssts(
        &self,
        snapshot: &LsmStorageState,
        sst_ids: &[usize],
        in_level: usize,
    ) -> Vec<usize> {
        let begin_key = sst_ids
             .iter()
             .map(|id| snapshot.sstables[id].first_key())
             .min()
             .cloned()
             .unwrap();
         let end_key = sst_ids
             .iter()
             .map(|id| snapshot.sstables[id].last_key())
             .max()
             .cloned()
             .unwrap();

        let mut sstables = Vec::with_capacity(snapshot.levels[in_level - 1].1.len());
        for sst_id in &snapshot.levels[in_level - 1].1 {
            sstables.push(snapshot.sstables[sst_id].clone());
        }

        Self::find_level_ssts_range(
            &sstables,
            begin_key.as_key_slice(),
            end_key.as_key_slice(),
        )
    }

binary search:

    fn find_level_ssts_range(
        sstables: &Vec<Arc<SsTable>>,
        first_key: KeySlice,
        last_key: KeySlice,
    ) -> Vec<usize> {
        if sstables.is_empty() {
            return Vec::new();
        }

        let mut sst_ids = Vec::new();

        let lower = sstables
            .partition_point(|table| table.last_key().as_key_slice() < first_key);
        let upper = sstables
            .partition_point(|table| table.first_key().as_key_slice() <= last_key)
            .saturating_sub(1);

        sst_ids.extend(sstables[lower..=upper]
            .iter()
            .map(|table| table.sst_id())
            .collect::<Vec<usize>>()
        );

        sst_ids
    }
@ppdogg
Copy link
Contributor Author

ppdogg commented Mar 13, 2025

Also, refactor compact.rs:

     fn compact_two_levels(
        &self,
        snapshot: &LsmStorageState,
        upper_level: &Option<usize>,
        upper_level_sst_ids: &Vec<usize>,
        _lower_level: &usize,
        lower_level_sst_ids: &Vec<usize>,
        compact_to_bottom_level: bool,
    ) -> Result<Vec<Arc<SsTable>>> {
        match upper_level {
            Some(_) => {
               ...

                self.compact_generate_sst_from_iter(
                    TwoMergeIterator::create(
                        SstConcatIterator::create_and_seek_to_first(upper_sstables)?,
                        SstConcatIterator::create_and_seek_to_first(lower_sstables)?,
                    )?,
                    compact_to_bottom_level,
                )
            }
            None => {
                ...

                self.compact_generate_sst_from_iter(
                    TwoMergeIterator::create(
                        MergeIterator::create(upper_iters),
                        SstConcatIterator::create_and_seek_to_first(lower_sstables)?,
                    )?,
                    compact_to_bottom_level,
                )
            }
        }

    fn compact(&self, task: &CompactionTask) -> Result<Vec<Arc<SsTable>>> {
        let snapshot = {
            let state = self.state.read();
            state.clone()
        };
        match task {
            CompactionTask::ForceFullCompaction {
                l0_sstables,
                l1_sstables,
            } => self.compact_two_levels(
                &snapshot,
                &None,
                l0_sstables,
                &1,
                l1_sstables,
                task.compact_to_bottom_level(),
            ),
            CompactionTask::Simple(SimpleLeveledCompactionTask {
                upper_level,
                upper_level_sst_ids,
                lower_level,
                lower_level_sst_ids,
                ..
            })
            | CompactionTask::Leveled(LeveledCompactionTask {
                upper_level,
                upper_level_sst_ids,
                lower_level,
                lower_level_sst_ids,
                ..
            }) => self.compact_two_levels(
                &snapshot,
                upper_level,
                upper_level_sst_ids,
                lower_level,
                lower_level_sst_ids,
                task.compact_to_bottom_level(),
            ),
            CompactionTask::Tiered(TieredCompactionTask { tiers, .. }) => {
                ...
            }
        }
    }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant