|
| 1 | +// Copyright (c) 2024-present, fjall-rs |
| 2 | +// This source code is licensed under both the Apache 2.0 and MIT License |
| 3 | +// (found in the LICENSE-* files in the repository) |
| 4 | + |
| 5 | +use super::Tree; |
| 6 | +use crate::{ |
| 7 | + file::SEGMENTS_FOLDER, |
| 8 | + segment::{block_index::BlockIndexImpl, multi_writer::MultiWriter, SegmentInner}, |
| 9 | + AbstractTree, Segment, UserKey, UserValue, ValueType, |
| 10 | +}; |
| 11 | +use std::{ |
| 12 | + path::PathBuf, |
| 13 | + sync::{atomic::AtomicBool, Arc}, |
| 14 | +}; |
| 15 | + |
| 16 | +pub struct Ingestion<'a> { |
| 17 | + folder: PathBuf, |
| 18 | + tree: &'a Tree, |
| 19 | + writer: MultiWriter, |
| 20 | +} |
| 21 | + |
| 22 | +impl<'a> Ingestion<'a> { |
| 23 | + pub fn new(tree: &'a Tree) -> crate::Result<Self> { |
| 24 | + assert_eq!( |
| 25 | + 0, |
| 26 | + tree.segment_count(), |
| 27 | + "can only perform bulk_ingest on empty trees", |
| 28 | + ); |
| 29 | + |
| 30 | + let folder = tree.config.path.join(SEGMENTS_FOLDER); |
| 31 | + log::debug!("Ingesting into disk segments in {folder:?}"); |
| 32 | + |
| 33 | + let mut writer = MultiWriter::new( |
| 34 | + tree.segment_id_counter.clone(), |
| 35 | + 128 * 1_024 * 1_024, |
| 36 | + crate::segment::writer::Options { |
| 37 | + folder: folder.clone(), |
| 38 | + data_block_size: tree.config.data_block_size, |
| 39 | + index_block_size: tree.config.index_block_size, |
| 40 | + segment_id: 0, /* TODO: unused */ |
| 41 | + }, |
| 42 | + )? |
| 43 | + .use_compression(tree.config.compression); |
| 44 | + |
| 45 | + { |
| 46 | + use crate::segment::writer::BloomConstructionPolicy; |
| 47 | + |
| 48 | + if tree.config.bloom_bits_per_key >= 0 { |
| 49 | + writer = writer.use_bloom_policy(BloomConstructionPolicy::BitsPerKey( |
| 50 | + tree.config.bloom_bits_per_key.unsigned_abs(), |
| 51 | + )); |
| 52 | + } else { |
| 53 | + writer = writer.use_bloom_policy(BloomConstructionPolicy::BitsPerKey(0)); |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + Ok(Self { |
| 58 | + folder, |
| 59 | + tree, |
| 60 | + writer, |
| 61 | + }) |
| 62 | + } |
| 63 | + |
| 64 | + pub fn write(&mut self, key: UserKey, value: UserValue) -> crate::Result<()> { |
| 65 | + self.writer.write(crate::InternalValue::from_components( |
| 66 | + key, |
| 67 | + value, |
| 68 | + 0, |
| 69 | + ValueType::Value, |
| 70 | + )) |
| 71 | + } |
| 72 | + |
| 73 | + pub fn finish(self) -> crate::Result<()> { |
| 74 | + use crate::{ |
| 75 | + compaction::MoveDown, segment::block_index::two_level_index::TwoLevelBlockIndex, |
| 76 | + }; |
| 77 | + |
| 78 | + let results = self.writer.finish()?; |
| 79 | + |
| 80 | + let created_segments = results |
| 81 | + .into_iter() |
| 82 | + .map(|trailer| -> crate::Result<Segment> { |
| 83 | + let segment_id = trailer.metadata.id; |
| 84 | + let segment_file_path = self.folder.join(segment_id.to_string()); |
| 85 | + |
| 86 | + let block_index = TwoLevelBlockIndex::from_file( |
| 87 | + &segment_file_path, |
| 88 | + &trailer.metadata, |
| 89 | + trailer.offsets.tli_ptr, |
| 90 | + (self.tree.id, segment_id).into(), |
| 91 | + self.tree.config.descriptor_table.clone(), |
| 92 | + self.tree.config.block_cache.clone(), |
| 93 | + )?; |
| 94 | + let block_index = BlockIndexImpl::TwoLevel(block_index); |
| 95 | + let block_index = Arc::new(block_index); |
| 96 | + |
| 97 | + Ok(SegmentInner { |
| 98 | + tree_id: self.tree.id, |
| 99 | + |
| 100 | + descriptor_table: self.tree.config.descriptor_table.clone(), |
| 101 | + block_cache: self.tree.config.block_cache.clone(), |
| 102 | + |
| 103 | + metadata: trailer.metadata, |
| 104 | + offsets: trailer.offsets, |
| 105 | + |
| 106 | + #[allow(clippy::needless_borrows_for_generic_args)] |
| 107 | + block_index, |
| 108 | + |
| 109 | + bloom_filter: Segment::load_bloom( |
| 110 | + &segment_file_path, |
| 111 | + trailer.offsets.bloom_ptr, |
| 112 | + )?, |
| 113 | + |
| 114 | + path: segment_file_path, |
| 115 | + is_deleted: AtomicBool::default(), |
| 116 | + } |
| 117 | + .into()) |
| 118 | + }) |
| 119 | + .collect::<crate::Result<Vec<_>>>()?; |
| 120 | + |
| 121 | + self.tree.register_segments(&created_segments)?; |
| 122 | + |
| 123 | + self.tree.compact(Arc::new(MoveDown(0, 6)), 0)?; |
| 124 | + |
| 125 | + for segment in &created_segments { |
| 126 | + let segment_file_path = self.folder.join(segment.id().to_string()); |
| 127 | + |
| 128 | + self.tree |
| 129 | + .config |
| 130 | + .descriptor_table |
| 131 | + .insert(&segment_file_path, segment.global_id()); |
| 132 | + } |
| 133 | + |
| 134 | + Ok(()) |
| 135 | + } |
| 136 | +} |
0 commit comments