Skip to content

Commit 42cc778

Browse files
committed
refactor: impl new BloomFilter
Signed-off-by: Kould <[email protected]>
1 parent 272c69f commit 42cc778

File tree

22 files changed

+563
-160
lines changed

22 files changed

+563
-160
lines changed

src/query/service/src/interpreters/interpreter_table_show_create.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use databend_common_expression::ComputedExpr;
2727
use databend_common_expression::DataBlock;
2828
use databend_common_expression::Scalar;
2929
use databend_common_expression::Value;
30+
use databend_common_meta_app::schema::TableIndexType;
3031
use databend_common_sql::plans::ShowCreateTablePlan;
3132
use databend_common_storages_fuse::FUSE_OPT_KEY_ATTACH_COLUMN_IDS;
3233
use databend_common_storages_stream::stream_table::StreamTable;
@@ -242,9 +243,14 @@ impl ShowCreateTableInterpreter {
242243
let option = format!("{} = '{}'", key, value);
243244
options.push(option);
244245
}
246+
let index_type = match index_field.index_type {
247+
TableIndexType::Inverted => "INVERTED",
248+
TableIndexType::Ngram => "NGRAM",
249+
};
245250
let mut index_str = format!(
246-
" {} INVERTED INDEX {} ({})",
251+
" {} {} INDEX {} ({})",
247252
sync,
253+
index_type,
248254
display_ident(
249255
&index_field.name,
250256
force_quoted_ident,

src/query/service/src/test_kits/block_writer.rs

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ impl<'a> BlockWriter<'a> {
109109
bloom_filter_index_size,
110110
None,
111111
None,
112+
None,
112113
Compression::Lz4Raw,
113114
Some(Utc::now()),
114115
);

src/query/sql/src/planner/binder/ddl/index.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ use crate::MetadataRef;
6565
use crate::RefreshAggregatingIndexRewriter;
6666
use crate::SUPPORTED_AGGREGATING_INDEX_FUNCTIONS;
6767

68-
const MAXIMUM_BLOOM_BITMAP_SIZE: usize = 128 * 1024 * 1024;
68+
const MAXIMUM_BLOOM_SIZE: usize = 1 << 30;
6969

7070
// valid values for inverted index option tokenizer
7171
static INDEX_TOKENIZER_VALUES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
@@ -598,17 +598,17 @@ impl Binder {
598598
}
599599
options.insert("gram_size".to_string(), value);
600600
}
601-
"bitmap_size" => {
601+
"bloom_size" => {
602602
match value.parse::<usize>() {
603603
Ok(num) => {
604604
if num == 0 {
605605
return Err(ErrorCode::IndexOptionInvalid(
606-
"`bitmap_size` cannot be 0",
606+
"`bloom_size` cannot be 0",
607607
));
608608
}
609-
if num > MAXIMUM_BLOOM_BITMAP_SIZE {
609+
if num > MAXIMUM_BLOOM_SIZE {
610610
return Err(ErrorCode::IndexOptionInvalid(format!(
611-
"bitmap_size: `{num}` is too large (bitmap_size is maximum: {MAXIMUM_BLOOM_BITMAP_SIZE})",
611+
"bloom_size: `{num}` is too large (bloom_size is maximum: {MAXIMUM_BLOOM_SIZE})",
612612
)));
613613
}
614614
}
@@ -618,7 +618,7 @@ impl Binder {
618618
)));
619619
}
620620
}
621-
options.insert("bitmap_size".to_string(), value);
621+
options.insert("bloom_size".to_string(), value);
622622
}
623623
_ => {
624624
return Err(ErrorCode::IndexOptionInvalid(format!(

src/query/storages/common/cache/src/cache_items.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub use databend_common_catalog::plan::PartStatistics;
1717
pub use databend_common_catalog::plan::Partitions;
1818
pub use databend_common_catalog::table::Table;
1919
use databend_common_exception::ErrorCode;
20-
pub use databend_storages_common_index::filters::Xor8Filter;
20+
pub use databend_storages_common_index::filters::FilterImpl;
2121
pub use databend_storages_common_index::BloomIndexMeta;
2222
pub use databend_storages_common_index::InvertedIndexFile;
2323
pub use databend_storages_common_index::InvertedIndexMeta;

src/query/storages/common/cache/src/caches.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub type TableSnapshotCache = InMemoryLruCache<TableSnapshot>;
4545
pub type TableSnapshotStatisticCache = InMemoryLruCache<TableSnapshotStatistics>;
4646
/// In memory object cache of bloom filter.
4747
/// For each indexed data block, the bloom xor8 filter of column is cached individually
48-
pub type BloomIndexFilterCache = HybridCache<Xor8Filter>;
48+
pub type BloomIndexFilterCache = HybridCache<FilterImpl>;
4949
/// In memory object cache of parquet FileMetaData of bloom index data
5050
pub type BloomIndexMetaCache = HybridCache<BloomIndexMeta>;
5151

@@ -123,7 +123,7 @@ impl CachedObject<(PartStatistics, Partitions)> for (PartStatistics, Partitions)
123123
}
124124
}
125125

126-
impl CachedObject<Xor8Filter> for Xor8Filter {
126+
impl CachedObject<FilterImpl> for FilterImpl {
127127
type Cache = BloomIndexFilterCache;
128128
fn cache() -> Option<Self::Cache> {
129129
CacheManager::instance().get_bloom_index_filter_cache()
@@ -235,10 +235,10 @@ impl From<TableSnapshotStatistics> for CacheValue<TableSnapshotStatistics> {
235235
}
236236
}
237237

238-
impl From<Xor8Filter> for CacheValue<Xor8Filter> {
239-
fn from(value: Xor8Filter) -> Self {
238+
impl From<FilterImpl> for CacheValue<FilterImpl> {
239+
fn from(value: FilterImpl) -> Self {
240240
CacheValue {
241-
mem_bytes: std::mem::size_of::<Xor8Filter>() + value.filter.finger_prints.len(),
241+
mem_bytes: value.mem_bytes(),
242242
inner: Arc::new(value),
243243
}
244244
}

0 commit comments

Comments
 (0)