-
Notifications
You must be signed in to change notification settings - Fork 3.6k
[feature](search) add variant subcolumn suppport for search function #56718
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,39 +48,63 @@ Status collect_search_inputs(const VSearchExpr& expr, VExprContext* context, | |
|
|
||
| auto index_context = context->get_inverted_index_context(); | ||
| if (index_context == nullptr) { | ||
| return Status::OK(); | ||
| LOG(WARNING) << "collect_search_inputs: No inverted index context available"; | ||
| return Status::InternalError("No inverted index context available"); | ||
| } | ||
|
|
||
| // Get field bindings for variant subcolumn support | ||
| const auto& search_param = expr.get_search_param(); | ||
| const auto& field_bindings = search_param.field_bindings; | ||
|
|
||
| int child_index = 0; // Index for iterating through children | ||
| for (const auto& child : expr.children()) { | ||
| if (child->is_slot_ref()) { | ||
| auto* column_slot_ref = assert_cast<VSlotRef*>(child.get()); | ||
| int column_id = column_slot_ref->column_id(); | ||
| auto* iterator = index_context->get_inverted_index_iterator_by_column_id(column_id); | ||
| if (iterator == nullptr) { | ||
| continue; | ||
|
|
||
| // Determine the field_name from field_bindings (for variant subcolumns) | ||
| // field_bindings and children should have the same order | ||
| std::string field_name; | ||
| if (child_index < field_bindings.size()) { | ||
| // Use field_name from binding (may include "parent.subcolumn" for variant) | ||
| field_name = field_bindings[child_index].field_name; | ||
| } else { | ||
| // Fallback to column_name if binding not found | ||
| field_name = column_slot_ref->column_name(); | ||
| } | ||
|
|
||
| const auto* storage_name_type = | ||
| index_context->get_storage_name_and_type_by_column_id(column_id); | ||
| if (storage_name_type == nullptr) { | ||
| auto err_msg = fmt::format( | ||
| "storage_name_type cannot be found for column {} while in {} evaluate", | ||
| column_id, expr.expr_name()); | ||
| LOG(ERROR) << err_msg; | ||
| return Status::InternalError(err_msg); | ||
| // Only collect fields that have iterators (materialized columns with indexes) | ||
| if (iterator != nullptr) { | ||
| const auto* storage_name_type = | ||
| index_context->get_storage_name_and_type_by_column_id(column_id); | ||
| if (storage_name_type == nullptr) { | ||
| return Status::InternalError("storage_name_type not found for column {} in {}", | ||
| column_id, expr.expr_name()); | ||
| } | ||
|
|
||
| bundle->iterators.emplace(field_name, iterator); | ||
| bundle->field_types.emplace(field_name, *storage_name_type); | ||
| bundle->column_ids.emplace_back(column_id); | ||
| } | ||
|
|
||
| auto column_name = column_slot_ref->column_name(); | ||
| bundle->iterators.emplace(column_name, iterator); | ||
| bundle->field_types.emplace(column_name, *storage_name_type); | ||
| bundle->column_ids.emplace_back(column_id); | ||
| child_index++; | ||
| } else if (child->is_literal()) { | ||
| auto* literal = assert_cast<VLiteral*>(child.get()); | ||
| bundle->literal_args.emplace_back(literal->get_column_ptr(), literal->get_data_type(), | ||
| literal->expr_name()); | ||
| } else { | ||
| LOG(WARNING) << "VSearchExpr: Unsupported child node type encountered"; | ||
| return Status::InvalidArgument("search expression child type unsupported"); | ||
| // Check if this is ElementAt expression (for variant subcolumn access) | ||
| if (child->expr_name() == "element_at" && child_index < field_bindings.size() && | ||
| field_bindings[child_index].__isset.is_variant_subcolumn && | ||
| field_bindings[child_index].is_variant_subcolumn) { | ||
|
Comment on lines
+97
to
+100
|
||
| // Variant subcolumn not materialized - skip, will create empty BitmapQuery in function_search | ||
| child_index++; | ||
| continue; | ||
| } | ||
|
|
||
| // Not a supported child type | ||
| return Status::InvalidArgument("Unsupported child node type: {}", child->expr_name()); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -94,16 +118,6 @@ VSearchExpr::VSearchExpr(const TExprNode& node) : VExpr(node) { | |
| _search_param = node.search_param; | ||
| _original_dsl = _search_param.original_dsl; | ||
| } | ||
|
|
||
| LOG(INFO) << "VSearchExpr constructor: dsl='" << _original_dsl | ||
| << "', num_children=" << node.num_children | ||
| << ", has_search_param=" << node.__isset.search_param | ||
| << ", children_size=" << _children.size(); | ||
|
|
||
| for (size_t i = 0; i < _children.size(); i++) { | ||
| LOG(INFO) << "VSearchExpr constructor: child[" << i | ||
| << "] expr_name=" << _children[i]->expr_name(); | ||
| } | ||
| } | ||
|
|
||
| const std::string& VSearchExpr::expr_name() const { | ||
|
|
@@ -120,7 +134,7 @@ Status VSearchExpr::execute(VExprContext* context, Block* block, int* result_col | |
| } | ||
|
|
||
| Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) { | ||
| LOG(INFO) << "VSearchExpr::evaluate_inverted_index called with DSL: " << _original_dsl; | ||
| LOG(INFO) << "VSearchExpr::evaluate_inverted_index called, DSL: " << _search_param.original_dsl; | ||
|
|
||
| if (_search_param.original_dsl.empty()) { | ||
| return Status::InvalidArgument("search DSL is empty"); | ||
|
|
@@ -135,8 +149,14 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm | |
| SearchInputBundle bundle; | ||
| RETURN_IF_ERROR(collect_search_inputs(*this, context, &bundle)); | ||
|
|
||
| VLOG_DEBUG << "VSearchExpr: bundle.iterators.size()=" << bundle.iterators.size(); | ||
|
|
||
| if (bundle.iterators.empty()) { | ||
| LOG(WARNING) << "VSearchExpr: No indexed columns available for evaluation"; | ||
| LOG(WARNING) << "VSearchExpr: No indexed columns available for evaluation, DSL: " | ||
| << _original_dsl; | ||
| auto empty_bitmap = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(), | ||
| std::make_shared<roaring::Roaring>()); | ||
| index_context->set_inverted_index_result_for_expr(this, std::move(empty_bitmap)); | ||
| return Status::OK(); | ||
| } | ||
|
|
||
|
|
@@ -155,15 +175,6 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm | |
| index_context->set_true_for_inverted_index_status(this, column_id); | ||
| } | ||
|
|
||
| const auto& data_bitmap = result_bitmap.get_data_bitmap(); | ||
| const uint64_t match_count = data_bitmap ? data_bitmap->cardinality() : 0; | ||
| if (match_count > 0) { | ||
| LOG(INFO) << "VSearchExpr: Found " << match_count | ||
| << " matching rows for DSL: " << _search_param.original_dsl; | ||
| } else { | ||
| LOG(INFO) << "VSearchExpr: No matches found for DSL: " << _search_param.original_dsl; | ||
| } | ||
|
|
||
| return Status::OK(); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,7 @@ | |
| #include "olap/rowset/segment_v2/index_file_reader.h" | ||
| #include "olap/rowset/segment_v2/index_query_context.h" | ||
| #include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h" | ||
| #include "olap/rowset/segment_v2/inverted_index/query_v2/bitmap_query/bitmap_query.h" | ||
| #include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query.h" | ||
| #include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h" | ||
| #include "olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h" | ||
|
|
@@ -52,8 +53,21 @@ Status FieldReaderResolver::resolve(const std::string& field_name, | |
| InvertedIndexQueryType query_type, | ||
| FieldReaderBinding* binding) { | ||
| DCHECK(binding != nullptr); | ||
|
|
||
| // Check if this is a variant subcolumn | ||
| bool is_variant_sub = is_variant_subcolumn(field_name); | ||
|
|
||
| auto data_it = _data_type_with_names.find(field_name); | ||
| if (data_it == _data_type_with_names.end()) { | ||
| // For variant subcolumns, not finding the index is normal (the subcolumn may not exist in this segment) | ||
| // Return OK but with null binding to signal "no match" | ||
| if (is_variant_sub) { | ||
| VLOG_DEBUG << "Variant subcolumn '" << field_name | ||
| << "' not found in this segment, treating as no match"; | ||
| *binding = FieldReaderBinding(); | ||
| return Status::OK(); | ||
| } | ||
| // For normal fields, this is an error | ||
| return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( | ||
| "field '{}' not found in inverted index metadata", field_name); | ||
| } | ||
|
|
@@ -69,6 +83,13 @@ Status FieldReaderResolver::resolve(const std::string& field_name, | |
|
|
||
| auto iterator_it = _iterators.find(field_name); | ||
| if (iterator_it == _iterators.end() || iterator_it->second == nullptr) { | ||
| // For variant subcolumns, not finding the iterator is normal | ||
| if (is_variant_sub) { | ||
| VLOG_DEBUG << "Variant subcolumn '" << field_name | ||
| << "' iterator not found in this segment, treating as no match"; | ||
| *binding = FieldReaderBinding(); | ||
| return Status::OK(); | ||
| } | ||
| return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( | ||
| "iterator not found for field '{}'", field_name); | ||
| } | ||
|
|
@@ -171,27 +192,31 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param( | |
| data_type_with_names, | ||
| std::unordered_map<std::string, IndexIterator*> iterators, uint32_t num_rows, | ||
| InvertedIndexResultBitmap& bitmap_result) const { | ||
| VLOG_DEBUG << "search: Processing DSL '" << search_param.original_dsl << "' with " | ||
| << data_type_with_names.size() << " indexed columns and " << iterators.size() | ||
| << " iterators"; | ||
|
|
||
| if (iterators.empty() || data_type_with_names.empty()) { | ||
| LOG(INFO) << "No indexed columns or iterators available, returning empty result"; | ||
| LOG(INFO) << "No indexed columns or iterators available, returning empty result, dsl:" | ||
| << search_param.original_dsl; | ||
| bitmap_result = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(), | ||
| std::make_shared<roaring::Roaring>()); | ||
|
Comment on lines
195
to
+199
|
||
| return Status::OK(); | ||
| } | ||
|
|
||
| auto context = std::make_shared<IndexQueryContext>(); | ||
| context->collection_statistics = std::make_shared<CollectionStatistics>(); | ||
| context->collection_similarity = std::make_shared<CollectionSimilarity>(); | ||
|
|
||
| FieldReaderResolver resolver(data_type_with_names, iterators, context); | ||
| // Pass field_bindings to resolver for variant subcolumn detection | ||
| FieldReaderResolver resolver(data_type_with_names, iterators, context, | ||
| search_param.field_bindings); | ||
|
|
||
| query_v2::QueryPtr root_query; | ||
| std::string root_binding_key; | ||
| RETURN_IF_ERROR(build_query_recursive(*this, search_param.root, context, resolver, &root_query, | ||
| &root_binding_key)); | ||
| if (root_query == nullptr) { | ||
| LOG(INFO) << "search: Query tree resolved to empty query"; | ||
| LOG(INFO) << "search: Query tree resolved to empty query, dsl:" | ||
| << search_param.original_dsl; | ||
| bitmap_result = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(), | ||
| std::make_shared<roaring::Roaring>()); | ||
| return Status::OK(); | ||
| } | ||
|
|
||
|
|
@@ -389,9 +414,12 @@ Status FunctionSearch::build_query_recursive(const FunctionSearch& function, | |
| std::string child_binding_key; | ||
| RETURN_IF_ERROR(build_query_recursive(function, child_clause, context, resolver, | ||
| &child_query, &child_binding_key)); | ||
| if (child_query != nullptr) { | ||
| builder.add(child_query, std::move(child_binding_key)); | ||
| } | ||
| // Add all children including empty BitmapQuery | ||
| // BooleanQuery will handle the logic: | ||
| // - AND with empty bitmap → result is empty | ||
| // - OR with empty bitmap → empty bitmap is ignored by OR logic | ||
| // - NOT with empty bitmap → NOT(empty) = all rows (handled by BooleanQuery) | ||
| builder.add(child_query, std::move(child_binding_key)); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -425,6 +453,19 @@ Status FunctionSearch::build_leaf_query(const FunctionSearch& function, const TS | |
|
|
||
| FieldReaderBinding binding; | ||
| RETURN_IF_ERROR(resolver.resolve(field_name, query_type, &binding)); | ||
|
|
||
| // Check if binding is empty (variant subcolumn not found in this segment) | ||
| if (binding.lucene_reader == nullptr) { | ||
| VLOG_DEBUG << "build_leaf_query: Variant subcolumn '" << field_name | ||
| << "' has no index in this segment, creating empty BitmapQuery (no matches)"; | ||
| // Variant subcolumn doesn't exist - create empty BitmapQuery (no matches) | ||
| *out = std::make_shared<query_v2::BitmapQuery>(roaring::Roaring()); | ||
| if (binding_key) { | ||
| binding_key->clear(); | ||
| } | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| if (binding_key) { | ||
| *binding_key = binding.binding_key; | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.