Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,10 +605,16 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() {
{
if (_opts.runtime_state &&
_opts.runtime_state->query_options().enable_inverted_index_query &&
has_index_in_iterators()) {
(has_index_in_iterators() || !_common_expr_ctxs_push_down.empty())) {
SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer);
size_t input_rows = _row_bitmap.cardinality();
RETURN_IF_ERROR(_apply_inverted_index());
// Only apply column-level inverted index if we have iterators
if (has_index_in_iterators()) {
RETURN_IF_ERROR(_apply_inverted_index());
}
// Always apply expr-level index (e.g., search expressions) if we have common_expr_pushdown
// This allows search expressions with variant subcolumns to be evaluated even when
// the segment doesn't have all subcolumns
RETURN_IF_ERROR(_apply_index_expr());
for (auto it = _common_expr_ctxs_push_down.begin();
it != _common_expr_ctxs_push_down.end();) {
Expand Down
87 changes: 49 additions & 38 deletions be/src/vec/exprs/vsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,39 +48,63 @@ Status collect_search_inputs(const VSearchExpr& expr, VExprContext* context,

auto index_context = context->get_inverted_index_context();
if (index_context == nullptr) {
return Status::OK();
LOG(WARNING) << "collect_search_inputs: No inverted index context available";
return Status::InternalError("No inverted index context available");
}

// Get field bindings for variant subcolumn support
const auto& search_param = expr.get_search_param();
const auto& field_bindings = search_param.field_bindings;

int child_index = 0; // Index for iterating through children
for (const auto& child : expr.children()) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
int column_id = column_slot_ref->column_id();
auto* iterator = index_context->get_inverted_index_iterator_by_column_id(column_id);
if (iterator == nullptr) {
continue;

// Determine the field_name from field_bindings (for variant subcolumns)
// field_bindings and children should have the same order
std::string field_name;
if (child_index < field_bindings.size()) {
// Use field_name from binding (may include "parent.subcolumn" for variant)
field_name = field_bindings[child_index].field_name;
} else {
// Fallback to column_name if binding not found
field_name = column_slot_ref->column_name();
}

const auto* storage_name_type =
index_context->get_storage_name_and_type_by_column_id(column_id);
if (storage_name_type == nullptr) {
auto err_msg = fmt::format(
"storage_name_type cannot be found for column {} while in {} evaluate",
column_id, expr.expr_name());
LOG(ERROR) << err_msg;
return Status::InternalError(err_msg);
// Only collect fields that have iterators (materialized columns with indexes)
if (iterator != nullptr) {
const auto* storage_name_type =
index_context->get_storage_name_and_type_by_column_id(column_id);
if (storage_name_type == nullptr) {
return Status::InternalError("storage_name_type not found for column {} in {}",
column_id, expr.expr_name());
}

bundle->iterators.emplace(field_name, iterator);
bundle->field_types.emplace(field_name, *storage_name_type);
bundle->column_ids.emplace_back(column_id);
}

auto column_name = column_slot_ref->column_name();
bundle->iterators.emplace(column_name, iterator);
bundle->field_types.emplace(column_name, *storage_name_type);
bundle->column_ids.emplace_back(column_id);
child_index++;
} else if (child->is_literal()) {
auto* literal = assert_cast<VLiteral*>(child.get());
bundle->literal_args.emplace_back(literal->get_column_ptr(), literal->get_data_type(),
literal->expr_name());
} else {
LOG(WARNING) << "VSearchExpr: Unsupported child node type encountered";
return Status::InvalidArgument("search expression child type unsupported");
// Check if this is ElementAt expression (for variant subcolumn access)
if (child->expr_name() == "element_at" && child_index < field_bindings.size() &&
field_bindings[child_index].__isset.is_variant_subcolumn &&
field_bindings[child_index].is_variant_subcolumn) {
Comment on lines +97 to +100
Copy link

Copilot AI Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded string 'element_at' should be defined as a constant to avoid magic strings and improve maintainability.

Copilot uses AI. Check for mistakes.
// Variant subcolumn not materialized - skip, will create empty BitmapQuery in function_search
child_index++;
continue;
}

// Not a supported child type
return Status::InvalidArgument("Unsupported child node type: {}", child->expr_name());
}
}

Expand All @@ -94,16 +118,6 @@ VSearchExpr::VSearchExpr(const TExprNode& node) : VExpr(node) {
_search_param = node.search_param;
_original_dsl = _search_param.original_dsl;
}

LOG(INFO) << "VSearchExpr constructor: dsl='" << _original_dsl
<< "', num_children=" << node.num_children
<< ", has_search_param=" << node.__isset.search_param
<< ", children_size=" << _children.size();

for (size_t i = 0; i < _children.size(); i++) {
LOG(INFO) << "VSearchExpr constructor: child[" << i
<< "] expr_name=" << _children[i]->expr_name();
}
}

const std::string& VSearchExpr::expr_name() const {
Expand All @@ -120,7 +134,7 @@ Status VSearchExpr::execute(VExprContext* context, Block* block, int* result_col
}

Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
LOG(INFO) << "VSearchExpr::evaluate_inverted_index called with DSL: " << _original_dsl;
LOG(INFO) << "VSearchExpr::evaluate_inverted_index called, DSL: " << _search_param.original_dsl;

if (_search_param.original_dsl.empty()) {
return Status::InvalidArgument("search DSL is empty");
Expand All @@ -135,8 +149,14 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm
SearchInputBundle bundle;
RETURN_IF_ERROR(collect_search_inputs(*this, context, &bundle));

VLOG_DEBUG << "VSearchExpr: bundle.iterators.size()=" << bundle.iterators.size();

if (bundle.iterators.empty()) {
LOG(WARNING) << "VSearchExpr: No indexed columns available for evaluation";
LOG(WARNING) << "VSearchExpr: No indexed columns available for evaluation, DSL: "
<< _original_dsl;
auto empty_bitmap = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
std::make_shared<roaring::Roaring>());
index_context->set_inverted_index_result_for_expr(this, std::move(empty_bitmap));
return Status::OK();
}

Expand All @@ -155,15 +175,6 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm
index_context->set_true_for_inverted_index_status(this, column_id);
}

const auto& data_bitmap = result_bitmap.get_data_bitmap();
const uint64_t match_count = data_bitmap ? data_bitmap->cardinality() : 0;
if (match_count > 0) {
LOG(INFO) << "VSearchExpr: Found " << match_count
<< " matching rows for DSL: " << _search_param.original_dsl;
} else {
LOG(INFO) << "VSearchExpr: No matches found for DSL: " << _search_param.original_dsl;
}

return Status::OK();
}

Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/exprs/vsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class VSearchExpr : public VExpr {

bool can_push_down_to_index() const override { return true; }

const TSearchParam& get_search_param() const { return _search_param; }

private:
TSearchParam _search_param;
std::string _original_dsl;
Expand Down
61 changes: 51 additions & 10 deletions be/src/vec/functions/function_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "olap/rowset/segment_v2/index_file_reader.h"
#include "olap/rowset/segment_v2/index_query_context.h"
#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/bitmap_query/bitmap_query.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h"
Expand All @@ -52,8 +53,21 @@ Status FieldReaderResolver::resolve(const std::string& field_name,
InvertedIndexQueryType query_type,
FieldReaderBinding* binding) {
DCHECK(binding != nullptr);

// Check if this is a variant subcolumn
bool is_variant_sub = is_variant_subcolumn(field_name);

auto data_it = _data_type_with_names.find(field_name);
if (data_it == _data_type_with_names.end()) {
// For variant subcolumns, not finding the index is normal (the subcolumn may not exist in this segment)
// Return OK but with null binding to signal "no match"
if (is_variant_sub) {
VLOG_DEBUG << "Variant subcolumn '" << field_name
<< "' not found in this segment, treating as no match";
*binding = FieldReaderBinding();
return Status::OK();
}
// For normal fields, this is an error
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"field '{}' not found in inverted index metadata", field_name);
}
Expand All @@ -69,6 +83,13 @@ Status FieldReaderResolver::resolve(const std::string& field_name,

auto iterator_it = _iterators.find(field_name);
if (iterator_it == _iterators.end() || iterator_it->second == nullptr) {
// For variant subcolumns, not finding the iterator is normal
if (is_variant_sub) {
VLOG_DEBUG << "Variant subcolumn '" << field_name
<< "' iterator not found in this segment, treating as no match";
*binding = FieldReaderBinding();
return Status::OK();
}
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"iterator not found for field '{}'", field_name);
}
Expand Down Expand Up @@ -171,27 +192,31 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
data_type_with_names,
std::unordered_map<std::string, IndexIterator*> iterators, uint32_t num_rows,
InvertedIndexResultBitmap& bitmap_result) const {
VLOG_DEBUG << "search: Processing DSL '" << search_param.original_dsl << "' with "
<< data_type_with_names.size() << " indexed columns and " << iterators.size()
<< " iterators";

if (iterators.empty() || data_type_with_names.empty()) {
LOG(INFO) << "No indexed columns or iterators available, returning empty result";
LOG(INFO) << "No indexed columns or iterators available, returning empty result, dsl:"
<< search_param.original_dsl;
bitmap_result = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
std::make_shared<roaring::Roaring>());
Comment on lines 195 to +199
Copy link

Copilot AI Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The creation of an empty InvertedIndexResultBitmap is duplicated in multiple places. Consider extracting this into a helper function to reduce code duplication.

Copilot uses AI. Check for mistakes.
return Status::OK();
}

auto context = std::make_shared<IndexQueryContext>();
context->collection_statistics = std::make_shared<CollectionStatistics>();
context->collection_similarity = std::make_shared<CollectionSimilarity>();

FieldReaderResolver resolver(data_type_with_names, iterators, context);
// Pass field_bindings to resolver for variant subcolumn detection
FieldReaderResolver resolver(data_type_with_names, iterators, context,
search_param.field_bindings);

query_v2::QueryPtr root_query;
std::string root_binding_key;
RETURN_IF_ERROR(build_query_recursive(*this, search_param.root, context, resolver, &root_query,
&root_binding_key));
if (root_query == nullptr) {
LOG(INFO) << "search: Query tree resolved to empty query";
LOG(INFO) << "search: Query tree resolved to empty query, dsl:"
<< search_param.original_dsl;
bitmap_result = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
std::make_shared<roaring::Roaring>());
return Status::OK();
}

Expand Down Expand Up @@ -389,9 +414,12 @@ Status FunctionSearch::build_query_recursive(const FunctionSearch& function,
std::string child_binding_key;
RETURN_IF_ERROR(build_query_recursive(function, child_clause, context, resolver,
&child_query, &child_binding_key));
if (child_query != nullptr) {
builder.add(child_query, std::move(child_binding_key));
}
// Add all children including empty BitmapQuery
// BooleanQuery will handle the logic:
// - AND with empty bitmap → result is empty
// - OR with empty bitmap → empty bitmap is ignored by OR logic
// - NOT with empty bitmap → NOT(empty) = all rows (handled by BooleanQuery)
builder.add(child_query, std::move(child_binding_key));
}
}

Expand Down Expand Up @@ -425,6 +453,19 @@ Status FunctionSearch::build_leaf_query(const FunctionSearch& function, const TS

FieldReaderBinding binding;
RETURN_IF_ERROR(resolver.resolve(field_name, query_type, &binding));

// Check if binding is empty (variant subcolumn not found in this segment)
if (binding.lucene_reader == nullptr) {
VLOG_DEBUG << "build_leaf_query: Variant subcolumn '" << field_name
<< "' has no index in this segment, creating empty BitmapQuery (no matches)";
// Variant subcolumn doesn't exist - create empty BitmapQuery (no matches)
*out = std::make_shared<query_v2::BitmapQuery>(roaring::Roaring());
if (binding_key) {
binding_key->clear();
}
return Status::OK();
}

if (binding_key) {
*binding_key = binding.binding_key;
}
Expand Down
20 changes: 18 additions & 2 deletions be/src/vec/functions/function_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,28 @@ class FieldReaderResolver {
const std::unordered_map<std::string, vectorized::IndexFieldNameAndTypePair>&
data_type_with_names,
const std::unordered_map<std::string, IndexIterator*>& iterators,
std::shared_ptr<IndexQueryContext> context)
std::shared_ptr<IndexQueryContext> context,
const std::vector<TSearchFieldBinding>& field_bindings = {})
: _data_type_with_names(data_type_with_names),
_iterators(iterators),
_context(std::move(context)) {}
_context(std::move(context)),
_field_bindings(field_bindings) {
// Build a lookup map for quick variant subcolumn checks
for (const auto& binding : _field_bindings) {
if (binding.__isset.is_variant_subcolumn && binding.is_variant_subcolumn) {
_variant_subcolumn_fields.insert(binding.field_name);
}
}
}

Status resolve(const std::string& field_name, InvertedIndexQueryType query_type,
FieldReaderBinding* binding);

// Check if a field is a variant subcolumn
bool is_variant_subcolumn(const std::string& field_name) const {
return _variant_subcolumn_fields.count(field_name) > 0;
}

const std::vector<std::shared_ptr<lucene::index::IndexReader>>& readers() const {
return _readers;
}
Expand Down Expand Up @@ -94,6 +108,8 @@ class FieldReaderResolver {
_data_type_with_names;
const std::unordered_map<std::string, IndexIterator*>& _iterators;
std::shared_ptr<IndexQueryContext> _context;
std::vector<TSearchFieldBinding> _field_bindings;
std::unordered_set<std::string> _variant_subcolumn_fields;
std::unordered_map<std::string, FieldReaderBinding> _cache;
std::vector<std::shared_ptr<lucene::index::IndexReader>> _readers;
std::unordered_map<std::string, std::shared_ptr<lucene::index::IndexReader>> _binding_readers;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ NOT : 'NOT' | 'not' | '!' ;
LPAREN : '(' ;
RPAREN : ')' ;
COLON : ':' ;
DOT : '.' ; // Support for variant subcolumn access (e.g., field.subcolumn)

QUOTED : '"' QUOTED_CHAR* '"' ;
TERM : TERM_START_CHAR TERM_CHAR* ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ orClause : andClause (OR andClause)* ;
andClause : notClause (AND notClause)* ;
notClause : NOT atomClause | atomClause ;
atomClause : LPAREN clause RPAREN | fieldQuery ;
fieldQuery : fieldName COLON searchValue ;
fieldName : TERM | QUOTED ;

// Support for variant subcolumn paths (e.g., field.subcolumn, field.sub1.sub2)
fieldQuery : fieldPath COLON searchValue ;
fieldPath : fieldSegment (DOT fieldSegment)* ;
fieldSegment : TERM | QUOTED ;

searchValue
: TERM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,29 @@ private TSearchParam buildThriftParam() {
for (int i = 0; i < qsPlan.fieldBindings.size(); i++) {
SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(i);
TSearchFieldBinding thriftBinding = new TSearchFieldBinding();
thriftBinding.setFieldName(binding.fieldName);

String fieldPath = binding.fieldName;
thriftBinding.setFieldName(fieldPath);

// Check if this is a variant subcolumn (contains dot)
if (fieldPath.contains(".")) {
// Parse variant subcolumn path
int firstDotPos = fieldPath.indexOf('.');
String parentField = fieldPath.substring(0, firstDotPos);
String subcolumnPath = fieldPath.substring(firstDotPos + 1);

thriftBinding.setIsVariantSubcolumn(true);
thriftBinding.setParentFieldName(parentField);
thriftBinding.setSubcolumnPath(subcolumnPath);

LOG.info("buildThriftParam: variant subcolumn field='{}', parent='{}', subcolumn='{}'",
fieldPath, parentField, subcolumnPath);
} else {
thriftBinding.setIsVariantSubcolumn(false);
}

// Set slot index - this is the index in the children array, not the slotId
thriftBinding.setSlotIndex(i);

if (i < this.children.size() && this.children.get(i) instanceof SlotRef) {
SlotRef slotRef = (SlotRef) this.children.get(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,6 @@ public class Rewriter extends AbstractBatchJobExecutor {
custom(RuleType.ADJUST_CONJUNCTS_RETURN_TYPE, AdjustConjunctsReturnType::new),
bottomUp(
new ExpressionRewrite(CheckLegalityAfterRewrite.INSTANCE),
new RewriteSearchToSlots(),
new CheckMatchExpression(),
new CheckMultiDistinct(),
new CheckRestorePartition(),
Expand Down Expand Up @@ -899,6 +898,12 @@ private static List<RewriteJob> getWholeTreeRewriteJobs(
rewriteJobs.addAll(jobs(topic("split multi distinct",
custom(RuleType.DISTINCT_AGG_STRATEGY_SELECTOR, () -> DistinctAggStrategySelector.INSTANCE))));

// Rewrite search function before VariantSubPathPruning
// so that ElementAt expressions from search can be processed
rewriteJobs.addAll(jobs(
bottomUp(new RewriteSearchToSlots())
));

if (needSubPathPushDown) {
rewriteJobs.addAll(jobs(
topic("variant element_at push down",
Expand Down
Loading
Loading