From 49322b0bd7d48bd2cc6ae471cda683f395e7cae6 Mon Sep 17 00:00:00 2001 From: Saksham Date: Tue, 11 Feb 2025 16:33:35 +0100 Subject: [PATCH] services: records: CompositeSuggestQueryParser: Filter out stopwords --- .../services/records/queryparser/suggest.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/invenio_records_resources/services/records/queryparser/suggest.py b/invenio_records_resources/services/records/queryparser/suggest.py index 4c7343a1..29d5b442 100644 --- a/invenio_records_resources/services/records/queryparser/suggest.py +++ b/invenio_records_resources/services/records/queryparser/suggest.py @@ -78,10 +78,13 @@ def __init__(self, identity=None, extra_params=None, clauses=None, **kwargs): # multiple fields (e.g. full name + affiliation + affiliation acronym). {"type": "cross_fields", "boost": 3}, # "bool_prefix" is useful for search-as-you-type/auto completion features. - # It works in conjunction with having one or more search_as_you_type fields - # or custom ngram-analyzed fields. - {"type": "bool_prefix", "boost": 2, "fuzziness": "AUTO"}, - # "most_fields" is just here to boost results where more fields match the + # Ref: https://opensearch.org/docs/latest/analyzers/tokenizers/edge-n-gram/ + # It works in conjunction with having search_as_you_type fields but for custom + # edge-ngram-analyzed fields it is not needed because the expansions already + # exist in the index, so essentially, bool_prefix is doing the same work + # Ref: https://opensearch.org/docs/latest/analyzers/token-filters/edge-ngram/ + # {"type": "bool_prefix", "boost": 2, "fuzziness": "AUTO"}, + # "most_fields" is here to boost results where more fields match the # query. E.g. the query "john doe acme" would match "name:(john doe)" and # "affiliation.acronym:(acme)", instead of a case where only one field # like "name:(john doe acme)" matches.