From 49322b0bd7d48bd2cc6ae471cda683f395e7cae6 Mon Sep 17 00:00:00 2001
From: Saksham <sakshamarora1001@gmail.com>
Date: Tue, 11 Feb 2025 16:33:35 +0100
Subject: [PATCH] services: records: CompositeSuggestQueryParser: Filter out
 stopwords

---
 .../services/records/queryparser/suggest.py           | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/invenio_records_resources/services/records/queryparser/suggest.py b/invenio_records_resources/services/records/queryparser/suggest.py
index 4c7343a1..29d5b442 100644
--- a/invenio_records_resources/services/records/queryparser/suggest.py
+++ b/invenio_records_resources/services/records/queryparser/suggest.py
@@ -78,10 +78,13 @@ def __init__(self, identity=None, extra_params=None, clauses=None, **kwargs):
             # multiple fields (e.g. full name + affiliation + affiliation acronym).
             {"type": "cross_fields", "boost": 3},
             # "bool_prefix" is useful for search-as-you-type/auto completion features.
-            # It works in conjunction with having one or more search_as_you_type fields
-            # or custom ngram-analyzed fields.
-            {"type": "bool_prefix", "boost": 2, "fuzziness": "AUTO"},
-            # "most_fields" is just here to boost results where more fields match the
+            # Ref: https://opensearch.org/docs/latest/analyzers/tokenizers/edge-n-gram/
+            # It works in conjunction with having search_as_you_type fields but for custom
+            # edge-ngram-analyzed fields it is not needed because the expansions already
+            # exist in the index, so essentially, bool_prefix is doing the same work
+            # Ref: https://opensearch.org/docs/latest/analyzers/token-filters/edge-ngram/
+            # {"type": "bool_prefix", "boost": 2, "fuzziness": "AUTO"},
+            # "most_fields" is here to boost results where more fields match the
             # query. E.g. the query "john doe acme" would match "name:(john doe)" and
             # "affiliation.acronym:(acme)", instead of a case where only one field
             # like "name:(john doe acme)" matches.