Skip to content

Commit a6893d0

Browse files
committed
SOLR-17959: Add alwaysStopwords option to edismax
1 parent b8ae627 commit a6893d0

File tree

5 files changed

+35
-5
lines changed

5 files changed

+35
-5
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
title: Add alwaysStopwords option to edismax so its "all stopwords" behaviour can be controlled
2+
type: changed
3+
authors:
4+
- name: Andy Webb
5+
links:
6+
- name: SOLR-17959
7+
url: https://issues.apache.org/jira/browse/SOLR-17959
8+
issues:
9+
- 17959

solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ private static interface DMP extends DisMaxParams {
9797

9898
/** If set to true, stopwords are removed from the query. */
9999
public static String STOPWORDS = "stopwords";
100+
101+
/** If set to true, the stopword filter applies even if all terms are stopwords */
102+
public static String ALWAYS_STOPWORDS = "alwaysStopwords";
100103
}
101104

102105
private ExtendedDismaxConfiguration config;
@@ -416,7 +419,7 @@ protected Query parseOriginalQuery(
416419
query = up.parse(mainUserQuery);
417420

418421
if (shouldRemoveStopFilter(config, query)) {
419-
// if the query was all stop words, remove none of them
422+
// if the query was all stopwords, remove none of them (unless alwaysStopwords is set)
420423
up.setRemoveStopFilter(true);
421424
query = up.parse(mainUserQuery);
422425
}
@@ -425,6 +428,8 @@ protected Query parseOriginalQuery(
425428
up.exceptions = false;
426429
}
427430

431+
// query may have become empty if it only contained tokenising characters or due to
432+
// stopword removal if alwaysStopwords is set
428433
if (query == null) {
429434
return null;
430435
}
@@ -447,11 +452,11 @@ protected Query parseOriginalQuery(
447452
/**
448453
* Determines if query should be re-parsed removing the stop filter.
449454
*
450-
* @return true if there are stopwords configured and the parsed query was empty false in any
451-
* other case.
455+
* @return true if there are stopwords configured, the alwaysStopwords option hasn't been set and
456+
* the parsed query was empty - return false in any other case.
452457
*/
453458
protected boolean shouldRemoveStopFilter(ExtendedDismaxConfiguration config, Query query) {
454-
return config.stopwords && isEmpty(query);
459+
return config.stopwords && !config.alwaysStopwords && isEmpty(query);
455460
}
456461

457462
private String escapeUserQuery(List<Clause> clauses) {
@@ -1699,6 +1704,8 @@ public static class ExtendedDismaxConfiguration {
16991704

17001705
protected boolean stopwords;
17011706

1707+
protected boolean alwaysStopwords;
1708+
17021709
protected boolean mmAutoRelax;
17031710

17041711
protected String altQ;
@@ -1749,6 +1756,8 @@ public ExtendedDismaxConfiguration(
17491756

17501757
stopwords = solrParams.getBool(DMP.STOPWORDS, true);
17511758

1759+
alwaysStopwords = solrParams.getBool(DMP.ALWAYS_STOPWORDS, false);
1760+
17521761
mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false);
17531762

17541763
altQ = solrParams.get(DisMaxParams.ALTQ);

solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,12 @@ public void testFocusQueryParser() {
379379
"q", "the big"),
380380
oner);
381381

382+
// test for ignoring stopwords when all query terms are stopwords
383+
assertQ(req("defType", "edismax", "qf", "text_sw", "q", "the"), oner);
384+
385+
// test for not ignoring stopwords when all query terms are stopwords and alwaysStopwords is set
386+
assertQ(req("defType", "edismax", "qf", "text_sw", "q", "the", "alwaysStopwords", "true"), nor);
387+
382388
// searching for a literal colon value when clearly not used for a field
383389
assertQ(
384390
"expected doc is missing (using standard)",

solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2941,6 +2941,8 @@ Spanish stemmer, Spanish words:
29412941
This filter discards, or _stops_ analysis of, tokens that are on the given stop words list.
29422942
A standard stop words list is included in the Solr `conf` directory, named `stopwords.txt`, which is appropriate for typical English language text.
29432943

2944+
Note that the xref:query-guide:edismax-query-parser.adoc[eDisMax] query parser disables the stop filter if all query terms are stop words unless its `alwaysStopwords` option is enabled.
2945+
29442946
*Factory class:* `solr.StopFilterFactory`
29452947

29462948
*Arguments:*

solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ In addition to supporting all the DisMax query parser parameters, Extended DisMa
2727
* includes improved smart partial escaping in the case of syntax errors; fielded queries, +/-, and phrase queries are still supported in this mode.
2828
* improves proximity boosting by using word shingles; you do not need the query to match all words in the document before proximity boosting is applied.
2929
* includes advanced stopword handling: stopwords are not required in the mandatory part of the query but are still used in the proximity boosting part.
30-
If a query consists of all stopwords, such as "to be or not to be", then all words are required.
30+
If a query consists of all stopwords, such as "to be or not to be", then all words are required. (This feature may be disabled - see `alwaysStopwords` below.)
3131
* includes improved boost function: in Extended DisMax, the `boost` function is a multiplier xref:dismax-query-parser.adoc#bq-bf-shortcomings[rather than an addend], improving your boost results; the additive boost functions of DisMax (`bf` and `bq`) are also supported.
3232
* supports pure negative nested queries: queries such as `+foo (-foo)` will match all documents.
3333
* lets you specify which fields the end user is allowed to query, and to disallow direct fielded searches.
@@ -109,6 +109,10 @@ If not specified, `ps` is used.
109109
A Boolean parameter indicating if the `StopFilterFactory` configured in the query analyzer should be respected when parsing the query.
110110
If this is set to `false`, then the `StopFilterFactory` in the query analyzer is ignored.
111111

112+
`alwaysStopwords`::
113+
A Boolean parameter indicating that the `StopFilterFactory` configured in the query analyzer should always be respected even if all query terms are stopwords.
114+
This defaults to `false`, in which case if a query consists of all stopwords, such as "to be or not to be", then all words are required.
115+
112116
`uf`::
113117
Specifies which schema fields the end user is allowed to explicitly query and to toggle whether embedded Solr queries are supported.
114118
This parameter supports wildcards.

0 commit comments

Comments
 (0)