Skip to content

Commit bce7e6f

Browse files
committed
Add tests for index-level STOPWORDS configuration
1 parent 901490f commit bce7e6f

File tree

2 files changed

+447
-0
lines changed

2 files changed

+447
-0
lines changed
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
"""Integration tests for stopwords support."""
2+
3+
import pytest
4+
5+
from redisvl.index import SearchIndex
6+
from redisvl.schema import IndexSchema
7+
8+
9+
@pytest.fixture
10+
def stopwords_disabled_schema():
11+
"""Schema with stopwords disabled (STOPWORDS 0)."""
12+
return {
13+
"index": {
14+
"name": "test_stopwords_disabled",
15+
"prefix": "test_sw_disabled:",
16+
"storage_type": "hash",
17+
"stopwords": [], # STOPWORDS 0
18+
},
19+
"fields": [
20+
{"name": "title", "type": "text"},
21+
{"name": "description", "type": "text"},
22+
],
23+
}
24+
25+
26+
@pytest.fixture
27+
def custom_stopwords_schema():
28+
"""Schema with custom stopwords list."""
29+
return {
30+
"index": {
31+
"name": "test_custom_stopwords",
32+
"prefix": "test_sw_custom:",
33+
"storage_type": "hash",
34+
"stopwords": ["the", "a", "an"],
35+
},
36+
"fields": [
37+
{"name": "title", "type": "text"},
38+
],
39+
}
40+
41+
42+
@pytest.fixture
43+
def default_stopwords_schema():
44+
"""Schema with default stopwords (no stopwords field)."""
45+
return {
46+
"index": {
47+
"name": "test_default_stopwords",
48+
"prefix": "test_sw_default:",
49+
"storage_type": "hash",
50+
},
51+
"fields": [
52+
{"name": "title", "type": "text"},
53+
],
54+
}
55+
56+
57+
def test_create_index_with_stopwords_disabled(client, stopwords_disabled_schema):
58+
"""Test creating an index with STOPWORDS 0."""
59+
schema = IndexSchema.from_dict(stopwords_disabled_schema)
60+
index = SearchIndex(schema, redis_client=client)
61+
62+
try:
63+
# Create the index
64+
index.create(overwrite=True, drop=True)
65+
66+
# Verify index was created
67+
assert index.exists()
68+
69+
# Get FT.INFO and verify stopwords_list is empty
70+
info = client.ft(index.name).info()
71+
assert "stopwords_list" in info
72+
assert info["stopwords_list"] == []
73+
74+
finally:
75+
try:
76+
index.delete(drop=True)
77+
except Exception:
78+
pass
79+
80+
81+
def test_create_index_with_custom_stopwords(client, custom_stopwords_schema):
82+
"""Test creating an index with custom stopwords list."""
83+
schema = IndexSchema.from_dict(custom_stopwords_schema)
84+
index = SearchIndex(schema, redis_client=client)
85+
86+
try:
87+
# Create the index
88+
index.create(overwrite=True, drop=True)
89+
90+
# Verify index was created
91+
assert index.exists()
92+
93+
# Get FT.INFO and verify stopwords_list matches
94+
info = client.ft(index.name).info()
95+
assert "stopwords_list" in info
96+
97+
# Convert bytes to strings for comparison
98+
stopwords_list = [
99+
sw.decode("utf-8") if isinstance(sw, bytes) else sw
100+
for sw in info["stopwords_list"]
101+
]
102+
assert set(stopwords_list) == {"the", "a", "an"}
103+
104+
finally:
105+
try:
106+
index.delete(drop=True)
107+
except Exception:
108+
pass
109+
110+
111+
def test_create_index_with_default_stopwords(client, default_stopwords_schema):
112+
"""Test creating an index with default stopwords (no STOPWORDS clause)."""
113+
schema = IndexSchema.from_dict(default_stopwords_schema)
114+
index = SearchIndex(schema, redis_client=client)
115+
116+
try:
117+
# Create the index
118+
index.create(overwrite=True, drop=True)
119+
120+
# Verify index was created
121+
assert index.exists()
122+
123+
# Get FT.INFO - stopwords_list should NOT be present for default behavior
124+
info = client.ft(index.name).info()
125+
# When no STOPWORDS clause is used, Redis doesn't include stopwords_list in FT.INFO
126+
# (or it may include the default list depending on Redis version)
127+
# We just verify the index was created successfully
128+
assert index.exists()
129+
130+
finally:
131+
try:
132+
index.delete(drop=True)
133+
except Exception:
134+
pass
135+
136+
137+
def test_from_existing_preserves_stopwords_disabled(client, stopwords_disabled_schema):
138+
"""Test that from_existing() correctly reconstructs stopwords=[] configuration."""
139+
schema = IndexSchema.from_dict(stopwords_disabled_schema)
140+
index = SearchIndex(schema, redis_client=client)
141+
142+
try:
143+
# Create the index
144+
index.create(overwrite=True, drop=True)
145+
146+
# Reconstruct from existing
147+
reconstructed_index = SearchIndex.from_existing(index.name, redis_client=client)
148+
149+
# Verify stopwords configuration was preserved
150+
assert reconstructed_index.schema.index.stopwords == []
151+
152+
finally:
153+
try:
154+
index.delete(drop=True)
155+
except Exception:
156+
pass
157+
158+
159+
def test_from_existing_preserves_custom_stopwords(client, custom_stopwords_schema):
160+
"""Test that from_existing() correctly reconstructs custom stopwords configuration."""
161+
schema = IndexSchema.from_dict(custom_stopwords_schema)
162+
index = SearchIndex(schema, redis_client=client)
163+
164+
try:
165+
# Create the index
166+
index.create(overwrite=True, drop=True)
167+
168+
# Reconstruct from existing
169+
reconstructed_index = SearchIndex.from_existing(index.name, redis_client=client)
170+
171+
# Verify stopwords configuration was preserved
172+
assert set(reconstructed_index.schema.index.stopwords) == {"the", "a", "an"}
173+
174+
finally:
175+
try:
176+
index.delete(drop=True)
177+
except Exception:
178+
pass
179+
180+
181+
def test_from_existing_default_stopwords(client, default_stopwords_schema):
182+
"""Test that from_existing() handles default stopwords (no stopwords_list in FT.INFO)."""
183+
schema = IndexSchema.from_dict(default_stopwords_schema)
184+
index = SearchIndex(schema, redis_client=client)
185+
186+
try:
187+
# Create the index
188+
index.create(overwrite=True, drop=True)
189+
190+
# Reconstruct from existing
191+
reconstructed_index = SearchIndex.from_existing(index.name, redis_client=client)
192+
193+
# Verify stopwords is None (default behavior)
194+
assert reconstructed_index.schema.index.stopwords is None
195+
196+
finally:
197+
try:
198+
index.delete(drop=True)
199+
except Exception:
200+
pass
201+
202+
203+
def test_stopwords_disabled_allows_searching_common_words(
204+
client, stopwords_disabled_schema
205+
):
206+
"""Test that STOPWORDS 0 allows searching for common stopwords like 'the', 'a', 'of'."""
207+
schema = IndexSchema.from_dict(stopwords_disabled_schema)
208+
index = SearchIndex(schema, redis_client=client)
209+
210+
try:
211+
# Create the index
212+
index.create(overwrite=True, drop=True)
213+
214+
# Add test data with common stopwords
215+
test_data = [
216+
{"title": "Bank of America", "description": "A major bank"},
217+
{"title": "The Great Gatsby", "description": "A classic novel"},
218+
{
219+
"title": "An Introduction to Python",
220+
"description": "A programming guide",
221+
},
222+
]
223+
224+
for i, data in enumerate(test_data):
225+
key = f"test_sw_disabled:{i}"
226+
client.hset(key, mapping=data)
227+
228+
# Search for "of" - should find "Bank of America"
229+
from redisvl.query import FilterQuery
230+
231+
query = FilterQuery(
232+
filter_expression="@title:(of)",
233+
return_fields=["title"],
234+
)
235+
results = index.search(query.query, query_params=query.params)
236+
237+
# With STOPWORDS 0, "of" should be indexed and searchable
238+
assert len(results.docs) > 0
239+
assert any("of" in doc.title.lower() for doc in results.docs)
240+
241+
finally:
242+
try:
243+
index.delete(drop=True)
244+
except Exception:
245+
pass

0 commit comments

Comments
 (0)