21
21
22
22
DB_NAME = "langchain_test_db"
23
23
COLLECTION_NAME = "langchain_test_retrievers"
24
+ COLLECTION_NAME_NESTED = "langchain_test_retrievers_nested"
24
25
VECTOR_INDEX_NAME = "vector_index"
25
26
EMBEDDING_FIELD = "embedding"
26
27
PAGE_CONTENT_FIELD = "text"
28
+ PAGE_CONTENT_FIELD_NESTED = "title.text"
27
29
SEARCH_INDEX_NAME = "text_index"
30
+ SEARCH_INDEX_NAME_NESTED = "text_index_nested"
28
31
29
32
TIMEOUT = 60.0
30
33
INTERVAL = 0.5
@@ -71,6 +74,39 @@ def collection(client: MongoClient, dimensions: int) -> Collection:
71
74
return clxn
72
75
73
76
77
+ @pytest .fixture (scope = "module" )
78
+ def collection_nested (client : MongoClient , dimensions : int ) -> Collection :
79
+ """A Collection with both a Vector and a Full-text Search Index"""
80
+ if COLLECTION_NAME_NESTED not in client [DB_NAME ].list_collection_names ():
81
+ clxn = client [DB_NAME ].create_collection (COLLECTION_NAME_NESTED )
82
+ else :
83
+ clxn = client [DB_NAME ][COLLECTION_NAME_NESTED ]
84
+
85
+ clxn .delete_many ({})
86
+
87
+ if not any ([VECTOR_INDEX_NAME == ix ["name" ] for ix in clxn .list_search_indexes ()]):
88
+ create_vector_search_index (
89
+ collection = clxn ,
90
+ index_name = VECTOR_INDEX_NAME ,
91
+ dimensions = dimensions ,
92
+ path = "embedding" ,
93
+ similarity = "cosine" ,
94
+ wait_until_complete = TIMEOUT ,
95
+ )
96
+
97
+ if not any (
98
+ [SEARCH_INDEX_NAME_NESTED == ix ["name" ] for ix in clxn .list_search_indexes ()]
99
+ ):
100
+ create_fulltext_search_index (
101
+ collection = clxn ,
102
+ index_name = SEARCH_INDEX_NAME_NESTED ,
103
+ field = PAGE_CONTENT_FIELD_NESTED ,
104
+ wait_until_complete = TIMEOUT ,
105
+ )
106
+
107
+ return clxn
108
+
109
+
74
110
@pytest .fixture (scope = "module" )
75
111
def indexed_vectorstore (
76
112
collection : Collection ,
@@ -93,6 +129,28 @@ def indexed_vectorstore(
93
129
vectorstore .collection .delete_many ({})
94
130
95
131
132
+ @pytest .fixture (scope = "module" )
133
+ def indexed_nested_vectorstore (
134
+ collection_nested : Collection ,
135
+ example_documents : List [Document ],
136
+ embedding : Embeddings ,
137
+ ) -> Generator [MongoDBAtlasVectorSearch , None , None ]:
138
+ """Return a VectorStore with example document embeddings indexed."""
139
+
140
+ vectorstore = PatchedMongoDBAtlasVectorSearch (
141
+ collection = collection_nested ,
142
+ embedding = embedding ,
143
+ index_name = VECTOR_INDEX_NAME ,
144
+ text_key = PAGE_CONTENT_FIELD_NESTED ,
145
+ )
146
+
147
+ vectorstore .add_documents (example_documents )
148
+
149
+ yield vectorstore
150
+
151
+ vectorstore .collection .delete_many ({})
152
+
153
+
96
154
def test_vector_retriever (indexed_vectorstore : PatchedMongoDBAtlasVectorSearch ) -> None :
97
155
"""Test VectorStoreRetriever"""
98
156
retriever = indexed_vectorstore .as_retriever ()
@@ -125,6 +183,26 @@ def test_hybrid_retriever(indexed_vectorstore: PatchedMongoDBAtlasVectorSearch)
125
183
assert "New Orleans" in results [0 ].page_content
126
184
127
185
186
+ def test_hybrid_retriever_nested (
187
+ indexed_nested_vectorstore : PatchedMongoDBAtlasVectorSearch ,
188
+ ) -> None :
189
+ """Test basic usage of MongoDBAtlasHybridSearchRetriever"""
190
+ retriever = MongoDBAtlasHybridSearchRetriever (
191
+ vectorstore = indexed_nested_vectorstore ,
192
+ search_index_name = SEARCH_INDEX_NAME_NESTED ,
193
+ top_k = 3 ,
194
+ )
195
+
196
+ query1 = "What did I visit France?"
197
+ results = retriever .invoke (query1 )
198
+ assert len (results ) == 3
199
+ assert "Paris" in results [0 ].page_content
200
+
201
+ query2 = "When was the last time I visited new orleans?"
202
+ results = retriever .invoke (query2 )
203
+ assert "New Orleans" in results [0 ].page_content
204
+
205
+
128
206
def test_fulltext_retriever (
129
207
indexed_vectorstore : PatchedMongoDBAtlasVectorSearch ,
130
208
) -> None :
0 commit comments