You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
* format fixes and graph schema indication fix
* Update README.md
* added chat modes variable in env updated the readme
* spell fix
* added the chat mode in env table
* added the logos
* fixed the overflow issues
* removed the extra fix
* Fixed specific scenario "when the text from schema closes it should reopen the previous modal"
* readme changes
* removed dev console logs
* added new retrieval query (#533)
* format fixes and tab rendering fix
* fixed the setting modal reopen issue
---------
Co-authored-by: kartikpersistent <[email protected]>
Co-authored-by: vasanthasaikalluri <[email protected]>
Copy file name to clipboardExpand all lines: backend/src/shared/constants.py
+93-33
Original file line number
Diff line number
Diff line change
@@ -111,38 +111,102 @@
111
111
# """
112
112
113
113
114
+
# VECTOR_GRAPH_SEARCH_QUERY = """
115
+
# WITH node as chunk, score
116
+
# // find the document of the chunk
117
+
# MATCH (chunk)-[:PART_OF]->(d:Document)
118
+
# // fetch entities
119
+
# CALL { WITH chunk
120
+
# // entities connected to the chunk
121
+
# // todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks
122
+
# MATCH (chunk)-[:HAS_ENTITY]->(e)
123
+
124
+
# // depending on match to query embedding either 1 or 2 step expansion
125
+
# WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95
126
+
# THEN
127
+
# collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,1}(:!Chunk&!Document) RETURN path }
128
+
# ELSE
129
+
# collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk&!Document) RETURN path }
130
+
# END as paths
131
+
132
+
# RETURN collect{ unwind paths as p unwind relationships(p) as r return distinct r} as rels,
133
+
# collect{ unwind paths as p unwind nodes(p) as n return distinct n} as nodes
134
+
# }
135
+
# // aggregate chunk-details and de-duplicate nodes and relationships
136
+
# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels,
137
+
138
+
# // TODO sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes?
139
+
# apoc.coll.toSet(apoc.coll.flatten(collect(
140
+
# [r in rels |[startNode(r),endNode(r)]]),true)) as nodes
141
+
142
+
# // generate metadata and text components for chunks, nodes and relationships
143
+
# WITH d, avg_score,
144
+
# [c IN chunks | c.chunk.text] AS texts,
145
+
# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails,
# RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata
172
+
# """
173
+
174
+
VECTOR_GRAPH_SEARCH_ENTITY_LIMIT=25
175
+
114
176
VECTOR_GRAPH_SEARCH_QUERY="""
115
177
WITH node as chunk, score
116
178
// find the document of the chunk
117
179
MATCH (chunk)-[:PART_OF]->(d:Document)
180
+
181
+
// aggregate chunk-details
182
+
WITH d, collect(DISTINCT {{chunk: chunk, score: score}}) AS chunks, avg(score) as avg_score
118
183
// fetch entities
119
-
CALL { WITH chunk
184
+
CALL {{ WITH chunks
185
+
UNWIND chunks as chunkScore
186
+
WITH chunkScore.chunk as chunk
120
187
// entities connected to the chunk
121
188
// todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks
122
-
MATCH (chunk)-[:HAS_ENTITY]->(e)
123
-
189
+
// todo sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes?
190
+
OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e)
191
+
WITH e, count(*) as numChunks
192
+
ORDER BY numChunks DESC LIMIT {no_of_entites}
124
193
// depending on match to query embedding either 1 or 2 step expansion
125
194
WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95
RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata
172
-
"""
173
-
174
-
175
225
226
+
WITH d, avg_score,chunkdetails,
227
+
"Text Content:\\n" +
228
+
apoc.text.join(texts,"\\n----\\n") +
229
+
"\\n----\\nEntities:\\n"+
230
+
apoc.text.join(nodeTexts,"\\n") +
231
+
"\\n----\\nRelationships:\\n" +
232
+
apoc.text.join(relTexts,"\\n")
176
233
234
+
as text,entities
177
235
236
+
RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata
0 commit comments