Skip to content

Commit bef8e8a

Browse files
Merge branch 'dev' of https://github.com/neo4j-labs/llm-graph-builder into backend_connection_config
2 parents ab58528 + 6f3f863 commit bef8e8a

36 files changed

+556
-532
lines changed

backend/example.env

+2
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,5 @@ BEDROCK_EMBEDDING_MODEL="model_name,aws_access_key,aws_secret_key,region_name"
4949
LLM_MODEL_CONFIG_bedrock_nova_micro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-micro-v1:0"
5050
LLM_MODEL_CONFIG_bedrock_nova_lite_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-lite-v1:0"
5151
LLM_MODEL_CONFIG_bedrock_nova_pro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-pro-v1:0"
52+
LLM_MODEL_CONFIG_fireworks_deepseek_r1="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-r1"
53+
LLM_MODEL_CONFIG_fireworks_deepseek_v3="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-v3"

backend/requirements.txt

+19-20
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
11
asyncio==3.4.3
2-
boto3==1.35.90
3-
botocore==1.35.90
2+
boto3==1.36.2
3+
botocore==1.36.2
44
certifi==2024.8.30
55
fastapi==0.115.6
66
fastapi-health==0.4.0
7-
google-api-core==2.23.0
8-
google-auth==2.36.0
7+
google-api-core==2.24.0
8+
google-auth==2.37.0
99
google_auth_oauthlib==1.2.1
1010
google-cloud-core==2.4.1
1111
json-repair==0.30.2
1212
pip-install==1.3.5
13-
langchain==0.3.13
14-
langchain-aws==0.2.10
15-
langchain-anthropic==0.3.0
16-
langchain-fireworks==0.2.5
17-
langchain-community==0.3.13
18-
langchain-core==0.3.28
13+
langchain==0.3.15
14+
langchain-aws==0.2.11
15+
langchain-anthropic==0.3.3
16+
langchain-fireworks==0.2.6
17+
langchain-community==0.3.15
18+
langchain-core==0.3.31
1919
langchain-experimental==0.3.4
20-
langchain-google-vertexai==2.0.7
21-
langchain-groq==0.2.1
22-
langchain-openai==0.2.14
23-
langchain-text-splitters==0.3.4
20+
langchain-google-vertexai==2.0.11
21+
langchain-groq==0.2.3
22+
langchain-openai==0.3.1
23+
langchain-text-splitters==0.3.5
2424
langchain-huggingface==0.1.2
2525
langdetect==1.0.9
26-
langsmith==0.2.4
27-
langserve==0.3.0
26+
langsmith==0.2.11
27+
langserve==0.3.1
2828
neo4j-rust-ext
2929
nltk==3.9.1
30-
openai==1.58.1
30+
openai==1.59.9
3131
opencv-python==4.10.0.84
3232
psutil==6.1.0
3333
pydantic==2.9.2
@@ -56,7 +56,6 @@ google-cloud-logging==3.11.3
5656
pypandoc==1.13
5757
graphdatascience==1.12
5858
Secweb==1.11.0
59-
ragas==0.2.6
59+
ragas==0.2.11
6060
rouge_score==0.1.2
61-
langchain-neo4j==0.2.0
62-
61+
langchain-neo4j==0.3.0

backend/score.py

+29-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from langchain_google_vertexai import ChatVertexAI
1212
from src.api_response import create_api_response
1313
from src.graphDB_dataAccess import graphDBdataAccess
14-
from src.graph_query import get_graph_results,get_chunktext_results
14+
from src.graph_query import get_graph_results,get_chunktext_results,visualize_schema
1515
from src.chunkid_entities import get_entities_from_chunkids
1616
from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding, graph_schema_consolidation
1717
from sse_starlette.sse import EventSourceResponse
@@ -82,7 +82,7 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send):
8282
app = FastAPI()
8383
app.add_middleware(XContentTypeOptions)
8484
app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'})
85-
app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"])
85+
app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext","/schema_visualization"])
8686
app.add_middleware(
8787
CORSMiddleware,
8888
allow_origins=["*"],
@@ -1033,6 +1033,32 @@ async def backend_connection_configuration():
10331033
return create_api_response(job_status, message=message, error=error_message.rstrip('.') + ', or fill from the login dialog.', data=graph_connection)
10341034
finally:
10351035
gc.collect()
1036-
1036+
1037+
@app.post("/schema_visualization")
1038+
async def get_schema_visualization(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None)):
1039+
try:
1040+
start = time.time()
1041+
result = await asyncio.to_thread(visualize_schema,
1042+
uri=uri,
1043+
userName=userName,
1044+
password=password,
1045+
database=database)
1046+
if result:
1047+
logging.info("Graph schema visualization query successful")
1048+
end = time.time()
1049+
elapsed_time = end - start
1050+
logging.info(f'Schema result from DB: {result}')
1051+
json_obj = {'api_name':'schema_visualization','db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'}
1052+
logger.log_struct(json_obj, "INFO")
1053+
return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}")
1054+
except Exception as e:
1055+
message="Unable to get schema visualization from neo4j database"
1056+
error_message = str(e)
1057+
logging.info(message)
1058+
logging.exception(f'Exception:{error_message}')
1059+
return create_api_response("Failed", message=message, error=error_message)
1060+
finally:
1061+
gc.collect()
1062+
10371063
if __name__ == "__main__":
10381064
uvicorn.run(app)

backend/src/diffbot_transformer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ def get_graph_from_diffbot(graph,chunkId_chunkDoc_list:List):
88
combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list)
99
llm,model_name = get_llm('diffbot')
1010
graph_documents = llm.convert_to_graph_documents(combined_chunk_document_list)
11-
return graph_documents
11+
return graph_documents

backend/src/document_sources/local_file.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,4 @@ def get_pages_with_page_numbers(unstructured_pages):
6262
'filetype':page.metadata['filetype']}
6363
if page == unstructured_pages[-1]:
6464
pages.append(Document(page_content = page_content, metadata=metadata_with_custom_page_number))
65-
return pages
65+
return pages

backend/src/document_sources/web_pages.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ def get_documents_from_web_page(source_url:str):
1313
file_name = last_url_segment(source_url)
1414
return file_name, pages
1515
except Exception as e:
16-
raise LLMGraphBuilderException(str(e))
16+
raise LLMGraphBuilderException(str(e))

backend/src/graph_query.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from neo4j import GraphDatabase
44
import os
55
import json
6-
from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY
6+
7+
from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY,SCHEMA_VISUALIZATION_QUERY
78

89
def get_graphDB_driver(uri, username, password,database="neo4j"):
910
"""
@@ -259,4 +260,23 @@ def get_chunktext_results(uri, username, password, database, document_name, page
259260
raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
260261
finally:
261262
if driver:
262-
driver.close()
263+
driver.close()
264+
265+
266+
def visualize_schema(uri, userName, password, database):
267+
"""Retrieves graph schema"""
268+
driver = None
269+
try:
270+
logging.info("Starting visualizing graph schema")
271+
driver = GraphDatabase.driver(uri, auth=(userName, password),database=database)
272+
records, summary, keys = driver.execute_query(SCHEMA_VISUALIZATION_QUERY)
273+
nodes = records[0].get("nodes", [])
274+
relationships = records[0].get("relationships", [])
275+
result = {"nodes": nodes, "relationships": relationships}
276+
return result
277+
except Exception as e:
278+
logging.error(f"An error occurred schema retrieval. Error: {str(e)}")
279+
raise Exception(f"An error occurred schema retrieval. Error: {str(e)}")
280+
finally:
281+
if driver:
282+
driver.close()

backend/src/make_relationships.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ def create_chunk_vector_index(graph):
161161
graph=graph,
162162
node_label="Chunk",
163163
embedding_node_property="embedding",
164-
index_name="vector"
164+
index_name="vector",
165+
embedding_dimension=EMBEDDING_DIMENSION
165166
)
166167
vector_store.create_new_index()
167168
logging.info(f"Index created successfully. Time taken: {time.time() - start_time:.2f} seconds")

backend/src/post_processing.py

-3
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,3 @@ def graph_schema_consolidation(graph):
233233
graph.query(query)
234234

235235
return None
236-
237-
238-

backend/src/shared/constants.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"]
32
GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"]
43
GROQ_MODELS = ["groq-llama3"]
@@ -893,3 +892,19 @@
893892
types such as dates, numbers, revenues, and other non-entity information are not extracted as separate nodes.
894893
Instead, treat these as properties associated with the relevant entities."""
895894

895+
SCHEMA_VISUALIZATION_QUERY = """
896+
CALL db.schema.visualization() YIELD nodes, relationships
897+
RETURN
898+
[n IN nodes | {
899+
element_id: elementId(n),
900+
labels: labels(n),
901+
properties: apoc.any.properties(n)
902+
}] AS nodes,
903+
[r IN relationships | {
904+
type: type(r),
905+
properties: apoc.any.properties(r),
906+
element_id: elementId(r),
907+
start_node_element_id: elementId(startNode(r)),
908+
end_node_element_id: elementId(endNode(r))
909+
}] AS relationships;
910+
"""

0 commit comments

Comments
 (0)