Skip to content

Commit

Permalink
feat(llm): added the process of text2gql in graphrag V1.0 (#105)
Browse files Browse the repository at this point in the history
address #10 

1. added the process of intelligent generated gremlin retrivecal
2. added text2gremlin block in rag app
3. add text2gremlin prompt & config
4. fix log bug in py-client
5. ....

We also add a `flag` value under the interface of graph query rag/graph: 
- `1` represents text2gql accurate matching success
- `0` represents (k-neighbor) generalization matching success
- `-1` represents no relevant graph info

---------

Co-authored-by: Simon Cheung <[email protected]>
Co-authored-by: imbajin <[email protected]>
Co-authored-by: HaoJin Yang <[email protected]>
  • Loading branch information
4 people authored Dec 9, 2024
1 parent 71b6261 commit cbfca3c
Show file tree
Hide file tree
Showing 23 changed files with 644 additions and 405 deletions.
11 changes: 4 additions & 7 deletions hugegraph-llm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,18 @@ graph systems and large language models.
```bash
python3 -m hugegraph_llm.demo.rag_demo.app --host 127.0.0.1 --port 18001
```
6. Or start the gradio interactive demo of **Text2Gremlin**, you can run with the following command, and open http://127.0.0.1:8002 after starting. You can also change the default host `0.0.0.0` and port `8002` as above. (🚧ing)
```bash
python3 -m hugegraph_llm.demo.gremlin_generate_web_demo
```
7. After running the web demo, the config file `.env` will be automatically generated at the path `hugegraph-llm/.env`. Additionally, a prompt-related configuration file `config_prompt.yaml` will also be generated at the path `hugegraph-llm/src/hugegraph_llm/resources/demo/config_prompt.yaml`.

6. After running the web demo, the config file `.env` will be automatically generated at the path `hugegraph-llm/.env`. Additionally, a prompt-related configuration file `config_prompt.yaml` will also be generated at the path `hugegraph-llm/src/hugegraph_llm/resources/demo/config_prompt.yaml`.
You can modify the content on the web page, and it will be automatically saved to the configuration file after the corresponding feature is triggered. You can also modify the file directly without restarting the web application; simply refresh the page to load your latest changes.
(Optional)To regenerate the config file, you can use `config.generate` with `-u` or `--update`.
```bash
python3 -m hugegraph_llm.config.generate --update
```
8. (__Optional__) You could use
7. (__Optional__) You could use
[hugegraph-hubble](https://hugegraph.apache.org/docs/quickstart/hugegraph-hubble/#21-use-docker-convenient-for-testdev)
to visit the graph data, could run it via [Docker/Docker-Compose](https://hub.docker.com/r/hugegraph/hubble)
for guidance. (Hubble is a graph-analysis dashboard include data loading/schema management/graph traverser/display).
9. (__Optional__) offline download NLTK stopwords
8. (__Optional__) offline download NLTK stopwords
```bash
python ./hugegraph_llm/operators/common_op/nltk_helper.py
```
Expand Down
3 changes: 2 additions & 1 deletion hugegraph-llm/src/hugegraph_llm/api/rag_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def graph_rag_recall(
) -> dict:
from hugegraph_llm.operators.graph_rag_task import RAGPipeline
rag = RAGPipeline()
rag.extract_keywords().keywords_to_vid().query_graphdb().merge_dedup_rerank(

rag.extract_keywords().keywords_to_vid().import_schema(settings.graph_name).query_graphdb().merge_dedup_rerank(
rerank_method=rerank_method,
near_neighbor_first=near_neighbor_first,
custom_related_information=custom_related_information,
Expand Down
2 changes: 1 addition & 1 deletion hugegraph-llm/src/hugegraph_llm/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.


__all__ = ["settings", "resource_path"]
__all__ = ["settings", "prompt", "resource_path"]

import os
from .config import Config, PromptConfig
Expand Down
8 changes: 8 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ def ensure_yaml_file_exists(self):

def save_to_yaml(self):
indented_schema = "\n".join([f" {line}" for line in self.graph_schema.splitlines()])
indented_text2gql_schema = "\n".join([f" {line}" for line in self.text2gql_graph_schema.splitlines()])
indented_gremlin_prompt = "\n".join([f" {line}" for line in self.gremlin_generate_prompt.splitlines()])
indented_example_prompt = "\n".join([f" {line}" for line in self.extract_graph_prompt.splitlines()])
indented_question = "\n".join([f" {line}" for line in self.default_question.splitlines()])
indented_custom_related_information = (
Expand All @@ -132,6 +134,9 @@ def save_to_yaml(self):
yaml_content = f"""graph_schema: |
{indented_schema}
text2gql_graph_schema: |
{indented_text2gql_schema}
extract_graph_prompt: |
{indented_example_prompt}
Expand All @@ -147,6 +152,9 @@ def save_to_yaml(self):
keywords_extract_prompt: |
{indented_keywords_extract_template}
gremlin_generate_prompt: |
{indented_gremlin_prompt}
"""
with open(yaml_file_path, "w", encoding="utf-8") as file:
file.write(yaml_content)
Expand Down
24 changes: 24 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/config/config_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ class PromptData:
}
"""

# TODO: we should provide a better example to reduce the useless information
text2gql_graph_schema = ConfigData.graph_name

# Extracted from llm_op/keyword_extract.py
keywords_extract_prompt = """指令:
请对以下文本执行以下任务:
Expand Down Expand Up @@ -266,3 +269,24 @@ class PromptData:
# Text:
# {question}
# """

gremlin_generate_prompt = """\
Given the example query-gremlin pairs:
{example}
Given the graph schema:
```json
{schema}
```
Given the extracted vertex vid:
{vertices}
Generate gremlin from the following user query.
{query}
The output format must be like:
```gremlin
g.V().limit(10)
```
The generated gremlin is:
"""
208 changes: 0 additions & 208 deletions hugegraph-llm/src/hugegraph_llm/demo/gremlin_generate_web_demo.py

This file was deleted.

18 changes: 13 additions & 5 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
apply_graph_config,
)
from hugegraph_llm.demo.rag_demo.other_block import create_other_block
from hugegraph_llm.demo.rag_demo.text2gremlin_block import create_text2gremlin_block
from hugegraph_llm.demo.rag_demo.rag_block import create_rag_block, rag_answer
from hugegraph_llm.demo.rag_demo.vector_graph_block import create_vector_graph_block
from hugegraph_llm.resources.demo.css import CSS
Expand All @@ -55,6 +56,7 @@ def authenticate(credentials: HTTPAuthorizationCredentials = Depends(sec)):
headers={"WWW-Authenticate": "Bearer"},
)


# pylint: disable=C0301
def init_rag_ui() -> gr.Interface:
with gr.Blocks(
Expand Down Expand Up @@ -93,9 +95,11 @@ def init_rag_ui() -> gr.Interface:
textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
textbox_inp, textbox_answer_prompt_input, textbox_keywords_extract_prompt_input = create_rag_block()
with gr.Tab(label="3. Graph Tools 🚧"):
with gr.Tab(label="3. Text2gremlin ⚙️"):
textbox_gremlin_inp, textbox_gremlin_schema, textbox_gremlin_prompt = create_text2gremlin_block()
with gr.Tab(label="4. Graph Tools 🚧"):
create_other_block()
with gr.Tab(label="4. Admin Tools ⚙️"):
with gr.Tab(label="5. Admin Tools 🛠"):
create_admin_block()

def refresh_ui_config_prompt() -> tuple:
Expand All @@ -104,10 +108,11 @@ def refresh_ui_config_prompt() -> tuple:
return (
settings.graph_ip, settings.graph_port, settings.graph_name, settings.graph_user,
settings.graph_pwd, settings.graph_space, prompt.graph_schema, prompt.extract_graph_prompt,
prompt.default_question, prompt.answer_prompt, prompt.keywords_extract_prompt
prompt.default_question, prompt.answer_prompt, prompt.keywords_extract_prompt,
prompt.default_question, settings.graph_name, prompt.gremlin_generate_prompt
)

hugegraph_llm_ui.load(fn=refresh_ui_config_prompt, outputs=[ #pylint: disable=E1101
hugegraph_llm_ui.load(fn=refresh_ui_config_prompt, outputs=[ # pylint: disable=E1101
textbox_array_graph_config[0],
textbox_array_graph_config[1],
textbox_array_graph_config[2],
Expand All @@ -118,7 +123,10 @@ def refresh_ui_config_prompt() -> tuple:
textbox_info_extract_template,
textbox_inp,
textbox_answer_prompt_input,
textbox_keywords_extract_prompt_input
textbox_keywords_extract_prompt_input,
textbox_gremlin_inp,
textbox_gremlin_schema,
textbox_gremlin_prompt
])

return hugegraph_llm_ui
Expand Down
Loading

0 comments on commit cbfca3c

Please sign in to comment.