Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(llm): support ragas in back-testing #92

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hugegraph-llm/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ python-dotenv>=1.0.1
pyarrow~=17.0.0 # TODO: a temporary dependency for pandas, figure out why ImportError
pandas~=2.2.2
openpyxl~=3.1.5
git+https://github.com/jasinliu/ragas.git@patch-2 # TODO: wait for release
4 changes: 2 additions & 2 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def init_rag_ui() -> gr.Interface:

with gr.Tab(label="1. Build RAG Index 💡"):
textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
with gr.Tab(label="2,3. (Graph)RAG & User Functions 📖"):
textbox_inp, textbox_answer_prompt_input = create_rag_block()
with gr.Tab(label="3. Others Tools 🚧"):
with gr.Tab(label="4. Others Tools 🚧"):
create_other_block()


Expand Down
183 changes: 145 additions & 38 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,35 @@

# pylint: disable=E1101

import json
import os
from typing import Tuple, Literal, Optional
from typing import List, Literal, Optional, Tuple

import gradio as gr
import pandas as pd
from datasets import Dataset
from gradio.utils import NamedString
from langchain_openai.chat_models import ChatOpenAI
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper

from hugegraph_llm.config import resource_path, prompt
from hugegraph_llm.config import prompt, resource_path, settings
from hugegraph_llm.operators.graph_rag_task import RAGPipeline
from hugegraph_llm.utils.log import log
from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT, RAGAS_METRICS_ZH_DICT


def rag_answer(
text: str,
raw_answer: bool,
vector_only_answer: bool,
graph_only_answer: bool,
graph_vector_answer: bool,
graph_ratio: float,
rerank_method: Literal["bleu", "reranker"],
near_neighbor_first: bool,
custom_related_information: str,
answer_prompt: str,
text: str,
raw_answer: bool,
vector_only_answer: bool,
graph_only_answer: bool,
graph_vector_answer: bool,
graph_ratio: float,
rerank_method: Literal["bleu", "reranker"],
near_neighbor_first: bool,
custom_related_information: str,
answer_prompt: str,
) -> Tuple:
"""
Generate an answer using the RAG (Retrieval-Augmented Generation) pipeline.
Expand Down Expand Up @@ -69,17 +75,29 @@ def rag_answer(
rag.extract_keywords().keywords_to_vid().query_graphdb()
# TODO: add more user-defined search strategies
rag.merge_dedup_rerank(graph_ratio, rerank_method, near_neighbor_first, custom_related_information)
rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt)
rag.synthesize_answer(answer_prompt)

try:
context = rag.run(verbose=True, query=text, vector_search=vector_search, graph_search=graph_search)
context = rag.run(
verbose=True,
query=text,
raw_answer=raw_answer,
vector_only_answer=vector_only_answer,
graph_only_answer=graph_only_answer,
graph_vector_answer=graph_vector_answer,
)
if context.get("switch_to_bleu"):
gr.Warning("Online reranker fails, automatically switches to local bleu rerank.")
return (
context.get("raw_answer", ""),
context.get("vector_only_answer", ""),
context.get("graph_only_answer", ""),
context.get("graph_vector_answer", ""),
context.get("raw_answer_result", ""),
context.get("vector_only_answer_result", ""),
context.get("graph_only_answer_result", ""),
context.get("graph_vector_answer_result", ""),
{
"vector_contexts": context.get("vector_contexts"),
"graph_contexts": context.get("graph_contexts"),
"graph_vector_contexts": context.get("graph_vector_contexts"),
},
)
except ValueError as e:
log.critical(e)
Expand Down Expand Up @@ -124,9 +142,7 @@ def toggle_slider(enable):
)
graph_ratio = gr.Slider(0, 1, 0.5, label="Graph Ratio", step=0.1, interactive=False)

graph_vector_radio.change(
toggle_slider, inputs=graph_vector_radio, outputs=graph_ratio
) # pylint: disable=no-member
graph_vector_radio.change(toggle_slider, inputs=graph_vector_radio, outputs=graph_ratio) # pylint: disable=no-member
near_neighbor_first = gr.Checkbox(
value=False,
label="Near neighbor first(Optional)",
Expand All @@ -135,6 +151,10 @@ def toggle_slider(enable):
custom_related_information = gr.Text(
prompt.custom_rerank_info,
label="Custom related information(Optional)",
info=(
"Used for rerank, can increase the weight of knowledge related to it, such as `law`. "
"Multiple values can be separated by commas."
),
)
btn = gr.Button("Answer Question", variant="primary")

Expand All @@ -160,34 +180,46 @@ def toggle_slider(enable):
> 2. Upload the file & click the button to generate answers. (Preview shows the first 40 lines)
> 3. The answer options are the same as the above RAG/Q&A frame
""")

# TODO: Replace string with python constant
tests_df_headers = [
"Question",
"Expected Answer",
"Basic LLM Answer",
"Vector-only Answer",
"Graph-only Answer",
"Graph-Vector Answer",
"Vector-only Answer",
"Basic LLM Answer",
"Expected Answer",
]
rag_answer_header_dict = {
"Vector-only Answer": "Vector Contexts",
"Graph-only Answer": "Graph Contexts",
"Graph-Vector Answer": "Graph-Vector Contexts",
}

answers_path = os.path.join(resource_path, "demo", "questions_answers.xlsx")
questions_path = os.path.join(resource_path, "demo", "questions.xlsx")
questions_template_path = os.path.join(resource_path, "demo", "questions_template.xlsx")

ragas_metrics_list = list(RAGAS_METRICS_DICT.keys())

def read_file_to_excel(file: NamedString, line_count: Optional[int] = None):
df = None
if os.path.exists(answers_path):
os.remove(answers_path)
df = pd.DataFrame()
if not file:
return pd.DataFrame(), 1
if file.name.endswith(".xlsx"):
df = pd.read_excel(file.name, nrows=line_count) if file else pd.DataFrame()
elif file.name.endswith(".csv"):
df = pd.read_csv(file.name, nrows=line_count) if file else pd.DataFrame()
df.to_excel(questions_path, index=False)
if df.empty:
df = pd.DataFrame([[""] * len(tests_df_headers)], columns=tests_df_headers)
else:
df.columns = tests_df_headers
raise gr.Error("Only support .xlsx and .csv files.")
df.to_excel(questions_path, index=False)
# truncate the dataframe if it's too long
if len(df) > 40:
return df.head(40), 40
if len(df) == 0:
gr.Warning("No data in the file.")
return df, len(df)

def change_showing_excel(line_count):
Expand Down Expand Up @@ -216,7 +248,7 @@ def several_rag_answer(
total_rows = len(df)
for index, row in df.iterrows():
question = row.iloc[0]
basic_llm_answer, vector_only_answer, graph_only_answer, graph_vector_answer = rag_answer(
llm_answer, vector_only_answer, graph_only_answer, graph_vector_answer, contexts = rag_answer(
question,
is_raw_answer,
is_vector_only_answer,
Expand All @@ -228,18 +260,30 @@ def several_rag_answer(
custom_related_information,
answer_prompt,
)
df.at[index, "Basic LLM Answer"] = basic_llm_answer
df.at[index, "Vector-only Answer"] = vector_only_answer
df.at[index, "Graph-only Answer"] = graph_only_answer
df.at[index, "Graph-Vector Answer"] = graph_vector_answer
df.at[index, "Basic LLM Answer"] = llm_answer if llm_answer else None
df.at[index, "Vector-only Answer"] = vector_only_answer if vector_only_answer else None
df.at[index, "Graph-only Answer"] = graph_only_answer if graph_only_answer else None
df.at[index, "Graph-Vector Answer"] = graph_vector_answer if graph_vector_answer else None
if "Vector Contexts" not in df.columns:
df["Vector Contexts"] = None
df["Graph Contexts"] = None
df["Graph-Vector Contexts"] = None
df.at[index, "Vector Contexts"] = contexts.get("vector_contexts")
df.at[index, "Graph Contexts"] = contexts.get("graph_contexts")
df.at[index, "Graph-Vector Contexts"] = contexts.get("graph_vector_contexts")
progress((index + 1, total_rows))
answers_path = os.path.join(resource_path, "demo", "questions_answers.xlsx")

df = df.dropna(axis=1, how="all")
df_to_show = df[[col for col in tests_df_headers if col in df.columns]]
for rag_context_header in rag_answer_header_dict.values():
if rag_context_header in df.columns:
df[rag_context_header] = df[rag_context_header].apply(lambda x: json.dumps(x, ensure_ascii=False))
df.to_excel(answers_path, index=False)
return df.head(answer_max_line_count), answers_path
return df_to_show.head(answer_max_line_count), answers_path

with gr.Row():
with gr.Column():
questions_file = gr.File(file_types=[".xlsx", ".csv"], label="Questions File (.xlsx & csv)")
questions_file = gr.File(file_types=[".xlsx", ".csv"], label="Questions File (.xlsx & .csv)")
with gr.Column():
test_template_file = os.path.join(resource_path, "demo", "questions_template.xlsx")
gr.File(value=test_template_file, label="Download Template File")
Expand All @@ -265,4 +309,67 @@ def several_rag_answer(
)
questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)
return inp, answer_prompt_input

def evaluate_rag(metrics: List[str], num: int, language: Literal["english", "chinese"]):
answers_df = pd.read_excel(answers_path)
answers_df = answers_df.head(num)
if not any(answers_df.columns.isin(rag_answer_header_dict)):
raise gr.Error("No RAG answers found in the answer file.")
if language == "chinese":
eval_metrics = [RAGAS_METRICS_ZH_DICT[metric] for metric in metrics]
else:
eval_metrics = [RAGAS_METRICS_DICT[metric] for metric in metrics]
rag_method_names = [answer for answer in rag_answer_header_dict if answer in answers_df.columns]
score_df = pd.DataFrame()

for answer in rag_method_names:
context_header = rag_answer_header_dict[answer]
answers_df[context_header] = answers_df[context_header].apply(json.loads)
rag_data = {
"user_input": answers_df["Question"].to_list(),
"response": answers_df[answer].to_list(),
"retrieved_contexts": answers_df[rag_answer_header_dict[answer]].to_list(),
"reference": answers_df["Expected Answer"].to_list(),
}
eval_llm = LangchainLLMWrapper(
ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
base_url=settings.openai_api_base,
api_key=settings.openai_api_key,
)
)

dataset = Dataset.from_dict(rag_data)
score = evaluate(
dataset,
metrics=eval_metrics,
llm=eval_llm,
)
score_df = pd.concat([score_df, score.to_pandas()])
score_df.insert(0, "method", rag_method_names)
return score_df

with gr.Row():
with gr.Column():
ragas_metrics = gr.Dropdown(
choices=ragas_metrics_list,
value=ragas_metrics_list[:4],
multiselect=True,
label="Metrics",
info=(
"Several evaluation metrics from `ragas`, ",
"please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html",
),
)
with gr.Column():
with gr.Row():
dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1)
language = gr.Radio(["english", "chinese"], label="Language", value="chinese")
ragas_btn = gr.Button("Evaluate RAG", variant="primary")
ragas_btn.click(
evaluate_rag,
inputs=[ragas_metrics, dataset_nums, language],
outputs=[gr.DataFrame(label="RAG Evaluation Results", headers=ragas_metrics_list)],
)
return inp, answer_prompt_input
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,21 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
if self.custom_related_information:
query = query + self.custom_related_information
context["graph_ratio"] = self.graph_ratio
vector_search = context.get("vector_search", False)
graph_search = context.get("graph_search", False)
if graph_search and vector_search:
graph_length = int(self.topk * self.graph_ratio)
vector_length = self.topk - graph_length
else:
graph_length = self.topk
vector_length = self.topk


raw_answer = context.get("raw_answer", False)
vector_only_answer = context.get("vector_only_answer", False)
graph_only_answer = context.get("graph_only_answer", False)
graph_vector_answer = context.get("graph_vector_answer", False)

if raw_answer and not (vector_only_answer or graph_only_answer or graph_vector_answer):
return context
vector_result = context.get("vector_result", [])
vector_length = min(len(vector_result), vector_length)
vector_length = min(len(vector_result), self.topk)
vector_result = self._dedup_and_rerank(query, vector_result, vector_length)

graph_result = context.get("graph_result", [])
graph_length = min(len(graph_result), graph_length)
graph_length = min(len(graph_result), self.topk)
if self.near_neighbor_first:
graph_result = self._rerank_with_vertex_degree(
query,
Expand All @@ -94,12 +94,17 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
else:
graph_result = self._dedup_and_rerank(query, graph_result, graph_length)

context["graph_rerank_length"] = min(graph_length, int(self.topk * self.graph_ratio))
context["vector_rerank_length"] = min(vector_length, self.topk - int(self.topk * self.graph_ratio))

context["vector_result"] = vector_result
context["graph_result"] = graph_result

return context

def _dedup_and_rerank(self, query: str, results: List[str], topn: int) -> List[str]:
if topn == 0:
return []
results = list(set(results))
if self.method == "bleu":
return _bleu_rerank(query, results)[:topn]
Expand All @@ -116,6 +121,9 @@ def _rerank_with_vertex_degree(
vertex_degree_list: Optional[List[List[str]]],
knowledge_with_degree: Dict[str, List[str]],
) -> List[str]:
if topn == 0:
return []

if vertex_degree_list is None or len(vertex_degree_list) == 0:
return self._dedup_and_rerank(query, results, topn)

Expand Down
12 changes: 0 additions & 12 deletions hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,28 +169,16 @@ def merge_dedup_rerank(

def synthesize_answer(
self,
raw_answer: bool = False,
vector_only_answer: bool = True,
graph_only_answer: bool = False,
graph_vector_answer: bool = False,
answer_prompt: Optional[str] = None,
):
"""
Add an answer synthesis operator to the pipeline.

:param raw_answer: Whether to return raw answers.
:param vector_only_answer: Whether to return vector-only answers.
:param graph_only_answer: Whether to return graph-only answers.
:param graph_vector_answer: Whether to return graph-vector combined answers.
:param answer_prompt: Template for the answer synthesis prompt.
:return: Self-instance for chaining.
"""
self._operators.append(
AnswerSynthesize(
raw_answer=raw_answer,
vector_only_answer=vector_only_answer,
graph_only_answer=graph_only_answer,
graph_vector_answer=graph_vector_answer,
prompt_template=answer_prompt,
)
)
Expand Down
Loading
Loading