Skip to content

Commit 1830588

Browse files
pavanjavapavanmantha
andauthored
Llama index agentic rag (#43)
* removed the secret * -added controllable agents with observability, -release version added, -modified the cli options --------- Co-authored-by: pavanmantha <[email protected]>
1 parent 43534a2 commit 1830588

File tree

23 files changed

+525
-6
lines changed

23 files changed

+525
-6
lines changed

bootstraprag/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def create(project_name, framework, template, observability):
3434
'rag-with-hyde',
3535
'rag-with-flare',
3636
'rag-with-self-correction',
37+
'rag-with-controllable-agents',
3738
'llama-deploy-with-simplemq',
3839
'llama-deploy-with-rabbitmq',
3940
'llama-deploy-with-kafka'
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
DB_URL='http://localhost:6333'
2+
DB_API_KEY='th3s3cr3tk3y'
3+
COLLECTION_NAME='CONTROLLABLE_AGENTS_COLLECTION'
4+
5+
OPENAI_API_KEY='sk-'
6+
OPENAI_EMBED_MODEL='text-embedding-3-small'
7+
# both of these models can be same as well.
8+
OPENAI_AGENT_MODEL='gpt-4-turbo-2024-04-09'
9+
OPENAI_QUERY_MODEL='gpt-4o'
10+
11+
# use this incase you are prefering to experiment with local models.
12+
OLLAMA_BASE_URL='http://localhost:11434'
13+
OLLAMA_LLM_MODEL='llama3.1'
14+
OLLAMA_EMBED_MODEL='nomic-embed-text:latest'
15+
16+
# logger can be controlled usiing env
17+
CRITICAL = 50
18+
FATAL = 50
19+
ERROR = 40
20+
WARNING = 30
21+
WARN = 30
22+
INFO = 20
23+
DEBUG = 10
24+
NOTSET = 0
25+
26+
LIT_SERVER_PORT=8000
27+
LIT_SERVER_WORKERS_PER_DEVICE=4
28+
29+
IS_EVALUATION_NEEDED=true
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Use the official Python image from the Docker Hub
2+
FROM python:3.9-slim
3+
4+
# Set the working directory in the container
5+
WORKDIR /app
6+
7+
# Copy the requirements file to the container
8+
COPY requirements.txt .
9+
10+
# Install the required dependencies
11+
RUN pip install --no-cache-dir -r requirements.txt
12+
13+
# Copy the current directory contents into the container at /app
14+
COPY . .
15+
16+
# Set environment variables (you can replace these with values from your .env file or other configs)
17+
ENV DB_URL='http://host.docker.internal:6333' \
18+
OLLAMA_BASE_URL='http://host.docker.internal:11434'
19+
20+
# Expose port 8000 for external access
21+
EXPOSE 8000
22+
23+
# Command to run your application
24+
CMD ["python", "api_server.py"]

bootstraprag/templates/llamaindex/rag_with_controllable_agents/__init__.py

Whitespace-only changes.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright The Lightning AI team.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import requests
15+
16+
response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
17+
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import os
2+
from llama_index.core import (
3+
SimpleDirectoryReader,
4+
VectorStoreIndex,
5+
StorageContext,
6+
Settings,
7+
)
8+
from llama_index.core.node_parser import SentenceSplitter
9+
from llama_index.llms.openai import OpenAI
10+
from llama_index.core.tools import QueryEngineTool, ToolMetadata
11+
from llama_index.core.agent import ReActAgent
12+
from llama_index.vector_stores.qdrant import QdrantVectorStore
13+
from rag_evaluator import RAGEvaluator
14+
from dotenv import load_dotenv, find_dotenv
15+
import qdrant_client
16+
import logging
17+
18+
19+
class ControllableAgentsWithHumanInLoop:
20+
def __init__(self, input_dir: str, show_progress: bool = True, required_exts: list[str] = ['.pdf', '.txt'],
21+
similarity_top_k: int = 3, chunk_size: int = 512, chunk_overlap: int = 200, max_iterations: int = 20):
22+
load_dotenv(find_dotenv())
23+
24+
logging.basicConfig(level=logging.INFO)
25+
self.logger = logging.getLogger(__name__)
26+
27+
llm = OpenAI(model=os.environ.get('OPENAI_QUERY_MODEL'))
28+
29+
Settings.chunk_size = chunk_size
30+
Settings.chunk_overlap = chunk_overlap
31+
Settings.llm = llm
32+
33+
self.rag_evaluator = RAGEvaluator()
34+
35+
self.similarity_top_k = similarity_top_k
36+
37+
self.text_parser = SentenceSplitter(chunk_size=Settings.chunk_size, chunk_overlap=Settings.chunk_overlap)
38+
39+
self.client = qdrant_client.QdrantClient(url=os.environ['DB_URL'], api_key=os.environ['DB_API_KEY'])
40+
self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])
41+
self.vector_index = None
42+
43+
self.mlops_data = SimpleDirectoryReader(input_dir=input_dir, required_exts=required_exts).load_data(
44+
show_progress=show_progress)
45+
46+
self.mlops_tool = self.get_tool("mlops_tool", "MLOps Tool", documents=self.mlops_data)
47+
48+
self.query_engine_tools = [self.mlops_tool]
49+
50+
agent_llm = OpenAI(model=os.environ.get('OPENAI_AGENT_MODEL'))
51+
self.agent = ReActAgent.from_tools(
52+
self.query_engine_tools, llm=agent_llm, verbose=True, max_iterations=max_iterations
53+
)
54+
55+
def get_tool(self, name, full_name, documents=None):
56+
self.logger.info("initializing the storage context")
57+
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
58+
59+
if not self.client.collection_exists(collection_name=os.environ.get('COLLECTION_NAME')):
60+
self.logger.info("indexing the nodes in VectorStoreIndex")
61+
self.vector_index = VectorStoreIndex.from_documents(
62+
documents=documents,
63+
storage_context=storage_context,
64+
transformations=Settings.transformations,
65+
)
66+
else:
67+
self.vector_index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)
68+
69+
query_engine = self.vector_index.as_query_engine(similarity_top_k=self.similarity_top_k, llm=Settings.llm)
70+
query_engine_tool = QueryEngineTool(
71+
query_engine=query_engine,
72+
metadata=ToolMetadata(
73+
name=name,
74+
description=(
75+
"Provides information about mlops and its details"
76+
f" {full_name}"
77+
),
78+
),
79+
)
80+
return query_engine_tool
81+
82+
def chat_repl(self, user_query: str, exit_when_done: bool = True):
83+
task_message = user_query
84+
85+
task = self.agent.create_task(task_message)
86+
87+
response = None
88+
step_output = None
89+
message = None
90+
while message != "exit":
91+
if message is None or message == "":
92+
step_output = self.agent.run_step(task.task_id)
93+
else:
94+
step_output = self.agent.run_step(task.task_id, input=message)
95+
if exit_when_done and step_output.is_last:
96+
print(">> Task marked as finished by the agent, executing task execution.")
97+
break
98+
99+
message = input(">> Add feedback during step? (press enter/leave blank to continue, "
100+
"and type 'exit' to stop): ")
101+
if message == "exit":
102+
break
103+
104+
if step_output is None:
105+
print(">> You haven't run the agent. Task is discarded.")
106+
elif not step_output.is_last:
107+
print(">> The agent hasn't finished yet. Task is discarded.")
108+
else:
109+
response = self.agent.finalize_response(task.task_id)
110+
print(f"Agent: {str(response)}")
111+
112+
if os.environ.get('IS_EVALUATION_NEEDED') == 'true':
113+
self.rag_evaluator.evaluate(user_query=user_query, response_obj=response)
114+
return response
Binary file not shown.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# driver code
2+
from controllable_agents_with_human_in_the_loop import ControllableAgentsWithHumanInLoop
3+
4+
5+
controllable_agent = ControllableAgentsWithHumanInLoop(input_dir='data', show_progress=True)
6+
7+
# Start a loop to continually get input from the user
8+
while True:
9+
# Get a query from the user
10+
user_query = input("Enter your query [type 'bye' to 'exit']: ")
11+
12+
# Check if the user wants to terminate the loop
13+
if user_query.lower() == "bye" or user_query.lower() == "exit":
14+
break
15+
16+
response = controllable_agent.chat_repl(user_query=user_query)
17+
print(response)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from deepeval.integrations.llama_index import (
2+
DeepEvalFaithfulnessEvaluator,
3+
DeepEvalAnswerRelevancyEvaluator,
4+
DeepEvalContextualRelevancyEvaluator
5+
)
6+
from dotenv import load_dotenv, find_dotenv
7+
from typing import Any
8+
import os
9+
import logging
10+
11+
_ = load_dotenv(find_dotenv())
12+
logging.basicConfig(level=int(os.environ['INFO']))
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class RAGEvaluator:
17+
def __init__(self):
18+
self.faithfulness_evaluator = DeepEvalFaithfulnessEvaluator()
19+
self.answer_relevancy_evaluator = DeepEvalAnswerRelevancyEvaluator()
20+
self.context_relevancy_evaluator = DeepEvalContextualRelevancyEvaluator()
21+
22+
def evaluate(self, user_query: str, response_obj: Any):
23+
logger.info(f"calling evaluation, user_query: {user_query}, response_obj: {response_obj}")
24+
retrieval_context = [node.get_content() for node in response_obj.source_nodes]
25+
actual_output = response_obj.response
26+
faithfulness_evaluation_response = self.faithfulness_evaluator.evaluate(query=user_query, response=actual_output,
27+
contexts=retrieval_context)
28+
answer_relevancy_response = self.answer_relevancy_evaluator.evaluate(query=user_query, response=actual_output,
29+
contexts=retrieval_context)
30+
context_relevancy_response = self.context_relevancy_evaluator.evaluate(query=user_query, response=actual_output,
31+
contexts=retrieval_context)
32+
logger.info(f"faithfulness_evaluation_response: {faithfulness_evaluation_response.score}")
33+
logger.info(f"answer_relevancy_response: {answer_relevancy_response.score}")
34+
logger.info(f"context_relevancy_response: {context_relevancy_response.score}")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
## Instructions to run the code
2+
3+
- Navigate to the root of the project and run the below command
4+
- `pip install -r requirements.txt`
5+
- open `.env` file update your qdrant password in the property `DB_API_KEY`
6+
- In the data folder place your data preferably any ".pdf"
7+
#### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
8+
- run `python main.py`
9+
10+
Note: This is Human in the loop agent, so keep a watch on the console to pass the human feedback to the agent.

0 commit comments

Comments
 (0)