Skip to content

Commit aff1b24

Browse files
pavanjavapavanmantha
andauthored
Enahced retrievals (#60)
* -tested and upgraded version * -implemented ragas evals * -implemented ragas evals * -implemented rag with llama_parse --------- Co-authored-by: pavanmantha <[email protected]>
1 parent c4bd288 commit aff1b24

File tree

7 files changed

+339
-0
lines changed

7 files changed

+339
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
llama_cloud=llx-<your_key>
2+
3+
DB_URL='http://localhost:6333'
4+
DB_API_KEY='th3s3cr3tk3y'
5+
COLLECTION_NAME='LLAMA_PARSE_COLLECTION'
6+
7+
OPENAI_API_KEY=''
8+
OPENAI_EMBED_MODEL=''
9+
10+
# use this incase you are prefering to experiment with local models.
11+
OLLAMA_BASE_URL='http://localhost:11434'
12+
OLLAMA_LLM_MODEL='llama3.2:latest'
13+
OLLAMA_EMBED_MODEL='nomic-embed-text:latest'
14+
15+
# logger can be controlled usiing env
16+
CRITICAL = 50
17+
FATAL = 50
18+
ERROR = 40
19+
WARNING = 30
20+
WARN = 30
21+
INFO = 20
22+
DEBUG = 10
23+
NOTSET = 0
24+
25+
LIT_SERVER_PORT=8000
26+
LIT_SERVER_WORKERS_PER_DEVICE=4

bootstraprag/templates/llamaindex/rag_with_llama_parse/__init__.py

Whitespace-only changes.
Binary file not shown.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import os
2+
3+
from llama_index.core import (
4+
SimpleDirectoryReader,
5+
VectorStoreIndex,
6+
StorageContext,
7+
Settings,
8+
get_response_synthesizer)
9+
from llama_index.core.query_engine import RetrieverQueryEngine, TransformQueryEngine
10+
from llama_index.core.node_parser import SentenceSplitter
11+
from llama_index.core.schema import TextNode, MetadataMode
12+
from llama_index.vector_stores.qdrant import QdrantVectorStore
13+
from llama_index.embeddings.ollama import OllamaEmbedding
14+
# enable if you are using openai
15+
# from llama_index.embeddings.openai import OpenAIEmbedding
16+
from llama_index.llms.ollama import Ollama
17+
# enable if you are using openai
18+
# from llama_index.llms.openai import OpenAI
19+
from llama_index.core.retrievers import VectorIndexRetriever
20+
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
21+
from llama_index.core.base.response.schema import Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
22+
from llama_parse import LlamaParse
23+
import qdrant_client
24+
import logging
25+
from dotenv import load_dotenv, find_dotenv
26+
from typing import Union
27+
28+
_ = load_dotenv(find_dotenv())
29+
30+
logging.basicConfig(level=int(os.environ['INFO']))
31+
logger = logging.getLogger(__name__)
32+
33+
34+
class RAGWithHyDeEngine:
35+
RESPONSE_TYPE = Union[
36+
Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
37+
]
38+
39+
def __init__(self, data_path: str, chunk_size: int = 512, chunk_overlap: int = 200,
40+
similarity_top_k: int = 3):
41+
# load the local data directory and chunk the data for further processing
42+
self.docs = self._docs_with_llama_parse(data_path=data_path)
43+
self.text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
44+
45+
# Create a local Qdrant vector store
46+
logger.info("initializing the vector store related objects")
47+
self.client = qdrant_client.QdrantClient(url=os.environ['DB_URL'], api_key=os.environ['DB_API_KEY'])
48+
self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])
49+
50+
# use your prefered vector embeddings model
51+
logger.info("initializing the OllamaEmbedding")
52+
embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'],
53+
base_url=os.environ['OLLAMA_BASE_URL'])
54+
# openai embeddings, embedding_model_name="text-embedding-3-large"
55+
# embed_model = OpenAIEmbedding(embed_batch_size=10, model=embedding_model_name)
56+
57+
# use your prefered llm
58+
llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], base_url=os.environ['OLLAMA_BASE_URL'], request_timeout=600)
59+
# llm = OpenAI(model="gpt-4o")
60+
61+
logger.info("initializing the global settings")
62+
Settings.embed_model = embed_model
63+
Settings.llm = llm
64+
Settings.transformations = [self.text_parser]
65+
66+
self.text_chunks = []
67+
self.doc_ids = []
68+
self.nodes = []
69+
70+
self.similarity_top_k = similarity_top_k
71+
self.hyde_query_engine: TransformQueryEngine = None
72+
73+
# preprocess the data like chunking, nodes, metadata etc
74+
self._pre_process()
75+
76+
def _docs_with_llama_parse(self, data_path: str, ):
77+
# set up parser
78+
parser = LlamaParse(
79+
result_type="markdown", # "markdown" and "text" are available
80+
api_key=os.environ.get('llama_cloud')
81+
)
82+
83+
# use SimpleDirectoryReader to parse our file
84+
file_extractor = {"pdf": parser}
85+
documents = SimpleDirectoryReader(input_dir=data_path, file_extractor=file_extractor).load_data(
86+
show_progress=True)
87+
return documents
88+
89+
def _pre_process(self):
90+
logger.info("enumerating docs")
91+
for doc_idx, doc in enumerate(self.docs):
92+
curr_text_chunks = self.text_parser.split_text(doc.text)
93+
self.text_chunks.extend(curr_text_chunks)
94+
self.doc_ids.extend([doc_idx] * len(curr_text_chunks))
95+
96+
logger.info("enumerating text_chunks")
97+
for idx, text_chunk in enumerate(self.text_chunks):
98+
node = TextNode(text=text_chunk)
99+
src_doc = self.docs[self.doc_ids[idx]]
100+
node.metadata = src_doc.metadata
101+
self.nodes.append(node)
102+
103+
logger.info("enumerating nodes")
104+
for node in self.nodes:
105+
node_embedding = Settings.embed_model.get_text_embedding(
106+
node.get_content(metadata_mode=MetadataMode.ALL)
107+
)
108+
node.embedding = node_embedding
109+
110+
# create vector store, index documents and creates retriever
111+
self._create_index_and_retriever()
112+
113+
def _create_index_and_retriever(self):
114+
logger.info("initializing the storage context")
115+
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
116+
logger.info("indexing the nodes in VectorStoreIndex")
117+
if not self.client.collection_exists(collection_name=os.environ['COLLECTION_NAME']):
118+
index = VectorStoreIndex(
119+
nodes=self.nodes,
120+
storage_context=storage_context,
121+
transformations=Settings.transformations,
122+
)
123+
else:
124+
index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)
125+
126+
logger.info("initializing the VectorIndexRetriever with top_k as 5")
127+
vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=self.similarity_top_k)
128+
response_synthesizer = get_response_synthesizer()
129+
logger.info("creating the RetrieverQueryEngine instance")
130+
vector_query_engine = RetrieverQueryEngine(
131+
retriever=vector_retriever,
132+
response_synthesizer=response_synthesizer,
133+
)
134+
logger.info("creating the HyDEQueryTransform instance")
135+
hyde = HyDEQueryTransform(include_original=True)
136+
hyde_query_engine = TransformQueryEngine(vector_query_engine, hyde)
137+
138+
self.hyde_query_engine = hyde_query_engine
139+
140+
def query(self, query_string: str) -> RESPONSE_TYPE:
141+
try:
142+
response = self.hyde_query_engine.query(str_or_query_bundle=query_string)
143+
return response
144+
except Exception as e:
145+
logger.error(f'Error while inference: {e}')
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# driver code
2+
from react_rag import ReActWithQueryEngine
3+
from hyde_rag import RAGWithHyDeEngine
4+
5+
technique = 'react' # 'react' or 'hyde'
6+
7+
# Start a loop to continually get input from the user
8+
while True:
9+
# Get a query from the user
10+
user_query = input("Enter your query [type 'bye' to 'exit']: ")
11+
12+
# Check if the user wants to terminate the loop
13+
if user_query.lower() == "bye" or user_query.lower() == "exit":
14+
break
15+
if technique == 'hyde':
16+
# this step will do pre processing, indexing in vector store, creating retriever (hyDE).
17+
# this may take some time based on your document size and chunk strategy.
18+
hyde_rag = RAGWithHyDeEngine(
19+
data_path='data') # leaving all the defaults. if needed override them in constructor
20+
response = hyde_rag.query(query_string=user_query)
21+
else:
22+
# this may take some time based on your document size and chunk strategy.
23+
react_rag = ReActWithQueryEngine(input_dir='data', show_progress=True)
24+
response = react_rag.query(user_query=user_query)
25+
26+
print(response)
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
from llama_index.core import (
2+
SimpleDirectoryReader,
3+
VectorStoreIndex,
4+
StorageContext,
5+
Settings
6+
)
7+
from llama_index.core.tools import QueryEngineTool, ToolMetadata
8+
from llama_index.embeddings.ollama import OllamaEmbedding
9+
from llama_index.vector_stores.qdrant import QdrantVectorStore
10+
from llama_index.core.agent import ReActAgent
11+
from llama_index.llms.ollama import Ollama
12+
from llama_index.core.base.response.schema import Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
13+
from llama_parse import LlamaParse
14+
from dotenv import load_dotenv, find_dotenv
15+
from typing import Union
16+
import qdrant_client
17+
import logging
18+
import os
19+
20+
_ = load_dotenv(find_dotenv())
21+
22+
logging.basicConfig(level=int(os.environ['INFO']))
23+
logger = logging.getLogger(__name__)
24+
25+
26+
class ReActWithQueryEngine:
27+
RESPONSE_TYPE = Union[
28+
Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
29+
]
30+
31+
def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100,
32+
show_progress: bool = False, no_of_iterations: int = 5, required_exts: list[str] = ['.pdf', '.txt']):
33+
self.index_loaded = False
34+
self.similarity_top_k = similarity_top_k
35+
self.input_dir = input_dir
36+
self._index = None
37+
self._engine = None
38+
self.agent: ReActAgent = None
39+
self.query_engine_tools = []
40+
self.show_progress = show_progress
41+
self.no_of_iterations = no_of_iterations
42+
self.required_exts = required_exts
43+
44+
# use your prefered vector embeddings model
45+
logger.info("initializing the OllamaEmbedding")
46+
embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'],
47+
base_url=os.environ['OLLAMA_BASE_URL'])
48+
# openai embeddings, embedding_model_name="text-embedding-3-large"
49+
# embed_model = OpenAIEmbedding(embed_batch_size=10, model=embedding_model_name)
50+
51+
# use your prefered llm
52+
llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], base_url=os.environ['OLLAMA_BASE_URL'], request_timeout=600)
53+
# llm = OpenAI(model="gpt-4o")
54+
55+
logger.info("initializing the global settings")
56+
Settings.embed_model = embed_model
57+
Settings.llm = llm
58+
Settings.chunk_size = chunk_size
59+
Settings.chunk_overlap = chunk_overlap
60+
61+
# Create a local Qdrant vector store
62+
logger.info("initializing the vector store related objects")
63+
self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['DB_URL'],
64+
api_key=os.environ['DB_API_KEY'])
65+
self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])
66+
self._load_data_and_create_engine()
67+
68+
def _docs_with_llama_parse(self, data_path: str, ):
69+
# set up parser
70+
parser = LlamaParse(
71+
result_type="markdown", # "markdown" and "text" are available
72+
api_key=os.environ.get('llama_cloud')
73+
)
74+
75+
# use SimpleDirectoryReader to parse our file
76+
file_extractor = {"pdf": parser}
77+
documents = SimpleDirectoryReader(input_dir=data_path, file_extractor=file_extractor).load_data(
78+
show_progress=True)
79+
return documents
80+
81+
def _load_data_and_create_engine(self):
82+
if self.client.collection_exists(collection_name=os.environ['COLLECTION_NAME']):
83+
try:
84+
self._index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)
85+
self.index_loaded = True
86+
except Exception as e:
87+
self.index_loaded = False
88+
89+
if not self.index_loaded:
90+
# load data
91+
_docs = self._docs_with_llama_parse(data_path=self.input_dir)
92+
93+
# build and persist index
94+
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
95+
logger.info("indexing the docs in VectorStoreIndex")
96+
self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context,
97+
show_progress=self.show_progress)
98+
99+
self._engine = self._index.as_query_engine(similarity_top_k=self.similarity_top_k)
100+
self._create_query_engine_tools()
101+
102+
def _create_query_engine_tools(self):
103+
# can have more than one as per the requirement
104+
self.query_engine_tools.append(
105+
QueryEngineTool(
106+
query_engine=self._engine,
107+
metadata=ToolMetadata(
108+
name="test_tool_engine", # change this accordingly
109+
description=(
110+
"Provides information about user query based on the information that you have. "
111+
"Use a detailed plain text question as input to the tool."
112+
),
113+
),
114+
)
115+
)
116+
self._create_react_agent()
117+
118+
def _create_react_agent(self):
119+
# [Optional] Add Context
120+
# context = """\
121+
# You are a stock market sorcerer who is an expert on the companies Lyft and Uber.\
122+
# You will answer questions about Uber and Lyft as in the persona of a sorcerer \
123+
# and veteran stock market investor.
124+
# """
125+
self.agent = ReActAgent.from_tools(
126+
self.query_engine_tools,
127+
llm=Settings.llm,
128+
verbose=True,
129+
# context=context
130+
max_iterations=self.no_of_iterations
131+
)
132+
133+
def query(self, user_query: str) -> RESPONSE_TYPE:
134+
try:
135+
response = self.agent.query(str_or_query_bundle=user_query)
136+
return response
137+
except Exception as e:
138+
logger.error(f'Error while generating response: {e}')
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
llama-index==0.11.19
2+
llama-parse==0.5.10
3+
llama-index-readers-file==0.2.2
4+
python-dotenv==1.0.1

0 commit comments

Comments
 (0)