Skip to content

Commit 0d96634

Browse files
committed
initial code
0 parents  commit 0d96634

File tree

4 files changed

+105
-0
lines changed

4 files changed

+105
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.venv/*
2+
__pycache__
3+
db/*

app.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from flask import Flask, request
2+
import scrape
3+
import chat
4+
5+
app = Flask(__name__)
6+
7+
@app.route("/scrape", methods=["POST"])
8+
def scrapeUrl():
9+
json_content = request.json
10+
url = json_content.get("url")
11+
12+
messages = scrape.fetch_and_persist_article(url)
13+
14+
return {"url": url, "messages": messages}
15+
16+
@app.route("/ask_bot", methods=["POST"])
17+
def askBot():
18+
json_content = request.json
19+
question = json_content.get("question")
20+
21+
response = chat.answer_question_with_context(question)
22+
23+
return response
24+
25+
if __name__ == "__main__":
26+
app.run(host="0.0.0.0", port=8080, debug=True)

chat.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from langchain_ollama import OllamaEmbeddings, ChatOllama
2+
from langchain_chroma import Chroma
3+
from langchain_core.prompts import ChatPromptTemplate
4+
from langchain_core.runnables import RunnablePassthrough
5+
from langchain_core.output_parsers import StrOutputParser
6+
7+
def format_docs(docs):
8+
return "\n\n".join(doc.page_content for doc in docs)
9+
10+
def answer_question_with_context(question):
11+
messages = []
12+
persist_directory = "db"
13+
local_embeddings = OllamaEmbeddings(model="llama3.1:8b")
14+
15+
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=local_embeddings)
16+
17+
docs = vectorstore.similarity_search(question)
18+
if not docs:
19+
messages.append("No relevant information was found")
20+
return
21+
22+
# Define the RAG prompt template
23+
RAG_TEMPLATE = """
24+
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Answer in about 3 lines and keep the answer concise.
25+
26+
<context>
27+
{context}
28+
</context>
29+
30+
Answer the following question:
31+
32+
{question}"""
33+
34+
rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)
35+
model = ChatOllama(model="llama3.1:8b")
36+
37+
chain = (
38+
RunnablePassthrough.assign(context=lambda input: format_docs(input["context"]))
39+
| rag_prompt
40+
| model
41+
| StrOutputParser()
42+
)
43+
44+
response = chain.invoke({"context": docs, "question": question})
45+
return {"response": response, "messages": messages}

scrape.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from langchain_community.document_loaders import WebBaseLoader
2+
from langchain_text_splitters import RecursiveCharacterTextSplitter
3+
from langchain_ollama import OllamaEmbeddings
4+
from langchain_chroma import Chroma
5+
import os
6+
7+
def fetch_and_persist_article(url):
8+
messages = []
9+
local_embeddings = OllamaEmbeddings(model="llama3.1:8b")
10+
persist_directory = "db"
11+
12+
if os.path.exists(persist_directory):
13+
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=local_embeddings)
14+
messages.append(f"Loaded the existing Chroma DB")
15+
else:
16+
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=local_embeddings)
17+
messages.append(f"Created the Chroma DB")
18+
19+
loader = WebBaseLoader(url)
20+
data = loader.load()
21+
messages.append(f"URL Loaded")
22+
23+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
24+
all_splits = text_splitter.split_documents(data)
25+
26+
vectorstore.add_documents(documents=all_splits)
27+
messages.append(f"Added to Chroma DB")
28+
29+
return messages
30+
31+

0 commit comments

Comments
 (0)