Skip to content

Commit defbbba

Browse files
committed
updated to langchain latest
1 parent 4589594 commit defbbba

File tree

6 files changed

+1431
-990
lines changed

6 files changed

+1431
-990
lines changed

Pipfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ streamlit = "*"
1919
streamlit-chat = "*"
2020
tqdm = "*"
2121
langchain-pinecone = "*"
22+
langchain-openai = "*"
23+
langchain-community = "*"
24+
isort = "*"
25+
langchainhub = "*"
2226

2327
[dev-packages]
2428

Pipfile.lock

Lines changed: 1355 additions & 965 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/core.py

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,71 @@
11
from dotenv import load_dotenv
22

33
load_dotenv()
4+
from langchain_core.output_parsers import StrOutputParser
5+
from langchain_core.runnables import RunnablePassthrough
46

5-
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
67

78
from typing import Any, Dict, List
8-
from langchain.chains import ConversationalRetrievalChain
9-
from langchain_pinecone import PineconeVectorStore
109

10+
from langchain import hub
11+
from langchain.chains.combine_documents import create_stuff_documents_chain
12+
from langchain.chains.history_aware_retriever import create_history_aware_retriever
13+
from langchain.chains.retrieval import create_retrieval_chain
14+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
15+
from langchain_pinecone import PineconeVectorStore
1116

12-
INDEX_NAME = "langchain-doc-index"
17+
from consts import INDEX_NAME
1318

1419

1520
def run_llm(query: str, chat_history: List[Dict[str, Any]] = []):
16-
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
17-
docsearch = PineconeVectorStore(embedding=embeddings, index_name=INDEX_NAME)
21+
embeddings = OpenAIEmbeddings()
22+
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
23+
chat = ChatOpenAI(verbose=True, temperature=0)
24+
25+
rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")
26+
27+
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
28+
stuff_documents_chain = create_stuff_documents_chain(chat, retrieval_qa_chat_prompt)
29+
30+
history_aware_retriever = create_history_aware_retriever(
31+
llm=chat, retriever=docsearch.as_retriever(), prompt=rephrase_prompt
32+
)
33+
qa = create_retrieval_chain(
34+
retriever=history_aware_retriever, combine_docs_chain=stuff_documents_chain
35+
)
1836

19-
chat = ChatOpenAI(
20-
verbose=True,
21-
temperature=0,
37+
result = qa.invoke(input={"input": query, "chat_history": chat_history})
38+
return result
39+
40+
41+
def format_docs(docs):
42+
return "\n\n".join(doc.page_content for doc in docs)
43+
44+
45+
def run_llm2(query: str, chat_history: List[Dict[str, Any]] = []):
46+
embeddings = OpenAIEmbeddings()
47+
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
48+
chat = ChatOpenAI(model_name="gpt-4o", verbose=True, temperature=0)
49+
50+
rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")
51+
52+
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
53+
54+
rag_chain = (
55+
{
56+
"context": docsearch.as_retriever() | format_docs,
57+
"input": RunnablePassthrough(),
58+
}
59+
| retrieval_qa_chat_prompt
60+
| chat
61+
| StrOutputParser()
2262
)
2363

24-
qa = ConversationalRetrievalChain.from_llm(
25-
llm=chat, retriever=docsearch.as_retriever(), return_source_documents=True
64+
retrieve_docs_chain = (lambda x: x["input"]) | docsearch.as_retriever()
65+
66+
chain = RunnablePassthrough.assign(context=retrieve_docs_chain).assign(
67+
answer=rag_chain
2668
)
27-
return qa.invoke({"question": query, "chat_history": chat_history})
69+
70+
result = chain.invoke({"input": query, "chat_history": chat_history})
71+
return result

consts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
INDEX_NAME = "langchain-doc-index"

ingestion.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,12 @@
22

33
load_dotenv()
44

5-
import os
6-
7-
from langchain_community.document_loaders import ReadTheDocsLoader
85
from langchain.text_splitter import RecursiveCharacterTextSplitter
9-
from langchain_pinecone import PineconeVectorStore
6+
from langchain_community.document_loaders import ReadTheDocsLoader
107
from langchain_openai import OpenAIEmbeddings
8+
from langchain_pinecone import PineconeVectorStore
119

12-
13-
INDEX_NAME = "langchain-doc-index"
10+
from consts import INDEX_NAME
1411

1512
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
1613

main.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
from dotenv import load_dotenv
2+
3+
load_dotenv()
14
from typing import Set
25

3-
from backend.core import run_llm
46
import streamlit as st
57
from streamlit_chat import message
68

9+
from backend.core import run_llm
10+
711

812
def create_sources_string(source_urls: Set[str]) -> str:
913
if not source_urls:
@@ -37,16 +41,17 @@ def create_sources_string(source_urls: Set[str]) -> str:
3741
query=prompt, chat_history=st.session_state["chat_history"]
3842
)
3943

40-
sources = set(
41-
[doc.metadata["source"] for doc in generated_response["source_documents"]]
42-
)
44+
sources = set(doc.metadata["source"] for doc in generated_response["context"])
45+
4346
formatted_response = (
4447
f"{generated_response['answer']} \n\n {create_sources_string(sources)}"
4548
)
4649

47-
st.session_state.chat_history.append((prompt, generated_response["answer"]))
48-
st.session_state.user_prompt_history.append(prompt)
49-
st.session_state.chat_answers_history.append(formatted_response)
50+
st.session_state["user_prompt_history"].append(prompt)
51+
st.session_state["chat_answers_history"].append(formatted_response)
52+
st.session_state["chat_history"].append(("human", prompt))
53+
st.session_state["chat_history"].append(("ai", generated_response["answer"]))
54+
5055

5156
if st.session_state["chat_answers_history"]:
5257
for generated_response, user_query in zip(

0 commit comments

Comments
 (0)