Skip to content

Commit 6343fa1

Browse files
Merge pull request #2504 from ayush-09/chatbot
Support ChatBot
2 parents 6acae1a + 872f87f commit 6343fa1

File tree

5 files changed

+170
-0
lines changed

5 files changed

+170
-0
lines changed
Loading
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import streamlit as st
2+
from utils import *
3+
import constants
4+
5+
# Creating Session State Variable
6+
if 'HuggingFace_API_Key' not in st.session_state:
7+
st.session_state['HuggingFace_API_Key'] =''
8+
if 'Pinecone_API_Key' not in st.session_state:
9+
st.session_state['Pinecone_API_Key'] =''
10+
11+
12+
#
13+
st.title('🤖 AI Assistance For Website')
14+
15+
#********SIDE BAR Funtionality started*******
16+
17+
# Sidebar to capture the API keys
18+
st.sidebar.title("😎🗝️")
19+
st.session_state['HuggingFace_API_Key']= st.sidebar.text_input("What's your HuggingFace API key?",type="password")
20+
st.session_state['Pinecone_API_Key']= st.sidebar.text_input("What's your Pinecone API key?",type="password")
21+
22+
load_button = st.sidebar.button("Load data to Pinecone", key="load_button")
23+
24+
#If the bove button is clicked, pushing the data to Pinecone...
25+
if load_button:
26+
#Proceed only if API keys are provided
27+
if st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_API_Key']!="" :
28+
29+
#Fetch data from site
30+
site_data=get_website_data(constants.WEBSITE_URL)
31+
st.write("Data pull done...")
32+
33+
#Split data into chunks
34+
chunks_data=split_data(site_data)
35+
st.write("Spliting data done...")
36+
37+
#Creating embeddings instance
38+
embeddings=create_embeddings()
39+
st.write("Embeddings instance creation done...")
40+
41+
#Push data to Pinecone
42+
push_to_pinecone(st.session_state['Pinecone_API_Key'],constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings,chunks_data)
43+
st.write("Pushing data to Pinecone done...")
44+
45+
st.sidebar.success("Data pushed to Pinecone successfully!")
46+
else:
47+
st.sidebar.error("Ooopssss!!! Please provide API keys.....")
48+
49+
#********SIDE BAR Funtionality ended*******
50+
51+
#Captures User Inputs
52+
prompt = st.text_input('How can I help you my friend ❓',key="prompt") # The box for the text prompt
53+
document_count = st.slider('No.Of links to return 🔗 - (0 LOW || 5 HIGH)', 0, 5, 2,step=1)
54+
55+
submit = st.button("Search")
56+
57+
58+
if submit:
59+
#Proceed only if API keys are provided
60+
if st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_API_Key']!="" :
61+
62+
#Creating embeddings instance
63+
embeddings=create_embeddings()
64+
st.write("Embeddings instance creation done...")
65+
66+
#Pull index data from Pinecone
67+
index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings)
68+
st.write("Pinecone index retrieval done...")
69+
70+
#Fetch relavant documents from Pinecone index
71+
relavant_docs=get_similar_docs(index,prompt,document_count)
72+
st.write(relavant_docs)
73+
74+
#Displaying search results
75+
st.success("Please find the search results :")
76+
#Displaying search results
77+
st.write("search results list....")
78+
for document in relavant_docs:
79+
80+
st.write("👉**Result : "+ str(relavant_docs.index(document)+1)+"**")
81+
st.write("**Info**: "+document.page_content)
82+
st.write("**Link**: "+ document.metadata['source'])
83+
84+
85+
86+
else:
87+
st.sidebar.error("Ooopssss!!! Please provide API keys.....")
88+
89+
90+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
WEBSITE_URL="https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml"
2+
PINECONE_ENVIRONMENT="us-west1-gcp-free"
3+
PINECONE_INDEX="chatbot"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
langchain
2+
pinecone-client
3+
openai
4+
tiktoken
5+
nest_asyncio
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from langchain.text_splitter import RecursiveCharacterTextSplitter
2+
from langchain.vectorstores import Pinecone
3+
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
4+
import pinecone
5+
import asyncio
6+
from langchain.document_loaders.sitemap import SitemapLoader
7+
8+
9+
#Function to fetch data from website
10+
#https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/sitemap
11+
def get_website_data(sitemap_url):
12+
13+
loop = asyncio.new_event_loop()
14+
asyncio.set_event_loop(loop)
15+
loader = SitemapLoader(
16+
sitemap_url
17+
)
18+
19+
docs = loader.load()
20+
21+
return docs
22+
23+
#Function to split data into smaller chunks
24+
def split_data(docs):
25+
26+
text_splitter = RecursiveCharacterTextSplitter(
27+
chunk_size = 1000,
28+
chunk_overlap = 200,
29+
length_function = len,
30+
)
31+
32+
docs_chunks = text_splitter.split_documents(docs)
33+
return docs_chunks
34+
35+
#Function to create embeddings instance
36+
def create_embeddings():
37+
38+
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
39+
return embeddings
40+
41+
#Function to push data to Pinecone
42+
def push_to_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,docs):
43+
44+
pinecone.init(
45+
api_key=pinecone_apikey,
46+
environment=pinecone_environment
47+
)
48+
49+
index_name = pinecone_index_name
50+
index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
51+
return index
52+
53+
#Function to pull index data from Pinecone
54+
def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
55+
56+
pinecone.init(
57+
api_key=pinecone_apikey,
58+
environment=pinecone_environment
59+
)
60+
61+
index_name = pinecone_index_name
62+
63+
index = Pinecone.from_existing_index(index_name, embeddings)
64+
return index
65+
66+
#This function will help us in fetching the top relevent documents from our vector store - Pinecone Index
67+
def get_similar_docs(index,query,k=2):
68+
69+
similar_docs = index.similarity_search(query, k=k)
70+
return similar_docs
71+
72+

0 commit comments

Comments
 (0)