Merge pull request #2504 from ayush-09/chatbot

avinashkranjan · web-flow · commit 6343fa1bcd58 · 2023-08-09T16:44:09.000+05:30
Support ChatBot
diff --git a/Support Chat Bot For Your Website -  Project 9/Support Chat Bot For Website.PNG b/Support Chat Bot For Your Website -  Project 9/Support Chat Bot For Website.PNG
diff --git a/Support Chat Bot For Your Website -  Project 9/app.py b/Support Chat Bot For Your Website -  Project 9/app.py
@@ -0,0 +1,90 @@
+import streamlit as st
+from utils import *
+import constants
+
+# Creating Session State Variable
+if 'HuggingFace_API_Key' not in st.session_state:
+    st.session_state['HuggingFace_API_Key'] =''
+if 'Pinecone_API_Key' not in st.session_state:
+    st.session_state['Pinecone_API_Key'] =''
+
+
+#
+st.title('🤖 AI Assistance For Website') 
+
+#********SIDE BAR Funtionality started*******
+
+# Sidebar to capture the API keys
+st.sidebar.title("😎🗝️")
+st.session_state['HuggingFace_API_Key']= st.sidebar.text_input("What's your HuggingFace API key?",type="password")
+st.session_state['Pinecone_API_Key']= st.sidebar.text_input("What's your Pinecone API key?",type="password")
+
+load_button = st.sidebar.button("Load data to Pinecone", key="load_button")
+
+#If the bove button is clicked, pushing the data to Pinecone...
+if load_button:
+    #Proceed only if API keys are provided
+    if st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_API_Key']!="" :
+
+        #Fetch data from site
+        site_data=get_website_data(constants.WEBSITE_URL)
+        st.write("Data pull done...")
+
+        #Split data into chunks
+        chunks_data=split_data(site_data)
+        st.write("Spliting data done...")
+
+        #Creating embeddings instance
+        embeddings=create_embeddings()
+        st.write("Embeddings instance creation done...")
+
+        #Push data to Pinecone
+        push_to_pinecone(st.session_state['Pinecone_API_Key'],constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings,chunks_data)
+        st.write("Pushing data to Pinecone done...")
+
+        st.sidebar.success("Data pushed to Pinecone successfully!")
+    else:
+        st.sidebar.error("Ooopssss!!! Please provide API keys.....")
+
+#********SIDE BAR Funtionality ended*******
+
+#Captures User Inputs
+prompt = st.text_input('How can I help you my friend ❓',key="prompt")  # The box for the text prompt
+document_count = st.slider('No.Of links to return 🔗 - (0 LOW || 5 HIGH)', 0, 5, 2,step=1)
+
+submit = st.button("Search") 
+
+
+if submit:
+    #Proceed only if API keys are provided
+    if st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_API_Key']!="" :
+
+        #Creating embeddings instance
+        embeddings=create_embeddings()
+        st.write("Embeddings instance creation done...")
+
+        #Pull index data from Pinecone
+        index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings)
+        st.write("Pinecone index retrieval done...")
+
+        #Fetch relavant documents from Pinecone index
+        relavant_docs=get_similar_docs(index,prompt,document_count)
+        st.write(relavant_docs)
+
+        #Displaying search results
+        st.success("Please find the search results :")
+         #Displaying search results
+        st.write("search results list....")
+        for document in relavant_docs:
+            
+            st.write("👉**Result : "+ str(relavant_docs.index(document)+1)+"**")
+            st.write("**Info**: "+document.page_content)
+            st.write("**Link**: "+ document.metadata['source'])
+       
+
+
+    else:
+        st.sidebar.error("Ooopssss!!! Please provide API keys.....")
+
+
+   
diff --git a/Support Chat Bot For Your Website -  Project 9/constants.py b/Support Chat Bot For Your Website -  Project 9/constants.py
@@ -0,0 +1,3 @@
+WEBSITE_URL="https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml"
+PINECONE_ENVIRONMENT="us-west1-gcp-free"
+PINECONE_INDEX="chatbot"
diff --git a/Support Chat Bot For Your Website -  Project 9/requirements.txt b/Support Chat Bot For Your Website -  Project 9/requirements.txt
@@ -0,0 +1,5 @@
+langchain
+pinecone-client
+openai
+tiktoken
+nest_asyncio
diff --git a/Support Chat Bot For Your Website -  Project 9/utils.py b/Support Chat Bot For Your Website -  Project 9/utils.py
@@ -0,0 +1,72 @@
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Pinecone
+from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+import pinecone
+import asyncio
+from langchain.document_loaders.sitemap import SitemapLoader
+
+
+#Function to fetch data from website
+#https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/sitemap
+def get_website_data(sitemap_url):
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    loader = SitemapLoader(
+    sitemap_url
+    )
+
+    docs = loader.load()
+
+    return docs
+
+#Function to split data into smaller chunks
+def split_data(docs):
+
+    text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 1000,
+    chunk_overlap  = 200,
+    length_function = len,
+    )
+
+    docs_chunks = text_splitter.split_documents(docs)
+    return docs_chunks
+
+#Function to create embeddings instance
+def create_embeddings():
+
+    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+    return embeddings
+
+#Function to push data to Pinecone
+def push_to_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,docs):
+
+    pinecone.init(
+    api_key=pinecone_apikey,
+    environment=pinecone_environment
+    )
+
+    index_name = pinecone_index_name
+    index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
+    return index
+
+#Function to pull index data from Pinecone
+def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
+
+    pinecone.init(
+    api_key=pinecone_apikey,
+    environment=pinecone_environment
+    )
+
+    index_name = pinecone_index_name
+
+    index = Pinecone.from_existing_index(index_name, embeddings)
+    return index
+
+#This function will help us in fetching the top relevent documents from our vector store - Pinecone Index
+def get_similar_docs(index,query,k=2):
+
+    similar_docs = index.similarity_search(query, k=k)
+    return similar_docs
+
+

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+WEBSITE_URL="https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml"`
	`2`	`+PINECONE_ENVIRONMENT="us-west1-gcp-free"`
	`3`	`+PINECONE_INDEX="chatbot"`