-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqachat.py
executable file
·90 lines (70 loc) · 3.27 KB
/
qachat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from dotenv import load_dotenv
load_dotenv()
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import os
import streamlit as st
from transformers import pipeline
from langchain_community.vectorstores import Chroma
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")
# Load the text file
loader = TextLoader("transcript.txt")
documents = loader.load()
# Split the text into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# Create embeddings and index
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(texts, embeddings)
# Initialize the summarization and Q&A pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
def summarize_transcript(transcript, max_length=150):
prompt = f"Summarize the following text with a maximum length of {max_length} characters:\n\n{transcript}"
summary = summarizer(prompt, max_length=max_length, min_length=50, do_sample=False)
return summary[0]['summary_text']
def get_qa_response(question, context):
prompt = f"Generate a response to the question based on the provided context. The response should be derived strictly from the information in the given text, be relevant and accurate, and maintain a tone of helpfulness and friendliness.\n\nQuestion: {question}\n\nContext:\n{context}"
result = qa_pipeline(question=question, context=context)
return result['answer']
# Streamlit app
st.set_page_config(page_title="Q&A Demo")
st.header("SpamBot: Your everyday assistant!")
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
option = st.selectbox("Select mode:", ["Q&A", "Summarization"])
if option == "Q&A":
input = st.text_input("Input: ", key="input")
submit = st.button("Ask the question")
clear_chat = st.button("Clear Chat")
if clear_chat:
st.session_state['chat_history'] = []
if submit and input:
# Retrieve relevant information from the text data
docs = vectorstore.similarity_search(input, k=3)
# Combine the relevant information
context = "\n".join([doc.page_content for doc in docs])
# Get the response from the Q&A pipeline
response = get_qa_response(input, context)
st.session_state['chat_history'].append(("You", input))
st.subheader("The Response is")
st.write(response)
st.session_state['chat_history'].append(("Bot", response))
elif option == "Summarization":
if documents:
transcript = documents[0].page_content
summary = summarize_transcript(transcript)
st.subheader("Summary:")
st.write(summary)
else:
st.warning("No transcript available for summarization.")
st.subheader("Chat History")
for role, text in st.session_state['chat_history']:
st.write(f"{role}: {text}")