|
20 | 20 | from langchain.chat_models import AzureChatOpenAI
|
21 | 21 | from langchain.embeddings import AzureOpenAIEmbeddings
|
22 | 22 | from azure.identity import ManagedIdentityCredential
|
| 23 | +import qdrant_client |
23 | 24 |
|
24 | 25 | import streamlit as st
|
| 26 | + |
| 27 | +from llama_index.vector_stores.qdrant import QdrantVectorStore |
25 | 28 | from llama_index.core import (
|
26 | 29 | SimpleDirectoryReader,
|
27 |
| - GPTVectorStoreIndex, |
| 30 | + VectorStoreIndex, |
28 | 31 | PromptHelper,
|
29 | 32 | ServiceContext,
|
30 | 33 | StorageContext,
|
31 |
| - load_index_from_storage, |
32 | 34 | Settings
|
33 |
| - |
34 | 35 | )
|
35 | 36 |
|
36 | 37 | from llama_index.llms.langchain import LangChainLLM
|
|
39 | 40 |
|
40 | 41 | from dotenv import load_dotenv, dotenv_values
|
41 | 42 |
|
42 |
| - |
43 | 43 | logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
44 | 44 | logging.getLogger("llama_index").setLevel(logging.DEBUG)
|
45 | 45 |
|
46 | 46 | index = None
|
47 | 47 | doc_path = "./data/"
|
48 |
| -index_file = "index.json" |
| 48 | + |
| 49 | +client = qdrant_client.QdrantClient( |
| 50 | + # you can use :memory: mode for fast and light-weight experiments, |
| 51 | + # it does not require to have Qdrant deployed anywhere |
| 52 | + # but requires qdrant-client >= 1.1.1 |
| 53 | + # location=":memory:" |
| 54 | + # otherwise set Qdrant instance address with: |
| 55 | + # url="http://<host>:<port>" |
| 56 | + # otherwise set Qdrant instance with host and port: |
| 57 | + host="chatbot-qdrant", |
| 58 | + port=6333 |
| 59 | + # set API KEY for Qdrant Cloud |
| 60 | + # api_key="<qdrant-api-key>", |
| 61 | +) |
| 62 | + |
49 | 63 |
|
50 | 64 | if "config" not in st.session_state:
|
51 | 65 | # Read the environment variables
|
@@ -119,47 +133,48 @@ def send_click():
|
119 | 133 | )
|
120 | 134 |
|
121 | 135 | if uploaded_file and uploaded_file.name != st.session_state.current_file:
|
122 |
| - # Ingest the document and create the index |
123 |
| - with st.spinner('Ingesting the file..'): |
124 |
| - doc_files = os.listdir(doc_path) |
125 |
| - for doc_file in doc_files: |
126 |
| - os.remove(doc_path + doc_file) |
127 |
| - |
128 |
| - bytes_data = uploaded_file.read() |
129 |
| - with open(f"{doc_path}{uploaded_file.name}", "wb") as f: |
130 |
| - f.write(bytes_data) |
131 |
| - |
132 |
| - loader = SimpleDirectoryReader(doc_path, recursive=True, exclude_hidden=True) |
133 |
| - documents = loader.load_data() |
134 |
| - sidebar_placeholder.header("Current Processing Document:") |
135 |
| - sidebar_placeholder.subheader(uploaded_file.name) |
136 |
| - sidebar_placeholder.write(documents[0].get_text()[:500] + "...") |
137 |
| - |
138 |
| - index = GPTVectorStoreIndex.from_documents( |
139 |
| - documents, service_context=service_context |
140 |
| - ) |
141 |
| - |
142 |
| - index.set_index_id("vector_index") |
143 |
| - index.storage_context.persist(index_file) |
144 |
| - st.session_state.current_file = uploaded_file.name |
145 |
| - st.session_state.response = "" # clean up the response when new file is uploaded |
146 |
| - st.success('Done!') |
147 |
| - |
148 |
| -elif os.path.exists(index_file): |
149 |
| - # Read from storage context |
150 |
| - storage_context = StorageContext.from_defaults(persist_dir=index_file) |
151 |
| - index = load_index_from_storage( |
152 |
| - storage_context, index_id="vector_index", service_context=service_context |
| 136 | + st.session_state.current_file = uploaded_file.name |
| 137 | + st.session_state.response = "" # clean up the response when new file is uploaded |
| 138 | + if not client.collection_exists(collection_name=uploaded_file.name): |
| 139 | + # Ingest the document and create the index |
| 140 | + with st.spinner('Ingesting the file..'): |
| 141 | + doc_files = os.listdir(doc_path) |
| 142 | + for doc_file in doc_files: |
| 143 | + os.remove(doc_path + doc_file) |
| 144 | + |
| 145 | + bytes_data = uploaded_file.read() |
| 146 | + with open(f"{doc_path}{uploaded_file.name}", "wb") as f: |
| 147 | + f.write(bytes_data) |
| 148 | + |
| 149 | + loader = SimpleDirectoryReader(doc_path, recursive=True, exclude_hidden=True) |
| 150 | + documents = loader.load_data() |
| 151 | + sidebar_placeholder.header("Current Processing Document:") |
| 152 | + sidebar_placeholder.subheader(uploaded_file.name) |
| 153 | + |
| 154 | + vector_store = QdrantVectorStore(client=client, collection_name=uploaded_file.name) |
| 155 | + storage_context = StorageContext.from_defaults(vector_store=vector_store) |
| 156 | + index = VectorStoreIndex.from_documents( |
| 157 | + documents, |
| 158 | + service_context=service_context, |
| 159 | + storage_context=storage_context, |
| 160 | + ) |
| 161 | + index.set_index_id("vector_index") |
| 162 | + st.success('Done!') |
| 163 | + |
| 164 | +if st.session_state.current_file: |
| 165 | + vector_store = QdrantVectorStore(client=client, collection_name=uploaded_file.name) |
| 166 | + storage_context = StorageContext.from_defaults(vector_store=vector_store) |
| 167 | + index = VectorStoreIndex.from_vector_store( |
| 168 | + vector_store=vector_store, |
| 169 | + service_context=service_context, |
| 170 | + storage_context=storage_context, |
| 171 | + index_id="vector_index", |
153 | 172 | )
|
154 |
| - |
155 |
| - loader = SimpleDirectoryReader(doc_path, recursive=True, exclude_hidden=True) |
156 |
| - documents = loader.load_data() |
157 |
| - doc_filename = os.listdir(doc_path)[0] |
| 173 | + doc_filename = st.session_state.current_file |
158 | 174 | sidebar_placeholder.header("Current Processing Document:")
|
159 |
| - sidebar_placeholder.subheader(doc_filename) |
160 |
| - sidebar_placeholder.write(documents[0].get_text()[:500] + "...") |
| 175 | + sidebar_placeholder.subheader(uploaded_file.name) |
161 | 176 |
|
162 |
| -if index: |
| 177 | +if index or st.session_state.response != "": |
163 | 178 | st.text_input("Ask something: ", key="prompt", on_change=send_click)
|
164 | 179 | st.button("Send", on_click=send_click)
|
165 | 180 | if st.session_state.response:
|
|
0 commit comments