pamelafox
diff --git a/‎README.md
Lines changed: 30 additions & 2 deletions b/‎README.md
Lines changed: 30 additions & 2 deletions
diff --git a/‎data/Aphideater_hoverfly.pdf
255 KB b/‎data/Aphideater_hoverfly.pdf
255 KB
diff --git a/‎data/California_carpenter_bee.pdf
316 KB b/‎data/California_carpenter_bee.pdf
316 KB
diff --git a/‎data/Centris_pallida.pdf
970 KB b/‎data/Centris_pallida.pdf
970 KB
diff --git a/‎data/Western_honey_bee.pdf
1010 KB b/‎data/Western_honey_bee.pdf
1010 KB
diff --git a/‎rag_csv.py
Lines changed: 73 additions & 0 deletions b/‎rag_csv.py
Lines changed: 73 additions & 0 deletions
diff --git a/‎rag_documents_flow.py
Lines changed: 70 additions & 0 deletions b/‎rag_documents_flow.py
Lines changed: 70 additions & 0 deletions
diff --git a/‎rag_documents_hybrid.py
Lines changed: 143 additions & 0 deletions b/‎rag_documents_hybrid.py
Lines changed: 143 additions & 0 deletions
diff --git a/‎rag_documents_ingestion.py
Lines changed: 61 additions & 0 deletions b/‎rag_documents_ingestion.py
Lines changed: 61 additions & 0 deletions
@@ -2,6 +2,9 @@
 
 This repository contains a collection of Python scripts that demonstrate how to use the OpenAI API to generate chat completions.
 
+## OpenAI package
+
+These scripts use the OpenAI package to demonstrate how to use the OpenAI API.
 In increasing order of complexity, the scripts are:
 
 1. [`chat.py`](./chat.py): A simple script that demonstrates how to use the OpenAI API to generate chat completions.
@@ -13,8 +16,33 @@ Plus these scripts to demonstrate additional features:
 
 * [`chat_safety.py`](./chat_safety.py): The simple script with exception handling for Azure AI Content Safety filter errors.
 * [`chat_async.py`](./chat_async.py): Uses the async clients to make asynchronous calls, including an example of sending off multiple requests at once using `asyncio.gather`.
-* [`chat_langchain.py`](./chat_langchain.py): Uses the langchain SDK to generate chat completions. [Learn more from Langchain docs](https://python.langchain.com/docs/get_started/quickstart)
-* [`chat_llamaindex.py`](./chat_llamaindex.py): Uses the LlamaIndex SDK to generate chat completions. [Learn more from LlamaIndex docs](https://docs.llamaindex.ai/en/stable/)
+
+## Popular LLM libraries
+
+These scripts use popular LLM libraries to demonstrate how to use the OpenAI API with them:
+
+* [`chat_langchain.py`](./chat_langchain.py): Uses the Langchain package to generate chat completions. [Learn more from Langchain docs](https://python.langchain.com/docs/get_started/quickstart)
+* [`chat_llamaindex.py`](./chat_llamaindex.py): Uses the LlamaIndex package to generate chat completions. [Learn more from LlamaIndex docs](https://docs.llamaindex.ai/en/stable/)
+* [`chat_pydanticai.py`](./chat_pydanticai.py): Uses the PydanticAI package to generate chat completions. [Learn more from PydanticAI docs](https://ai.pydantic.dev/)
+
+## Retrieval-Augmented Generation (RAG)
+
+These scripts demonstrate how to use the OpenAI API for Retrieval-Augmented Generation (RAG) tasks, where the model retrieves relevant information from a source and uses it to generate a response.
+
+First install the RAG dependencies:
+
+```bash
+python -m pip install -r requirements-rag.txt
+```
+
+Then run the scripts (in order of increasing complexity):
+
+* [`rag_csv.py`](./rag.py): Retrieves matching results from a CSV file and uses them to answer user's question.
+* [`rag_multiturn.py`](./rag_multiturn.py): The same idea, but with a back-and-forth chat interface using `input()` which keeps track of past messages and sends them with each chat completion call.
+* [`rag_queryrewrite.py`](./rag_queryrewrite.py): Adds a query rewriting step to the RAG process, where the user's question is rewritten to improve the retrieval results.
+* [`rag_documents_ingestion.py`](./rag_ingestion.py): Ingests PDFs by using pymupdf to convert to markdown, then using Langchain to split into chunks, then using OpenAI to embed the chunks, and finally storing in a local JSON file.
+* [`rag_documents_flow.py`](./rag_pdfs.py): A RAG flow that retrieves matching results from the local JSON file created by `rag_documents_ingestion.py`.
+* [`rag_documents_hybrid.py`](./rag_documents_hybrid.py): A RAG flow that implements a hybrid retrieval with both vector and keyword search, merging with Reciprocal Rank Fusion (RRF), and semantic re-ranking with a cross-encoder model.
 
 ## Setting up the environment
 
 
@@ -0,0 +1,73 @@
+import csv
+import os
+
+import azure.identity
+import openai
+from dotenv import load_dotenv
+from lunr import lunr
+
+# Setup the OpenAI client to use either Azure, OpenAI.com, or Ollama API
+load_dotenv(override=True)
+API_HOST = os.getenv("API_HOST")
+
+if API_HOST == "azure":
+    token_provider = azure.identity.get_bearer_token_provider(
+        azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
+    )
+    client = openai.AzureOpenAI(
+        api_version=os.environ["AZURE_OPENAI_VERSION"],
+        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+        azure_ad_token_provider=token_provider,
+    )
+    MODEL_NAME = os.environ["AZURE_OPENAI_DEPLOYMENT"]
+
+elif API_HOST == "ollama":
+    client = openai.OpenAI(base_url=os.environ["OLLAMA_ENDPOINT"], api_key="nokeyneeded")
+    MODEL_NAME = os.environ["OLLAMA_MODEL"]
+
+elif API_HOST == "github":
+    client = openai.OpenAI(base_url="https://models.inference.ai.azure.com", api_key=os.environ["GITHUB_TOKEN"])
+    MODEL_NAME = os.environ["GITHUB_MODEL"]
+
+else:
+    client = openai.OpenAI(api_key=os.environ["OPENAI_KEY"])
+    MODEL_NAME = os.environ["OPENAI_MODEL"]
+
+# Index the data from the CSV
+with open("hybrid.csv") as file:
+    reader = csv.reader(file)
+    rows = list(reader)
+documents = [{"id": (i + 1), "body": " ".join(row)} for i, row in enumerate(rows[1:])]
+index = lunr(ref="id", fields=["body"], documents=documents)
+
+# Get the user question
+user_question = "how fast is the prius v?"
+
+# Search the index for the user question
+results = index.search(user_question)
+matching_rows = [rows[int(result["ref"])] for result in results]
+
+# Format as a markdown table, since language models understand markdown
+matches_table = " | ".join(rows[0]) + "\n" + " | ".join(" --- " for _ in range(len(rows[0]))) + "\n"
+matches_table += "\n".join(" | ".join(row) for row in matching_rows)
+
+print("Found matches:")
+print(matches_table)
+
+# Now we can use the matches to generate a response
+SYSTEM_MESSAGE = """
+You are a helpful assistant that answers questions about cars based off a hybrid car data set.
+You must use the data set to answer the questions, you should not provide any info that is not in the provided sources.
+"""
+
+response = client.chat.completions.create(
+    model=MODEL_NAME,
+    temperature=0.3,
+    messages=[
+        {"role": "system", "content": SYSTEM_MESSAGE},
+        {"role": "user", "content": f"{user_question}\nSources: {matches_table}"},
+    ],
+)
+
+print(f"\nResponse from {API_HOST}: \n")
+print(response.choices[0].message.content)
@@ -0,0 +1,70 @@
+import json
+import os
+
+import azure.identity
+import openai
+from dotenv import load_dotenv
+from lunr import lunr
+
+# Setup the OpenAI client to use either Azure, OpenAI.com, or Ollama API
+load_dotenv(override=True)
+API_HOST = os.getenv("API_HOST")
+
+if API_HOST == "azure":
+    token_provider = azure.identity.get_bearer_token_provider(
+        azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
+    )
+    client = openai.AzureOpenAI(
+        api_version=os.environ["AZURE_OPENAI_VERSION"],
+        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+        azure_ad_token_provider=token_provider,
+    )
+    MODEL_NAME = os.environ["AZURE_OPENAI_DEPLOYMENT"]
+
+elif API_HOST == "ollama":
+    client = openai.OpenAI(base_url=os.environ["OLLAMA_ENDPOINT"], api_key="nokeyneeded")
+    MODEL_NAME = os.environ["OLLAMA_MODEL"]
+
+elif API_HOST == "github":
+    client = openai.OpenAI(base_url="https://models.inference.ai.azure.com", api_key=os.environ["GITHUB_TOKEN"])
+    MODEL_NAME = os.environ["GITHUB_MODEL"]
+
+else:
+    client = openai.OpenAI(api_key=os.environ["OPENAI_KEY"])
+    MODEL_NAME = os.environ["OPENAI_MODEL"]
+
+# Index the data from the JSON - each object has id, text, and embedding
+with open("rag_ingested_chunks.json") as file:
+    documents = json.load(file)
+    documents_by_id = {doc["id"]: doc for doc in documents}
+index = lunr(ref="id", fields=["text"], documents=documents)
+
+# Get the user question
+user_question = "where do digger bees live?"
+
+# Search the index for the user question
+results = index.search(user_question)
+retrieved_documents = [documents_by_id[result["ref"]] for result in results]
+print(f"Retrieved {len(retrieved_documents)} matching documents, only sending the first 5.")
+context = "\n".join([f"{doc['id']}: {doc['text']}" for doc in retrieved_documents[0:5]])
+
+# Now we can use the matches to generate a response
+SYSTEM_MESSAGE = """
+You are a helpful assistant that answers questions about Maya civilization.
+You must use the data set to answer the questions,
+you should not provide any info that is not in the provided sources.
+Cite the sources you used to answer the question inside square brackets.
+The sources are in the format: <id>: <text>.
+"""
+
+response = client.chat.completions.create(
+    model=MODEL_NAME,
+    temperature=0.3,
+    messages=[
+        {"role": "system", "content": SYSTEM_MESSAGE},
+        {"role": "user", "content": f"{user_question}\nSources: {context}"},
+    ],
+)
+
+print(f"\nResponse from {MODEL_NAME} on {API_HOST}: \n")
+print(response.choices[0].message.content)
@@ -0,0 +1,143 @@
+# pip install sentence-transformers
+import json
+import os
+
+import azure.identity
+import openai
+from dotenv import load_dotenv
+from lunr import lunr
+from sentence_transformers import CrossEncoder
+
+# Setup the OpenAI client to use either Azure, OpenAI.com, or Ollama API
+load_dotenv(override=True)
+API_HOST = os.getenv("API_HOST")
+
+if API_HOST == "azure":
+    token_provider = azure.identity.get_bearer_token_provider(
+        azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
+    )
+    client = openai.AzureOpenAI(
+        api_version=os.environ["AZURE_OPENAI_VERSION"],
+        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+        azure_ad_token_provider=token_provider,
+    )
+    MODEL_NAME = os.environ["AZURE_OPENAI_DEPLOYMENT"]
+
+elif API_HOST == "ollama":
+    client = openai.OpenAI(base_url=os.environ["OLLAMA_ENDPOINT"], api_key="nokeyneeded")
+    MODEL_NAME = os.environ["OLLAMA_MODEL"]
+
+elif API_HOST == "github":
+    client = openai.OpenAI(base_url="https://models.inference.ai.azure.com", api_key=os.environ["GITHUB_TOKEN"])
+    MODEL_NAME = os.environ["GITHUB_MODEL"]
+
+else:
+    client = openai.OpenAI(api_key=os.environ["OPENAI_KEY"])
+    MODEL_NAME = os.environ["OPENAI_MODEL"]
+
+# Index the data from the JSON - each object has id, text, and embedding
+with open("rag_ingested_chunks.json") as file:
+    documents = json.load(file)
+    documents_by_id = {doc["id"]: doc for doc in documents}
+index = lunr(ref="id", fields=["text"], documents=documents)
+
+
+def full_text_search(query, limit):
+    """
+    Perform a full-text search on the indexed documents.
+    """
+    results = index.search(query)
+    retrieved_documents = [documents_by_id[result["ref"]] for result in results[:limit]]
+    return retrieved_documents
+
+
+def vector_search(query, limit):
+    """
+    Perform a vector search on the indexed documents
+    using a simple cosine similarity function.
+    """
+
+    def cosine_similarity(a, b):
+        return sum(x * y for x, y in zip(a, b)) / ((sum(x * x for x in a) ** 0.5) * (sum(y * y for y in b) ** 0.5))
+
+    query_embedding = client.embeddings.create(model="text-embedding-3-small", input=query).data[0].embedding
+    similarities = []
+    for doc in documents:
+        doc_embedding = doc["embedding"]
+        similarity = cosine_similarity(query_embedding, doc_embedding)
+        similarities.append((doc, similarity))
+    similarities.sort(key=lambda x: x[1], reverse=True)
+
+    retrieved_documents = [doc for doc, _ in similarities[:limit]]
+    return retrieved_documents
+
+
+def reciprocal_rank_fusion(text_results, vector_results, alpha=0.5):
+    """
+    Perform Reciprocal Rank Fusion on the results from text and vector searches.
+    """
+    text_ids = {doc["id"] for doc in text_results}
+    vector_ids = {doc["id"] for doc in vector_results}
+
+    combined_results = []
+    for doc in text_results:
+        if doc["id"] in vector_ids:
+            combined_results.append((doc, alpha))
+        else:
+            combined_results.append((doc, 1 - alpha))
+    for doc in vector_results:
+        if doc["id"] not in text_ids:
+            combined_results.append((doc, alpha))
+    combined_results.sort(key=lambda x: x[1], reverse=True)
+    return [doc for doc, _ in combined_results]
+
+
+def rerank(query, retrieved_documents):
+    """
+    Rerank the results using a cross-encoder model.
+    """
+    encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
+    scores = encoder.predict([(query, doc["text"]) for doc in retrieved_documents])
+    scored_documents = [v for _, v in sorted(zip(scores, retrieved_documents), reverse=True)]
+    return scored_documents
+
+
+def hybrid_search(query, limit):
+    """
+    Perform a hybrid search using both full-text and vector search.
+    """
+    text_results = full_text_search(query, limit * 2)
+    vector_results = vector_search(query, limit * 2)
+    combined_results = reciprocal_rank_fusion(text_results, vector_results)
+    combined_results = rerank(query, combined_results)
+    return combined_results[:limit]
+
+
+# Get the user question
+user_question = "cute gray fuzzsters"
+
+# Search the index for the user question
+retrieved_documents = hybrid_search(user_question, limit=5)
+print(f"Retrieved {len(retrieved_documents)} matching documents.")
+context = "\n".join([f"{doc['id']}: {doc['text']}" for doc in retrieved_documents[0:5]])
+
+# Now we can use the matches to generate a response
+SYSTEM_MESSAGE = """
+You are a helpful assistant that answers questions about Maya civilization.
+You must use the data set to answer the questions,
+you should not provide any info that is not in the provided sources.
+Cite the sources you used to answer the question inside square brackets.
+The sources are in the format: <id>: <text>.
+"""
+
+response = client.chat.completions.create(
+    model=MODEL_NAME,
+    temperature=0.3,
+    messages=[
+        {"role": "system", "content": SYSTEM_MESSAGE},
+        {"role": "user", "content": f"{user_question}\nSources: {context}"},
+    ],
+)
+
+print(f"\nResponse from {MODEL_NAME} on {API_HOST}: \n")
+print(response.choices[0].message.content)
@@ -0,0 +1,61 @@
+import json
+import os
+import pathlib
+
+import azure.identity
+import openai
+import pymupdf4llm
+from dotenv import load_dotenv
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+# Setup the OpenAI client to use either Azure, OpenAI.com, or Ollama API
+load_dotenv(override=True)
+API_HOST = os.getenv("API_HOST")
+
+if API_HOST == "azure":
+    token_provider = azure.identity.get_bearer_token_provider(
+        azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
+    )
+    client = openai.AzureOpenAI(
+        api_version=os.environ["AZURE_OPENAI_VERSION"],
+        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+        azure_ad_token_provider=token_provider,
+    )
+    MODEL_NAME = os.environ["AZURE_OPENAI_DEPLOYMENT"]
+
+elif API_HOST == "ollama":
+    client = openai.OpenAI(base_url=os.environ["OLLAMA_ENDPOINT"], api_key="nokeyneeded")
+    MODEL_NAME = os.environ["OLLAMA_MODEL"]
+
+elif API_HOST == "github":
+    client = openai.OpenAI(base_url="https://models.inference.ai.azure.com", api_key=os.environ["GITHUB_TOKEN"])
+    MODEL_NAME = os.environ["GITHUB_MODEL"]
+
+else:
+    client = openai.OpenAI(api_key=os.environ["OPENAI_KEY"])
+    MODEL_NAME = os.environ["OPENAI_MODEL"]
+
+data_dir = pathlib.Path(os.path.dirname(__file__)) / "data"
+filenames = ["California_carpenter_bee.pdf", "Centris_pallida.pdf", "Western_honey_bee.pdf", "Aphideater_hoverfly.pdf"]
+all_chunks = []
+for filename in filenames:
+    # Extract text from the PDF file
+    md_text = pymupdf4llm.to_markdown(data_dir / filename)
+
+    # Split the text into smaller chunks
+    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+        model_name="gpt-4o", chunk_size=500, chunk_overlap=0
+    )
+    texts = text_splitter.create_documents([md_text])
+    file_chunks = [{"id": f"{filename}-{(i + 1)}", "text": text.page_content} for i, text in enumerate(texts)]
+
+    # Generate embeddings using openAI SDK for each text
+    for file_chunk in file_chunks:
+        file_chunk["embedding"] = (
+            client.embeddings.create(model="text-embedding-3-small", input=file_chunk["text"]).data[0].embedding
+        )
+    all_chunks.extend(file_chunks)
+
+# Save the documents with embeddings to a JSON file
+with open("rag_ingested_chunks.json", "w") as f:
+    json.dump(all_chunks, f, indent=4)