-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
912c5d3
commit 61d7c2c
Showing
1 changed file
with
224 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Advanced RAG: Self-querying\n", | ||
"\n", | ||
"### Before doing anything\n", | ||
"\n", | ||
"### Ollama Running in background\n", | ||
"\n", | ||
"- [ollama pull nomic-embed-text](https://ollama.com/library/nomic-embed-text)\n", | ||
"- [ollama run qwen2](https://ollama.com/library/qwen2)\n", | ||
"\n", | ||
"### Install these packages\n", | ||
"\n", | ||
"- `!pip install ollama`\n", | ||
"- `!pip install langchain`\n", | ||
"- `!pip install --upgrade --quiet lark langchain-chroma`\n", | ||
"- `!pip install langchain_community`\n", | ||
"\n", | ||
"________________________________________________________________________________" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Prepare Documents" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain_chroma import Chroma\n", | ||
"from langchain_core.documents import Document\n", | ||
"from langchain_community.embeddings import OllamaEmbeddings\n", | ||
"\n", | ||
"# You can make document from different loaders (e.g PDF, HTML, JSON, etc.)\n", | ||
"# different loaders here: https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/\n", | ||
"\n", | ||
"\n", | ||
"docs = [\n", | ||
" Document(\n", | ||
" page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", | ||
" metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n", | ||
" ),\n", | ||
" Document(\n", | ||
" page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", | ||
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n", | ||
" ),\n", | ||
" Document(\n", | ||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", | ||
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n", | ||
" ),\n", | ||
" Document(\n", | ||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", | ||
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n", | ||
" ),\n", | ||
" Document(\n", | ||
" page_content=\"Toys come alive and have a blast doing so\",\n", | ||
" metadata={\"year\": 1995, \"genre\": \"animated\"},\n", | ||
" ),\n", | ||
" Document(\n", | ||
" page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", | ||
" metadata={\n", | ||
" \"year\": 1979,\n", | ||
" \"director\": \"Andrei Tarkovsky\",\n", | ||
" \"genre\": \"thriller\",\n", | ||
" \"rating\": 9.9,\n", | ||
" },\n", | ||
" ),\n", | ||
"]\n", | ||
"vectorstore = Chroma.from_documents(docs, OllamaEmbeddings(model=\"nomic-embed-text\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Creating our self-querying retriever" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.chains.query_constructor.base import AttributeInfo\n", | ||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n", | ||
"\n", | ||
"\n", | ||
"metadata_field_info = [\n", | ||
" AttributeInfo(\n", | ||
" name=\"genre\",\n", | ||
" description=\"The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']\",\n", | ||
" type=\"string\",\n", | ||
" ),\n", | ||
" AttributeInfo(\n", | ||
" name=\"year\", #name=\"email\",\n", | ||
" description=\"The year the movie was released\",\n", | ||
" type=\"integer\",\n", | ||
" ),\n", | ||
" AttributeInfo(\n", | ||
" name=\"director\", # eg. name=\"purchase_history\"\n", | ||
" description=\"The name of the movie director\",\n", | ||
" type=\"string\",\n", | ||
" ),\n", | ||
" AttributeInfo(\n", | ||
" name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n", | ||
" ),\n", | ||
" \n", | ||
"]\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain_community.chat_models import ChatOllama\n", | ||
"\n", | ||
"document_content_description = \"Brief summary of a movie\"\n", | ||
"\n", | ||
"llm = ChatOllama(model=\"qwen2\",temperature=0)\n", | ||
"\n", | ||
"retriever = SelfQueryRetriever.from_llm(\n", | ||
" llm,\n", | ||
" vectorstore,\n", | ||
" document_content_description,\n", | ||
" metadata_field_info,\n", | ||
")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Query" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),\n", | ||
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979})]" | ||
] | ||
}, | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# This example only specifies a filter\n", | ||
"retriever.invoke(\"I want to watch a movie rated higher than 8.5\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995}),\n", | ||
" Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}),\n", | ||
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),\n", | ||
" Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'director': 'Greta Gerwig', 'rating': 8.3, 'year': 2019})]" | ||
] | ||
}, | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# This example specifies a query and a filter\n", | ||
"retriever.invoke(\"is there a movie about toys\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "base", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.1.undefined" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |