Context documents in Adaptive RAG (#8333)

szymondudycz · Manul from Pathway · commit e2d57ee466c4 · 2025-03-24T15:36:58.000Z
GitOrigin-RevId: 8e1c8053bb4e00c9814fa010afce3f919b00835c
diff --git a/docs/2.developers/7.templates/.private_rag_ollama_mistral/article.py b/docs/2.developers/7.templates/.private_rag_ollama_mistral/article.py
@@ -255,7 +255,6 @@ class InputSchema(pw.Schema):
         n_starting_documents=2,
         factor=2,
         max_iterations=4,
-        strict_prompt=True,  # needed for open source models, instructs LLM to give JSON output strictly
     ),
 )
 
diff --git a/examples/notebooks/showcases/mistral_adaptive_rag_question_answering.ipynb b/examples/notebooks/showcases/mistral_adaptive_rag_question_answering.ipynb
@@ -350,6 +350,160 @@
       "id": "25",
       "metadata": {},
       "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "4cb9b53338984b4ca3782660fd369ac5",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "a13d1f303c0b475da7cadfcd2519fbb7",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "README.md:   0%|          | 0.00/68.0k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "f2c90530720a49e981bf4f93573966ad",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "f70a76e14fc34714979c1721b7faf546",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c472a492c4844bdfbdd8b21ddf159d47",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "2d1b971f1430455e88c69a79e510d6cb",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "318728fd211d40c5b1f8c2202f67120a",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "77c7c8d87fba49ce82365b931b506013",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "219f45c9280a4cd4b6bbc93aac5e7a98",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "3b612c8b15ad49f088535c1c70a35648",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
         {
           "name": "stdout",
           "output_type": "stream",
@@ -526,7 +680,6 @@
         "        n_starting_documents=2,\n",
         "        factor=2,\n",
         "        max_iterations=4,\n",
-        "        strict_prompt=True,  # needed for open source models, instructs LLM to give JSON output strictly\n",
         "    ),\n",
         ")"
       ]
diff --git a/python/pathway/xpacks/llm/document_store.py b/python/pathway/xpacks/llm/document_store.py
@@ -31,6 +31,21 @@
     import llama_index.core.schema
 
 
+@pw.udf
+def _get_jmespath_filter(metadata_filter: str, filepath_globpattern: str) -> str | None:
+    ret_parts = []
+    if metadata_filter:
+        metadata_filter = (
+            metadata_filter.replace("'", r"\'").replace("`", "'").replace('"', "")
+        )
+        ret_parts.append(f"({metadata_filter})")
+    if filepath_globpattern:
+        ret_parts.append(f"globmatch('{filepath_globpattern}', path)")
+    if ret_parts:
+        return " && ".join(ret_parts)
+    return None
+
+
 class DocumentStore:
     """
     Builds a document indexing pipeline for processing documents and querying closest documents
@@ -360,24 +375,6 @@ def format_stats(counts, last_modified, last_indexed) -> pw.Json:
 
     @staticmethod
     def merge_filters(queries: pw.Table):
-        @pw.udf
-        def _get_jmespath_filter(
-            metadata_filter: str, filepath_globpattern: str
-        ) -> str | None:
-            ret_parts = []
-            if metadata_filter:
-                metadata_filter = (
-                    metadata_filter.replace("'", r"\'")
-                    .replace("`", "'")
-                    .replace('"', "")
-                )
-                ret_parts.append(f"({metadata_filter})")
-            if filepath_globpattern:
-                ret_parts.append(f"globmatch('{filepath_globpattern}', path)")
-            if ret_parts:
-                return " && ".join(ret_parts)
-            return None
-
         queries = queries.without(
             *DocumentStore.FilterSchema.__columns__.keys()
         ) + queries.select(
diff --git a/python/pathway/xpacks/llm/prompts.py b/python/pathway/xpacks/llm/prompts.py
@@ -219,77 +219,36 @@ def prompt_qa(
     return prompt
 
 
-# prompt for `answer_with_geometric_rag_strategy`, it is the same as in the research project
-# docs` argument will be deprecated in favor of `context: str` argument
-# this will require the use of `BaseContextProcessor`
 @pw.udf
 def prompt_qa_geometric_rag(
+    context: str,
     query: str,
-    docs: list[pw.Json] | list[str],
     information_not_found_response="No information found.",
     additional_rules: str = "",
-    strict_prompt: bool = False,  # instruct LLM to return json for local models, improves performance
 ):
-    context_pieces = []
 
-    for i, doc in enumerate(docs, 1):
-        if isinstance(doc, str):
-            context_pieces.append(f"Source {i}: {doc}")
-        else:
-            context_pieces.append(f"Source {i}: {doc['text']}")  # type: ignore
-    context_str = "\n".join(context_pieces)
-
-    if strict_prompt:
-        prompt = f"""
-        Use the below articles to answer the subsequent question. If the answer cannot be found in the articles, write "{information_not_found_response}" Do not explain.
-        ONLY RESPOND IN PARSABLE JSON WITH THE ONLY KEY `answer`.
-        When referencing information from a source, cite the appropriate source(s) using their corresponding numbers. Every answer should include at least one source citation.
-        Only cite a source when you are explicitly referencing it.
-        For example:
-        Given following sources and query
-        Example 1: "Source 1: The sky is red in the evening and blue in the morning.\nSource 2: Water is wet when the sky is red.
-        Query: When is water wet?
-        Response: {{"answer": "When the sky is red [2], which occurs in the evening [1]."}}
-        Example 2: "Source 1: LLM stands for Large language models.
-        Query: Who is the current pope?
-        Response: {{"answer": "{information_not_found_response}"}}
-        """  # noqa
-    else:
-        prompt = f"""
-        Use the below articles to answer the subsequent question. If the answer cannot be found in the articles, write "{information_not_found_response}" Do not answer in full sentences.
-        When referencing information from a source, cite the appropriate source(s) using their corresponding numbers. Every answer should include at least one source citation.
-        Only cite a source when you are explicitly referencing it. For example:
-        "Source 1:
-        The sky is red in the evening and blue in the morning.
-        Source 2:
-        Water is wet when the sky is red.\n
-        Query: When is water wet?
-        Answer: When the sky is red [2], which occurs in the evening [1]."
-        """  # noqa
+    prompt = f"""
+    Use the below articles to answer the subsequent question. If the answer cannot be found in the articles, write "{information_not_found_response}" Do not explain.
+    For example:
+    Given following sources and query
+    Example 1: "Source 1: The sky is red in the evening and blue in the morning.\nSource 2: Water is wet when the sky is red.
+    Query: When is water wet?
+    Answer: "When the sky is red [2], which occurs in the evening [1]."
+    Example 2: "Source 1: LLM stands for Large language models.
+    Query: Who is the current pope?
+    Answer: {information_not_found_response}
+    """  # noqa
 
     prompt += additional_rules + " "
 
-    if strict_prompt:  # further instruction is needed for smaller models
-        prompt += (
-            "\n------\n"
-            f"{context_str}"
-            f"Query: {query}\n"
-            "ONLY RESPOND IN PARSABLE JSON WITH THE ONLY KEY `answer` containing your response. "
-        )
-
-        response_str = "Response"
-    else:
-        prompt += (
-            "Now it's your turn. "
-            "\n------\n"
-            f"{context_str}"
-            "\n------\n"
-            f"Query: {query}\n"
-        )
-
-        response_str = "Answer"
-
-    prompt += f"{response_str}:"
+    prompt += (
+        "Now it's your turn. "
+        "\n------\n"
+        f"{context}"
+        "\n------\n"
+        f"Query: {query}\n"
+        "Answer: "
+    )
     return prompt
 
 
diff --git a/python/pathway/xpacks/llm/question_answering.py b/python/pathway/xpacks/llm/question_answering.py

Original file line number	Diff line number	Diff line change
`@@ -255,7 +255,6 @@ class InputSchema(pw.Schema):`
`255`	`255`	`n_starting_documents=2,`
`256`	`256`	`factor=2,`
`257`	`257`	`max_iterations=4,`
`258`		`- strict_prompt=True, # needed for open source models, instructs LLM to give JSON output strictly`
`259`	`258`	`),`
`260`	`259`	`)`
`261`	`260`