Add more preset values in yaml files (#7718)

GitOrigin-RevId: ea8c6eb12b2defdd7cf1c25577d676c24bada14e
pathwaycom · Dec 2, 2024 · 1f63aa7 · 1f63aa7
1 parent b545bdc
commit 1f63aa7
Show file tree

Hide file tree

Showing 7 changed files with 23 additions and 14 deletions.
diff --git a/examples/pipelines/adaptive-rag/app.yaml b/examples/pipelines/adaptive-rag/app.yaml
@@ -28,18 +28,19 @@ $llm: !pw.xpacks.llm.llms.OpenAIChat
   model: "gpt-3.5-turbo"
   retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy
     max_retries: 6
-  cache_strategy: !pw.udfs.DiskCache
-  temperature: 0.05
+  cache_strategy: !pw.udfs.DefaultCache
+  temperature: 0
   capacity: 8
 
 $embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder
   model: "text-embedding-ada-002"
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
 
 $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
 $parser: !pw.xpacks.llm.parsers.ParseUnstructured
+  cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
   reserved_space: 1000

diff --git a/examples/pipelines/demo-document-indexing/app.yaml b/examples/pipelines/demo-document-indexing/app.yaml
@@ -35,6 +35,7 @@ $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
 $parser: !pw.xpacks.llm.parsers.ParseUnstructured
+  cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
   reserved_space: 1000

diff --git a/examples/pipelines/demo-question-answering/app.py b/examples/pipelines/demo-question-answering/app.py
@@ -33,6 +33,7 @@ def run(self) -> None:
         server.run(
             with_cache=self.with_cache,
             terminate_on_error=self.terminate_on_error,
+            cache_backend=pw.persistence.Backend.filesystem("Cache"),
         )
 
     model_config = ConfigDict(extra="forbid")

diff --git a/examples/pipelines/demo-question-answering/app.yaml b/examples/pipelines/demo-question-answering/app.yaml
@@ -28,18 +28,19 @@ $llm: !pw.xpacks.llm.llms.OpenAIChat
   model: "gpt-3.5-turbo"
   retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy
     max_retries: 6
-  cache_strategy: !pw.udfs.DiskCache
-  temperature: 0.05
+  cache_strategy: !pw.udfs.DefaultCache
+  temperature: 0
   capacity: 8
 
 $embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder
   model: "text-embedding-ada-002"
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
 
 $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
 $parser: !pw.xpacks.llm.parsers.ParseUnstructured
+  cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
   reserved_space: 1000
@@ -56,6 +57,8 @@ $document_store: !pw.xpacks.llm.document_store.DocumentStore
 question_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer
   llm: $llm
   indexer: $document_store
+  # You can set the number of documents to be included as the context of the query
+  # search_topk: 6
 
 # Change host and port by uncommenting these lines
 # host: "0.0.0.0"

diff --git a/examples/pipelines/gpt_4o_multimodal_rag/app.yaml b/examples/pipelines/gpt_4o_multimodal_rag/app.yaml
@@ -28,22 +28,22 @@ $llm: !pw.xpacks.llm.llms.OpenAIChat
   model: "gpt-3.5-turbo"
   retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy
     max_retries: 6
-  cache_strategy: !pw.udfs.DiskCache
-  temperature: 0.05
+  cache_strategy: !pw.udfs.DefaultCache
+  temperature: 0
   capacity: 8
 
 $embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder
   model: "text-embedding-ada-002"
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
 
 $parsing_llm: !pw.xpacks.llm.llms.OpenAIChat
   model: "gpt-4o"
   retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy
     max_retries: 6
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
 
 $parser: !pw.xpacks.llm.parsers.OpenParse
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
   table_args:
     parsing_algorithm: "llm"
     llm: $parsing_llm
@@ -67,6 +67,8 @@ $document_store: !pw.xpacks.llm.document_store.DocumentStore
 question_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer
   llm: $llm
   indexer: $document_store
+  # You can set the number of documents to be included as the context of the query
+  # search_topk: 6
 
 # Change host and port by uncommenting these lines
 # host: "0.0.0.0"

diff --git a/examples/pipelines/private-rag/app.yaml b/examples/pipelines/private-rag/app.yaml
@@ -30,7 +30,7 @@ $llm: !pw.xpacks.llm.llms.LiteLLMChat
   model: $llm_model
   retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy
     max_retries: 6
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
   temperature: 0
   top_p: 1
   format: "json"  # only available in Ollama local deploy, not usable in Mistral API
@@ -47,6 +47,7 @@ $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
 $parser: !pw.xpacks.llm.parsers.ParseUnstructured
+  cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
   reserved_space: 1000

diff --git a/examples/pipelines/slides_ai_search/app.yaml b/examples/pipelines/slides_ai_search/app.yaml
@@ -30,12 +30,12 @@ llm: !pw.xpacks.llm.llms.OpenAIChat
     max_retries: 6
     initial_delay: 2500
     backoff_factor: 2.5
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
   temperature: 0.0
   capacity: 8 # reduce this in case you are hitting API throttle limits
 
 embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder
-  cache_strategy: !pw.udfs.DiskCache
+  cache_strategy: !pw.udfs.DefaultCache
 
 details_schema:
   category: