From 35569900ee1217e131800979d2126e58feb35a45 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Fri, 10 Jan 2025 19:21:13 +0000 Subject: [PATCH] Improve documentation & revert protocol --- .../framework/model-clients.ipynb | 102 +++++++++--------- python/packages/autogen-core/pyproject.toml | 2 + .../src/autogen_core/_cache_store.py | 10 +- .../src/autogen_core/models/__init__.py | 3 +- .../src/autogen_core/models/_cache.py | 57 +++++----- .../tests/test_chat_completion_cache.py | 12 +-- python/uv.lock | 25 +++++ 7 files changed, 119 insertions(+), 92 deletions(-) diff --git a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb index 5c2124993ff5..9bb8f79be571 100644 --- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb +++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb @@ -180,58 +180,6 @@ "**NB the default usage response is to return zero values**" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Caching Wrapper\n", - "\n", - "`autogen_core` implements a {py:class}`~autogen_core.models.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times. \n", - "\n", - "{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol to allow duck-typing any storage object that has a pair of `get` & `set` methods (such as `redis.Redis` or `diskcache.Cache`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Any, Dict, Optional\n", - "\n", - "from autogen_core import CacheStore\n", - "from autogen_core.models import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache\n", - "\n", - "\n", - "# Simple CacheStore implementation using in-memory dict,\n", - "# you can also use redis.Redis or diskcache.Cache\n", - "class DictStore(CacheStore[CHAT_CACHE_VALUE_TYPE]):\n", - " def __init__(self) -> None:\n", - " self._store: dict[str, CHAT_CACHE_VALUE_TYPE] = {}\n", - "\n", - " def get(self, key: str, default: Optional[CHAT_CACHE_VALUE_TYPE] = None) -> Optional[CHAT_CACHE_VALUE_TYPE]:\n", - " return self._store.get(key, default)\n", - "\n", - " def set(self, key: str, value: CHAT_CACHE_VALUE_TYPE) -> None:\n", - " self._store[key] = value\n", - "\n", - "\n", - "cached_client = ChatCompletionCache(model_client, DictStore())\n", - "response = await cached_client.create(messages=messages)\n", - "\n", - "cached_response = await cached_client.create(messages=messages)\n", - "print(cached_response.cached)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n", - "\n", - "Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -373,6 +321,54 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Caching Wrapper\n", + "\n", + "`autogen_core` implements a {py:class}`~autogen_core.models.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times. \n", + "\n", + "{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol to allow duck-typing any storage object that has a pair of `get` & `set` methods (such as `redis.Redis` or `diskcache.Cache`). Here's an example of using `diskcache` for local caching:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "from typing import Any, Dict, Optional\n", + "\n", + "from autogen_core.models import ChatCompletionCache\n", + "from diskcache import Cache\n", + "\n", + "diskcache_client = Cache(\"/tmp/diskcache\")\n", + "\n", + "cached_client = ChatCompletionCache(model_client, diskcache_client)\n", + "response = await cached_client.create(messages=messages)\n", + "\n", + "cached_response = await cached_client.create(messages=messages)\n", + "print(cached_response.cached)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n", + "\n", + "Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -673,7 +669,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/python/packages/autogen-core/pyproject.toml b/python/packages/autogen-core/pyproject.toml index 4d6aa4ba6410..f66ae81c2d85 100644 --- a/python/packages/autogen-core/pyproject.toml +++ b/python/packages/autogen-core/pyproject.toml @@ -72,6 +72,8 @@ dev = [ "autogen_ext==0.4.0", # Documentation tooling + "diskcache", + "redis", "sphinx-autobuild", ] diff --git a/python/packages/autogen-core/src/autogen_core/_cache_store.py b/python/packages/autogen-core/src/autogen_core/_cache_store.py index 2f2019bf2f93..92bafde1d02a 100644 --- a/python/packages/autogen-core/src/autogen_core/_cache_store.py +++ b/python/packages/autogen-core/src/autogen_core/_cache_store.py @@ -1,9 +1,7 @@ -from typing import Generic, Optional, Protocol, TypeVar +from typing import Any, Optional, Protocol -T = TypeVar("T") - -class CacheStore(Protocol, Generic[T]): +class CacheStore(Protocol): """ This protocol defines the basic interface for store/cache operations. @@ -11,7 +9,7 @@ class CacheStore(Protocol, Generic[T]): such as redis or diskcache interfaces. """ - def get(self, key: str, default: Optional[T] = None) -> Optional[T]: + def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]: """ Retrieve an item from the store. @@ -25,7 +23,7 @@ def get(self, key: str, default: Optional[T] = None) -> Optional[T]: """ ... - def set(self, key: str, value: T) -> None: + def set(self, key: str, value: Any) -> Optional[Any]: """ Set an item in the store. diff --git a/python/packages/autogen-core/src/autogen_core/models/__init__.py b/python/packages/autogen-core/src/autogen_core/models/__init__.py index 11a4db5ae510..9c958a540721 100644 --- a/python/packages/autogen-core/src/autogen_core/models/__init__.py +++ b/python/packages/autogen-core/src/autogen_core/models/__init__.py @@ -1,4 +1,4 @@ -from ._cache import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache +from ._cache import ChatCompletionCache from ._model_client import ChatCompletionClient, ModelCapabilities, ModelFamily, ModelInfo # type: ignore from ._replay_chat_completion_client import ReplayChatCompletionClient from ._types import ( @@ -17,7 +17,6 @@ __all__ = [ "ModelCapabilities", - "CHAT_CACHE_VALUE_TYPE", "ChatCompletionCache", "ChatCompletionClient", "SystemMessage", diff --git a/python/packages/autogen-core/src/autogen_core/models/_cache.py b/python/packages/autogen-core/src/autogen_core/models/_cache.py index c13761ba8d5c..fe85381c6c79 100644 --- a/python/packages/autogen-core/src/autogen_core/models/_cache.py +++ b/python/packages/autogen-core/src/autogen_core/models/_cache.py @@ -17,47 +17,54 @@ RequestUsage, ) -CHAT_CACHE_VALUE_TYPE = Union[CreateResult, List[Union[str, CreateResult]]] - class ChatCompletionCache(ChatCompletionClient): """ A wrapper around a ChatCompletionClient that caches creation results from an underlying client. Cache hits do not contribute to token usage of the original client. - """ - def __init__(self, client: ChatCompletionClient, store: CacheStore[CHAT_CACHE_VALUE_TYPE]): - """ - Initialize a new ChatCompletionCache. + Typical Usage: - First initialize (for eg) a Redis store: + Lets use caching with `openai` as an example: - ```python - import redis + .. code-block:: bash - redis_client = redis.Redis(host="localhost", port=6379, db=0) - ``` + pip install "autogen-ext[openai]==0.4.0.dev13" - or diskcache store: + And use it as: - ```python - from diskcache import Cache + .. code-block:: python + + # Initialize the original client + from autogen_ext.models.openai import OpenAIChatCompletionClient + + openai_client = OpenAIChatCompletionClient( + model="gpt-4o-2024-08-06", + # api_key="sk-...", # Optional if you have an OPENAI_API_KEY environment variable set. + ) - diskcache_client = Cache("/tmp/diskcache") - ``` + # Then initialize the CacheStore. Either a Redis store: + import redis - Then initialize the ChatCompletionCache with the store: + redis_client = redis.Redis(host="localhost", port=6379, db=0) - ```python - from autogen_core.models import ChatCompletionCache - from autogen_ext.models import OpenAIChatCompletionClient + # or diskcache: + from diskcache import Cache - # Original client - client = OpenAIChatCompletionClient(...) + diskcache_client = Cache("/tmp/diskcache") - # Cached version - cached_client = ChatCompletionCache(client, redis_client) - ``` + # Then initialize the ChatCompletionCache with the store: + from autogen_core.models import ChatCompletionCache + + # Cached client + cached_client = ChatCompletionCache(openai_client, diskcache_client) + + You can now use the `cached_client` as you would the original client, but with caching enabled. + """ + + def __init__(self, client: ChatCompletionClient, store: CacheStore): + """ + Initialize a new ChatCompletionCache. Args: client (ChatCompletionClient): The original ChatCompletionClient to wrap. diff --git a/python/packages/autogen-core/tests/test_chat_completion_cache.py b/python/packages/autogen-core/tests/test_chat_completion_cache.py index 41437fc345b3..c5e2f46cb9da 100644 --- a/python/packages/autogen-core/tests/test_chat_completion_cache.py +++ b/python/packages/autogen-core/tests/test_chat_completion_cache.py @@ -1,10 +1,9 @@ import copy -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union import pytest from autogen_core import CacheStore from autogen_core.models import ( - CHAT_CACHE_VALUE_TYPE, ChatCompletionCache, ChatCompletionClient, CreateResult, @@ -15,15 +14,16 @@ ) -class DictStore(CacheStore[CHAT_CACHE_VALUE_TYPE]): +class DictStore(CacheStore): def __init__(self) -> None: - self._store: dict[str, CHAT_CACHE_VALUE_TYPE] = {} + self._store: dict[str, Any] = {} - def get(self, key: str, default: Optional[CHAT_CACHE_VALUE_TYPE] = None) -> Optional[CHAT_CACHE_VALUE_TYPE]: + def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]: return self._store.get(key, default) - def set(self, key: str, value: CHAT_CACHE_VALUE_TYPE) -> None: + def set(self, key: str, value: Any) -> Optional[Any]: self._store[key] = value + return None def get_test_data() -> Tuple[list[str], list[str], SystemMessage, ChatCompletionClient, ChatCompletionCache]: diff --git a/python/uv.lock b/python/uv.lock index 219b5176d2d1..560ce94d2785 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -383,6 +383,7 @@ dev = [ { name = "azure-identity" }, { name = "chess" }, { name = "colorama" }, + { name = "diskcache" }, { name = "langchain-openai" }, { name = "langgraph" }, { name = "llama-index" }, @@ -400,6 +401,7 @@ dev = [ { name = "pydata-sphinx-theme" }, { name = "pygments" }, { name = "python-dotenv" }, + { name = "redis" }, { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, @@ -439,6 +441,7 @@ dev = [ { name = "azure-identity" }, { name = "chess" }, { name = "colorama" }, + { name = "diskcache" }, { name = "langchain-openai" }, { name = "langgraph" }, { name = "llama-index" }, @@ -456,6 +459,7 @@ dev = [ { name = "pydata-sphinx-theme", specifier = "==0.15.4" }, { name = "pygments" }, { name = "python-dotenv" }, + { name = "redis" }, { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, @@ -1197,6 +1201,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 }, ] +[[package]] +name = "diskcache" +version = "5.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550 }, +] + [[package]] name = "distro" version = "1.9.0" @@ -4251,6 +4264,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/d2/3b2ab40f455a256cb6672186bea95cd97b459ce4594050132d71e76f0d6f/pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", size = 550762 }, ] +[[package]] +name = "redis" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/47/da/d283a37303a995cd36f8b92db85135153dc4f7a8e4441aa827721b442cfb/redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f", size = 4608355 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/5f/fa26b9b2672cbe30e07d9a5bdf39cf16e3b80b42916757c5f92bca88e4ba/redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4", size = 261502 }, +] + [[package]] name = "referencing" version = "0.35.1"