From 35569900ee1217e131800979d2126e58feb35a45 Mon Sep 17 00:00:00 2001
From: Sachin Joglekar <srjoglekar246@gmail.com>
Date: Fri, 10 Jan 2025 19:21:13 +0000
Subject: [PATCH] Improve documentation & revert protocol

---
 .../framework/model-clients.ipynb             | 102 +++++++++---------
 python/packages/autogen-core/pyproject.toml   |   2 +
 .../src/autogen_core/_cache_store.py          |  10 +-
 .../src/autogen_core/models/__init__.py       |   3 +-
 .../src/autogen_core/models/_cache.py         |  57 +++++-----
 .../tests/test_chat_completion_cache.py       |  12 +--
 python/uv.lock                                |  25 +++++
 7 files changed, 119 insertions(+), 92 deletions(-)

diff --git a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb
index 5c2124993ff5..9bb8f79be571 100644
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb
@@ -180,58 +180,6 @@
     "**NB the default usage response is to return zero values**"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Caching Wrapper\n",
-    "\n",
-    "`autogen_core` implements a {py:class}`~autogen_core.models.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times. \n",
-    "\n",
-    "{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol to allow duck-typing any storage object that has a pair of `get` & `set` methods (such as `redis.Redis` or `diskcache.Cache`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from typing import Any, Dict, Optional\n",
-    "\n",
-    "from autogen_core import CacheStore\n",
-    "from autogen_core.models import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache\n",
-    "\n",
-    "\n",
-    "# Simple CacheStore implementation using in-memory dict,\n",
-    "# you can also use redis.Redis or diskcache.Cache\n",
-    "class DictStore(CacheStore[CHAT_CACHE_VALUE_TYPE]):\n",
-    "    def __init__(self) -> None:\n",
-    "        self._store: dict[str, CHAT_CACHE_VALUE_TYPE] = {}\n",
-    "\n",
-    "    def get(self, key: str, default: Optional[CHAT_CACHE_VALUE_TYPE] = None) -> Optional[CHAT_CACHE_VALUE_TYPE]:\n",
-    "        return self._store.get(key, default)\n",
-    "\n",
-    "    def set(self, key: str, value: CHAT_CACHE_VALUE_TYPE) -> None:\n",
-    "        self._store[key] = value\n",
-    "\n",
-    "\n",
-    "cached_client = ChatCompletionCache(model_client, DictStore())\n",
-    "response = await cached_client.create(messages=messages)\n",
-    "\n",
-    "cached_response = await cached_client.create(messages=messages)\n",
-    "print(cached_response.cached)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n",
-    "\n",
-    "Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss."
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -373,6 +321,54 @@
     "```"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Caching Wrapper\n",
+    "\n",
+    "`autogen_core` implements a {py:class}`~autogen_core.models.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times. \n",
+    "\n",
+    "{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol to allow duck-typing any storage object that has a pair of `get` & `set` methods (such as `redis.Redis` or `diskcache.Cache`). Here's an example of using `diskcache` for local caching:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "from typing import Any, Dict, Optional\n",
+    "\n",
+    "from autogen_core.models import ChatCompletionCache\n",
+    "from diskcache import Cache\n",
+    "\n",
+    "diskcache_client = Cache(\"/tmp/diskcache\")\n",
+    "\n",
+    "cached_client = ChatCompletionCache(model_client, diskcache_client)\n",
+    "response = await cached_client.create(messages=messages)\n",
+    "\n",
+    "cached_response = await cached_client.create(messages=messages)\n",
+    "print(cached_response.cached)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n",
+    "\n",
+    "Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -673,7 +669,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.12.1"
   }
  },
  "nbformat": 4,
diff --git a/python/packages/autogen-core/pyproject.toml b/python/packages/autogen-core/pyproject.toml
index 4d6aa4ba6410..f66ae81c2d85 100644
--- a/python/packages/autogen-core/pyproject.toml
+++ b/python/packages/autogen-core/pyproject.toml
@@ -72,6 +72,8 @@ dev = [
     "autogen_ext==0.4.0",
 
     # Documentation tooling
+    "diskcache",
+    "redis",
     "sphinx-autobuild",
 ]
 
diff --git a/python/packages/autogen-core/src/autogen_core/_cache_store.py b/python/packages/autogen-core/src/autogen_core/_cache_store.py
index 2f2019bf2f93..92bafde1d02a 100644
--- a/python/packages/autogen-core/src/autogen_core/_cache_store.py
+++ b/python/packages/autogen-core/src/autogen_core/_cache_store.py
@@ -1,9 +1,7 @@
-from typing import Generic, Optional, Protocol, TypeVar
+from typing import Any, Optional, Protocol
 
-T = TypeVar("T")
 
-
-class CacheStore(Protocol, Generic[T]):
+class CacheStore(Protocol):
     """
     This protocol defines the basic interface for store/cache operations.
 
@@ -11,7 +9,7 @@ class CacheStore(Protocol, Generic[T]):
     such as redis or diskcache interfaces.
     """
 
-    def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
+    def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
         """
         Retrieve an item from the store.
 
@@ -25,7 +23,7 @@ def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
         """
         ...
 
-    def set(self, key: str, value: T) -> None:
+    def set(self, key: str, value: Any) -> Optional[Any]:
         """
         Set an item in the store.
 
diff --git a/python/packages/autogen-core/src/autogen_core/models/__init__.py b/python/packages/autogen-core/src/autogen_core/models/__init__.py
index 11a4db5ae510..9c958a540721 100644
--- a/python/packages/autogen-core/src/autogen_core/models/__init__.py
+++ b/python/packages/autogen-core/src/autogen_core/models/__init__.py
@@ -1,4 +1,4 @@
-from ._cache import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache
+from ._cache import ChatCompletionCache
 from ._model_client import ChatCompletionClient, ModelCapabilities, ModelFamily, ModelInfo  # type: ignore
 from ._replay_chat_completion_client import ReplayChatCompletionClient
 from ._types import (
@@ -17,7 +17,6 @@
 
 __all__ = [
     "ModelCapabilities",
-    "CHAT_CACHE_VALUE_TYPE",
     "ChatCompletionCache",
     "ChatCompletionClient",
     "SystemMessage",
diff --git a/python/packages/autogen-core/src/autogen_core/models/_cache.py b/python/packages/autogen-core/src/autogen_core/models/_cache.py
index c13761ba8d5c..fe85381c6c79 100644
--- a/python/packages/autogen-core/src/autogen_core/models/_cache.py
+++ b/python/packages/autogen-core/src/autogen_core/models/_cache.py
@@ -17,47 +17,54 @@
     RequestUsage,
 )
 
-CHAT_CACHE_VALUE_TYPE = Union[CreateResult, List[Union[str, CreateResult]]]
-
 
 class ChatCompletionCache(ChatCompletionClient):
     """
     A wrapper around a ChatCompletionClient that caches creation results from an underlying client.
     Cache hits do not contribute to token usage of the original client.
-    """
 
-    def __init__(self, client: ChatCompletionClient, store: CacheStore[CHAT_CACHE_VALUE_TYPE]):
-        """
-        Initialize a new ChatCompletionCache.
+    Typical Usage:
 
-        First initialize (for eg) a Redis store:
+        Lets use caching with `openai` as an example:
 
-        ```python
-        import redis
+        .. code-block:: bash
 
-        redis_client = redis.Redis(host="localhost", port=6379, db=0)
-        ```
+            pip install "autogen-ext[openai]==0.4.0.dev13"
 
-        or diskcache store:
+        And use it as:
 
-        ```python
-        from diskcache import Cache
+        .. code-block:: python
+
+            # Initialize the original client
+            from autogen_ext.models.openai import OpenAIChatCompletionClient
+
+            openai_client = OpenAIChatCompletionClient(
+                model="gpt-4o-2024-08-06",
+                # api_key="sk-...", # Optional if you have an OPENAI_API_KEY environment variable set.
+            )
 
-        diskcache_client = Cache("/tmp/diskcache")
-        ```
+            # Then initialize the CacheStore. Either a Redis store:
+            import redis
 
-        Then initialize the ChatCompletionCache with the store:
+            redis_client = redis.Redis(host="localhost", port=6379, db=0)
 
-        ```python
-        from autogen_core.models import ChatCompletionCache
-        from autogen_ext.models import OpenAIChatCompletionClient
+            # or diskcache:
+            from diskcache import Cache
 
-        # Original client
-        client = OpenAIChatCompletionClient(...)
+            diskcache_client = Cache("/tmp/diskcache")
 
-        # Cached version
-        cached_client = ChatCompletionCache(client, redis_client)
-        ```
+            # Then initialize the ChatCompletionCache with the store:
+            from autogen_core.models import ChatCompletionCache
+
+            # Cached client
+            cached_client = ChatCompletionCache(openai_client, diskcache_client)
+
+        You can now use the `cached_client` as you would the original client, but with caching enabled.
+    """
+
+    def __init__(self, client: ChatCompletionClient, store: CacheStore):
+        """
+        Initialize a new ChatCompletionCache.
 
         Args:
             client (ChatCompletionClient): The original ChatCompletionClient to wrap.
diff --git a/python/packages/autogen-core/tests/test_chat_completion_cache.py b/python/packages/autogen-core/tests/test_chat_completion_cache.py
index 41437fc345b3..c5e2f46cb9da 100644
--- a/python/packages/autogen-core/tests/test_chat_completion_cache.py
+++ b/python/packages/autogen-core/tests/test_chat_completion_cache.py
@@ -1,10 +1,9 @@
 import copy
-from typing import List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 
 import pytest
 from autogen_core import CacheStore
 from autogen_core.models import (
-    CHAT_CACHE_VALUE_TYPE,
     ChatCompletionCache,
     ChatCompletionClient,
     CreateResult,
@@ -15,15 +14,16 @@
 )
 
 
-class DictStore(CacheStore[CHAT_CACHE_VALUE_TYPE]):
+class DictStore(CacheStore):
     def __init__(self) -> None:
-        self._store: dict[str, CHAT_CACHE_VALUE_TYPE] = {}
+        self._store: dict[str, Any] = {}
 
-    def get(self, key: str, default: Optional[CHAT_CACHE_VALUE_TYPE] = None) -> Optional[CHAT_CACHE_VALUE_TYPE]:
+    def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
         return self._store.get(key, default)
 
-    def set(self, key: str, value: CHAT_CACHE_VALUE_TYPE) -> None:
+    def set(self, key: str, value: Any) -> Optional[Any]:
         self._store[key] = value
+        return None
 
 
 def get_test_data() -> Tuple[list[str], list[str], SystemMessage, ChatCompletionClient, ChatCompletionCache]:
diff --git a/python/uv.lock b/python/uv.lock
index 219b5176d2d1..560ce94d2785 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -383,6 +383,7 @@ dev = [
     { name = "azure-identity" },
     { name = "chess" },
     { name = "colorama" },
+    { name = "diskcache" },
     { name = "langchain-openai" },
     { name = "langgraph" },
     { name = "llama-index" },
@@ -400,6 +401,7 @@ dev = [
     { name = "pydata-sphinx-theme" },
     { name = "pygments" },
     { name = "python-dotenv" },
+    { name = "redis" },
     { name = "requests" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
@@ -439,6 +441,7 @@ dev = [
     { name = "azure-identity" },
     { name = "chess" },
     { name = "colorama" },
+    { name = "diskcache" },
     { name = "langchain-openai" },
     { name = "langgraph" },
     { name = "llama-index" },
@@ -456,6 +459,7 @@ dev = [
     { name = "pydata-sphinx-theme", specifier = "==0.15.4" },
     { name = "pygments" },
     { name = "python-dotenv" },
+    { name = "redis" },
     { name = "requests" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
@@ -1197,6 +1201,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
 ]
 
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550 },
+]
+
 [[package]]
 name = "distro"
 version = "1.9.0"
@@ -4251,6 +4264,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/d2/3b2ab40f455a256cb6672186bea95cd97b459ce4594050132d71e76f0d6f/pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", size = 550762 },
 ]
 
+[[package]]
+name = "redis"
+version = "5.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "async-timeout", marker = "python_full_version < '3.11.3'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/47/da/d283a37303a995cd36f8b92db85135153dc4f7a8e4441aa827721b442cfb/redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f", size = 4608355 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/5f/fa26b9b2672cbe30e07d9a5bdf39cf16e3b80b42916757c5f92bca88e4ba/redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4", size = 261502 },
+]
+
 [[package]]
 name = "referencing"
 version = "0.35.1"