Skip to content

Commit

Permalink
Improve documentation & revert protocol
Browse files Browse the repository at this point in the history
  • Loading branch information
srjoglekar246 committed Jan 10, 2025
1 parent 434b25e commit 3556990
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -180,58 +180,6 @@
"**NB the default usage response is to return zero values**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Caching Wrapper\n",
"\n",
"`autogen_core` implements a {py:class}`~autogen_core.models.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times. \n",
"\n",
"{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol to allow duck-typing any storage object that has a pair of `get` & `set` methods (such as `redis.Redis` or `diskcache.Cache`)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Any, Dict, Optional\n",
"\n",
"from autogen_core import CacheStore\n",
"from autogen_core.models import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache\n",
"\n",
"\n",
"# Simple CacheStore implementation using in-memory dict,\n",
"# you can also use redis.Redis or diskcache.Cache\n",
"class DictStore(CacheStore[CHAT_CACHE_VALUE_TYPE]):\n",
" def __init__(self) -> None:\n",
" self._store: dict[str, CHAT_CACHE_VALUE_TYPE] = {}\n",
"\n",
" def get(self, key: str, default: Optional[CHAT_CACHE_VALUE_TYPE] = None) -> Optional[CHAT_CACHE_VALUE_TYPE]:\n",
" return self._store.get(key, default)\n",
"\n",
" def set(self, key: str, value: CHAT_CACHE_VALUE_TYPE) -> None:\n",
" self._store[key] = value\n",
"\n",
"\n",
"cached_client = ChatCompletionCache(model_client, DictStore())\n",
"response = await cached_client.create(messages=messages)\n",
"\n",
"cached_response = await cached_client.create(messages=messages)\n",
"print(cached_response.cached)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n",
"\n",
"Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss."
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -373,6 +321,54 @@
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Caching Wrapper\n",
"\n",
"`autogen_core` implements a {py:class}`~autogen_core.models.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times. \n",
"\n",
"{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol to allow duck-typing any storage object that has a pair of `get` & `set` methods (such as `redis.Redis` or `diskcache.Cache`). Here's an example of using `diskcache` for local caching:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"from typing import Any, Dict, Optional\n",
"\n",
"from autogen_core.models import ChatCompletionCache\n",
"from diskcache import Cache\n",
"\n",
"diskcache_client = Cache(\"/tmp/diskcache\")\n",
"\n",
"cached_client = ChatCompletionCache(model_client, diskcache_client)\n",
"response = await cached_client.create(messages=messages)\n",
"\n",
"cached_response = await cached_client.create(messages=messages)\n",
"print(cached_response.cached)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n",
"\n",
"Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss."
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -673,7 +669,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
"version": "3.12.1"
}
},
"nbformat": 4,
Expand Down
2 changes: 2 additions & 0 deletions python/packages/autogen-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ dev = [
"autogen_ext==0.4.0",

# Documentation tooling
"diskcache",
"redis",
"sphinx-autobuild",
]

Expand Down
10 changes: 4 additions & 6 deletions python/packages/autogen-core/src/autogen_core/_cache_store.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from typing import Generic, Optional, Protocol, TypeVar
from typing import Any, Optional, Protocol

T = TypeVar("T")


class CacheStore(Protocol, Generic[T]):
class CacheStore(Protocol):
"""
This protocol defines the basic interface for store/cache operations.
Allows duck-typing with any object that implements the get and set methods,
such as redis or diskcache interfaces.
"""

def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
"""
Retrieve an item from the store.
Expand All @@ -25,7 +23,7 @@ def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
"""
...

Check warning on line 24 in python/packages/autogen-core/src/autogen_core/_cache_store.py

View check run for this annotation

Codecov / codecov/patch

python/packages/autogen-core/src/autogen_core/_cache_store.py#L24

Added line #L24 was not covered by tests

def set(self, key: str, value: T) -> None:
def set(self, key: str, value: Any) -> Optional[Any]:
"""
Set an item in the store.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ._cache import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache
from ._cache import ChatCompletionCache
from ._model_client import ChatCompletionClient, ModelCapabilities, ModelFamily, ModelInfo # type: ignore
from ._replay_chat_completion_client import ReplayChatCompletionClient
from ._types import (
Expand All @@ -17,7 +17,6 @@

__all__ = [
"ModelCapabilities",
"CHAT_CACHE_VALUE_TYPE",
"ChatCompletionCache",
"ChatCompletionClient",
"SystemMessage",
Expand Down
57 changes: 32 additions & 25 deletions python/packages/autogen-core/src/autogen_core/models/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,47 +17,54 @@
RequestUsage,
)

CHAT_CACHE_VALUE_TYPE = Union[CreateResult, List[Union[str, CreateResult]]]


class ChatCompletionCache(ChatCompletionClient):
"""
A wrapper around a ChatCompletionClient that caches creation results from an underlying client.
Cache hits do not contribute to token usage of the original client.
"""
def __init__(self, client: ChatCompletionClient, store: CacheStore[CHAT_CACHE_VALUE_TYPE]):
"""
Initialize a new ChatCompletionCache.
Typical Usage:
First initialize (for eg) a Redis store:
Lets use caching with `openai` as an example:
```python
import redis
.. code-block:: bash
redis_client = redis.Redis(host="localhost", port=6379, db=0)
```
pip install "autogen-ext[openai]==0.4.0.dev13"
or diskcache store:
And use it as:
```python
from diskcache import Cache
.. code-block:: python
# Initialize the original client
from autogen_ext.models.openai import OpenAIChatCompletionClient
openai_client = OpenAIChatCompletionClient(
model="gpt-4o-2024-08-06",
# api_key="sk-...", # Optional if you have an OPENAI_API_KEY environment variable set.
)
diskcache_client = Cache("/tmp/diskcache")
```
# Then initialize the CacheStore. Either a Redis store:
import redis
Then initialize the ChatCompletionCache with the store:
redis_client = redis.Redis(host="localhost", port=6379, db=0)
```python
from autogen_core.models import ChatCompletionCache
from autogen_ext.models import OpenAIChatCompletionClient
# or diskcache:
from diskcache import Cache
# Original client
client = OpenAIChatCompletionClient(...)
diskcache_client = Cache("/tmp/diskcache")
# Cached version
cached_client = ChatCompletionCache(client, redis_client)
```
# Then initialize the ChatCompletionCache with the store:
from autogen_core.models import ChatCompletionCache
# Cached client
cached_client = ChatCompletionCache(openai_client, diskcache_client)
You can now use the `cached_client` as you would the original client, but with caching enabled.
"""

def __init__(self, client: ChatCompletionClient, store: CacheStore):
"""
Initialize a new ChatCompletionCache.
Args:
client (ChatCompletionClient): The original ChatCompletionClient to wrap.
Expand Down
12 changes: 6 additions & 6 deletions python/packages/autogen-core/tests/test_chat_completion_cache.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import copy
from typing import List, Optional, Tuple, Union
from typing import Any, List, Optional, Tuple, Union

import pytest
from autogen_core import CacheStore
from autogen_core.models import (
CHAT_CACHE_VALUE_TYPE,
ChatCompletionCache,
ChatCompletionClient,
CreateResult,
Expand All @@ -15,15 +14,16 @@
)


class DictStore(CacheStore[CHAT_CACHE_VALUE_TYPE]):
class DictStore(CacheStore):
def __init__(self) -> None:
self._store: dict[str, CHAT_CACHE_VALUE_TYPE] = {}
self._store: dict[str, Any] = {}

def get(self, key: str, default: Optional[CHAT_CACHE_VALUE_TYPE] = None) -> Optional[CHAT_CACHE_VALUE_TYPE]:
def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
return self._store.get(key, default)

def set(self, key: str, value: CHAT_CACHE_VALUE_TYPE) -> None:
def set(self, key: str, value: Any) -> Optional[Any]:
self._store[key] = value
return None


def get_test_data() -> Tuple[list[str], list[str], SystemMessage, ChatCompletionClient, ChatCompletionCache]:
Expand Down
25 changes: 25 additions & 0 deletions python/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3556990

Please sign in to comment.