Add documentation for token usage tracking (#1518)

habema · seratch · web-flow · commit ebc5443b0b6a · 2025-08-19T02:19:39.000Z
Co-authored-by: Kazuhiro Sera &lt;seratch@openai.com&gt;
diff --git a/docs/usage.md b/docs/usage.md
@@ -0,0 +1,54 @@
+# Usage
+
+The Agents SDK automatically tracks token usage for every run. You can access it from the run context and use it to monitor costs, enforce limits, or record analytics.
+
+## What is tracked
+
+- **requests**: number of LLM API calls made
+- **input_tokens**: total input tokens sent
+- **output_tokens**: total output tokens received
+- **total_tokens**: input + output
+- **details**:
+  - `input_tokens_details.cached_tokens`
+  - `output_tokens_details.reasoning_tokens`
+
+## Accessing usage from a run
+
+After `Runner.run(...)`, access usage via `result.context_wrapper.usage`.
+
+```python
+result = await Runner.run(agent, "What's the weather in Tokyo?")
+usage = result.context_wrapper.usage
+
+print("Requests:", usage.requests)
+print("Input tokens:", usage.input_tokens)
+print("Output tokens:", usage.output_tokens)
+print("Total tokens:", usage.total_tokens)
+```
+
+Usage is aggregated across all model calls during the run (including tool calls and handoffs).
+
+## Accessing usage with sessions
+
+When you use a `Session` (e.g., `SQLiteSession`), usage continues to accumulate across turns within the same run. Each call to `Runner.run(...)` returns the run’s cumulative usage at that point.
+
+```python
+session = SQLiteSession("my_conversation")
+
+first = await Runner.run(agent, "Hi!", session=session)
+print(first.context_wrapper.usage.total_tokens)
+
+second = await Runner.run(agent, "Can you elaborate?", session=session)
+print(second.context_wrapper.usage.total_tokens)  # includes both turns
+```
+
+## Using usage in hooks
+
+If you’re using `RunHooks`, the `context` object passed to each hook contains `usage`. This lets you log usage at key lifecycle moments.
+
+```python
+class MyHooks(RunHooks):
+    async def on_agent_end(self, context: RunContextWrapper, agent: Agent, output: Any) -> None:
+        u = context.usage
+        print(f"{agent.name} → {u.requests} requests, {u.total_tokens} total tokens")
+```
diff --git a/examples/basic/usage_tracking.py b/examples/basic/usage_tracking.py
@@ -0,0 +1,46 @@
+import asyncio
+
+from pydantic import BaseModel
+
+from agents import Agent, Runner, Usage, function_tool
+
+
+class Weather(BaseModel):
+    city: str
+    temperature_range: str
+    conditions: str
+
+
+@function_tool
+def get_weather(city: str) -> Weather:
+    """Get the current weather information for a specified city."""
+    return Weather(city=city, temperature_range="14-20C", conditions="Sunny with wind.")
+
+
+def print_usage(usage: Usage) -> None:
+    print("\n=== Usage ===")
+    print(f"Requests: {usage.requests}")
+    print(f"Input tokens: {usage.input_tokens}")
+    print(f"Output tokens: {usage.output_tokens}")
+    print(f"Total tokens: {usage.total_tokens}")
+
+
+async def main() -> None:
+    agent = Agent(
+        name="Usage Demo",
+        instructions="You are a concise assistant. Use tools if needed.",
+        tools=[get_weather],
+    )
+
+    result = await Runner.run(agent, "What's the weather in Tokyo?")
+
+    print("\nFinal output:")
+    print(result.final_output)
+
+    # Access usage from the run context
+    print_usage(result.context_wrapper.usage)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -68,6 +68,7 @@ plugins:
                 - context.md
                 - guardrails.md
                 - multi_agent.md
+                - usage.md
                 - Models:
                     - models/index.md
                     - models/litellm.md
@@ -165,6 +166,7 @@ plugins:
                 - context.md
                 - guardrails.md
                 - multi_agent.md
+                - usage.md
                 - モデル:
                     - models/index.md
                     - models/litellm.md