pydantic · GDaamn · Sep 1, 2025 · Sep 1, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/docs/models/openai.md b/docs/models/openai.md
@@ -143,6 +143,30 @@ As of 7:48 AM on Wednesday, April 2, 2025, in Tokyo, Japan, the weather is cloud
 
 You can learn more about the differences between the Responses API and Chat Completions API in the [OpenAI API docs](https://platform.openai.com/docs/guides/responses-vs-chat-completions).
 
+The Responses API also supports referencing earlier model responses in a new request. This is available through the `openai_previous_response_id` field in
+[`OpenAIResponsesModelSettings`][pydantic_ai.models.openai.OpenAIResponsesModelSettings].
+
+```python
+from pydantic_ai import Agent
+from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
+
+model = OpenAIResponsesModel('gpt-4o')
+agent = Agent(model=model)
+
+result = agent.run_sync('The secret is 1234')
+model_settings = OpenAIResponsesModelSettings(
+    openai_previous_response_id=result.all_messages()[-1].provider_response_id
+)
+result = agent.run_sync('What is the secret code?', model_settings=model_settings)
+print(result.output)
+#> 1234
+```
+
+By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history.
+
+If message history is provided and all responses come from the same OpenAI model,
     model_settings, model_request_parameters, message_history, _ = await self._prepare_request(ctx) 
     model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters) 
     ctx.state.usage.requests += 1 
     return self._finish_handling(ctx, model_response) 
 async def _prepare_request( 
     self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]] 
 ) -> tuple[ModelSettings | None, models.ModelRequestParameters, list[_messages.ModelMessage], RunContext[DepsT]]: 
     ctx.state.message_history.append(self.request) 
     ctx.state.run_step += 1 
     run_context = build_run_context(ctx) 
     # This will raise errors for any tool name conflicts 
     ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context) 
     message_history = await _process_message_history(ctx.state, ctx.deps.history_processors, run_context) 
  
     model_settings, model_request_parameters, message_history, _ = await self._prepare_request(ctx) 
     model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters) 
     ctx.state.usage.requests += 1 
  
     return self._finish_handling(ctx, model_response) 
  
 async def _prepare_request( 
     self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]] 
 ) -> tuple[ModelSettings | None, models.ModelRequestParameters, list[_messages.ModelMessage], RunContext[DepsT]]: 
     ctx.state.message_history.append(self.request) 
  
     ctx.state.run_step += 1 
  
     run_context = build_run_context(ctx) 
  
     # This will raise errors for any tool name conflicts 
     ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context) 
  
     message_history = await _process_message_history(ctx.state, ctx.deps.history_processors, run_context) 
  
+Pydantic AI will automatically only send the the latest request and the `previous_response_id` from the latest response to the API for efficiency.
+
 ## OpenAI-compatible Models
 
 Many providers and models are compatible with the OpenAI API, and can be used with `OpenAIChatModel` in Pydantic AI.

diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -190,6 +190,14 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False):
     `medium`, and `high`.
     """
 
+    openai_previous_response_id: str
+    """The identifier of the most recent response to include in the API request.
+
+    This enables the model to reference previous reasoning traces.
+    See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context)
+    for more information.
+    """
+
 
 @dataclass(init=False)
 class OpenAIChatModel(Model):
@@ -890,6 +898,10 @@ async def _responses_create(
         else:
             tool_choice = 'auto'
 
+        previous_response_id = model_settings.get('openai_previous_response_id')
+        if not previous_response_id:
+            messages, previous_response_id = self._get_response_id_and_trim(messages)
+
         instructions, openai_messages = await self._map_messages(messages)
         reasoning = self._get_reasoning(model_settings)
 
@@ -935,6 +947,7 @@ async def _responses_create(
                 truncation=model_settings.get('openai_truncation', NOT_GIVEN),
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 service_tier=model_settings.get('openai_service_tier', NOT_GIVEN),
+                previous_response_id=previous_response_id,
                 reasoning=reasoning,
                 user=model_settings.get('openai_user', NOT_GIVEN),
                 text=text or NOT_GIVEN,
@@ -999,6 +1012,30 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam
             ),
         }
 
+    def _get_response_id_and_trim(self, messages: list[ModelMessage]) -> tuple[list[ModelMessage], str | None]:
+        # If the message history contains only openai responses,
+        # we can limit the history to the most recent ModelRequest.
+        # The provider_response_id from the latest ModelResponse is
+        # then passed as previous_response_id to preserve context.
+        response_id = None
+        latest_model_request: ModelRequest | None = None
+        for m in messages:
+            # Openai may return a dated model_name that differs from self.model_name
+            # (e.g., "gpt-5" vs "gpt-5-2025-08-07").
+            if isinstance(m, ModelResponse) and m.model_name and (self.model_name in m.model_name):
+                response_id = m.provider_response_id
+            elif isinstance(m, ModelRequest):
+                latest_model_request = m
+            else:
+                # Mixed model responses invalidate response_id,
+                # so the history is kept intact.
+                response_id = None
+                break
+        if response_id and latest_model_request:
+            return [latest_model_request], response_id
+        else:
+            return messages, None
+
     async def _map_messages(
         self, messages: list[ModelMessage]
     ) -> tuple[str | NotGiven, list[responses.ResponseInputItemParam]]:

diff --git a/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml
@@ -0,0 +1,131 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      input:
+      - content: The secret key is sesame
+        role: user
+      instructions: ''
+      model: gpt-5
+      text:
+        format:
+          type: text
+    uri: https://api.openai.com/v1/responses
+  response:
+    headers:
+      content-type:
+      - application/json
+    parsed_body:
+      created_at: 1743075629
+      error: null
+      id: resp_1234
+      incomplete_details: null
+      instructions: ''
+      max_output_tokens: null
+      metadata: {}
+      model: gpt-5
+      object: response
+      output:
+      - content:
+        - annotations: []
+          text: "Open sesame! What would you like to unlock?"
+          type: output_text
+        id: msg_test_previous_response_id
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      previous_response_id: null
+      reasoning: null
+      status: complete
+      status_details: null
+      tool_calls: null
+      total_tokens: 15
+      usage:
+        input_tokens: 10
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 1
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 11
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      input:
+      - content: What is the secret key again?
+        role: user
+      instructions: ''
+      model: gpt-5
+      text:
+        format:
+          type: text
+      previous_response_id: resp_1234
+    uri: https://api.openai.com/v1/responses
+  response:
+    headers:
+      content-type:
+      - application/json
+    parsed_body:
+      created_at: 1743075630
+      error: null
+      id: resp_5678
+      incomplete_details: null
+      instructions: ''
+      max_output_tokens: null
+      metadata: {}
+      model: gpt-5
+      object: response
+      output:
+      - content:
+        - annotations: []
+          text: "sesame"
+          type: output_text
+        id: msg_test_previous_response_id
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      previous_response_id: resp_1234
+      reasoning: null
+      status: complete
+      status_details: null
+      tool_calls: null
+      total_tokens: 15
+      usage:
+        input_tokens: 10
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 1
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 11
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
@@ -1085,6 +1085,139 @@ async def test_openai_responses_verbosity(allow_model_requests: None, openai_api
     assert result.output == snapshot('4')
 
 
+@pytest.mark.vcr()
+async def test_openai_previous_response_id(allow_model_requests: None, openai_api_key: str):
+    """Test if previous responses are detected via previous_response_id in settings"""
+    model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key))
+    agent = Agent(model=model)
+    result = await agent.run('The secret key is sesame')
+    settings = OpenAIResponsesModelSettings(openai_previous_response_id=result.all_messages()[-1].provider_response_id)  # type: ignore
+    result = await agent.run('What is the secret code?', model_settings=settings)
+    assert result.output == snapshot('sesame')
+
+
+async def test_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str):
+    """Test if invalid previous response id is ignored when history contains non-OpenAI responses"""
+    history = [
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='The first secret key is sesame',
+                ),
+            ],
+        ),
+        ModelResponse(
+            parts=[
+                TextPart(content='Open sesame! What would you like to unlock?'),
+            ],
+            model_name='gpt-5',
+            provider_name='openai',
+            provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099',
+        ),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='The second secret key is olives',
+                ),
+            ],
+        ),
+        ModelResponse(
+            parts=[
+                TextPart(content='Understood'),
+            ],
+            model_name='claude-3-5-sonnet-latest',
+            provider_name='anthropic',
+            provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj',
+        ),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='what is the first secret key?',
+                ),
+            ],
+        ),
+    ]
+
+    model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key))
+    history, previous_response_id = model._get_response_id_and_trim(history)  # type: ignore
+    assert not previous_response_id
+    assert history == snapshot(
+        [
+            ModelRequest(parts=[UserPromptPart(content='The first secret key is sesame', timestamp=IsDatetime())]),
+            ModelResponse(
+                parts=[TextPart(content='Open sesame! What would you like to unlock?')],
+                usage=RequestUsage(),
+                model_name='gpt-5',
+                timestamp=IsDatetime(),
+                provider_name='openai',
+                provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099',
+            ),
+            ModelRequest(parts=[UserPromptPart(content='The second secret key is olives', timestamp=IsDatetime())]),
+            ModelResponse(
+                parts=[TextPart(content='Understood')],
+                usage=RequestUsage(),
+                model_name='claude-3-5-sonnet-latest',
+                timestamp=IsDatetime(),
+                provider_name='anthropic',
+                provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj',
+            ),
+            ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]),
+        ]
+    )
+
+
+async def test_previous_response_id_same_model_history(allow_model_requests: None, openai_api_key: str):
+    """Test if message history is trimmed when model responses are from same model"""
+    history = [
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='The first secret key is sesame',
+                ),
+            ],
+        ),
+        ModelResponse(
+            parts=[
+                TextPart(content='Open sesame! What would you like to unlock?'),
+            ],
+            model_name='gpt-5',
+            provider_name='openai',
+            provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099',
+        ),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='The second secret key is olives',
+                ),
+            ],
+        ),
+        ModelResponse(
+            parts=[
+                TextPart(content='Understood'),
+            ],
+            model_name='gpt-5',
+            provider_name='openai',
+            provider_response_id='resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b',
+        ),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='what is the first secret key?',
+                ),
+            ],
+        ),
+    ]
+
+    model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key))
+    history, previous_response_id = model._get_response_id_and_trim(history)  # type: ignore
+    assert previous_response_id == 'resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b'
+    assert history == snapshot(
+        [
+            ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]),
+        ]
+    )
+
+
 async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None):
     c = response_message(
         [

diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -622,6 +622,10 @@ async def model_logic(  # noqa: C901
                 return ModelResponse(parts=list(response))
             else:
                 return ModelResponse(parts=[response])
+        elif m.content == 'The secret is 1234':
+            return ModelResponse(parts=[TextPart('The secret is safe with me')])
+        elif m.content == 'What is the secret code?':
+            return ModelResponse(parts=[TextPart('1234')])
 
     elif isinstance(m, ToolReturnPart) and m.tool_name == 'roulette_wheel':
         win = m.content == 'winner'