Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion langfuse/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,34 @@ def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
return _with_langfuse


def _extract_responses_prompt(kwargs: Any) -> Any:
input_value = kwargs.get("input", None)
instructions = kwargs.get("instructions", None)

if isinstance(input_value, NotGiven):
input_value = None

if isinstance(instructions, NotGiven):
instructions = None

if instructions is None:
return input_value

if input_value is None:
return {"instructions": instructions}

if isinstance(input_value, str):
return [
{"role": "system", "content": instructions},
{"role": "user", "content": input_value},
]

if isinstance(input_value, list):
return [{"role": "system", "content": instructions}, *input_value]
Comment on lines +271 to +272
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible duplicate system message in captured prompt

When input_value is a list, instructions is prepended unconditionally as a role: system entry. If the caller already included a {"role": "system", ...} message inside their input list (valid in the Responses API), the captured Langfuse prompt will contain two system-role entries, which can confuse prompt replay or evaluation flows.

Consider checking whether a system message is already present before prepending:

if isinstance(input_value, list):
    already_has_system = any(
        isinstance(m, dict) and m.get("role") == "system"
        for m in input_value
    )
    if already_has_system:
        return input_value
    return [{"role": "system", "content": instructions}, *input_value]

Alternatively, document clearly that instructions will always be surfaced as the first system message in the captured prompt regardless of existing list contents.

Copy link
Author

@D-Joey-G D-Joey-G Mar 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple developer/system messages isn't uncommon, so that concern doesn't seem important.

I believe that it is safe/correct to put the instructions arg first in what we trace, as earlier docs for the Responses API said the parameter inserts a system (or developer) message as the first item in the model's context.

That said, the docs now say:

A system (or developer) message inserted into the model's context.

When using along with previous_response_id, the instructions from a previous response will not be carried over to the next response. This makes it simple to swap out system (or developer) messages in new responses.

That is admittedly less certain on that point.


return {"instructions": instructions, "input": input_value}


def _extract_chat_prompt(kwargs: Any) -> Any:
"""Extracts the user input from prompts. Returns an array of messages or dict with messages and functions"""
prompt = {}
Expand Down Expand Up @@ -403,7 +431,7 @@ def _get_langfuse_data_from_kwargs(resource: OpenAiDefinition, kwargs: Any) -> A
if resource.type == "completion":
prompt = kwargs.get("prompt", None)
elif resource.object == "Responses" or resource.object == "AsyncResponses":
prompt = kwargs.get("input", None)
prompt = _extract_responses_prompt(kwargs)
elif resource.type == "chat":
prompt = _extract_chat_prompt(kwargs)
elif resource.type == "embedding":
Expand Down
5 changes: 4 additions & 1 deletion tests/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -1407,7 +1407,10 @@ def test_response_api_streaming(openai):
assert len(generation.data) != 0
generationData = generation.data[0]
assert generationData.name == generation_name
assert generation.data[0].input == "Hello!"
assert generation.data[0].input == [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
]
assert generationData.type == "GENERATION"
assert "gpt-4o" in generationData.model
assert generationData.start_time is not None
Expand Down
46 changes: 46 additions & 0 deletions tests/test_openai_prompt_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest

try:
# Compatibility across OpenAI SDK versions where NOT_GIVEN export moved.
from openai import NOT_GIVEN
except ImportError:
from openai._types import NOT_GIVEN

from langfuse.openai import _extract_responses_prompt


@pytest.mark.parametrize(
"kwargs, expected",
[
({"input": "Hello!"}, "Hello!"),
(
{"instructions": "You are helpful.", "input": "Hello!"},
[
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"},
],
),
(
{
"instructions": "You are helpful.",
"input": [{"role": "user", "content": "Hello!"}],
},
[
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"},
],
),
(
{"instructions": "You are helpful."},
{"instructions": "You are helpful."},
),
(
{"instructions": "You are helpful.", "input": NOT_GIVEN},
{"instructions": "You are helpful."},
),
({"instructions": NOT_GIVEN, "input": "Hello!"}, "Hello!"),
({"instructions": NOT_GIVEN, "input": NOT_GIVEN}, None),
],
)
def test_extract_responses_prompt(kwargs, expected):
assert _extract_responses_prompt(kwargs) == expected