livekit · giulio-leone · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/livekit-agents/livekit/agents/llm/utils.py b/livekit-agents/livekit/agents/llm/utils.py
@@ -366,20 +366,134 @@ def function_arguments_to_pydantic_model(func: Callable[..., Any]) -> type[BaseM
     return create_model(model_name, **fields)
 
 
+def _try_repair_json(raw: str) -> Any:
+    """Attempt to repair truncated JSON from LLM tool call arguments.
+
+    LLMs sometimes return truncated JSON in streaming tool calls, e.g.:
+    '{"success":true,"reason":"The message explicitly asks the user'
+    This function tries to close open strings, arrays, and objects.
+    """
+
+    repaired = raw
+    # Close any open string literal
+    quote_count = 0
+    in_escape = False
+    for ch in repaired:
+        if in_escape:
+            in_escape = False
+            continue
+        if ch == "\\":
+            in_escape = True
+            continue
+        if ch == '"':
+            quote_count += 1
+    if quote_count % 2 != 0:
+        # If the string ends with an unescaped backslash (start of a truncated
+        # escape sequence), strip it before closing the quote so we don't
+        # produce an escaped-quote '\"' instead of a real closing quote.
+        stripped = repaired.rstrip("\\")
+        trailing_backslashes = len(repaired) - len(stripped)
+        if trailing_backslashes % 2 != 0:
+            repaired = repaired[:-1]
+
+        repaired += '"'
+
+    # Close open brackets/braces in correct nesting order
+    nesting_stack: list[str] = []
+    in_string = False
+    in_escape = False
+    for ch in repaired:
+        if in_escape:
+            in_escape = False
+            continue
+        if ch == "\\":
+            in_escape = True
+            continue
+        if ch == '"':
+            in_string = not in_string
+            continue
+        if in_string:
+            continue
+        if ch == "{":
+            nesting_stack.append("}")
+        elif ch == "[":
+            nesting_stack.append("]")
+        elif ch in ("}", "]") and nesting_stack and nesting_stack[-1] == ch:
+            nesting_stack.pop()
+
+    repaired += "".join(reversed(nesting_stack))
+
+    return from_json(repaired)
+
+
 def prepare_function_arguments(
     *,
     fnc: FunctionTool | RawFunctionTool,
     json_arguments: str,  # raw function output from the LLM
     call_ctx: RunContext[Any] | None = None,
+    repair_json: bool = True,
 ) -> tuple[tuple[Any, ...], dict[str, Any]]:  # returns args, kwargs
     """
     Create the positional and keyword arguments to call a function tool from
     the raw function output from the LLM.
+
+    Args:
+        repair_json: When True (default), attempt to repair truncated JSON from
+            LLM streaming before raising. Repaired arguments are validated
+            against the tool's schema — if validation fails, the original parse
+            error is raised so the caller can retry the LLM call instead.
+            Set to False to always raise on malformed JSON without attempting
+            repair.
     """
 
     signature = inspect.signature(fnc)
     type_hints = get_type_hints(fnc, include_extras=True)
-    args_dict = from_json(json_arguments)
+    try:
+        args_dict = from_json(json_arguments)
+    except ValueError as original_parse_error:
+        if not repair_json:
+            raise ValueError(
+                f"Failed to parse tool call arguments as JSON: "
+                f"{json_arguments[:200]!r}{'…' if len(json_arguments) > 200 else ''}"
+            ) from None
+
+        # LLMs may return truncated JSON in streaming tool calls (e.g., EOF
+        # while parsing a string). Attempt to repair the JSON before giving up.
+        try:
+            args_dict = _try_repair_json(json_arguments)
+        except Exception:
+            raise ValueError(
+                f"Failed to parse tool call arguments as JSON "
+                f"(and repair attempt failed): "
+                f"{json_arguments[:200]!r}{'…' if len(json_arguments) > 200 else ''}"
+            ) from None
+
+        # Validate repaired args against the tool's Pydantic schema before
+        # executing.  Repair can produce structurally valid JSON that is
+        # semantically incomplete — e.g. '{"arr": [{"a": 1' repairs to
+        # '{"arr": [{"a": 1}]}' but the tool may require a "b" field too.
+        # If validation fails, we re-raise so the caller can retry the LLM
+        # call rather than invoking the tool with wrong arguments.
+        if isinstance(fnc, FunctionTool):
+            try:
+                model_type = function_arguments_to_pydantic_model(fnc)
+                model_type.model_validate(args_dict)
+            except Exception:
+                raise ValueError(
+                    f"Repaired JSON failed schema validation for tool "
+                    f"'{fnc.id}'; raising so the caller can retry the "
+                    f"LLM call. Args preview: "
+                    f"{json_arguments[:200]!r}"
+                ) from original_parse_error
+
+        logger.warning(
+            "repaired truncated JSON in tool call arguments",
+            extra={
+                "tool_name": fnc.id,
+                "raw_arguments_preview": json_arguments[:200],
+                "raw_arguments_length": len(json_arguments),
+            },
+        )
 
     if isinstance(fnc, FunctionTool):
         model_type = function_arguments_to_pydantic_model(fnc)

diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -68,6 +68,16 @@ async def raw_tool_2() -> str:
     return "raw2"
 
 
+@function_tool
+async def mock_tool_required_fields(a: int, b: int) -> dict[str, int]:
+    """Test tool with two required fields.
+    Args:
+        a: First required integer
+        b: Second required integer
+    """
+    return {"a": a, "b": b}
+
+
 class DummyAgent(Agent):
     def __init__(self):
         super().__init__(instructions="You are a dummy agent.")
@@ -392,3 +402,87 @@ def test_unexpected_arguments(self):
             prepare_function_arguments(
                 fnc=agent.mock_tool_in_agent, json_arguments='{"opt_arg2": "test2"}'
             )
+
+
+class TestTruncatedJsonRepair:
+    """Test repair of truncated JSON from LLM tool call arguments."""
+
+    def test_truncated_string_value(self):
+        """LLM returns JSON with an unfinished string — should repair and parse."""
+        # Real-world example from issue #4240: GPT-4.1 on Azure
+        args, kwargs = prepare_function_arguments(
+            fnc=mock_tool_1,
+            json_arguments='{"arg1":"The message explicitly asks the user',
+        )
+        assert "The message explicitly asks the user" in args[0]
+
+    def test_truncated_closing_brace(self):
+        """JSON missing closing brace — should repair."""
+        args, kwargs = prepare_function_arguments(
+            fnc=mock_tool_1,
+            json_arguments='{"arg1": "hello"',
+        )
+        assert args == ("hello", None)
+
+    def test_valid_json_not_affected(self):
+        """Valid JSON should still work unchanged."""
+        args, kwargs = prepare_function_arguments(
+            fnc=mock_tool_1,
+            json_arguments='{"arg1": "test"}',
+        )
+        assert args == ("test", None)
+
+    def test_completely_invalid_json_raises(self):
+        """Completely broken JSON should still raise ValueError."""
+        with pytest.raises(ValueError):
+            prepare_function_arguments(
+                fnc=mock_tool_1,
+                json_arguments="this is not json at all",
+            )
+
+    def test_nested_object_in_array_repair(self):
+        """Object inside array should close in correct nesting order."""
+        from livekit.agents.llm.utils import _try_repair_json
+
+        result = _try_repair_json('{"arr": [{"a": 1')
+        assert result == {"arr": [{"a": 1}]}
+
+    def test_triple_trailing_backslash_repair(self):
+        """Odd number of trailing backslashes (including 1) should strip the last one."""
+        from livekit.agents.llm.utils import _try_repair_json
+
+        result = _try_repair_json('{"path": "C:\\\\Users\\\\name\\')
+        assert result is not None
+        assert "path" in result
+
+    def test_repaired_json_rejected_when_schema_validation_fails(self):
+        """Repaired JSON that drops required fields must raise ValueError.
+
+        This is the exact scenario from the PR review: '{"a": 1' might have been
+        '{"a": 1, "b": 2}' but repair produces '{"a": 1}' — missing required "b".
+        The schema validation safety net catches this and raises so the caller
+        can retry the LLM call.
+        """
+        with pytest.raises(ValueError, match="schema validation"):
+            prepare_function_arguments(
+                fnc=mock_tool_required_fields,
+                json_arguments='{"a": 1',  # truncated; "b" is missing
+            )
+
+    def test_repair_disabled_raises_immediately(self):
+        """When repair_json=False, malformed JSON raises without attempting repair."""
+        with pytest.raises(ValueError, match="Failed to parse"):
+            prepare_function_arguments(
+                fnc=mock_tool_1,
+                json_arguments='{"arg1": "hello"',
+                repair_json=False,
+            )
+
+    def test_repair_succeeds_when_all_required_fields_present(self):
+        """Repair should succeed when all required fields are present in the
+        truncated JSON — only structural closers are missing."""
+        args, kwargs = prepare_function_arguments(
+            fnc=mock_tool_required_fields,
+            json_arguments='{"a": 1, "b": 2',  # just missing closing brace
+        )
+        assert args == (1, 2)