microsoft · jgarrison929 · Mar 13, 2026
@@ -345,16 +345,45 @@ def from_rendered_prompt(cls: type[_T], rendered_prompt: str) -> _T:
         except ParseError as exc:
             logger.info(f"Could not parse prompt {prompt} as xml, treating as text, error was: {exc}")
             return cls(messages=[ChatMessageContent(role=AuthorRole.USER, content=unescape(prompt))])
-        if xml_prompt.text and xml_prompt.text.strip():
-            messages.append(ChatMessageContent(role=AuthorRole.SYSTEM, content=unescape(xml_prompt.text.strip())))
+        # Accumulate text content that should be combined into a single message.
+        # This handles HTML-like tags (e.g., <p>, <div>) that are valid XML but not
+        # recognized as chat message tags — their content should be preserved as text.
+        pending_text_parts: list[str] = []
+        if xml_prompt.text:
+            pending_text_parts.append(xml_prompt.text)
+
+        def flush_pending_text(role: AuthorRole = AuthorRole.SYSTEM) -> None:
+            """Flush accumulated text as a chat message if non-empty."""
+            if pending_text_parts:
+                combined = "".join(pending_text_parts).strip()
+                if combined:
+                    messages.append(ChatMessageContent(role=role, content=unescape(combined)))
+                pending_text_parts.clear()
+
         for item in xml_prompt:
             if item.tag == CHAT_MESSAGE_CONTENT_TAG:
+                # Flush any pending text before a structured message
+                flush_pending_text()
                 messages.append(ChatMessageContent.from_element(item))
+                # Tail text after a recognized message element is treated as USER content
+                if item.tail and item.tail.strip():
+                    messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip())))
             elif item.tag == CHAT_HISTORY_TAG:
+                flush_pending_text()
                 for message in item:
                     messages.append(ChatMessageContent.from_element(message))
-            if item.tail and item.tail.strip():
-                messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip())))
+                # Tail text after a recognized history element is treated as USER content
+                if item.tail and item.tail.strip():
+                    messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip())))
+            else:
+                # Unrecognized element (e.g., <p>, <b>, <div>) — serialize it back
+                # to XML so the original content is preserved in the prompt text.
+                # This fixes #13632 where HTML tags caused content to be silently dropped.
+                # Note: tostring() includes the element's tail, so we don't add it separately.
+                pending_text_parts.append(tostring(item, encoding="unicode"))
+
+        # Flush any remaining text (as USER if messages exist, SYSTEM otherwise)
+        flush_pending_text(role=AuthorRole.USER if messages else AuthorRole.SYSTEM)
         if len(messages) == 1 and messages[0].role == AuthorRole.SYSTEM:
             messages[0].role = AuthorRole.USER
         return cls(messages=messages)

@@ -592,6 +592,48 @@ async def test_template_empty_history(chat_history: ChatHistory):
     assert chat_history_2.messages[1].role == AuthorRole.USER
 
 
+def test_chat_history_from_rendered_prompt_with_html_tags():
+    """Regression test for #13632: HTML tags in prompts caused content to be dropped.
+
+    When the prompt contains valid XML tags like <p>, <div>, etc., the XML parser
+    was treating them as elements and silently discarding their content because
+    they weren't recognized as chat message tags. The fix serializes unrecognized
+    elements back to their string representation.
+    """
+    # Prompt with HTML-like tags
+    prompt_with_html = 'Translate this: "<p>What is your name?</p>"'
+    # Same prompt without HTML tags
+    prompt_without_html = 'Translate this: "What is your name?"'
+
+    history_with_html = ChatHistory.from_rendered_prompt(prompt_with_html)
+    history_without_html = ChatHistory.from_rendered_prompt(prompt_without_html)
+
+    # Both should produce a single user message
+    assert len(history_with_html.messages) == 1
+    assert len(history_without_html.messages) == 1
+
+    # Both should contain the question text
+    assert "What is your name?" in history_with_html.messages[0].content
+    assert "What is your name?" in history_without_html.messages[0].content
+
+    # The HTML version should preserve the <p> tags
+    assert "<p>" in history_with_html.messages[0].content
+    assert "</p>" in history_with_html.messages[0].content
+
+
+def test_chat_history_from_rendered_prompt_with_nested_html():
+    """Test that nested HTML-like tags are preserved."""
+    prompt = "Format this: <div><p>Hello</p><p>World</p></div>"
+
+    history = ChatHistory.from_rendered_prompt(prompt)
+
+    assert len(history.messages) == 1
+    assert "Hello" in history.messages[0].content
+    assert "World" in history.messages[0].content
+    assert "<div>" in history.messages[0].content
+    assert "<p>" in history.messages[0].content
+
+
 def test_to_from_file(chat_history: ChatHistory, tmp_path):
     chat_history.add_system_message("You are an AI assistant")
     chat_history.add_user_message("What is the weather in Seattle?")