microsoft · roli-lpci · Mar 1, 2026 · Mar 4, 2026 · Mar 14, 2026 · moonbox3
@@ -66,6 +66,7 @@ def locate_safe_reduction_index(
     target_count: int,
     threshold_count: int = 0,
     offset_count: int = 0,
+    has_system_message: bool = False,
 ) -> int | None:
     """Identify the index of the first message at or beyond the specified target_count.
 
@@ -83,11 +84,27 @@ def locate_safe_reduction_index(
         threshold_count: The threshold beyond target_count required to trigger reduction.
                          If total messages <= (target_count + threshold_count), no reduction occurs.
         offset_count: Optional number of messages to skip at the start (e.g. existing summary messages).
+        has_system_message: Whether the history contains a system message that will be preserved
+                           separately. When True, the target_count is adjusted to account for the
+                           system message being re-added after reduction.
 
     Returns:
         The index that identifies the starting point for a reduced history that does not orphan
         sensitive content. Returns None if reduction is not needed.
     """
+    # Adjust target_count to account for the system message that will be preserved separately.
-    # Adjust target_count to account for the system message that will be preserved separately.
+        target_count -= 1
+        if target_count <= 0:
+            import logging
+            logging.getLogger(__name__).warning(
+                "target_count after accounting for system message is %d; reduction disabled.", target_count
+            )
+            return None  # Cannot reduce further; only system message would remain
-    # Adjust target_count to account for the system message that will be preserved separately.
+        target_count -= 1
+        if target_count <= 0:
+            import logging
+            logging.getLogger(__name__).warning(
+                "target_count after accounting for system message is %d; reduction disabled.", target_count
+            )
+            return None  # Cannot reduce further; only system message would remain
+    # This matches the .NET SDK behavior.
+    if has_system_message:
+        target_count -= 1
+        if target_count <= 0:
+            logger.warning(
+                "target_count after accounting for system message is %d; reduction will keep only the system message.",
+                target_count,
+            )
+            # Reduce to just the system message — return index past all non-system messages.
+            # The caller will prepend the system message to the empty/minimal tail.
+            return len(history)
+
     total_count = len(history)
     threshold_index = total_count - (threshold_count or 0) - target_count
     if threshold_index <= offset_count:

@@ -26,6 +26,7 @@
     locate_safe_reduction_index,
     locate_summarization_boundary,
 )
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
 from semantic_kernel.utils.feature_stage_decorator import experimental
 
@@ -89,19 +90,41 @@ async def reduce(self) -> Self | None:
 
         logger.info("Performing chat history summarization check...")
 
+        # Preserve system/developer messages so they are not lost during summarization.
+        # This matches the .NET SDK behavior and the truncation reducer.
+        # Only the first system/developer message is preserved; this mirrors .NET semantics.
+        # Exclude summary messages (which may have SYSTEM role) — they are generated content,
+        # not original system prompts.
+        system_message_index = next(
+            (
+                i
+                for i, msg in enumerate(history)
+                if msg.role in (AuthorRole.SYSTEM, AuthorRole.DEVELOPER) and not msg.metadata.get(SUMMARY_METADATA_KEY)
+            ),
+            -1,
+        )
+        system_message = history[system_message_index] if system_message_index >= 0 else None
+
         # 1. Identify where existing summary messages end
         insertion_point = locate_summarization_boundary(history)
         if insertion_point == len(history):
             # fallback fix: force boundary to something reasonable
             logger.warning("All messages are summaries, forcing boundary to 0.")
             insertion_point = 0
 
+        # Only adjust target_count if the system message would be truncated away.
+        # If the system message is already in the retained portion, no adjustment needed.
+        system_would_be_truncated = (
+            system_message is not None and system_message_index < len(history) - self.target_count
+        )
+
         # 2. Locate the safe reduction index
         truncation_index = locate_safe_reduction_index(
             history,
             self.target_count,
             self.threshold_count,
             offset_count=insertion_point,
+            has_system_message=system_would_be_truncated,
         )
         if truncation_index is None:
             logger.info("No valid truncation index found.")
@@ -138,7 +161,13 @@ async def reduce(self) -> Self | None:
                 keep_existing_summaries = history[:insertion_point]
 
             remainder = history[truncation_index:]
+
+            # Prepend the system/developer message if it was summarized away.
+            # Use identity comparison to avoid false matches from value-equal messages.
             new_history = [*keep_existing_summaries, summary_msg, *remainder]
+            if system_message is not None and not any(m is system_message for m in new_history):
+                new_history = [system_message, *new_history]
+
             self.messages = new_history
 
             return self
@@ -151,8 +180,6 @@ async def reduce(self) -> Self | None:
 
     async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageContent | None:
         """Use the ChatCompletion service to generate a single summary message."""
-        from semantic_kernel.contents.utils.author_role import AuthorRole
-
         chat_history = ChatHistory(messages=messages)
         execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_from_settings(
             PromptExecutionSettings()

@@ -15,9 +15,9 @@
 
 from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
-    extract_range,
     locate_safe_reduction_index,
 )
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.utils.feature_stage_decorator import experimental
 
 logger = logging.getLogger(__name__)
@@ -45,15 +45,42 @@ async def reduce(self) -> Self | None:
 
         logger.info("Performing chat history truncation check...")
 
-        truncation_index = locate_safe_reduction_index(history, self.target_count, self.threshold_count)
+        # Preserve system/developer messages so they are not lost during truncation.
+        # This matches the .NET SDK behavior where system messages are always retained.
+        # Only the first system/developer message is preserved; this mirrors .NET semantics.
+        system_message_index = next(
+            (i for i, msg in enumerate(history) if msg.role in (AuthorRole.SYSTEM, AuthorRole.DEVELOPER)),
+            -1,
+        )
+        system_message = history[system_message_index] if system_message_index >= 0 else None
+
+        # Only adjust target_count if the system message would be truncated away
+        # (i.e., it falls before the naive tail). If the system message is already in the
+        # retained portion, no adjustment is needed — it naturally occupies a slot.
+        system_would_be_truncated = (
+            system_message is not None and system_message_index < len(history) - self.target_count
+        )
+
+        truncation_index = locate_safe_reduction_index(
+            history,
+            self.target_count,
+            self.threshold_count,
+            has_system_message=system_would_be_truncated,
+        )
         if truncation_index is None:
             logger.info(
                 f"No truncation index found. Target count: {self.target_count}, Threshold: {self.threshold_count}"
             )
             return None
 
         logger.info(f"Truncating history to {truncation_index} messages.")
-        truncated_list = extract_range(history, start=truncation_index)
+        truncated_list = history[truncation_index:]
+
+        # Prepend the system/developer message if it was truncated away.
+        # Use identity comparison (is) to avoid false matches from value-equal messages.
+        if system_message is not None and all(msg is not system_message for msg in truncated_list):
+            truncated_list = [system_message, *truncated_list]
+
         self.messages = truncated_list
         return self
 

@@ -226,3 +226,89 @@ async def test_summarization_reducer_private_summarize(mock_service):
     actual_summary = await reducer._summarize(chat_messages)
     assert actual_summary is not None, "We should get a summary message back."
     assert actual_summary.content == "Mock Summary", "We expect the mock summary content."
+
+
+async def test_summarization_preserves_system_message(mock_service):
+    """Verify that the summarization reducer preserves system messages."""
+    messages = [
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Important system prompt"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says even more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"),
+    ]
+
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0)
+    reducer.messages = messages
+
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Summary of conversation.")
+    mock_service.get_chat_message_content.return_value = summary_content
+    mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()
+
+    result = await reducer.reduce()
+    assert result is not None
+    # System message must be preserved
+    roles = [msg.role for msg in result.messages]
+    assert AuthorRole.SYSTEM in roles, "System message was lost during summarization"
+    # System message should be first
+    assert result.messages[0].role == AuthorRole.SYSTEM
+    assert result.messages[0].content == "Important system prompt"
+
+
+async def test_summarization_does_not_preserve_summary_system_message(mock_service):
+    """A prior summary with SYSTEM role should NOT be treated as the system prompt to preserve."""
+    summary_sys = ChatMessageContent(
+        role=AuthorRole.SYSTEM, content="Previous summary", metadata={SUMMARY_METADATA_KEY: True}
+    )
+    messages = [
+        summary_sys,
+        ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says even more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"),
+    ]
+
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0)
+    reducer.messages = messages
+
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="New summary.")
+    mock_service.get_chat_message_content.return_value = summary_content
+    mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()
+
+    result = await reducer.reduce()
+    assert result is not None
+    # The old summary-system message should NOT be preserved as a "system prompt"
+    # It should be replaced by the new summary
+    for msg in result.messages:
+        if msg.role == AuthorRole.SYSTEM:
+            assert msg.metadata.get(SUMMARY_METADATA_KEY) is not True or msg is not summary_sys
+
+
+async def test_summarization_preserves_developer_message(mock_service):
+    """Verify that developer messages are preserved during summarization."""
+    messages = [
+        ChatMessageContent(role=AuthorRole.DEVELOPER, content="Developer instructions"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says even more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"),
+    ]
+
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0)
+    reducer.messages = messages
+
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Summary of conversation.")
+    mock_service.get_chat_message_content.return_value = summary_content
+    mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()
+
+    result = await reducer.reduce()
+    assert result is not None
+    # Developer message must be preserved
+    assert result.messages[0].role == AuthorRole.DEVELOPER
+    assert result.messages[0].content == "Developer instructions"