Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def locate_safe_reduction_index(
target_count: int,
threshold_count: int = 0,
offset_count: int = 0,
has_system_message: bool = False,
) -> int | None:
"""Identify the index of the first message at or beyond the specified target_count.

Expand All @@ -83,11 +84,27 @@ def locate_safe_reduction_index(
threshold_count: The threshold beyond target_count required to trigger reduction.
If total messages <= (target_count + threshold_count), no reduction occurs.
offset_count: Optional number of messages to skip at the start (e.g. existing summary messages).
has_system_message: Whether the history contains a system message that will be preserved
separately. When True, the target_count is adjusted to account for the
system message being re-added after reduction.

Returns:
The index that identifies the starting point for a reduced history that does not orphan
sensitive content. Returns None if reduction is not needed.
"""
# Adjust target_count to account for the system message that will be preserved separately.
Comment on lines 92 to +95
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When has_system_message=True and target_count was 1, this decrements to 0 and returns None (no reduction). But the history may have many messages that should be reduced — the reducer should produce a result containing only the system message. Returning None here causes the truncation reducer to silently skip reduction entirely. Consider handling target_count == 0 as 'truncate everything' (return len(history)) and let the caller prepend the system message, or handle this edge case in the truncation reducer itself.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When target_count is 1 and has_system_message is True, this permanently prevents any reduction. Consider emitting a warning log so callers can detect this silent no-op, since an ever-growing history could eventually cause OOM or token-limit failures.

Suggested change
# Adjust target_count to account for the system message that will be preserved separately.
target_count -= 1
if target_count <= 0:
import logging
logging.getLogger(__name__).warning(
"target_count after accounting for system message is %d; reduction disabled.", target_count
)
return None # Cannot reduce further; only system message would remain

# This matches the .NET SDK behavior.
if has_system_message:
target_count -= 1
if target_count <= 0:
logger.warning(
"target_count after accounting for system message is %d; reduction will keep only the system message.",
target_count,
)
# Reduce to just the system message — return index past all non-system messages.
# The caller will prepend the system message to the empty/minimal tail.
return len(history)

total_count = len(history)
threshold_index = total_count - (threshold_count or 0) - target_count
if threshold_index <= offset_count:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
locate_safe_reduction_index,
locate_summarization_boundary,
)
from semantic_kernel.contents.utils.author_role import AuthorRole
from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
from semantic_kernel.utils.feature_stage_decorator import experimental

Expand Down Expand Up @@ -89,19 +90,41 @@ async def reduce(self) -> Self | None:

logger.info("Performing chat history summarization check...")

# Preserve system/developer messages so they are not lost during summarization.
# This matches the .NET SDK behavior and the truncation reducer.
# Only the first system/developer message is preserved; this mirrors .NET semantics.
# Exclude summary messages (which may have SYSTEM role) — they are generated content,
# not original system prompts.
system_message_index = next(
(
i
for i, msg in enumerate(history)
if msg.role in (AuthorRole.SYSTEM, AuthorRole.DEVELOPER) and not msg.metadata.get(SUMMARY_METADATA_KEY)
),
-1,
)
system_message = history[system_message_index] if system_message_index >= 0 else None

# 1. Identify where existing summary messages end
insertion_point = locate_summarization_boundary(history)
if insertion_point == len(history):
# fallback fix: force boundary to something reasonable
logger.warning("All messages are summaries, forcing boundary to 0.")
insertion_point = 0

# Only adjust target_count if the system message would be truncated away.
# If the system message is already in the retained portion, no adjustment needed.
system_would_be_truncated = (
system_message is not None and system_message_index < len(history) - self.target_count
)

# 2. Locate the safe reduction index
truncation_index = locate_safe_reduction_index(
history,
self.target_count,
self.threshold_count,
offset_count=insertion_point,
has_system_message=system_would_be_truncated,
)
if truncation_index is None:
logger.info("No valid truncation index found.")
Expand Down Expand Up @@ -138,7 +161,13 @@ async def reduce(self) -> Self | None:
keep_existing_summaries = history[:insertion_point]

remainder = history[truncation_index:]

# Prepend the system/developer message if it was summarized away.
# Use identity comparison to avoid false matches from value-equal messages.
new_history = [*keep_existing_summaries, summary_msg, *remainder]
if system_message is not None and not any(m is system_message for m in new_history):
new_history = [system_message, *new_history]

self.messages = new_history

return self
Expand All @@ -151,8 +180,6 @@ async def reduce(self) -> Self | None:

async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageContent | None:
"""Use the ChatCompletion service to generate a single summary message."""
from semantic_kernel.contents.utils.author_role import AuthorRole

chat_history = ChatHistory(messages=messages)
execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_from_settings(
PromptExecutionSettings()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@

from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
extract_range,
locate_safe_reduction_index,
)
from semantic_kernel.contents.utils.author_role import AuthorRole
from semantic_kernel.utils.feature_stage_decorator import experimental

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -45,15 +45,42 @@ async def reduce(self) -> Self | None:

logger.info("Performing chat history truncation check...")

truncation_index = locate_safe_reduction_index(history, self.target_count, self.threshold_count)
# Preserve system/developer messages so they are not lost during truncation.
# This matches the .NET SDK behavior where system messages are always retained.
# Only the first system/developer message is preserved; this mirrors .NET semantics.
system_message_index = next(
(i for i, msg in enumerate(history) if msg.role in (AuthorRole.SYSTEM, AuthorRole.DEVELOPER)),
-1,
)
system_message = history[system_message_index] if system_message_index >= 0 else None

# Only adjust target_count if the system message would be truncated away
# (i.e., it falls before the naive tail). If the system message is already in the
# retained portion, no adjustment is needed — it naturally occupies a slot.
system_would_be_truncated = (
system_message is not None and system_message_index < len(history) - self.target_count
)

truncation_index = locate_safe_reduction_index(
history,
self.target_count,
self.threshold_count,
has_system_message=system_would_be_truncated,
)
if truncation_index is None:
logger.info(
f"No truncation index found. Target count: {self.target_count}, Threshold: {self.threshold_count}"
)
return None

logger.info(f"Truncating history to {truncation_index} messages.")
truncated_list = extract_range(history, start=truncation_index)
truncated_list = history[truncation_index:]

# Prepend the system/developer message if it was truncated away.
# Use identity comparison (is) to avoid false matches from value-equal messages.
if system_message is not None and all(msg is not system_message for msg in truncated_list):
truncated_list = [system_message, *truncated_list]

self.messages = truncated_list
return self

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,3 +226,89 @@ async def test_summarization_reducer_private_summarize(mock_service):
actual_summary = await reducer._summarize(chat_messages)
assert actual_summary is not None, "We should get a summary message back."
assert actual_summary.content == "Mock Summary", "We expect the mock summary content."


async def test_summarization_preserves_system_message(mock_service):
"""Verify that the summarization reducer preserves system messages."""
messages = [
ChatMessageContent(role=AuthorRole.SYSTEM, content="Important system prompt"),
ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
ChatMessageContent(role=AuthorRole.USER, content="User says more"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
ChatMessageContent(role=AuthorRole.USER, content="User says even more"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"),
]

reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0)
reducer.messages = messages

summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Summary of conversation.")
mock_service.get_chat_message_content.return_value = summary_content
mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()

result = await reducer.reduce()
assert result is not None
# System message must be preserved
roles = [msg.role for msg in result.messages]
assert AuthorRole.SYSTEM in roles, "System message was lost during summarization"
# System message should be first
assert result.messages[0].role == AuthorRole.SYSTEM
assert result.messages[0].content == "Important system prompt"


async def test_summarization_does_not_preserve_summary_system_message(mock_service):
"""A prior summary with SYSTEM role should NOT be treated as the system prompt to preserve."""
summary_sys = ChatMessageContent(
role=AuthorRole.SYSTEM, content="Previous summary", metadata={SUMMARY_METADATA_KEY: True}
)
messages = [
summary_sys,
ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
ChatMessageContent(role=AuthorRole.USER, content="User says more"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
ChatMessageContent(role=AuthorRole.USER, content="User says even more"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"),
]

reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0)
reducer.messages = messages

summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="New summary.")
mock_service.get_chat_message_content.return_value = summary_content
mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()

result = await reducer.reduce()
assert result is not None
# The old summary-system message should NOT be preserved as a "system prompt"
# It should be replaced by the new summary
for msg in result.messages:
if msg.role == AuthorRole.SYSTEM:
assert msg.metadata.get(SUMMARY_METADATA_KEY) is not True or msg is not summary_sys


async def test_summarization_preserves_developer_message(mock_service):
"""Verify that developer messages are preserved during summarization."""
messages = [
ChatMessageContent(role=AuthorRole.DEVELOPER, content="Developer instructions"),
ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
ChatMessageContent(role=AuthorRole.USER, content="User says more"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
ChatMessageContent(role=AuthorRole.USER, content="User says even more"),
ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"),
]

reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0)
reducer.messages = messages

summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Summary of conversation.")
mock_service.get_chat_message_content.return_value = summary_content
mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings()

result = await reducer.reduce()
assert result is not None
# Developer message must be preserved
assert result.messages[0].role == AuthorRole.DEVELOPER
assert result.messages[0].content == "Developer instructions"
Loading
Loading