microsoft
diff --git a/‎getting-started/CONCEPTS/STREAMING.md
Lines changed: 45 additions & 2 deletions b/‎getting-started/CONCEPTS/STREAMING.md
Lines changed: 45 additions & 2 deletions
diff --git a/‎python/packages/ai/teams/ai/citations/citations.py
Lines changed: 2 additions & 0 deletions b/‎python/packages/ai/teams/ai/citations/citations.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/packages/ai/teams/ai/clients/llm_client.py
Lines changed: 112 additions & 1 deletion b/‎python/packages/ai/teams/ai/clients/llm_client.py
Lines changed: 112 additions & 1 deletion
diff --git a/‎python/packages/ai/teams/ai/models/__init__.py
Lines changed: 12 additions & 0 deletions b/‎python/packages/ai/teams/ai/models/__init__.py
Lines changed: 12 additions & 0 deletions
@@ -34,6 +34,7 @@ There are two parts to streaming:
 ## Sample Bots
 - [C# Streaming ChefBot](https://github.com/microsoft/teams-ai/tree/main/dotnet/samples/04.ai.g.teamsChefBot-streaming)
 - [JS Streaming ChefBot](https://github.com/microsoft/teams-ai/tree/main/js/samples/04.ai-apps/i.teamsChefBot-streaming)
+- [Python Streaming ListBot](https://github.com/microsoft/teams-ai/tree/main/python/samples/04.ai.h.chainedActions.listBot-streaming)
 
 ## Streaming Response Class
 The `StreamingResponse` class is the helper class for streaming responses to the client. The class is used to send a series of updates to the client in a single response. If you are using your own custom model, you can directly instantiate and manage this class to stream responses.
@@ -52,7 +53,8 @@ Once `endStream()` is called, the stream is considered ended and no further upda
 ### Current Limitations:
 - Streaming is only available in 1:1 chats.
 - SendActivity requests are restricted to 1 RPS. Our SDK buffers to 1.5 seconds.
-- For Powered by AI features, only the Feedback Loop and Generated by AI Label is currently supported.
+- For Powered by AI features, Citations, Sensitivity Label, Feedback Loop and Generated by AI Label are supported in the final chunk.
+    - Citations are set per each text chunk queued.
 - Only rich text can be streamed.
 - Due to future GA protocol changes, the `channelData` metadata must be included in the `entities` object as well.
 - Only one informative message can be set. This is reused for each message.
@@ -74,7 +76,8 @@ You can configure streaming with your bot by following these steps:
 
 #### Optional additions:
 - Set the informative message in the `ActionPlanner` declaration via the `StartStreamingMessage` config.
-- As previously, set the feedback loop toggle in the `AIOptions` object in the `app` declaration and specify a handler.  
+- As previously, set the feedback loop toggle in the `AIOptions` object in the `app` declaration and specify a handler. 
+    - For *Python* specifically, the toggle also needs to be set in the `ActionPlannerOptions` object.
 - Set attachments in the final chunk via the  `EndStreamHandler` in the `ActionPlanner` declaration. 
 
 #### C#
@@ -158,6 +161,46 @@ const planner = new ActionPlanner({
 });
 ```
 
+### Python
+
+```python
+model = OpenAIModel(
+        OpenAIModelOptions(api_key=config.OPENAI_KEY, default_model="gpt-4o", stream=True)
+    )
+
+def end_stream_handler(
+    context: TurnContext,
+    state: MemoryBase,
+    response: PromptResponse[str],
+    streamer: StreamingResponse,
+):
+    if not streamer:
+        return
+
+    card = CardFactory.adaptive_card(
+        {
+            "$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
+            "version": "1.6",
+            "type": "AdaptiveCard",
+            "body": [{"type": "TextBlock", "wrap": True, "text": streamer.message}],
+        }
+    )
+
+    streamer.set_attachments([card])
+
+planner=ActionPlanner(
+                ActionPlannerOptions(
+                    model=model,
+                    prompts=prompts,
+                    default_prompt="tools",
+                    enable_feedback_loop=True,                                      # Enable the feedback loop
+                    start_streaming_message="Loading streaming results...",         # Set the informative message
+                    end_stream_handler=end_stream_handler,                          # Set the final chunk handler
+                )
+            ),
+
+```
+
 ---
 
 ## Return to other major section topics:
 
@@ -22,6 +22,7 @@ class AIEntity(Entity):
         "id_": {"key": "@id", "type": "str"},
         "additional_type": {"key": "additionalType", "type": "[str]"},
         "citation": {"key": "citation", "type": "[ClientCitation]"},
+        "usage_info": {"key": "usageInfo", "type": "SensitivityUsageInfo"},
     }
 
     additional_type: Optional[list[str]]
@@ -30,6 +31,7 @@ class AIEntity(Entity):
     type_: str = "Message"
     context_: str = "https://schema.org"
     id_: str = ""
+    usage_info: Optional[SensitivityUsageInfo] = field(default=None)
 
 
 @dataclass
 
@@ -12,7 +12,14 @@
 from botbuilder.core import TurnContext
 
 from ...state import Memory, MemoryBase
-from ..models import PromptCompletionModel, PromptResponse
+from ...streaming.prompt_chunk import PromptChunk
+from ...streaming.streaming_response import StreamingResponse
+from ..models import (
+    PromptCompletionModel,
+    PromptResponse,
+    ResponseReceivedHandler,
+    StreamHandlerTypes,
+)
 from ..prompts import (
     ConversationHistorySection,
     Message,
@@ -68,13 +75,29 @@ class LLMClientOptions:
     Optional. When set the model will log requests
     """
 
+    start_streaming_message: Optional[str] = ""
+    """
+    Optional message to send at the start of a streaming response.
+    """
+
+    end_stream_handler: Optional[ResponseReceivedHandler] = None
+    """
+    Optional handler to run when a stream is about to conclude.
+    """
+
+    enable_feedback_loop: Optional[bool] = False
+    "Optional. Enables the Teams thumbs up or down buttons."
+
 
 class LLMClient:
     """
     LLMClient class that's used to complete prompts.
     """
 
     _options: LLMClientOptions
+    _start_streaming_message: Optional[str] = ""
+    _end_stream_handler: Optional[ResponseReceivedHandler] = None
+    _enable_feedback_loop: Optional[bool] = False
 
     @property
     def options(self) -> LLMClientOptions:
@@ -89,6 +112,9 @@ def __init__(self, options: LLMClientOptions) -> None:
         """
 
         self._options = options
+        self._start_streaming_message = options.start_streaming_message
+        self._end_stream_handler = options.end_stream_handler
+        self._enable_feedback_loop = options.enable_feedback_loop
 
     async def complete_prompt(
         self,
@@ -112,6 +138,70 @@ async def complete_prompt(
 
         remaining_attempts = remaining_attempts or self._options.max_repair_attempts
 
+        # Define event handlers
+        is_streaming = False
+        streamer: Optional[StreamingResponse] = None
+
+        def before_completion(
+            ctx: TurnContext,
+            memory: MemoryBase,
+            functions: PromptFunctions,
+            tokenizer: Tokenizer,
+            template: PromptTemplate,
+            streaming: bool,
+        ) -> None:
+            # pylint: disable=unused-argument
+            # Ignore events for other contexts
+            if context != ctx:
+                return
+
+            # Check for a streaming response
+            if streaming:
+                nonlocal is_streaming
+                is_streaming = True
+
+            nonlocal streamer
+            streamer = StreamingResponse(context)
+            memory.set("temp.streamer", streamer)
+
+            if self._enable_feedback_loop is not None:
+                streamer.set_feedback_loop(self._enable_feedback_loop)
+
+            streamer.set_generated_by_ai_label(True)
+
+            if self._start_streaming_message:
+                streamer.queue_informative_update(self._start_streaming_message)
+
+        def chunk_received(
+            ctx: TurnContext,
+            memory: MemoryBase,
+            chunk: PromptChunk,
+        ) -> None:
+            # pylint: disable=unused-argument
+            nonlocal streamer
+            if (context != ctx) or (streamer is None):
+                return
+
+            text = chunk.delta.content if (chunk.delta and chunk.delta.content) else ""
+            citations = (
+                chunk.delta.context.citations if (chunk.delta and chunk.delta.context) else None
+            )
+
+            if len(text) > 0:
+                streamer.queue_text_chunk(text, citations)
+
+        # Subscribe to model events
+        if self._options.model.events is not None:
+            self._options.model.events.subscribe(
+                StreamHandlerTypes.BEFORE_COMPLETION, before_completion
+            )
+            self._options.model.events.subscribe(StreamHandlerTypes.CHUNK_RECEIVED, chunk_received)
+
+            if self._end_stream_handler is not None:
+                self._options.model.events.subscribe(
+                    StreamHandlerTypes.RESPONSE_RECEIVED, self._end_stream_handler
+                )
+
         try:
             if remaining_attempts <= 0:
                 return PromptResponse(
@@ -187,9 +277,30 @@ async def complete_prompt(
 
             self._add_message_to_history(memory, self._options.history_variable, res.input)
             self._add_message_to_history(memory, self._options.history_variable, res.message)
+
+            if is_streaming and res.status == "success":
+                # Delete message from response to avoid sending it twice
+                res.message = None
+
+            if streamer is not None:
+                await streamer.end_stream()
             return res
         except Exception as err:  # pylint: disable=broad-except
             return PromptResponse(status="error", error=str(err))
+        finally:
+            # Unsubscribe from model events
+            if self._options.model.events is not None:
+                self._options.model.events.unsubscribe(
+                    StreamHandlerTypes.BEFORE_COMPLETION, before_completion
+                )
+                self._options.model.events.unsubscribe(
+                    StreamHandlerTypes.CHUNK_RECEIVED, chunk_received
+                )
+
+                if self._end_stream_handler is not None:
+                    self._options.model.events.unsubscribe(
+                        StreamHandlerTypes.RESPONSE_RECEIVED, self._end_stream_handler
+                    )
 
     def _add_message_to_history(
         self, memory: MemoryBase, variable: str, messages: Union[Message[Any], List[Message[Any]]]
 
@@ -3,9 +3,16 @@
 Licensed under the MIT License.
 """
 
+from ...streaming import (
+    BeforeCompletionHandler,
+    ChunkReceivedHandler,
+    ResponseReceivedHandler,
+    StreamHandlerTypes,
+)
 from .chat_completion_action import ChatCompletionAction
 from .openai_model import AzureOpenAIModelOptions, OpenAIModel, OpenAIModelOptions
 from .prompt_completion_model import PromptCompletionModel
+from .prompt_completion_model_emitter import PromptCompletionModelEmitter
 from .prompt_response import PromptResponse, PromptResponseStatus
 
 __all__ = [
@@ -16,4 +23,9 @@
     "PromptCompletionModel",
     "PromptResponse",
     "PromptResponseStatus",
+    "PromptCompletionModelEmitter",
+    "BeforeCompletionHandler",
+    "ChunkReceivedHandler",
+    "ResponseReceivedHandler",
+    "StreamHandlerTypes",
 ]