microsoft · gvanrossum · Jan 19, 2026 · Jan 19, 2026
diff --git a/src/typeagent/emails/email_import.py b/src/typeagent/emails/email_import.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT License.
 
 from email import message_from_string
+from email.header import decode_header, make_header
 from email.message import Message
 from email.utils import parsedate_to_datetime
 from pathlib import Path
@@ -11,6 +12,14 @@
 from .email_message import EmailMessage, EmailMessageMeta
 
 
+def decode_encoded_words(value: str) -> str:
+    """Decode text that may contain RFC 2047 encoded words."""
+    if not value:
+        return ""
+
+    return str(make_header(decode_header(value)))
+
+
 def import_emails_from_dir(
     dir_path: str, max_chunk_length: int = 4096
 ) -> Iterable[EmailMessage]:
@@ -78,7 +87,7 @@ def import_email_message(msg: Message, max_chunk_length: int) -> EmailMessage:
         body = get_last_response_in_thread(body)
 
     if email_meta.subject is not None:
-        body = email_meta.subject + "\n\n" + body
+        body = decode_encoded_words(email_meta.subject) + "\n\n" + body
 
     body_chunks = _text_to_chunks(body, max_chunk_length)
     email: EmailMessage = EmailMessage(

diff --git a/tools/ingest_email.py b/tools/ingest_email.py
@@ -23,13 +23,12 @@
 
 import argparse
 import asyncio
-from email.header import decode_header
 from pathlib import Path
 import sys
 import time
 
 from typeagent.aitools import utils
-from typeagent.emails.email_import import import_email_from_file
+from typeagent.emails.email_import import decode_encoded_words, import_email_from_file
 from typeagent.emails.email_memory import EmailMemory
 from typeagent.emails.email_message import EmailMessage
 from typeagent.knowpro.convsettings import ConversationSettings
@@ -91,21 +90,6 @@ def collect_email_files(paths: list[str], verbose: bool) -> list[Path]:
     return email_files
 
 
-def decode_encoded_word(s: str) -> str:
-    """Decode an RFC 2047 encoded string."""
-    if "=?utf-8?" not in s:
-        return s  # Fast path for common case
-    decoded_parts = decode_header(s)
-    return "".join(
-        (
-            part.decode(encoding or "utf-8", errors="replace")
-            if isinstance(part, bytes)
-            else part
-        )
-        for part, encoding in decoded_parts
-    )
-
-
 async def ingest_emails(
     paths: list[str],
     database: str,
@@ -194,17 +178,16 @@ async def ingest_emails(
                     print()
 
             if verbose:
-                print(f"    From: {email.metadata.sender}")
+                print(f"    From: {decode_encoded_words(email.metadata.sender)}")
                 if email.metadata.subject:
                     print(
-                        f"    Subject: {decode_encoded_word(email.metadata.subject).replace('\n', '\\n')}"
+                        f"    Subject: {decode_encoded_words(email.metadata.subject).replace('\n', '\\n')}"
                     )
                 print(f"    Date: {email.timestamp}")
                 print(f"    Body chunks: {len(email.text_chunks)}")
                 for chunk in email.text_chunks:
                     # Show first N chars of each decoded chunk
                     N = 150
-                    chunk = decode_encoded_word(chunk)
                     preview = repr(chunk[: N + 1])[1:-1]
                     if len(preview) > N:
                         preview = preview[: N - 3] + "..."