Skip to content

Commit ed81971

Browse files
RanKKIblurb-it[bot]picnixz
authored
gh-124452: Fix header mismatches when folding/unfolding with email message (#125919)
The header-folder of the new email API has a long standing known buglet where if the first token is longer than max_line_length, it puts that token on the next line. It turns out there is also a *parsing* bug when parsing such a header: the space prefixing that first, non-empty line gets preserved and tacked on to the start of the header value, which is not the expected behavior per the RFCs. The bug arises from the fact that the parser assumed that there would be at least one token on the line with the header, which is going to be true for probably every email producer other than the python email library with its folding buglet. Clearly, though, this is a case that needs to be handled correctly. The fix is simple: strip the blanks off the start of the whole value, not just the first physical line of the value. Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Bénédikt Tran <[email protected]>
1 parent 2313f84 commit ed81971

File tree

4 files changed

+56
-6
lines changed

4 files changed

+56
-6
lines changed

Lib/email/_policybase.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,12 @@ def header_source_parse(self, sourcelines):
302302
"""+
303303
The name is parsed as everything up to the ':' and returned unmodified.
304304
The value is determined by stripping leading whitespace off the
305-
remainder of the first line, joining all subsequent lines together, and
305+
remainder of the first line joined with all subsequent lines, and
306306
stripping any trailing carriage return or linefeed characters.
307307
308308
"""
309309
name, value = sourcelines[0].split(':', 1)
310-
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
310+
value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n')
311311
return (name, value.rstrip('\r\n'))
312312

313313
def header_store_parse(self, name, value):

Lib/email/policy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,13 @@ def header_source_parse(self, sourcelines):
119119
"""+
120120
The name is parsed as everything up to the ':' and returned unmodified.
121121
The value is determined by stripping leading whitespace off the
122-
remainder of the first line, joining all subsequent lines together, and
122+
remainder of the first line joined with all subsequent lines, and
123123
stripping any trailing carriage return or linefeed characters. (This
124124
is the same as Compat32).
125125
126126
"""
127127
name, value = sourcelines[0].split(':', 1)
128-
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
128+
value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n')
129129
return (name, value.rstrip('\r\n'))
130130

131131
def header_store_parse(self, name, value):

Lib/test/test_email/test_message.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import unittest
21
import textwrap
3-
from email import policy, message_from_string
2+
import unittest
3+
from email import message_from_bytes, message_from_string, policy
44
from email.message import EmailMessage, MIMEPart
55
from test.test_email import TestEmailBase, parameterize
66

@@ -958,6 +958,52 @@ def test_folding_with_utf8_encoding_8(self):
958958
b'123456789-123456789\n 123456789 Hello '
959959
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
960960

961+
def test_folding_with_short_nospace_1(self):
962+
# bpo-36520
963+
#
964+
# Fold a line that contains a long whitespace after
965+
# the fold point.
966+
967+
m = EmailMessage(policy.default)
968+
m['Message-ID'] = '123456789' * 3
969+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
970+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
971+
972+
def test_folding_with_long_nospace_default_policy_1(self):
973+
# Fixed: https://github.com/python/cpython/issues/124452
974+
#
975+
# When the value is too long, it should be converted back
976+
# to its original form without any modifications.
977+
978+
m = EmailMessage(policy.default)
979+
message = '123456789' * 10
980+
m['Message-ID'] = message
981+
self.assertEqual(m.as_bytes(),
982+
f'Message-ID:\n {message}\n\n'.encode())
983+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
984+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
985+
986+
def test_folding_with_long_nospace_compat32_policy_1(self):
987+
m = EmailMessage(policy.compat32)
988+
message = '123456789' * 10
989+
m['Message-ID'] = message
990+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
991+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
992+
993+
def test_folding_with_long_nospace_smtp_policy_1(self):
994+
m = EmailMessage(policy.SMTP)
995+
message = '123456789' * 10
996+
m['Message-ID'] = message
997+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
998+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
999+
1000+
def test_folding_with_long_nospace_http_policy_1(self):
1001+
m = EmailMessage(policy.HTTP)
1002+
message = '123456789' * 10
1003+
m['Message-ID'] = message
1004+
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
1005+
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
1006+
9611007
def test_get_body_malformed(self):
9621008
"""test for bpo-42892"""
9631009
msg = textwrap.dedent("""\
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix an issue in :meth:`email.policy.EmailPolicy.header_source_parse` and
2+
:meth:`email.policy.Compat32.header_source_parse` that introduced spurious
3+
leading whitespaces into header values when the header includes a newline
4+
character after the header name delimiter (``:``) and before the value.

0 commit comments

Comments
 (0)