gh-121284: Fix email address header folding with parsed encoded-word (GH-122754)

Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] --------- Co-authored-by: R David Murray <rdmurray@bitdance.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
author: Mike Edmunds <medmunds@gmail.com> 2025-03-18 04:07:17 -0700
committer: GitHub <noreply@github.com> 2025-03-18 12:07:17 +0100
commit: 295b53df2aa18deb625a7da41f7e4babfe6ef34b (patch)
tree: 4937e75b5446e11cdc0b8a677c98bc9f216f4da8 /Lib/email/_header_value_parser.py
parent: ab6333f7f56554bfd6c01eff567ddfb163a3dae6 (diff)
download: cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.tar.gz
cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.zip
1 files changed, 5 insertions, 5 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 3d845c09d41..9a51b943733 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1053,7 +1053,7 @@ def get_fws(value):
     fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
     return fws, newvalue
 
-def get_encoded_word(value):
+def get_encoded_word(value, terminal_type='vtext'):
     """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
 
     """
@@ -1092,7 +1092,7 @@ def get_encoded_word(value):
             ew.append(token)
             continue
         chars, *remainder = _wsp_splitter(text, 1)
-        vtext = ValueTerminal(chars, 'vtext')
+        vtext = ValueTerminal(chars, terminal_type)
         _validate_xtext(vtext)
         ew.append(vtext)
         text = ''.join(remainder)
@@ -1134,7 +1134,7 @@ def get_unstructured(value):
         valid_ew = True
         if value.startswith('=?'):
             try:
-                token, value = get_encoded_word(value)
+                token, value = get_encoded_word(value, 'utext')
             except _InvalidEwError:
                 valid_ew = False
             except errors.HeaderParseError:
@@ -1163,7 +1163,7 @@ def get_unstructured(value):
         # the parser to go in an infinite loop.
         if valid_ew and rfc2047_matcher.search(tok):
             tok, *remainder = value.partition('=?')
-        vtext = ValueTerminal(tok, 'vtext')
+        vtext = ValueTerminal(tok, 'utext')
         _validate_xtext(vtext)
         unstructured.append(vtext)
         value = ''.join(remainder)
@@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy):
             continue
         tstr = str(part)
         if not want_encoding:
-            if part.token_type == 'ptext':
+            if part.token_type in ('ptext', 'vtext'):
                 # Encode if tstr contains special characters.
                 want_encoding = not SPECIALSNL.isdisjoint(tstr)
             else:
author	Mike Edmunds <medmunds@gmail.com>	2025-03-18 04:07:17 -0700
committer	GitHub <noreply@github.com>	2025-03-18 12:07:17 +0100
commit	295b53df2aa18deb625a7da41f7e4babfe6ef34b (patch)
tree	4937e75b5446e11cdc0b8a677c98bc9f216f4da8 /Lib/email/_header_value_parser.py
parent	ab6333f7f56554bfd6c01eff567ddfb163a3dae6 (diff)
download	cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.tar.gz cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.zip