aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/email/_header_value_parser.py
diff options
context:
space:
mode:
authorMike Edmunds <medmunds@gmail.com>2025-03-18 04:07:17 -0700
committerGitHub <noreply@github.com>2025-03-18 12:07:17 +0100
commit295b53df2aa18deb625a7da41f7e4babfe6ef34b (patch)
tree4937e75b5446e11cdc0b8a677c98bc9f216f4da8 /Lib/email/_header_value_parser.py
parentab6333f7f56554bfd6c01eff567ddfb163a3dae6 (diff)
downloadcpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.tar.gz
cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.zip
gh-121284: Fix email address header folding with parsed encoded-word (GH-122754)
Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] --------- Co-authored-by: R David Murray <rdmurray@bitdance.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r--Lib/email/_header_value_parser.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 3d845c09d41..9a51b943733 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1053,7 +1053,7 @@ def get_fws(value):
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
return fws, newvalue
-def get_encoded_word(value):
+def get_encoded_word(value, terminal_type='vtext'):
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
"""
@@ -1092,7 +1092,7 @@ def get_encoded_word(value):
ew.append(token)
continue
chars, *remainder = _wsp_splitter(text, 1)
- vtext = ValueTerminal(chars, 'vtext')
+ vtext = ValueTerminal(chars, terminal_type)
_validate_xtext(vtext)
ew.append(vtext)
text = ''.join(remainder)
@@ -1134,7 +1134,7 @@ def get_unstructured(value):
valid_ew = True
if value.startswith('=?'):
try:
- token, value = get_encoded_word(value)
+ token, value = get_encoded_word(value, 'utext')
except _InvalidEwError:
valid_ew = False
except errors.HeaderParseError:
@@ -1163,7 +1163,7 @@ def get_unstructured(value):
# the parser to go in an infinite loop.
if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
- vtext = ValueTerminal(tok, 'vtext')
+ vtext = ValueTerminal(tok, 'utext')
_validate_xtext(vtext)
unstructured.append(vtext)
value = ''.join(remainder)
@@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy):
continue
tstr = str(part)
if not want_encoding:
- if part.token_type == 'ptext':
+ if part.token_type in ('ptext', 'vtext'):
# Encode if tstr contains special characters.
want_encoding = not SPECIALSNL.isdisjoint(tstr)
else: