diff options
author | Mike Edmunds <medmunds@gmail.com> | 2025-03-18 04:07:17 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-18 12:07:17 +0100 |
commit | 295b53df2aa18deb625a7da41f7e4babfe6ef34b (patch) | |
tree | 4937e75b5446e11cdc0b8a677c98bc9f216f4da8 /Lib/email/_header_value_parser.py | |
parent | ab6333f7f56554bfd6c01eff567ddfb163a3dae6 (diff) | |
download | cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.tar.gz cpython-295b53df2aa18deb625a7da41f7e4babfe6ef34b.zip |
gh-121284: Fix email address header folding with parsed encoded-word (GH-122754)
Email generators using email.policy.default may convert an RFC 2047
encoded-word to unencoded form during header refolding. In a structured
header, this could allow 'specials' chars outside a quoted-string,
leading to invalid address headers and enabling spoofing. This change
ensures a parsed encoded-word that contains specials is kept as an
encoded-word while the header is refolded.
[Better fix from @bitdancer.]
---------
Co-authored-by: R David Murray <rdmurray@bitdance.com>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r-- | Lib/email/_header_value_parser.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 3d845c09d41..9a51b943733 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1053,7 +1053,7 @@ def get_fws(value): fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') return fws, newvalue -def get_encoded_word(value): +def get_encoded_word(value, terminal_type='vtext'): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ @@ -1092,7 +1092,7 @@ def get_encoded_word(value): ew.append(token) continue chars, *remainder = _wsp_splitter(text, 1) - vtext = ValueTerminal(chars, 'vtext') + vtext = ValueTerminal(chars, terminal_type) _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) @@ -1134,7 +1134,7 @@ def get_unstructured(value): valid_ew = True if value.startswith('=?'): try: - token, value = get_encoded_word(value) + token, value = get_encoded_word(value, 'utext') except _InvalidEwError: valid_ew = False except errors.HeaderParseError: @@ -1163,7 +1163,7 @@ def get_unstructured(value): # the parser to go in an infinite loop. if valid_ew and rfc2047_matcher.search(tok): tok, *remainder = value.partition('=?') - vtext = ValueTerminal(tok, 'vtext') + vtext = ValueTerminal(tok, 'utext') _validate_xtext(vtext) unstructured.append(vtext) value = ''.join(remainder) @@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy): continue tstr = str(part) if not want_encoding: - if part.token_type == 'ptext': + if part.token_type in ('ptext', 'vtext'): # Encode if tstr contains special characters. want_encoding = not SPECIALSNL.isdisjoint(tstr) else: |