diff options
author | Petr Viktorin <encukou@gmail.com> | 2024-07-31 00:19:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-31 00:19:48 +0200 |
commit | 097633981879b3c9de9a1dd120d3aa585ecc2384 (patch) | |
tree | bd7d04f28e53f8df28dd57213e09619cc1666b9d /Lib/email/_header_value_parser.py | |
parent | 5912487938ac4b517209082ab9e6d2d3d0fb4f4d (diff) | |
download | cpython-097633981879b3c9de9a1dd120d3aa585ecc2384.tar.gz cpython-097633981879b3c9de9a1dd120d3aa585ecc2384.zip |
gh-121650: Encode newlines in headers, and verify headers are sound (GH-122233)
## Encode header parts that contain newlines
Per RFC 2047:
> [...] these encoding schemes allow the
> encoding of arbitrary octet values, mail readers that implement this
> decoding should also ensure that display of the decoded data on the
> recipient's terminal will not cause unwanted side-effects
It seems that the "quoted-word" scheme is a valid way to include
a newline character in a header value, just like we already allow
undecodable bytes or control characters.
They do need to be properly quoted when serialized to text, though.
## Verify that email headers are well-formed
This should fail for custom fold() implementations that aren't careful
about newlines.
Co-authored-by: Bas Bloemsaat <bas@bloemsaat.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r-- | Lib/email/_header_value_parser.py | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 7da1bbaf8a8..ec2215a5e5f 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -92,6 +92,8 @@ TOKEN_ENDS = TSPECIALS | WSP ASPECIALS = TSPECIALS | set("*'%") ATTRIBUTE_ENDS = ASPECIALS | WSP EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') +NLSET = {'\n', '\r'} +SPECIALSNL = SPECIALS | NLSET def quote_string(value): return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' @@ -2802,9 +2804,13 @@ def _refold_parse_tree(parse_tree, *, policy): wrap_as_ew_blocked -= 1 continue tstr = str(part) - if part.token_type == 'ptext' and set(tstr) & SPECIALS: - # Encode if tstr contains special characters. - want_encoding = True + if not want_encoding: + if part.token_type == 'ptext': + # Encode if tstr contains special characters. + want_encoding = not SPECIALSNL.isdisjoint(tstr) + else: + # Encode if tstr contains newlines. + want_encoding = not NLSET.isdisjoint(tstr) try: tstr.encode(encoding) charset = encoding |