aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/email/_header_value_parser.py
diff options
context:
space:
mode:
authorPetr Viktorin <encukou@gmail.com>2024-07-31 00:19:48 +0200
committerGitHub <noreply@github.com>2024-07-31 00:19:48 +0200
commit097633981879b3c9de9a1dd120d3aa585ecc2384 (patch)
treebd7d04f28e53f8df28dd57213e09619cc1666b9d /Lib/email/_header_value_parser.py
parent5912487938ac4b517209082ab9e6d2d3d0fb4f4d (diff)
downloadcpython-097633981879b3c9de9a1dd120d3aa585ecc2384.tar.gz
cpython-097633981879b3c9de9a1dd120d3aa585ecc2384.zip
gh-121650: Encode newlines in headers, and verify headers are sound (GH-122233)
## Encode header parts that contain newlines Per RFC 2047: > [...] these encoding schemes allow the > encoding of arbitrary octet values, mail readers that implement this > decoding should also ensure that display of the decoded data on the > recipient's terminal will not cause unwanted side-effects It seems that the "quoted-word" scheme is a valid way to include a newline character in a header value, just like we already allow undecodable bytes or control characters. They do need to be properly quoted when serialized to text, though. ## Verify that email headers are well-formed This should fail for custom fold() implementations that aren't careful about newlines. Co-authored-by: Bas Bloemsaat <bas@bloemsaat.org> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r--Lib/email/_header_value_parser.py12
1 files changed, 9 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 7da1bbaf8a8..ec2215a5e5f 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -92,6 +92,8 @@ TOKEN_ENDS = TSPECIALS | WSP
ASPECIALS = TSPECIALS | set("*'%")
ATTRIBUTE_ENDS = ASPECIALS | WSP
EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+NLSET = {'\n', '\r'}
+SPECIALSNL = SPECIALS | NLSET
def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
@@ -2802,9 +2804,13 @@ def _refold_parse_tree(parse_tree, *, policy):
wrap_as_ew_blocked -= 1
continue
tstr = str(part)
- if part.token_type == 'ptext' and set(tstr) & SPECIALS:
- # Encode if tstr contains special characters.
- want_encoding = True
+ if not want_encoding:
+ if part.token_type == 'ptext':
+ # Encode if tstr contains special characters.
+ want_encoding = not SPECIALSNL.isdisjoint(tstr)
+ else:
+ # Encode if tstr contains newlines.
+ want_encoding = not NLSET.isdisjoint(tstr)
try:
tstr.encode(encoding)
charset = encoding