aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/email/_header_value_parser.py
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-07-12 16:00:28 -0400
committerR David Murray <rdmurray@bitdance.com>2013-07-12 16:00:28 -0400
commit923512f327af6944bbdbc905d2372658a3977489 (patch)
treeb59a06066813e708621c90e3145753d3e6d5236a /Lib/email/_header_value_parser.py
parent65171b28e77f589a490335c8749a24151e1d8817 (diff)
downloadcpython-923512f327af6944bbdbc905d2372658a3977489.tar.gz
cpython-923512f327af6944bbdbc905d2372658a3977489.zip
#18431: Decode encoded words in atoms in new email parser.
There is more to be done here in terms of accepting RFC invalid input that some mailers accept, but this covers the valid RFC places where encoded words can occur in structured headers.
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r--Lib/email/_header_value_parser.py23
1 files changed, 21 insertions, 2 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index a01d845110f..291437c5867 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1627,6 +1627,7 @@ def get_quoted_string(value):
def get_atom(value):
"""atom = [CFWS] 1*atext [CFWS]
+ An atom could be an rfc2047 encoded word.
"""
atom = Atom()
if value and value[0] in CFWS_LEADER:
@@ -1635,7 +1636,15 @@ def get_atom(value):
if value and value[0] in ATOM_ENDS:
raise errors.HeaderParseError(
"expected atom but found '{}'".format(value))
- token, value = get_atext(value)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_atext(value)
+ else:
+ token, value = get_atext(value)
atom.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
@@ -1664,12 +1673,22 @@ def get_dot_atom_text(value):
def get_dot_atom(value):
""" dot-atom = [CFWS] dot-atom-text [CFWS]
+ Any place we can have a dot atom, we could instead have an rfc2047 encoded
+ word.
"""
dot_atom = DotAtom()
if value[0] in CFWS_LEADER:
token, value = get_cfws(value)
dot_atom.append(token)
- token, value = get_dot_atom_text(value)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_dot_atom_text(value)
+ else:
+ token, value = get_dot_atom_text(value)
dot_atom.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)