From 923512f327af6944bbdbc905d2372658a3977489 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Fri, 12 Jul 2013 16:00:28 -0400 Subject: #18431: Decode encoded words in atoms in new email parser. There is more to be done here in terms of accepting RFC invalid input that some mailers accept, but this covers the valid RFC places where encoded words can occur in structured headers. --- Lib/email/_header_value_parser.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'Lib/email/_header_value_parser.py') diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index a01d845110f..291437c5867 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1627,6 +1627,7 @@ def get_quoted_string(value): def get_atom(value): """atom = [CFWS] 1*atext [CFWS] + An atom could be an rfc2047 encoded word. """ atom = Atom() if value and value[0] in CFWS_LEADER: @@ -1635,7 +1636,15 @@ def get_atom(value): if value and value[0] in ATOM_ENDS: raise errors.HeaderParseError( "expected atom but found '{}'".format(value)) - token, value = get_atext(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_atext(value) + else: + token, value = get_atext(value) atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) @@ -1664,12 +1673,22 @@ def get_dot_atom_text(value): def get_dot_atom(value): """ dot-atom = [CFWS] dot-atom-text [CFWS] + Any place we can have a dot atom, we could instead have an rfc2047 encoded + word. """ dot_atom = DotAtom() if value[0] in CFWS_LEADER: token, value = get_cfws(value) dot_atom.append(token) - token, value = get_dot_atom_text(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_dot_atom_text(value) + else: + token, value = get_dot_atom_text(value) dot_atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) -- cgit v1.2.3