diff options
Diffstat (limited to 'Lib/email/utils.py')
-rw-r--r-- | Lib/email/utils.py | 88 |
1 files changed, 35 insertions, 53 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py index c976021e0e0..ac4da3705f3 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -26,8 +26,9 @@ import time import base64 import random import socket -import urllib +import urllib.parse import warnings +from io import StringIO from email._parseaddr import quote from email._parseaddr import AddressList as _AddressList @@ -44,7 +45,7 @@ from email.encoders import _bencode, _qencode COMMASPACE = ', ' EMPTYSTRING = '' -UEMPTYSTRING = u'' +UEMPTYSTRING = '' CRLF = '\r\n' TICK = "'" @@ -52,36 +53,9 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[][\\()"]') - -# Helpers - -def _identity(s): - return s - - -def _bdecode(s): - """Decodes a base64 string. - - This function is equivalent to base64.decodestring and it's retained only - for backward compatibility. It used to remove the last \\n of the decoded - string, if it had any (see issue 7143). - """ - if not s: - return s - return base64.decodestring(s) - - - -def fix_eols(s): - """Replace all line-ending characters with \\r\\n.""" - # Fix newlines with no preceding carriage return - s = re.sub(r'(?<!\r)\n', CRLF, s) - # Fix carriage returns with no following newline - s = re.sub(r'\r(?!\n)', CRLF, s) - return s +# Helpers - def formataddr(pair): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable @@ -100,7 +74,7 @@ def formataddr(pair): return address - + def getaddresses(fieldvalues): """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" all = COMMASPACE.join(fieldvalues) @@ -108,7 +82,7 @@ def getaddresses(fieldvalues): return a.addresslist - + ecre = re.compile(r''' =\? # literal =? (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset @@ -120,7 +94,7 @@ ecre = re.compile(r''' ''', re.VERBOSE | re.IGNORECASE) - + def formatdate(timeval=None, localtime=False, usegmt=False): """Returns a date string as specified by RFC 2822, e.g.: @@ -173,14 +147,16 @@ def formatdate(timeval=None, localtime=False, usegmt=False): zone) - -def make_msgid(idstring=None): + +def make_msgid(idstring=None, domain=None): """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: <20020201195627.33539.96671@nightshade.la.mastaler.com> Optional idstring if given is a string used to strengthen the - uniqueness of the message id. + uniqueness of the message id. Optional domain if given provides the + portion of the message id after the '@'. It defaults to the locally + defined hostname. """ timeval = time.time() utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) @@ -190,12 +166,13 @@ def make_msgid(idstring=None): idstring = '' else: idstring = '.' + idstring - idhost = socket.getfqdn() - msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) + if domain is None: + domain = socket.getfqdn() + msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) return msgid - + # These functions are in the standalone mimelib version only because they've # subsequently been fixed in the latest Python versions. We use this to worm # around broken older Pythons. @@ -229,7 +206,7 @@ def unquote(str): return str - + # RFC2231-related functions - parameter encoding and decoding def decode_rfc2231(s): """Decode string according to RFC 2231""" @@ -246,8 +223,7 @@ def encode_rfc2231(s, charset=None, language=None): charset is given but not language, the string is encoded using the empty string for language. """ - import urllib - s = urllib.quote(s, safe='') + s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii') if charset is None and language is None: return s if language is None: @@ -255,7 +231,8 @@ def encode_rfc2231(s, charset=None, language=None): return "%s'%s'%s" % (charset, language, s) -rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') +rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$', + re.ASCII) def decode_params(params): """Decode parameters list according to RFC 2231. @@ -299,7 +276,10 @@ def decode_params(params): # language specifiers at the beginning of the string. for num, s, encoded in continuations: if encoded: - s = urllib.unquote(s) + # Decode as "latin-1", so the characters in s directly + # represent the percent-encoded octet values. + # collapse_rfc2231_value treats this as an octet sequence. + s = urllib.parse.unquote(s, encoding="latin-1") extended = True value.append(s) value = quote(EMPTYSTRING.join(value)) @@ -312,13 +292,15 @@ def decode_params(params): def collapse_rfc2231_value(value, errors='replace', fallback_charset='us-ascii'): - if isinstance(value, tuple): - rawval = unquote(value[2]) - charset = value[0] or 'us-ascii' - try: - return unicode(rawval, charset, errors) - except LookupError: - # XXX charset is unknown to Python. - return unicode(rawval, fallback_charset, errors) - else: + if not isinstance(value, tuple) or len(value) != 3: return unquote(value) + # While value comes to us as a unicode string, we need it to be a bytes + # object. We do not want bytes() normal utf-8 decoder, we want a straight + # interpretation of the string as character bytes. + charset, language, text = value + rawbytes = bytes(text, 'raw-unicode-escape') + try: + return str(rawbytes, charset, errors) + except LookupError: + # charset is not a known codec. + return unquote(text) |