aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/email/generator.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email/generator.py')
-rw-r--r--Lib/email/generator.py263
1 files changed, 180 insertions, 83 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index 5626ab91eb5..c6bfb704f53 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -1,9 +1,10 @@
# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Classes to generate plain text from a message object tree."""
-__all__ = ['Generator', 'DecodedGenerator']
+__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
import re
import sys
@@ -11,22 +12,15 @@ import time
import random
import warnings
-from cStringIO import StringIO
+from io import StringIO, BytesIO
from email.header import Header
+from email.message import _has_surrogates
UNDERSCORE = '_'
-NL = '\n'
+NL = '\n' # XXX: no longer used by the code below.
fcre = re.compile(r'^From ', re.MULTILINE)
-def _is8bitstring(s):
- if isinstance(s, str):
- try:
- unicode(s, 'us-ascii')
- except UnicodeError:
- return True
- return False
-
class Generator:
@@ -64,8 +58,8 @@ class Generator:
# Just delegate to the file object
self._fp.write(s)
- def flatten(self, msg, unixfrom=False):
- """Print the message object tree rooted at msg to the output file
+ def flatten(self, msg, unixfrom=False, linesep='\n'):
+ r"""Print the message object tree rooted at msg to the output file
specified when the Generator instance was created.
unixfrom is a flag that forces the printing of a Unix From_ delimiter
@@ -74,12 +68,26 @@ class Generator:
is False to inhibit the printing of any From_ delimiter.
Note that for subobjects, no From_ line is printed.
+
+ linesep specifies the characters used to indicate a new line in
+ the output. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+
"""
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ self._NL = linesep
+ self._encoded_NL = self._encode(linesep)
+ self._EMPTY = ''
+ self._encoded_EMTPY = self._encode('')
if unixfrom:
ufrom = msg.get_unixfrom()
if not ufrom:
ufrom = 'From nobody ' + time.ctime(time.time())
- print >> self._fp, ufrom
+ self.write(ufrom + self._NL)
self._write(msg)
def clone(self, fp):
@@ -90,6 +98,27 @@ class Generator:
# Protected interface - undocumented ;/
#
+ # Note that we use 'self.write' when what we are writing is coming from
+ # the source, and self._fp.write when what we are writing is coming from a
+ # buffer (because the Bytes subclass has already had a chance to transform
+ # the data in its write method in that case). This is an entirely
+ # pragmatic split determined by experiment; we could be more general by
+ # always using write and having the Bytes subclass write method detect when
+ # it has already transformed the input; but, since this whole thing is a
+ # hack anyway this seems good enough.
+
+ # Similarly, we have _XXX and _encoded_XXX attributes that are used on
+ # source and buffer data, respectively.
+ _encoded_EMPTY = ''
+
+ def _new_buffer(self):
+ # BytesGenerator overrides this to return BytesIO.
+ return StringIO()
+
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+
def _write(self, msg):
# We can't write the headers yet because of the following scenario:
# say a multipart message includes the boundary string somewhere in
@@ -98,13 +127,13 @@ class Generator:
# parameter.
#
# The way we do this, so as to make the _handle_*() methods simpler,
- # is to cache any subpart writes into a StringIO. The we write the
- # headers and the StringIO contents. That way, subpart handlers can
+ # is to cache any subpart writes into a buffer. The we write the
+ # headers and the buffer contents. That way, subpart handlers can
# Do The Right Thing, and can still modify the Content-Type: header if
# necessary.
oldfp = self._fp
try:
- self._fp = sfp = StringIO()
+ self._fp = sfp = self._new_buffer()
self._dispatch(msg)
finally:
self._fp = oldfp
@@ -139,31 +168,17 @@ class Generator:
def _write_headers(self, msg):
for h, v in msg.items():
- print >> self._fp, '%s:' % h,
- if self._maxheaderlen == 0:
- # Explicit no-wrapping
- print >> self._fp, v
- elif isinstance(v, Header):
- # Header instances know what to do
- print >> self._fp, v.encode()
- elif _is8bitstring(v):
- # If we have raw 8bit data in a byte string, we have no idea
- # what the encoding is. There is no safe way to split this
- # string. If it's ascii-subset, then we could do a normal
- # ascii split, but if it's multibyte then we could break the
- # string. There's no way to know so the least harm seems to
- # be to not split the string and risk it being too long.
- print >> self._fp, v
+ self.write('%s: ' % h)
+ if isinstance(v, Header):
+ self.write(v.encode(
+ maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
else:
- # Header's got lots of smarts, so use it. Note that this is
- # fundamentally broken though because we lose idempotency when
- # the header string is continued with tabs. It will now be
- # continued with spaces. This was reversedly broken before we
- # fixed bug 1974. Either way, we lose.
- print >> self._fp, Header(
- v, maxlinelen=self._maxheaderlen, header_name=h).encode()
+ # Header's got lots of smarts, so use it.
+ header = Header(v, maxlinelen=self._maxheaderlen,
+ header_name=h)
+ self.write(header.encode(linesep=self._NL)+self._NL)
# A blank line always separates headers from body
- print >> self._fp
+ self.write(self._NL)
#
# Handlers for writing types and subtypes
@@ -173,11 +188,17 @@ class Generator:
payload = msg.get_payload()
if payload is None:
return
- if not isinstance(payload, basestring):
+ if not isinstance(payload, str):
raise TypeError('string payload expected: %s' % type(payload))
+ if _has_surrogates(msg._payload):
+ charset = msg.get_param('charset')
+ if charset is not None:
+ del msg['content-transfer-encoding']
+ msg.set_payload(payload, charset)
+ payload = msg.get_payload()
if self._mangle_from_:
payload = fcre.sub('>From ', payload)
- self._fp.write(payload)
+ self.write(payload)
# Default body handler
_writeBody = _handle_text
@@ -190,25 +211,25 @@ class Generator:
subparts = msg.get_payload()
if subparts is None:
subparts = []
- elif isinstance(subparts, basestring):
+ elif isinstance(subparts, str):
# e.g. a non-strict parse of a message with no starting boundary.
- self._fp.write(subparts)
+ self.write(subparts)
return
elif not isinstance(subparts, list):
# Scalar payload
subparts = [subparts]
for part in subparts:
- s = StringIO()
+ s = self._new_buffer()
g = self.clone(s)
- g.flatten(part, unixfrom=False)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
msgtexts.append(s.getvalue())
# BAW: What about boundaries that are wrapped in double-quotes?
boundary = msg.get_boundary()
if not boundary:
# Create a boundary that doesn't appear in any of the
# message texts.
- alltext = NL.join(msgtexts)
- boundary = _make_boundary(alltext)
+ alltext = self._encoded_NL.join(msgtexts)
+ boundary = self._make_boundary(alltext)
msg.set_boundary(boundary)
# If there's a preamble, write it out, with a trailing CRLF
if msg.preamble is not None:
@@ -216,9 +237,9 @@ class Generator:
preamble = fcre.sub('>From ', msg.preamble)
else:
preamble = msg.preamble
- print >> self._fp, preamble
+ self.write(preamble + self._NL)
# dash-boundary transport-padding CRLF
- print >> self._fp, '--' + boundary
+ self.write('--' + boundary + self._NL)
# body-part
if msgtexts:
self._fp.write(msgtexts.pop(0))
@@ -227,18 +248,18 @@ class Generator:
# --> CRLF body-part
for body_part in msgtexts:
# delimiter transport-padding CRLF
- print >> self._fp, '\n--' + boundary
+ self.write(self._NL + '--' + boundary + self._NL)
# body-part
self._fp.write(body_part)
# close-delimiter transport-padding
- self._fp.write('\n--' + boundary + '--')
+ self.write(self._NL + '--' + boundary + '--')
if msg.epilogue is not None:
- print >> self._fp
+ self.write(self._NL)
if self._mangle_from_:
epilogue = fcre.sub('>From ', msg.epilogue)
else:
epilogue = msg.epilogue
- self._fp.write(epilogue)
+ self.write(epilogue)
def _handle_multipart_signed(self, msg):
# The contents of signed parts has to stay unmodified in order to keep
@@ -257,23 +278,23 @@ class Generator:
# block and the boundary. Sigh.
blocks = []
for part in msg.get_payload():
- s = StringIO()
+ s = self._new_buffer()
g = self.clone(s)
- g.flatten(part, unixfrom=False)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
text = s.getvalue()
- lines = text.split('\n')
+ lines = text.split(self._encoded_NL)
# Strip off the unnecessary trailing empty line
- if lines and lines[-1] == '':
- blocks.append(NL.join(lines[:-1]))
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
else:
blocks.append(text)
# Now join all the blocks with an empty line. This has the lovely
# effect of separating each block with an empty line, but not adding
# an extra one after the last one.
- self._fp.write(NL.join(blocks))
+ self._fp.write(self._encoded_NL.join(blocks))
def _handle_message(self, msg):
- s = StringIO()
+ s = self._new_buffer()
g = self.clone(s)
# The payload of a message/rfc822 part should be a multipart sequence
# of length 1. The zeroth element of the list should be the Message
@@ -286,10 +307,100 @@ class Generator:
# in that case we just emit the string body.
payload = msg.get_payload()
if isinstance(payload, list):
- g.flatten(msg.get_payload(0), unixfrom=False)
+ g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
payload = s.getvalue()
self._fp.write(payload)
+ # This used to be a module level function; we use a classmethod for this
+ # and _compile_re so we can continue to provide the module level function
+ # for backward compatibility by doing
+ # _make_boudary = Generator._make_boundary
+ # at the end of the module. It *is* internal, so we could drop that...
+ @classmethod
+ def _make_boundary(cls, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxsize)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s, flags)
+
+
+class BytesGenerator(Generator):
+ """Generates a bytes version of a Message object tree.
+
+ Functionally identical to the base Generator except that the output is
+ bytes and not string. When surrogates were used in the input to encode
+ bytes, these are decoded back to bytes for output.
+
+ The outfp object must accept bytes in its write method.
+ """
+
+ # Bytes versions of this constant for use in manipulating data from
+ # the BytesIO buffer.
+ _encoded_EMPTY = b''
+
+ def write(self, s):
+ self._fp.write(s.encode('ascii', 'surrogateescape'))
+
+ def _new_buffer(self):
+ return BytesIO()
+
+ def _encode(self, s):
+ return s.encode('ascii')
+
+ def _write_headers(self, msg):
+ # This is almost the same as the string version, except for handling
+ # strings with 8bit bytes.
+ for h, v in msg._headers:
+ self.write('%s: ' % h)
+ if isinstance(v, Header):
+ self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
+ elif _has_surrogates(v):
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ self.write(v+NL)
+ else:
+ # Header's got lots of smarts and this string is safe...
+ header = Header(v, maxlinelen=self._maxheaderlen,
+ header_name=h)
+ self.write(header.encode(linesep=self._NL)+self._NL)
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ def _handle_text(self, msg):
+ # If the string has surrogates the original source was bytes, so
+ # just write it back out.
+ if msg._payload is None:
+ return
+ if _has_surrogates(msg._payload):
+ if self._mangle_from_:
+ msg._payload = fcre.sub(">From ", msg._payload)
+ self.write(msg._payload)
+ else:
+ super(BytesGenerator,self)._handle_text(msg)
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s.encode('ascii'), flags)
+
_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
@@ -332,12 +443,12 @@ class DecodedGenerator(Generator):
for part in msg.walk():
maintype = part.get_content_maintype()
if maintype == 'text':
- print >> self, part.get_payload(decode=True)
+ print(part.get_payload(decode=False), file=self)
elif maintype == 'multipart':
# Just skip this
pass
else:
- print >> self, self._fmt % {
+ print(self._fmt % {
'type' : part.get_content_type(),
'maintype' : part.get_content_maintype(),
'subtype' : part.get_content_subtype(),
@@ -346,27 +457,13 @@ class DecodedGenerator(Generator):
'[no description]'),
'encoding' : part.get('Content-Transfer-Encoding',
'[no encoding]'),
- }
+ }, file=self)
-# Helper
-_width = len(repr(sys.maxint-1))
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
_fmt = '%%0%dd' % _width
-def _make_boundary(text=None):
- # Craft a random boundary. If text is given, ensure that the chosen
- # boundary doesn't appear in the text.
- token = random.randrange(sys.maxint)
- boundary = ('=' * 15) + (_fmt % token) + '=='
- if text is None:
- return boundary
- b = boundary
- counter = 0
- while True:
- cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
- if not cre.search(text):
- break
- b = boundary + '.' + str(counter)
- counter += 1
- return b
+# Backward compatibility
+_make_boundary = Generator._make_boundary