diff options
Diffstat (limited to 'Lib/wsgiref')
-rw-r--r-- | Lib/wsgiref/handlers.py | 181 | ||||
-rw-r--r-- | Lib/wsgiref/headers.py | 39 | ||||
-rw-r--r-- | Lib/wsgiref/simple_server.py | 39 | ||||
-rw-r--r-- | Lib/wsgiref/util.py | 12 | ||||
-rw-r--r-- | Lib/wsgiref/validate.py | 59 |
5 files changed, 227 insertions, 103 deletions
diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index 8cb57e223aa..63d5993eca0 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -1,30 +1,14 @@ """Base classes for server/gateway implementations""" -from types import StringType -from util import FileWrapper, guess_scheme, is_hop_by_hop -from headers import Headers +from .util import FileWrapper, guess_scheme, is_hop_by_hop +from .headers import Headers import sys, os, time -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] - -try: - dict -except NameError: - def dict(items): - d = {} - for k,v in items: - d[k] = v - return d - -# Uncomment for 2.2 compatibility. -#try: -# True -# False -#except NameError: -# True = not None -# False = not True - +__all__ = [ + 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', + 'IISCGIHandler', 'read_environ' +] # Weekday and month names for HTTP date/time formatting; always English! _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] @@ -38,6 +22,74 @@ def format_date_time(timestamp): _weekdayname[wd], day, _monthname[month], year, hh, mm, ss ) +_is_request = { + 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', + 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', +}.__contains__ + +def _needs_transcode(k): + return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ + or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) + +def read_environ(): + """Read environment, fixing HTTP variables""" + enc = sys.getfilesystemencoding() + esc = 'surrogateescape' + try: + ''.encode('utf-8', esc) + except LookupError: + esc = 'replace' + environ = {} + + # Take the basic environment from native-unicode os.environ. Attempt to + # fix up the variables that come from the HTTP request to compensate for + # the bytes->unicode decoding step that will already have taken place. + for k, v in os.environ.items(): + if _needs_transcode(k): + + # On win32, the os.environ is natively Unicode. Different servers + # decode the request bytes using different encodings. + if sys.platform == 'win32': + software = os.environ.get('SERVER_SOFTWARE', '').lower() + + # On IIS, the HTTP request will be decoded as UTF-8 as long + # as the input is a valid UTF-8 sequence. Otherwise it is + # decoded using the system code page (mbcs), with no way to + # detect this has happened. Because UTF-8 is the more likely + # encoding, and mbcs is inherently unreliable (an mbcs string + # that happens to be valid UTF-8 will not be decoded as mbcs) + # always recreate the original bytes as UTF-8. + if software.startswith('microsoft-iis/'): + v = v.encode('utf-8').decode('iso-8859-1') + + # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct + # to the Unicode environ. No modification needed. + elif software.startswith('apache/'): + pass + + # Python 3's http.server.CGIHTTPRequestHandler decodes + # using the urllib.unquote default of UTF-8, amongst other + # issues. + elif ( + software.startswith('simplehttp/') + and 'python/3' in software + ): + v = v.encode('utf-8').decode('iso-8859-1') + + # For other servers, guess that they have written bytes to + # the environ using stdio byte-oriented interfaces, ending up + # with the system code page. + else: + v = v.encode(enc, 'replace').decode('iso-8859-1') + + # Recover bytes from unicode environ, using surrogate escapes + # where available (Python 3.1+). + else: + v = v.encode(enc, esc).decode('iso-8859-1') + + environ[k] = v + return environ + class BaseHandler: """Manage the invocation of a WSGI application""" @@ -55,7 +107,7 @@ class BaseHandler: # os_environ is used to supply configuration from the OS environment: # by default it's a copy of 'os.environ' as of import time, but you can # override this in e.g. your __init__ method. - os_environ = dict(os.environ.items()) + os_environ= read_environ() # Collaborator classes wsgi_file_wrapper = FileWrapper # set to None to disable @@ -65,7 +117,7 @@ class BaseHandler: traceback_limit = None # Print entire traceback to self.get_stderr() error_status = "500 Internal Server Error" error_headers = [('Content-Type','text/plain')] - error_body = "A server error occurred. Please contact the administrator." + error_body = b"A server error occurred. Please contact the administrator." # State variables (don't mess with these) status = result = None @@ -158,50 +210,60 @@ class BaseHandler: self.set_content_length() def start_response(self, status, headers,exc_info=None): - """'start_response()' callable as specified by PEP 333""" + """'start_response()' callable as specified by PEP 3333""" if exc_info: try: if self.headers_sent: # Re-raise original exception if headers sent - raise exc_info[0], exc_info[1], exc_info[2] + raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) finally: exc_info = None # avoid dangling circular ref elif self.headers is not None: raise AssertionError("Headers already set!") - assert type(status) is StringType,"Status must be a string" + self.status = status + self.headers = self.headers_class(headers) + status = self._convert_string_type(status, "Status") assert len(status)>=4,"Status must be at least 4 characters" assert int(status[:3]),"Status message must begin w/3-digit code" assert status[3]==" ", "Status message must have a space after code" + if __debug__: - for name,val in headers: - assert type(name) is StringType,"Header names must be strings" - assert type(val) is StringType,"Header values must be strings" + for name, val in headers: + name = self._convert_string_type(name, "Header name") + val = self._convert_string_type(val, "Header value") assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed" - self.status = status - self.headers = self.headers_class(headers) + return self.write + def _convert_string_type(self, value, title): + """Convert/check value type.""" + if type(value) is str: + return value + raise AssertionError( + "{0} must be of type str (got {1})".format(title, repr(value)) + ) def send_preamble(self): """Transmit version/status/date/server, via self._write()""" if self.origin_server: if self.client_is_modern(): - self._write('HTTP/%s %s\r\n' % (self.http_version,self.status)) + self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1')) if 'Date' not in self.headers: self._write( - 'Date: %s\r\n' % format_date_time(time.time()) + ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1') ) if self.server_software and 'Server' not in self.headers: - self._write('Server: %s\r\n' % self.server_software) + self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1')) else: - self._write('Status: %s\r\n' % self.status) + self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1')) def write(self, data): - """'write()' callable as specified by PEP 333""" + """'write()' callable as specified by PEP 3333""" - assert type(data) is StringType,"write() argument must be string" + assert type(data) is bytes, \ + "write() argument must be a bytes instance" if not self.status: raise AssertionError("write() before start_response()") @@ -268,7 +330,7 @@ class BaseHandler: self.headers_sent = True if not self.origin_server or self.client_is_modern(): self.send_preamble() - self._write(str(self.headers)) + self._write(bytes(self.headers)) def result_is_file(self): @@ -389,7 +451,6 @@ class SimpleHandler(BaseHandler): def _write(self,data): self.stdout.write(data) - self._write = self.stdout.write def _flush(self): self.stdout.flush() @@ -445,6 +506,42 @@ class CGIHandler(BaseCGIHandler): def __init__(self): BaseCGIHandler.__init__( - self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), - multithread=False, multiprocess=True + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + read_environ(), multithread=False, multiprocess=True + ) + + +class IISCGIHandler(BaseCGIHandler): + """CGI-based invocation with workaround for IIS path bug + + This handler should be used in preference to CGIHandler when deploying on + Microsoft IIS without having set the config allowPathInfo option (IIS>=7) + or metabase allowPathInfoForScriptMappings (IIS<7). + """ + wsgi_run_once = True + os_environ = {} + + # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at + # the front, causing problems for WSGI applications that wish to implement + # routing. This handler strips any such duplicated path. + + # IIS can be configured to pass the correct PATH_INFO, but this causes + # another bug where PATH_TRANSLATED is wrong. Luckily this variable is + # rarely used and is not guaranteed by WSGI. On IIS<7, though, the + # setting can only be made on a vhost level, affecting all other script + # mappings, many of which break when exposed to the PATH_TRANSLATED bug. + # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 + # rarely uses it because there is still no UI for it.) + + # There is no way for CGI code to tell whether the option was set, so a + # separate handler class is provided. + def __init__(self): + environ= read_environ() + path = environ.get('PATH_INFO', '') + script = environ.get('SCRIPT_NAME', '') + if (path+'/').startswith(script+'/'): + environ['PATH_INFO'] = path[len(script):] + BaseCGIHandler.__init__( + self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, + environ, multithread=False, multiprocess=True ) diff --git a/Lib/wsgiref/headers.py b/Lib/wsgiref/headers.py index 6c8c60c8913..d93962831ae 100644 --- a/Lib/wsgiref/headers.py +++ b/Lib/wsgiref/headers.py @@ -5,8 +5,6 @@ so portions are Copyright (C) 2001,2002 Python Software Foundation, and were written by Barry Warsaw. """ -from types import ListType, TupleType - # Regular expression that matches `special' characters in parameters, the # existence of which force quoting of the parameter value. import re @@ -32,9 +30,20 @@ class Headers: """Manage a collection of HTTP response headers""" def __init__(self,headers): - if type(headers) is not ListType: + if type(headers) is not list: raise TypeError("Headers must be a list of name/value tuples") self._headers = headers + if __debug__: + for k, v in headers: + self._convert_string_type(k) + self._convert_string_type(v) + + def _convert_string_type(self, value): + """Convert/check value type.""" + if type(value) is str: + return value + raise AssertionError("Header names/values must be" + " of type str (got {0})".format(repr(value))) def __len__(self): """Return the total number of headers, including duplicates.""" @@ -43,14 +52,15 @@ class Headers: def __setitem__(self, name, val): """Set the value of a header.""" del self[name] - self._headers.append((name, val)) + self._headers.append( + (self._convert_string_type(name), self._convert_string_type(val))) def __delitem__(self,name): """Delete all occurrences of a header, if present. Does *not* raise an exception if the header is missing. """ - name = name.lower() + name = self._convert_string_type(name.lower()) self._headers[:] = [kv for kv in self._headers if kv[0].lower() != name] def __getitem__(self,name): @@ -64,12 +74,10 @@ class Headers: """ return self.get(name) - def has_key(self, name): + def __contains__(self, name): """Return true if the message contains the header.""" return self.get(name) is not None - __contains__ = has_key - def get_all(self, name): """Return a list of all the values for the named field. @@ -79,13 +87,13 @@ class Headers: fields deleted and re-inserted are always appended to the header list. If no fields exist with the given name, returns an empty list. """ - name = name.lower() + name = self._convert_string_type(name.lower()) return [kv[1] for kv in self._headers if kv[0].lower()==name] def get(self,name,default=None): """Get the first header value for 'name', or return 'default'""" - name = name.lower() + name = self._convert_string_type(name.lower()) for k,v in self._headers: if k.lower()==name: return v @@ -130,6 +138,9 @@ class Headers: suitable for direct HTTP transmission.""" return '\r\n'.join(["%s: %s" % kv for kv in self._headers]+['','']) + def __bytes__(self): + return str(self).encode('iso-8859-1') + def setdefault(self,name,value): """Return first matching header value for 'name', or 'value' @@ -137,7 +148,8 @@ class Headers: and value 'value'.""" result = self.get(name) if result is None: - self._headers.append((name,value)) + self._headers.append((self._convert_string_type(name), + self._convert_string_type(value))) return value else: return result @@ -160,10 +172,13 @@ class Headers: """ parts = [] if _value is not None: + _value = self._convert_string_type(_value) parts.append(_value) for k, v in _params.items(): + k = self._convert_string_type(k) if v is None: parts.append(k.replace('_', '-')) else: + v = self._convert_string_type(v) parts.append(_formatparam(k.replace('_', '-'), v)) - self._headers.append((_name, "; ".join(parts))) + self._headers.append((self._convert_string_type(_name), "; ".join(parts))) diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py index e6a385b03c7..af82f953c53 100644 --- a/Lib/wsgiref/simple_server.py +++ b/Lib/wsgiref/simple_server.py @@ -1,4 +1,4 @@ -"""BaseHTTPServer that implements the Python WSGI protocol (PEP 333, rev 1.21) +"""BaseHTTPServer that implements the Python WSGI protocol (PEP 3333) This is both an example of how WSGI can be implemented, and a basis for running simple web applications on a local machine, such as might be done when testing @@ -10,11 +10,12 @@ For example usage, see the 'if __name__=="__main__"' block at the end of the module. See also the BaseHTTPServer module docs for other API information. """ -from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -import urllib, sys +from http.server import BaseHTTPRequestHandler, HTTPServer +import sys +import urllib.parse from wsgiref.handlers import SimpleHandler -__version__ = "0.1" +__version__ = "0.2" __all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server'] @@ -73,13 +74,14 @@ class WSGIRequestHandler(BaseHTTPRequestHandler): def get_environ(self): env = self.server.base_environ.copy() env['SERVER_PROTOCOL'] = self.request_version + env['SERVER_SOFTWARE'] = self.server_version env['REQUEST_METHOD'] = self.command if '?' in self.path: path,query = self.path.split('?',1) else: path,query = self.path,'' - env['PATH_INFO'] = urllib.unquote(path) + env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1') env['QUERY_STRING'] = query host = self.address_string() @@ -87,17 +89,16 @@ class WSGIRequestHandler(BaseHTTPRequestHandler): env['REMOTE_HOST'] = host env['REMOTE_ADDR'] = self.client_address[0] - if self.headers.typeheader is None: - env['CONTENT_TYPE'] = self.headers.type + if self.headers.get('content-type') is None: + env['CONTENT_TYPE'] = self.headers.get_content_type() else: - env['CONTENT_TYPE'] = self.headers.typeheader + env['CONTENT_TYPE'] = self.headers['content-type'] - length = self.headers.getheader('content-length') + length = self.headers.get('content-length') if length: env['CONTENT_LENGTH'] = length - for h in self.headers.headers: - k,v = h.split(':',1) + for k, v in self.headers.items(): k=k.replace('-','_').upper(); v=v.strip() if k in env: continue # skip content length, type,etc. @@ -126,15 +127,15 @@ class WSGIRequestHandler(BaseHTTPRequestHandler): def demo_app(environ,start_response): - from StringIO import StringIO + from io import StringIO stdout = StringIO() - print >>stdout, "Hello world!" - print >>stdout - h = environ.items(); h.sort() + print("Hello world!", file=stdout) + print(file=stdout) + h = sorted(environ.items()) for k,v in h: - print >>stdout, k,'=', repr(v) - start_response("200 OK", [('Content-Type','text/plain')]) - return [stdout.getvalue()] + print(k,'=',repr(v), file=stdout) + start_response("200 OK", [('Content-Type','text/plain; charset=utf-8')]) + return [stdout.getvalue().encode("utf-8")] def make_server( @@ -149,7 +150,7 @@ def make_server( if __name__ == '__main__': httpd = make_server('', 8000, demo_app) sa = httpd.socket.getsockname() - print "Serving HTTP on", sa[0], "port", sa[1], "..." + print("Serving HTTP on", sa[0], "port", sa[1], "...") import webbrowser webbrowser.open('http://localhost:8000/xyz?abc') httpd.handle_request() # serve one request, then exit diff --git a/Lib/wsgiref/util.py b/Lib/wsgiref/util.py index 194b187a4dc..1f1e6cce179 100644 --- a/Lib/wsgiref/util.py +++ b/Lib/wsgiref/util.py @@ -26,7 +26,7 @@ class FileWrapper: def __iter__(self): return self - def next(self): + def __next__(self): data = self.filelike.read(self.blksize) if data: return data @@ -43,7 +43,7 @@ def guess_scheme(environ): def application_uri(environ): """Return the application's base URI (no PATH_INFO or QUERY_STRING)""" url = environ['wsgi.url_scheme']+'://' - from urllib import quote + from urllib.parse import quote if environ.get('HTTP_HOST'): url += environ['HTTP_HOST'] @@ -60,10 +60,10 @@ def application_uri(environ): url += quote(environ.get('SCRIPT_NAME') or '/') return url -def request_uri(environ, include_query=1): +def request_uri(environ, include_query=True): """Return the full request URI, optionally including the query string""" url = application_uri(environ) - from urllib import quote + from urllib.parse import quote path_info = quote(environ.get('PATH_INFO',''),safe='/;=,') if not environ.get('SCRIPT_NAME'): url += path_info[1:] @@ -142,8 +142,8 @@ def setup_testing_defaults(environ): environ.setdefault('wsgi.multithread', 0) environ.setdefault('wsgi.multiprocess', 0) - from StringIO import StringIO - environ.setdefault('wsgi.input', StringIO("")) + from io import StringIO, BytesIO + environ.setdefault('wsgi.input', BytesIO()) environ.setdefault('wsgi.errors', StringIO()) environ.setdefault('wsgi.url_scheme',guess_scheme(environ)) diff --git a/Lib/wsgiref/validate.py b/Lib/wsgiref/validate.py index 04a893d7c61..49eaa514cc5 100644 --- a/Lib/wsgiref/validate.py +++ b/Lib/wsgiref/validate.py @@ -98,7 +98,7 @@ Some of the things this checks: - That it is not a string (it should be a list of a single string; a string will work, but perform horribly). - - That .next() returns a string + - That .__next__() returns a string - That the iterator is not iterated over until start_response has been called (that can signal either a server or application @@ -113,7 +113,6 @@ __all__ = ['validator'] import re import sys -from types import DictType, StringType, TupleType, ListType import warnings header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$') @@ -128,6 +127,12 @@ def assert_(cond, *args): if not cond: raise AssertionError(*args) +def check_string_type(value, title): + if type (value) is str: + return value + raise AssertionError( + "{0} must be of type str (got {1})".format(title, repr(value))) + def validator(application): """ @@ -189,22 +194,23 @@ class InputWrapper: self.input = wsgi_input def read(self, *args): - assert_(len(args) <= 1) + assert_(len(args) == 1) v = self.input.read(*args) - assert_(type(v) is type("")) + assert_(type(v) is bytes) return v - def readline(self): - v = self.input.readline() - assert_(type(v) is type("")) + def readline(self, *args): + assert_(len(args) <= 1) + v = self.input.readline(*args) + assert_(type(v) is bytes) return v def readlines(self, *args): assert_(len(args) <= 1) lines = self.input.readlines(*args) - assert_(type(lines) is type([])) + assert_(type(lines) is list) for line in lines: - assert_(type(line) is type("")) + assert_(type(line) is bytes) return lines def __iter__(self): @@ -223,7 +229,7 @@ class ErrorWrapper: self.errors = wsgi_errors def write(self, s): - assert_(type(s) is type("")) + assert_(type(s) is str) self.errors.write(s) def flush(self): @@ -242,7 +248,7 @@ class WriteWrapper: self.writer = wsgi_writer def __call__(self, s): - assert_(type(s) is type("")) + assert_(type(s) is bytes) self.writer(s) class PartialIteratorWrapper: @@ -265,10 +271,12 @@ class IteratorWrapper: def __iter__(self): return self - def next(self): + def __next__(self): assert_(not self.closed, "Iterator read after closed") - v = self.iterator.next() + v = next(self.iterator) + if type(v) is not bytes: + assert_(False, "Iterator yielded non-bytestring (%r)" % (v,)) if self.check_start_response is not None: assert_(self.check_start_response, "The application returns and we started iterating over its body, but start_response has not yet been called") @@ -288,7 +296,7 @@ class IteratorWrapper: "Iterator garbage collected without being closed") def check_environ(environ): - assert_(type(environ) is DictType, + assert_(type(environ) is dict, "Environment is not of the right type: %r (environment: %r)" % (type(environ), environ)) @@ -315,11 +323,11 @@ def check_environ(environ): if '.' in key: # Extension, we don't care about its type continue - assert_(type(environ[key]) is StringType, + assert_(type(environ[key]) is str, "Environmental variable %s is not a string: %r (value: %r)" % (key, type(environ[key]), environ[key])) - assert_(type(environ['wsgi.version']) is TupleType, + assert_(type(environ['wsgi.version']) is tuple, "wsgi.version should be a tuple (%r)" % (environ['wsgi.version'],)) assert_(environ['wsgi.url_scheme'] in ('http', 'https'), "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme']) @@ -365,8 +373,7 @@ def check_errors(wsgi_errors): % (wsgi_errors, attr)) def check_status(status): - assert_(type(status) is StringType, - "Status must be a string (not %r)" % status) + status = check_string_type(status, "Status") # Implicitly check that we can turn it into an integer: status_code = status.split(None, 1)[0] assert_(len(status_code) == 3, @@ -380,16 +387,18 @@ def check_status(status): % status, WSGIWarning) def check_headers(headers): - assert_(type(headers) is ListType, + assert_(type(headers) is list, "Headers (%r) must be of type list: %r" % (headers, type(headers))) header_names = {} for item in headers: - assert_(type(item) is TupleType, + assert_(type(item) is tuple, "Individual headers (%r) must be of type tuple: %r" % (item, type(item))) assert_(len(item) == 2) name, value = item + name = check_string_type(name, "Header name") + value = check_string_type(value, "Header value") assert_(name.lower() != 'status', "The Status header cannot be used; it conflicts with CGI " "script, and HTTP status is not given through headers " @@ -405,11 +414,13 @@ def check_headers(headers): % (value, bad_header_value_re.search(value).group(0))) def check_content_type(status, headers): + status = check_string_type(status, "Status") code = int(status.split(None, 1)[0]) # @@: need one more person to verify this interpretation of RFC 2616 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html NO_MESSAGE_BODY = (204, 304) for name, value in headers: + name = check_string_type(name, "Header name") if name.lower() == 'content-type': if code not in NO_MESSAGE_BODY: return @@ -419,14 +430,14 @@ def check_content_type(status, headers): assert_(0, "No Content-Type header found in headers (%s)" % headers) def check_exc_info(exc_info): - assert_(exc_info is None or type(exc_info) is type(()), + assert_(exc_info is None or type(exc_info) is tuple, "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info))) # More exc_info checks? def check_iterator(iterator): - # Technically a string is legal, which is why it's a really bad + # Technically a bytestring is legal, which is why it's a really bad # idea, because it may cause the response to be returned # character-by-character - assert_(not isinstance(iterator, str), + assert_(not isinstance(iterator, (str, bytes)), "You should not return a string as your application iterator, " - "instead return a single-item list containing that string.") + "instead return a single-item list containing a bytestring.") |