diff options
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r-- | Lib/urllib/request.py | 158 |
1 files changed, 111 insertions, 47 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 6b299018647..cf065715875 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -94,6 +94,7 @@ import re import socket import sys import time +import collections from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( @@ -114,11 +115,27 @@ else: __version__ = sys.version[:3] _opener = None -def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): +def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + *, cafile=None, capath=None): global _opener - if _opener is None: - _opener = build_opener() - return _opener.open(url, data, timeout) + if cafile or capath: + if not _have_ssl: + raise ValueError('SSL support not available') + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + if cafile or capath: + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(cafile, capath) + check_hostname = True + else: + check_hostname = False + https_handler = HTTPSHandler(context=context, check_hostname=check_hostname) + opener = build_opener(https_handler) + elif _opener is None: + _opener = opener = build_opener() + else: + opener = _opener + return opener.open(url, data, timeout) def install_opener(opener): global _opener @@ -535,12 +552,11 @@ class HTTPRedirectHandler(BaseHandler): # For security reasons we don't allow redirection to anything other # than http, https or ftp. - if not urlparts.scheme in ('http', 'https', 'ftp'): - raise HTTPError(newurl, code, - msg + - " - Redirection to url '%s' is not allowed" % - newurl, - headers, fp) + if urlparts.scheme not in ('http', 'https', 'ftp', ''): + raise HTTPError( + newurl, code, + "%s - Redirection to url '%s' is not allowed" % (msg, newurl), + headers, fp) if not urlparts.path: urlparts = list(urlparts) @@ -705,7 +721,7 @@ class HTTPPasswordMgr: # uri could be a single URI or a sequence if isinstance(uri, str): uri = [uri] - if not realm in self.passwd: + if realm not in self.passwd: self.passwd[realm] = {} for default_port in True, False: reduced_uri = tuple( @@ -1045,13 +1061,24 @@ class AbstractHTTPHandler(BaseHandler): if request.data is not None: # POST data = request.data + if isinstance(data, str): + raise TypeError("POST data should be bytes" + " or an iterable of bytes. It cannot be str.") if not request.has_header('Content-type'): request.add_unredirected_header( 'Content-type', 'application/x-www-form-urlencoded') if not request.has_header('Content-length'): - request.add_unredirected_header( - 'Content-length', '%d' % len(data)) + try: + mv = memoryview(data) + except TypeError: + if isinstance(data, collections.Iterable): + raise ValueError("Content-Length should be specified " + "for iterable data of type %r %r" % (type(data), + data)) + else: + request.add_unredirected_header( + 'Content-length', '%d' % (len(mv) * mv.itemsize)) sel_host = host if request.has_proxy(): @@ -1066,7 +1093,7 @@ class AbstractHTTPHandler(BaseHandler): return request - def do_open(self, http_class, req): + def do_open(self, http_class, req, **http_conn_args): """Return an HTTPResponse object for the request, using http_class. http_class must implement the HTTPConnection API from http.client. @@ -1075,7 +1102,8 @@ class AbstractHTTPHandler(BaseHandler): if not host: raise URLError('no host given') - h = http_class(host, timeout=req.timeout) # will parse host:port + # will parse host:port + h = http_class(host, timeout=req.timeout, **http_conn_args) headers = dict(req.unredirected_hdrs) headers.update(dict((k, v) for k, v in req.headers.items() @@ -1101,13 +1129,15 @@ class AbstractHTTPHandler(BaseHandler): # Proxy-Authorization should not be sent to origin # server. del headers[proxy_auth_hdr] - h._set_tunnel(req._tunnel_host, headers=tunnel_headers) + h.set_tunnel(req._tunnel_host, headers=tunnel_headers) try: h.request(req.get_method(), req.selector, req.data, headers) - r = h.getresponse() # an HTTPResponse instance - except socket.error as err: + except socket.error as err: # timeout error + h.close() raise URLError(err) + else: + r = h.getresponse() r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse @@ -1127,10 +1157,18 @@ class HTTPHandler(AbstractHTTPHandler): http_request = AbstractHTTPHandler.do_request_ if hasattr(http.client, 'HTTPSConnection'): + import ssl + class HTTPSHandler(AbstractHTTPHandler): + def __init__(self, debuglevel=0, context=None, check_hostname=None): + AbstractHTTPHandler.__init__(self, debuglevel) + self._context = context + self._check_hostname = check_hostname + def https_open(self, req): - return self.do_open(http.client.HTTPSConnection, req) + return self.do_open(http.client.HTTPSConnection, req, + context=self._context, check_hostname=self._check_hostname) https_request = AbstractHTTPHandler.do_request_ @@ -1216,8 +1254,8 @@ class FileHandler(BaseHandler): url = req.selector if url[:2] == '//' and url[2:3] != '/' and (req.host and req.host != 'localhost'): - req.type = 'ftp' - return self.parent.open(req) + if not req.host is self.get_names(): + raise URLError("file:// scheme is supported only on localhost") else: return self.open_local_file(req) @@ -1323,8 +1361,8 @@ class FTPHandler(BaseHandler): raise exc.with_traceback(sys.exc_info()[2]) def connect_ftp(self, user, passwd, host, port, dirs, timeout): - fw = ftpwrapper(user, passwd, host, port, dirs, timeout) - return fw + return ftpwrapper(user, passwd, host, port, dirs, timeout, + persistent=False) class CacheFTPHandler(FTPHandler): # XXX would be nice to have pluggable cache strategies @@ -1373,14 +1411,19 @@ class CacheFTPHandler(FTPHandler): break self.soonest = min(list(self.timeout.values())) + def clear_cache(self): + for conn in self.cache.values(): + conn.close() + self.cache.clear() + self.timeout.clear() + + # Code move from the old urllib module MAXFTPCACHE = 10 # Trim the ftp cache beyond this size # Helper for non-unix systems -if os.name == 'mac': - from macurl2path import url2pathname, pathname2url -elif os.name == 'nt': +if os.name == 'nt': from nturl2path import url2pathname, pathname2url else: def url2pathname(pathname): @@ -1519,7 +1562,7 @@ class URLopener: try: fp = self.open_local_file(url1) hdrs = fp.info() - del fp + fp.close() return url2pathname(splithost(url1)[1]), hdrs except IOError as msg: pass @@ -1546,9 +1589,9 @@ class URLopener: size = -1 read = 0 blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) if reporthook: - if "content-length" in headers: - size = int(headers["Content-Length"]) reporthook(blocknum, bs, size) while 1: block = fp.read(bs) @@ -1563,8 +1606,6 @@ class URLopener: tfp.close() finally: fp.close() - del fp - del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: @@ -1620,13 +1661,13 @@ class URLopener: if not host: raise IOError('http error', 'no host given') if proxy_passwd: - import base64 + proxy_passwd = unquote(proxy_passwd) proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') else: proxy_auth = None if user_passwd: - import base64 + user_passwd = unquote(user_passwd) auth = base64.b64encode(user_passwd.encode()).decode('ascii') else: auth = None @@ -1638,6 +1679,12 @@ class URLopener: headers["Authorization"] = "Basic %s" % auth if realhost: headers["Host"] = realhost + + # Add Connection:close as we don't support persistent connections yet. + # This helps in closing the socket and avoiding ResourceWarning + + headers["Connection"] = "close" + for header, value in self.addheaders: headers[header] = value @@ -1685,7 +1732,6 @@ class URLopener: def http_error_default(self, url, fp, errcode, errmsg, headers): """Default error handler: close the connection and raise IOError.""" - void = fp.read() fp.close() raise HTTPError(url, errcode, errmsg, headers, None) @@ -1704,7 +1750,7 @@ class URLopener: if not isinstance(url, str): raise URLError('file error', 'proxy support for file protocol currently not implemented') if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - return self.open_ftp(url) + raise ValueError("file:// scheme is supported only on localhost") else: return self.open_local_file(url) @@ -1775,7 +1821,7 @@ class URLopener: del self.ftpcache[k] v.close() try: - if not key in self.ftpcache: + if key not in self.ftpcache: self.ftpcache[key] = \ ftpwrapper(user, passwd, host, port, dirs) if not file: type = 'D' @@ -1825,7 +1871,6 @@ class URLopener: time.gmtime(time.time()))) msg.append('Content-type: %s' % type) if encoding == 'base64': - import base64 # XXX is this encoding/decoding ok? data = base64.decodebytes(data.encode('ascii')).decode('latin1') else: @@ -1876,7 +1921,6 @@ class FancyURLopener(URLopener): newurl = headers['uri'] else: return - void = fp.read() fp.close() # In case the server sent a relative URL, join with original: @@ -1890,7 +1934,7 @@ class FancyURLopener(URLopener): # We are using newer HTTPError with older redirect_internal method # This older method will get deprecated in 3.3 - if not urlparts.scheme in ('http', 'https', 'ftp'): + if urlparts.scheme not in ('http', 'https', 'ftp', ''): raise HTTPError(newurl, errcode, errmsg + " Redirection to url '%s' is not allowed." % newurl, @@ -1917,7 +1961,7 @@ class FancyURLopener(URLopener): retry=False): """Error 401 -- authentication required. This function supports Basic authentication only.""" - if not 'www-authenticate' in headers: + if 'www-authenticate' not in headers: URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) stuff = headers['www-authenticate'] @@ -1943,7 +1987,7 @@ class FancyURLopener(URLopener): retry=False): """Error 407 -- proxy authentication required. This function supports Basic authentication only.""" - if not 'proxy-authenticate' in headers: + if 'proxy-authenticate' not in headers: URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) stuff = headers['proxy-authenticate'] @@ -2068,7 +2112,7 @@ def thishost(): """Return the IP addresses of the current host.""" global _thishost if _thishost is None: - _thishost = tuple(socket.gethostbyname_ex(socket.gethostname()[2])) + _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) return _thishost _ftperrors = None @@ -2094,13 +2138,16 @@ def noheaders(): class ftpwrapper: """Class used by open_ftp() for cache of open FTP connections.""" - def __init__(self, user, passwd, host, port, dirs, timeout=None): + def __init__(self, user, passwd, host, port, dirs, timeout=None, + persistent=True): self.user = user self.passwd = passwd self.host = host self.port = port self.dirs = dirs self.timeout = timeout + self.refcount = 0 + self.keepalive = persistent self.init() def init(self): @@ -2127,7 +2174,7 @@ class ftpwrapper: # Try to retrieve as a file try: cmd = 'RETR ' + file - conn = self.ftp.ntransfercmd(cmd) + conn, retrlen = self.ftp.ntransfercmd(cmd) except ftplib.error_perm as reason: if str(reason)[:3] != '550': raise URLError('ftp error', reason).with_traceback( @@ -2148,10 +2195,15 @@ class ftpwrapper: cmd = 'LIST ' + file else: cmd = 'LIST' - conn = self.ftp.ntransfercmd(cmd) + conn, retrlen = self.ftp.ntransfercmd(cmd) self.busy = 1 + + ftpobj = addclosehook(conn.makefile('rb'), self.file_close) + self.refcount += 1 + conn.close() # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1]) + return (ftpobj, retrlen) + def endtransfer(self): if not self.busy: return @@ -2162,6 +2214,17 @@ class ftpwrapper: pass def close(self): + self.keepalive = False + if self.refcount <= 0: + self.real_close() + + def file_close(self): + self.endtransfer() + self.refcount -= 1 + if self.refcount <= 0 and not self.keepalive: + self.real_close() + + def real_close(self): self.endtransfer() try: self.ftp.close() @@ -2198,7 +2261,8 @@ def proxy_bypass_environment(host): # strip port off host hostonly, port = splitport(host) # check if the host ends with any of the DNS suffixes - for name in no_proxy.split(','): + no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] + for name in no_proxy_list: if name and (hostonly.endswith(name) or host.endswith(name)): return 1 # otherwise, don't bypass |