diff options
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r-- | Lib/urllib/request.py | 313 |
1 files changed, 142 insertions, 171 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 537b765a12c..f769386e0e4 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -18,7 +18,7 @@ urlopen(url, data=None) -- Basic usage is the same as original urllib. pass the url and optionally data to post to an HTTP URL, and get a file-like object back. One difference is that you can also pass a Request instance instead of URL. Raises a URLError (subclass of -IOError); for HTTP errors, raises an HTTPError, which can also be +OSError); for HTTP errors, raises an HTTPError, which can also be treated as a valid response. build_opener -- Function that creates a new OpenerDirector instance. @@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, splittype, splithost, splitport, splituser, splitpasswd, - splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) + splitattr, splitquery, splitvalue, splittag, to_bytes, + unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook # check for SSL @@ -121,7 +122,7 @@ __all__ = [ 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', - 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', 'UnknownHandler', 'HTTPErrorProcessor', # Functions 'urlopen', 'install_opener', 'build_opener', @@ -135,19 +136,23 @@ __version__ = sys.version[:3] _opener = None def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - *, cafile=None, capath=None, cadefault=False): + *, cafile=None, capath=None, cadefault=False, context=None): global _opener if cafile or capath or cadefault: + if context is not None: + raise ValueError( + "You can't pass both context and any of cafile, capath, and " + "cadefault" + ) if not _have_ssl: raise ValueError('SSL support not available') - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - context.options |= ssl.OP_NO_SSLv2 - context.verify_mode = ssl.CERT_REQUIRED - if cafile or capath: - context.load_verify_locations(cafile, capath) - else: - context.set_default_verify_paths() - https_handler = HTTPSHandler(context=context, check_hostname=True) + context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH, + cafile=cafile, + capath=capath) + https_handler = HTTPSHandler(context=context) + opener = build_opener(https_handler) + elif context: + https_handler = HTTPSHandler(context=context) opener = build_opener(https_handler) elif _opener is None: _opener = opener = build_opener() @@ -224,10 +229,11 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): return result def urlcleanup(): + """Clean up temporary files from urlretrieve calls.""" for temp_file in _url_tempfiles: try: os.unlink(temp_file) - except EnvironmentError: + except OSError: pass del _url_tempfiles[:] @@ -258,24 +264,60 @@ class Request: def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False, method=None): - # unwrap('<URL:type://host/path>') --> 'type://host/path' - self.full_url = unwrap(url) - self.full_url, self.fragment = splittag(self.full_url) - self.data = data + self.full_url = url self.headers = {} + self.unredirected_hdrs = {} + self._data = None + self.data = data self._tunnel_host = None for key, value in headers.items(): self.add_header(key, value) - self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable - self.method = method + if method: + self.method = method + + @property + def full_url(self): + if self.fragment: + return '{}#{}'.format(self._full_url, self.fragment) + return self._full_url + + @full_url.setter + def full_url(self, url): + # unwrap('<URL:type://host/path>') --> 'type://host/path' + self._full_url = unwrap(url) + self._full_url, self.fragment = splittag(self._full_url) self._parse() + @full_url.deleter + def full_url(self): + self._full_url = None + self.fragment = None + self.selector = '' + + @property + def data(self): + return self._data + + @data.setter + def data(self, data): + if data != self._data: + self._data = data + # issue 16464 + # if we change data we need to remove content-length header + # (cause it's most probably calculated for previous value) + if self.has_header("Content-length"): + self.remove_header("Content-length") + + @data.deleter + def data(self): + self.data = None + def _parse(self): - self.type, rest = splittype(self.full_url) + self.type, rest = splittype(self._full_url) if self.type is None: raise ValueError("unknown url type: %r" % self.full_url) self.host, self.selector = splithost(rest) @@ -284,62 +326,11 @@ class Request: def get_method(self): """Return a string indicating the HTTP request method.""" - if self.method is not None: - return self.method - elif self.data is not None: - return "POST" - else: - return "GET" + default_method = "POST" if self.data is not None else "GET" + return getattr(self, 'method', default_method) def get_full_url(self): - if self.fragment: - return '%s#%s' % (self.full_url, self.fragment) - else: - return self.full_url - - # Begin deprecated methods - - def add_data(self, data): - msg = "Request.add_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - self.data = data - - def has_data(self): - msg = "Request.has_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.data is not None - - def get_data(self): - msg = "Request.get_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.data - - def get_type(self): - msg = "Request.get_type method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.type - - def get_host(self): - msg = "Request.get_host method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.host - - def get_selector(self): - msg = "Request.get_selector method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.selector - - def is_unverifiable(self): - msg = "Request.is_unverifiable method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.unverifiable - - def get_origin_req_host(self): - msg = "Request.get_origin_req_host method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.origin_req_host - - # End deprecated methods + return self.full_url def set_proxy(self, host, type): if self.type == 'https' and not self._tunnel_host: @@ -369,6 +360,10 @@ class Request: header_name, self.unredirected_hdrs.get(header_name, default)) + def remove_header(self, header_name): + self.headers.pop(header_name, None) + self.unredirected_hdrs.pop(header_name, None) + def header_items(self): hdrs = self.unredirected_hdrs.copy() hdrs.update(self.headers) @@ -525,19 +520,17 @@ def build_opener(*handlers): If any of the handlers passed as arguments are subclasses of the default handlers, the default handlers will not be used. """ - def isclass(obj): - return isinstance(obj, type) or hasattr(obj, "__bases__") - opener = OpenerDirector() default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, - FTPHandler, FileHandler, HTTPErrorProcessor] + FTPHandler, FileHandler, HTTPErrorProcessor, + DataHandler] if hasattr(http.client, "HTTPSConnection"): default_classes.append(HTTPSHandler) skip = set() for klass in default_classes: for check in handlers: - if isclass(check): + if isinstance(check, type): if issubclass(check, klass): skip.add(klass) elif isinstance(check, klass): @@ -549,7 +542,7 @@ def build_opener(*handlers): opener.add_handler(klass()) for h in handlers: - if isclass(h): + if isinstance(h, type): h = h() opener.add_handler(h) return opener @@ -703,50 +696,7 @@ def _parse_proxy(proxy): If a URL is supplied, it must have an authority (host:port) component. According to RFC 3986, having an authority component means the URL must - have two slashes after the scheme: - - >>> _parse_proxy('file:/ftp.example.com/') - Traceback (most recent call last): - ValueError: proxy URL with no authority: 'file:/ftp.example.com/' - - The first three items of the returned tuple may be None. - - Examples of authority parsing: - - >>> _parse_proxy('proxy.example.com') - (None, None, None, 'proxy.example.com') - >>> _parse_proxy('proxy.example.com:3128') - (None, None, None, 'proxy.example.com:3128') - - The authority component may optionally include userinfo (assumed to be - username:password): - - >>> _parse_proxy('joe:password@proxy.example.com') - (None, 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('joe:password@proxy.example.com:3128') - (None, 'joe', 'password', 'proxy.example.com:3128') - - Same examples, but with URLs instead: - - >>> _parse_proxy('http://proxy.example.com/') - ('http', None, None, 'proxy.example.com') - >>> _parse_proxy('http://proxy.example.com:3128/') - ('http', None, None, 'proxy.example.com:3128') - >>> _parse_proxy('http://joe:password@proxy.example.com/') - ('http', 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('http://joe:password@proxy.example.com:3128') - ('http', 'joe', 'password', 'proxy.example.com:3128') - - Everything after the authority is ignored: - - >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') - ('ftp', 'joe', 'password', 'proxy.example.com') - - Test for no trailing '/' case: - - >>> _parse_proxy('http://joe:password@proxy.example.com') - ('http', 'joe', 'password', 'proxy.example.com') - + have two slashes after the scheme. """ scheme, r_scheme = splittype(proxy) if not r_scheme.startswith("/"): @@ -905,10 +855,6 @@ class AbstractBasicAuthHandler: password_mgr = HTTPPasswordMgr() self.passwd = password_mgr self.add_password = self.passwd.add_password - self.retried = 0 - - def reset_retry_count(self): - self.retried = 0 def http_error_auth_reqed(self, authreq, host, req, headers): # host may be an authority (without userinfo) or a URL with an @@ -916,13 +862,6 @@ class AbstractBasicAuthHandler: # XXX could be multiple headers authreq = headers.get(authreq, None) - if self.retried > 5: - # retry sending the username:password 5 times before failing. - raise HTTPError(req.get_full_url(), 401, "basic auth failed", - headers, None) - else: - self.retried += 1 - if authreq: scheme = authreq.split()[0] if scheme.lower() != 'basic': @@ -937,17 +876,14 @@ class AbstractBasicAuthHandler: warnings.warn("Basic Auth Realm was unquoted", UserWarning, 2) if scheme.lower() == 'basic': - response = self.retry_http_basic_auth(host, req, realm) - if response and response.code != 401: - self.retried = 0 - return response + return self.retry_http_basic_auth(host, req, realm) def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) if pw is not None: raw = "%s:%s" % (user, pw) auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") - if req.headers.get(self.auth_header, None) == auth: + if req.get_header(self.auth_header, None) == auth: return None req.add_unredirected_header(self.auth_header, auth) return self.parent.open(req, timeout=req.timeout) @@ -963,7 +899,6 @@ class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): url = req.full_url response = self.http_error_auth_reqed('www-authenticate', url, req, headers) - self.reset_retry_count() return response @@ -979,7 +914,6 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): authority = req.host response = self.http_error_auth_reqed('proxy-authenticate', authority, req, headers) - self.reset_retry_count() return response @@ -1245,18 +1179,21 @@ class AbstractHTTPHandler(BaseHandler): h.set_tunnel(req._tunnel_host, headers=tunnel_headers) try: - h.request(req.get_method(), req.selector, req.data, headers) - except socket.error as err: # timeout error - h.close() - raise URLError(err) - else: + try: + h.request(req.get_method(), req.selector, req.data, headers) + except OSError as err: # timeout error + raise URLError(err) r = h.getresponse() - # If the server does not send us a 'Connection: close' header, - # HTTPConnection assumes the socket should be left open. Manually - # mark the socket to be closed when this response object goes away. - if h.sock: - h.sock.close() - h.sock = None + except: + h.close() + raise + + # If the server does not send us a 'Connection: close' header, + # HTTPConnection assumes the socket should be left open. Manually + # mark the socket to be closed when this response object goes away. + if h.sock: + h.sock.close() + h.sock = None r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse @@ -1374,7 +1311,7 @@ class FileHandler(BaseHandler): url = req.selector if url[:2] == '//' and url[2:3] != '/' and (req.host and req.host != 'localhost'): - if not req.host is self.get_names(): + if not req.host in self.get_names(): raise URLError("file:// scheme is supported only on localhost") else: return self.open_local_file(req) @@ -1451,7 +1388,7 @@ class FTPHandler(BaseHandler): try: host = socket.gethostbyname(host) - except socket.error as msg: + except OSError as msg: raise URLError(msg) path, attrs = splitattr(req.selector) dirs = path.split('/') @@ -1537,6 +1474,36 @@ class CacheFTPHandler(FTPHandler): self.cache.clear() self.timeout.clear() +class DataHandler(BaseHandler): + def data_open(self, req): + # data URLs as specified in RFC 2397. + # + # ignores POSTed data + # + # syntax: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + url = req.full_url + + scheme, data = url.split(":",1) + mediatype, data = data.split(",",1) + + # even base64 encoded data URLs might be quoted so unquote in any case: + data = unquote_to_bytes(data) + if mediatype.endswith(";base64"): + data = base64.decodebytes(data) + mediatype = mediatype[:-7] + + if not mediatype: + mediatype = "text/plain;charset=US-ASCII" + + headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" % + (mediatype, len(data))) + + return addinfourl(io.BytesIO(data), headers, url) + # Code move from the old urllib module @@ -1660,20 +1627,20 @@ class URLopener: return getattr(self, name)(url) else: return getattr(self, name)(url, data) - except HTTPError: + except (HTTPError, URLError): raise - except socket.error as msg: - raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) + except OSError as msg: + raise OSError('socket error', msg).with_traceback(sys.exc_info()[2]) def open_unknown(self, fullurl, data=None): """Overridable interface to open unknown URL type.""" type, url = splittype(fullurl) - raise IOError('url error', 'unknown url type', type) + raise OSError('url error', 'unknown url type', type) def open_unknown_proxy(self, proxy, fullurl, data=None): """Overridable interface to open unknown URL type.""" type, url = splittype(fullurl) - raise IOError('url error', 'invalid proxy for %s' % type, proxy) + raise OSError('url error', 'invalid proxy for %s' % type, proxy) # External interface def retrieve(self, url, filename=None, reporthook=None, data=None): @@ -1689,7 +1656,7 @@ class URLopener: hdrs = fp.info() fp.close() return url2pathname(splithost(url1)[1]), hdrs - except IOError as msg: + except OSError as msg: pass fp = self.open(url, data) try: @@ -1782,7 +1749,7 @@ class URLopener: if proxy_bypass(realhost): host = realhost - if not host: raise IOError('http error', 'no host given') + if not host: raise OSError('http error', 'no host given') if proxy_passwd: proxy_passwd = unquote(proxy_passwd) @@ -1855,7 +1822,7 @@ class URLopener: return self.http_error_default(url, fp, errcode, errmsg, headers) def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise IOError.""" + """Default error handler: close the connection and raise OSError.""" fp.close() raise HTTPError(url, errcode, errmsg, headers, None) @@ -1940,7 +1907,7 @@ class URLopener: # XXX thread unsafe! if len(self.ftpcache) > MAXFTPCACHE: # Prune the cache, rather arbitrarily - for k in self.ftpcache.keys(): + for k in list(self.ftpcache): if k != key: v = self.ftpcache[k] del self.ftpcache[k] @@ -1982,7 +1949,7 @@ class URLopener: try: [type, data] = url.split(',', 1) except ValueError: - raise IOError('data error', 'bad data URL') + raise OSError('data error', 'bad data URL') if not type: type = 'text/plain;charset=US-ASCII' semi = type.rfind(';') @@ -2274,7 +2241,11 @@ class ftpwrapper: self.timeout = timeout self.refcount = 0 self.keepalive = persistent - self.init() + try: + self.init() + except: + self.close() + raise def init(self): import ftplib @@ -2438,7 +2409,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): try: hostIP = socket.gethostbyname(hostonly) hostIP = ip2num(hostIP) - except socket.error: + except OSError: continue base = ip2num(m.group(1)) @@ -2524,7 +2495,7 @@ elif os.name == 'nt': proxies['https'] = 'https://%s' % proxyServer proxies['ftp'] = 'ftp://%s' % proxyServer internetSettings.Close() - except (WindowsError, ValueError, TypeError): + except (OSError, ValueError, TypeError): # Either registry key not found etc, or the value in an # unexpected format. # proxies already set up to be empty so nothing to do @@ -2554,7 +2525,7 @@ elif os.name == 'nt': proxyOverride = str(winreg.QueryValueEx(internetSettings, 'ProxyOverride')[0]) # ^^^^ Returned as Unicode but problems if not converted to ASCII - except WindowsError: + except OSError: return 0 if not proxyEnable or not proxyOverride: return 0 @@ -2565,13 +2536,13 @@ elif os.name == 'nt': addr = socket.gethostbyname(rawHost) if addr != rawHost: host.append(addr) - except socket.error: + except OSError: pass try: fqdn = socket.getfqdn(rawHost) if fqdn != rawHost: host.append(fqdn) - except socket.error: + except OSError: pass # make a check value list from the registry entry: replace the # '<local>' string by the localhost entry and the corresponding |