aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/urllib/request.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r--Lib/urllib/request.py158
1 files changed, 111 insertions, 47 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 6b299018647..cf065715875 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,7 @@ import re
import socket
import sys
import time
+import collections
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
@@ -114,11 +115,27 @@ else:
__version__ = sys.version[:3]
_opener = None
-def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ *, cafile=None, capath=None):
global _opener
- if _opener is None:
- _opener = build_opener()
- return _opener.open(url, data, timeout)
+ if cafile or capath:
+ if not _have_ssl:
+ raise ValueError('SSL support not available')
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ if cafile or capath:
+ context.verify_mode = ssl.CERT_REQUIRED
+ context.load_verify_locations(cafile, capath)
+ check_hostname = True
+ else:
+ check_hostname = False
+ https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
+ opener = build_opener(https_handler)
+ elif _opener is None:
+ _opener = opener = build_opener()
+ else:
+ opener = _opener
+ return opener.open(url, data, timeout)
def install_opener(opener):
global _opener
@@ -535,12 +552,11 @@ class HTTPRedirectHandler(BaseHandler):
# For security reasons we don't allow redirection to anything other
# than http, https or ftp.
- if not urlparts.scheme in ('http', 'https', 'ftp'):
- raise HTTPError(newurl, code,
- msg +
- " - Redirection to url '%s' is not allowed" %
- newurl,
- headers, fp)
+ if urlparts.scheme not in ('http', 'https', 'ftp', ''):
+ raise HTTPError(
+ newurl, code,
+ "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
+ headers, fp)
if not urlparts.path:
urlparts = list(urlparts)
@@ -705,7 +721,7 @@ class HTTPPasswordMgr:
# uri could be a single URI or a sequence
if isinstance(uri, str):
uri = [uri]
- if not realm in self.passwd:
+ if realm not in self.passwd:
self.passwd[realm] = {}
for default_port in True, False:
reduced_uri = tuple(
@@ -1045,13 +1061,24 @@ class AbstractHTTPHandler(BaseHandler):
if request.data is not None: # POST
data = request.data
+ if isinstance(data, str):
+ raise TypeError("POST data should be bytes"
+ " or an iterable of bytes. It cannot be str.")
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
if not request.has_header('Content-length'):
- request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
+ try:
+ mv = memoryview(data)
+ except TypeError:
+ if isinstance(data, collections.Iterable):
+ raise ValueError("Content-Length should be specified "
+ "for iterable data of type %r %r" % (type(data),
+ data))
+ else:
+ request.add_unredirected_header(
+ 'Content-length', '%d' % (len(mv) * mv.itemsize))
sel_host = host
if request.has_proxy():
@@ -1066,7 +1093,7 @@ class AbstractHTTPHandler(BaseHandler):
return request
- def do_open(self, http_class, req):
+ def do_open(self, http_class, req, **http_conn_args):
"""Return an HTTPResponse object for the request, using http_class.
http_class must implement the HTTPConnection API from http.client.
@@ -1075,7 +1102,8 @@ class AbstractHTTPHandler(BaseHandler):
if not host:
raise URLError('no host given')
- h = http_class(host, timeout=req.timeout) # will parse host:port
+ # will parse host:port
+ h = http_class(host, timeout=req.timeout, **http_conn_args)
headers = dict(req.unredirected_hdrs)
headers.update(dict((k, v) for k, v in req.headers.items()
@@ -1101,13 +1129,15 @@ class AbstractHTTPHandler(BaseHandler):
# Proxy-Authorization should not be sent to origin
# server.
del headers[proxy_auth_hdr]
- h._set_tunnel(req._tunnel_host, headers=tunnel_headers)
+ h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
try:
h.request(req.get_method(), req.selector, req.data, headers)
- r = h.getresponse() # an HTTPResponse instance
- except socket.error as err:
+ except socket.error as err: # timeout error
+ h.close()
raise URLError(err)
+ else:
+ r = h.getresponse()
r.url = req.get_full_url()
# This line replaces the .msg attribute of the HTTPResponse
@@ -1127,10 +1157,18 @@ class HTTPHandler(AbstractHTTPHandler):
http_request = AbstractHTTPHandler.do_request_
if hasattr(http.client, 'HTTPSConnection'):
+ import ssl
+
class HTTPSHandler(AbstractHTTPHandler):
+ def __init__(self, debuglevel=0, context=None, check_hostname=None):
+ AbstractHTTPHandler.__init__(self, debuglevel)
+ self._context = context
+ self._check_hostname = check_hostname
+
def https_open(self, req):
- return self.do_open(http.client.HTTPSConnection, req)
+ return self.do_open(http.client.HTTPSConnection, req,
+ context=self._context, check_hostname=self._check_hostname)
https_request = AbstractHTTPHandler.do_request_
@@ -1216,8 +1254,8 @@ class FileHandler(BaseHandler):
url = req.selector
if url[:2] == '//' and url[2:3] != '/' and (req.host and
req.host != 'localhost'):
- req.type = 'ftp'
- return self.parent.open(req)
+ if not req.host is self.get_names():
+ raise URLError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(req)
@@ -1323,8 +1361,8 @@ class FTPHandler(BaseHandler):
raise exc.with_traceback(sys.exc_info()[2])
def connect_ftp(self, user, passwd, host, port, dirs, timeout):
- fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
- return fw
+ return ftpwrapper(user, passwd, host, port, dirs, timeout,
+ persistent=False)
class CacheFTPHandler(FTPHandler):
# XXX would be nice to have pluggable cache strategies
@@ -1373,14 +1411,19 @@ class CacheFTPHandler(FTPHandler):
break
self.soonest = min(list(self.timeout.values()))
+ def clear_cache(self):
+ for conn in self.cache.values():
+ conn.close()
+ self.cache.clear()
+ self.timeout.clear()
+
+
# Code move from the old urllib module
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
# Helper for non-unix systems
-if os.name == 'mac':
- from macurl2path import url2pathname, pathname2url
-elif os.name == 'nt':
+if os.name == 'nt':
from nturl2path import url2pathname, pathname2url
else:
def url2pathname(pathname):
@@ -1519,7 +1562,7 @@ class URLopener:
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
- del fp
+ fp.close()
return url2pathname(splithost(url1)[1]), hdrs
except IOError as msg:
pass
@@ -1546,9 +1589,9 @@ class URLopener:
size = -1
read = 0
blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
if reporthook:
- if "content-length" in headers:
- size = int(headers["Content-Length"])
reporthook(blocknum, bs, size)
while 1:
block = fp.read(bs)
@@ -1563,8 +1606,6 @@ class URLopener:
tfp.close()
finally:
fp.close()
- del fp
- del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
@@ -1620,13 +1661,13 @@ class URLopener:
if not host: raise IOError('http error', 'no host given')
if proxy_passwd:
- import base64
+ proxy_passwd = unquote(proxy_passwd)
proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
else:
proxy_auth = None
if user_passwd:
- import base64
+ user_passwd = unquote(user_passwd)
auth = base64.b64encode(user_passwd.encode()).decode('ascii')
else:
auth = None
@@ -1638,6 +1679,12 @@ class URLopener:
headers["Authorization"] = "Basic %s" % auth
if realhost:
headers["Host"] = realhost
+
+ # Add Connection:close as we don't support persistent connections yet.
+ # This helps in closing the socket and avoiding ResourceWarning
+
+ headers["Connection"] = "close"
+
for header, value in self.addheaders:
headers[header] = value
@@ -1685,7 +1732,6 @@ class URLopener:
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""Default error handler: close the connection and raise IOError."""
- void = fp.read()
fp.close()
raise HTTPError(url, errcode, errmsg, headers, None)
@@ -1704,7 +1750,7 @@ class URLopener:
if not isinstance(url, str):
raise URLError('file error', 'proxy support for file protocol currently not implemented')
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
- return self.open_ftp(url)
+ raise ValueError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(url)
@@ -1775,7 +1821,7 @@ class URLopener:
del self.ftpcache[k]
v.close()
try:
- if not key in self.ftpcache:
+ if key not in self.ftpcache:
self.ftpcache[key] = \
ftpwrapper(user, passwd, host, port, dirs)
if not file: type = 'D'
@@ -1825,7 +1871,6 @@ class URLopener:
time.gmtime(time.time())))
msg.append('Content-type: %s' % type)
if encoding == 'base64':
- import base64
# XXX is this encoding/decoding ok?
data = base64.decodebytes(data.encode('ascii')).decode('latin1')
else:
@@ -1876,7 +1921,6 @@ class FancyURLopener(URLopener):
newurl = headers['uri']
else:
return
- void = fp.read()
fp.close()
# In case the server sent a relative URL, join with original:
@@ -1890,7 +1934,7 @@ class FancyURLopener(URLopener):
# We are using newer HTTPError with older redirect_internal method
# This older method will get deprecated in 3.3
- if not urlparts.scheme in ('http', 'https', 'ftp'):
+ if urlparts.scheme not in ('http', 'https', 'ftp', ''):
raise HTTPError(newurl, errcode,
errmsg +
" Redirection to url '%s' is not allowed." % newurl,
@@ -1917,7 +1961,7 @@ class FancyURLopener(URLopener):
retry=False):
"""Error 401 -- authentication required.
This function supports Basic authentication only."""
- if not 'www-authenticate' in headers:
+ if 'www-authenticate' not in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['www-authenticate']
@@ -1943,7 +1987,7 @@ class FancyURLopener(URLopener):
retry=False):
"""Error 407 -- proxy authentication required.
This function supports Basic authentication only."""
- if not 'proxy-authenticate' in headers:
+ if 'proxy-authenticate' not in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['proxy-authenticate']
@@ -2068,7 +2112,7 @@ def thishost():
"""Return the IP addresses of the current host."""
global _thishost
if _thishost is None:
- _thishost = tuple(socket.gethostbyname_ex(socket.gethostname()[2]))
+ _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
return _thishost
_ftperrors = None
@@ -2094,13 +2138,16 @@ def noheaders():
class ftpwrapper:
"""Class used by open_ftp() for cache of open FTP connections."""
- def __init__(self, user, passwd, host, port, dirs, timeout=None):
+ def __init__(self, user, passwd, host, port, dirs, timeout=None,
+ persistent=True):
self.user = user
self.passwd = passwd
self.host = host
self.port = port
self.dirs = dirs
self.timeout = timeout
+ self.refcount = 0
+ self.keepalive = persistent
self.init()
def init(self):
@@ -2127,7 +2174,7 @@ class ftpwrapper:
# Try to retrieve as a file
try:
cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
except ftplib.error_perm as reason:
if str(reason)[:3] != '550':
raise URLError('ftp error', reason).with_traceback(
@@ -2148,10 +2195,15 @@ class ftpwrapper:
cmd = 'LIST ' + file
else:
cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
self.busy = 1
+
+ ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
+ self.refcount += 1
+ conn.close()
# Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
+ return (ftpobj, retrlen)
+
def endtransfer(self):
if not self.busy:
return
@@ -2162,6 +2214,17 @@ class ftpwrapper:
pass
def close(self):
+ self.keepalive = False
+ if self.refcount <= 0:
+ self.real_close()
+
+ def file_close(self):
+ self.endtransfer()
+ self.refcount -= 1
+ if self.refcount <= 0 and not self.keepalive:
+ self.real_close()
+
+ def real_close(self):
self.endtransfer()
try:
self.ftp.close()
@@ -2198,7 +2261,8 @@ def proxy_bypass_environment(host):
# strip port off host
hostonly, port = splitport(host)
# check if the host ends with any of the DNS suffixes
- for name in no_proxy.split(','):
+ no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
+ for name in no_proxy_list:
if name and (hostonly.endswith(name) or host.endswith(name)):
return 1
# otherwise, don't bypass