From ccad61e35d240972d14f993507566706fbf419f1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 14 Apr 2025 01:49:02 +0100 Subject: GH-125866: Support complete "file:" URLs in urllib (#132378) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add optional *add_scheme* argument to `urllib.request.pathname2url()`; when set to true, a complete URL is returned. Likewise add optional *require_scheme* argument to `url2pathname()`; when set to true, a complete URL is accepted. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/urllib/request.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'Lib/urllib/request.py') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 84c075ec8b3..2c9c7b6ca53 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1466,17 +1466,16 @@ class FileHandler(BaseHandler): def open_local_file(self, req): import email.utils import mimetypes - filename = _splittype(req.full_url)[1] - localfile = url2pathname(filename) + localfile = url2pathname(req.full_url, require_scheme=True) try: stats = os.stat(localfile) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(filename)[0] + mtype = mimetypes.guess_file_type(localfile)[0] headers = email.message_from_string( 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)) - origurl = f'file:{pathname2url(localfile)}' + origurl = pathname2url(localfile, add_scheme=True) return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: raise URLError(exp, exp.filename) @@ -1635,9 +1634,16 @@ class DataHandler(BaseHandler): # Code move from the old urllib module -def url2pathname(url): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" +def url2pathname(url, *, require_scheme=False): + """Convert the given file URL to a local file system path. + + The 'file:' scheme prefix must be omitted unless *require_scheme* + is set to true. + """ + if require_scheme: + scheme, url = _splittype(url) + if scheme != 'file': + raise URLError("URL is missing a 'file:' scheme") authority, url = _splithost(url) if os.name == 'nt': if not _is_local_authority(authority): @@ -1661,13 +1667,17 @@ def url2pathname(url): return unquote(url, encoding=encoding, errors=errors) -def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" +def pathname2url(pathname, *, add_scheme=False): + """Convert the given local file system path to a file URL. + + The 'file:' scheme prefix is omitted unless *add_scheme* + is set to true. + """ if os.name == 'nt': pathname = pathname.replace('\\', '/') encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() + scheme = 'file:' if add_scheme else '' drive, root, tail = os.path.splitroot(pathname) if drive: # First, clean up some special forms. We are going to sacrifice the @@ -1689,7 +1699,7 @@ def pathname2url(pathname): # avoids interpreting the path as a URL authority. root = '//' + root tail = quote(tail, encoding=encoding, errors=errors) - return drive + root + tail + return scheme + drive + root + tail # Utility functions -- cgit v1.2.3