diff options
author | Barney Gale <barney.gale@gmail.com> | 2025-03-18 23:37:12 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-18 23:37:12 +0000 |
commit | d783d7b51d31db568de6b3438f4e805acff663da (patch) | |
tree | 67d3396e504207904878b2baefd4513aa3db2f57 | |
parent | 01b5abbc53b2a9ee8d85e0518c98efce27dbd061 (diff) | |
download | cpython-d783d7b51d31db568de6b3438f4e805acff663da.tar.gz cpython-d783d7b51d31db568de6b3438f4e805acff663da.zip |
GH-126367: `url2pathname()`: handle NTFS alternate data streams (#131428)
Adjust `url2pathname()` to decode embedded colon characters in Windows
URIs, rather than bailing out with an `OSError`.
-rw-r--r-- | Doc/library/urllib.request.rst | 4 | ||||
-rw-r--r-- | Lib/nturl2path.py | 23 | ||||
-rw-r--r-- | Lib/test/test_urllib.py | 7 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst | 3 |
4 files changed, 20 insertions, 17 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 969e7daea71..14785d21e74 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following functions: 'C:\\Program Files' .. versionchanged:: 3.14 - Windows drive letters are no longer converted to uppercase. + Windows drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an + :exc:`OSError` exception to be raised on Windows. .. function:: getproxies() diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 7e13ae31283..7b5b82068e9 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -14,7 +14,7 @@ def url2pathname(url): # ///C:/foo/bar/spam.foo # become # C:\foo\bar\spam.foo - import string, urllib.parse + import urllib.parse if url[:3] == '///': # URL has an empty authority section, so the path begins on the third # character. @@ -25,19 +25,14 @@ def url2pathname(url): if url[:3] == '///': # Skip past extra slash before UNC drive in URL path. url = url[1:] - # Windows itself uses ":" even in URLs. - url = url.replace(':', '|') - if not '|' in url: - # No drive specifier, just convert slashes - # make sure not to convert quoted slashes :-) - return urllib.parse.unquote(url.replace('/', '\\')) - comp = url.split('|') - if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: - error = 'Bad URL: ' + url - raise OSError(error) - drive = comp[0][-1] - tail = urllib.parse.unquote(comp[1].replace('/', '\\')) - return drive + ':' + tail + else: + if url[:1] == '/' and url[2:3] in (':', '|'): + # Skip past extra slash before DOS drive in URL path. + url = url[1:] + if url[1:2] == '|': + # Older URLs use a pipe after a drive letter + url = url[:1] + ':' + url[2:] + return urllib.parse.unquote(url.replace('/', '\\')) def pathname2url(p): """OS-specific conversion from a file system path to a relative URL diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4842428d6fd..ed23215c4d0 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1484,6 +1484,7 @@ class Pathname_Tests(unittest.TestCase): 'test specific to Windows pathnames.') def test_url2pathname_win(self): fn = urllib.request.url2pathname + self.assertEqual(fn('/'), '\\') self.assertEqual(fn('/C:/'), 'C:\\') self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') @@ -1502,8 +1503,10 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo') - # Non-ASCII drive letter - self.assertRaises(IOError, fn, "///\u00e8|/") + # Colons in URI + self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\') + self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs') + self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs') # UNC paths self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') diff --git a/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst new file mode 100644 index 00000000000..cebfefbda48 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst @@ -0,0 +1,3 @@ +Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError` +when given a Windows URI containing a colon character not following a drive +letter, such as before an NTFS alternate data stream. |