aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2025-03-18 23:37:12 +0000
committerGitHub <noreply@github.com>2025-03-18 23:37:12 +0000
commitd783d7b51d31db568de6b3438f4e805acff663da (patch)
tree67d3396e504207904878b2baefd4513aa3db2f57
parent01b5abbc53b2a9ee8d85e0518c98efce27dbd061 (diff)
downloadcpython-d783d7b51d31db568de6b3438f4e805acff663da.tar.gz
cpython-d783d7b51d31db568de6b3438f4e805acff663da.zip
GH-126367: `url2pathname()`: handle NTFS alternate data streams (#131428)
Adjust `url2pathname()` to decode embedded colon characters in Windows URIs, rather than bailing out with an `OSError`.
-rw-r--r--Doc/library/urllib.request.rst4
-rw-r--r--Lib/nturl2path.py23
-rw-r--r--Lib/test/test_urllib.py7
-rw-r--r--Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst3
4 files changed, 20 insertions, 17 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 969e7daea71..14785d21e74 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following functions:
'C:\\Program Files'
.. versionchanged:: 3.14
- Windows drive letters are no longer converted to uppercase.
+ Windows drive letters are no longer converted to uppercase, and ``:``
+ characters not following a drive letter no longer cause an
+ :exc:`OSError` exception to be raised on Windows.
.. function:: getproxies()
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 7e13ae31283..7b5b82068e9 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -14,7 +14,7 @@ def url2pathname(url):
# ///C:/foo/bar/spam.foo
# become
# C:\foo\bar\spam.foo
- import string, urllib.parse
+ import urllib.parse
if url[:3] == '///':
# URL has an empty authority section, so the path begins on the third
# character.
@@ -25,19 +25,14 @@ def url2pathname(url):
if url[:3] == '///':
# Skip past extra slash before UNC drive in URL path.
url = url[1:]
- # Windows itself uses ":" even in URLs.
- url = url.replace(':', '|')
- if not '|' in url:
- # No drive specifier, just convert slashes
- # make sure not to convert quoted slashes :-)
- return urllib.parse.unquote(url.replace('/', '\\'))
- comp = url.split('|')
- if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
- error = 'Bad URL: ' + url
- raise OSError(error)
- drive = comp[0][-1]
- tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
- return drive + ':' + tail
+ else:
+ if url[:1] == '/' and url[2:3] in (':', '|'):
+ # Skip past extra slash before DOS drive in URL path.
+ url = url[1:]
+ if url[1:2] == '|':
+ # Older URLs use a pipe after a drive letter
+ url = url[:1] + ':' + url[2:]
+ return urllib.parse.unquote(url.replace('/', '\\'))
def pathname2url(p):
"""OS-specific conversion from a file system path to a relative URL
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 4842428d6fd..ed23215c4d0 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1484,6 +1484,7 @@ class Pathname_Tests(unittest.TestCase):
'test specific to Windows pathnames.')
def test_url2pathname_win(self):
fn = urllib.request.url2pathname
+ self.assertEqual(fn('/'), '\\')
self.assertEqual(fn('/C:/'), 'C:\\')
self.assertEqual(fn("///C|"), 'C:')
self.assertEqual(fn("///C:"), 'C:')
@@ -1502,8 +1503,10 @@ class Pathname_Tests(unittest.TestCase):
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
- # Non-ASCII drive letter
- self.assertRaises(IOError, fn, "///\u00e8|/")
+ # Colons in URI
+ self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\')
+ self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs')
+ self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs')
# UNC paths
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
diff --git a/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst
new file mode 100644
index 00000000000..cebfefbda48
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-03-18-19-52-49.gh-issue-126367.PRxnuu.rst
@@ -0,0 +1,3 @@
+Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError`
+when given a Windows URI containing a colon character not following a drive
+letter, such as before an NTFS alternate data stream.