diff options
Diffstat (limited to 'Lib/test/test_urlparse.py')
-rw-r--r-- | Lib/test/test_urlparse.py | 398 |
1 files changed, 190 insertions, 208 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index aabc360289a..b2bde5a9b1d 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -2,6 +2,7 @@ import sys import unicodedata import unittest import urllib.parse +from test import support RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" @@ -156,27 +157,25 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) - def test_qsl(self): - for orig, expect in parse_qsl_test_cases: - result = urllib.parse.parse_qsl(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %r" % orig) - expect_without_blanks = [v for v in expect if len(v[1])] - result = urllib.parse.parse_qsl(orig, keep_blank_values=False) - self.assertEqual(result, expect_without_blanks, - "Error parsing %r" % orig) - - def test_qs(self): - for orig, expect in parse_qs_test_cases: - result = urllib.parse.parse_qs(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %r" % orig) - expect_without_blanks = {v: expect[v] - for v in expect if len(expect[v][0])} - result = urllib.parse.parse_qs(orig, keep_blank_values=False) - self.assertEqual(result, expect_without_blanks, - "Error parsing %r" % orig) - - def test_roundtrips(self): - str_cases = [ + @support.subTests('orig,expect', parse_qsl_test_cases) + def test_qsl(self, orig, expect): + result = urllib.parse.parse_qsl(orig, keep_blank_values=True) + self.assertEqual(result, expect) + expect_without_blanks = [v for v in expect if len(v[1])] + result = urllib.parse.parse_qsl(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks) + + @support.subTests('orig,expect', parse_qs_test_cases) + def test_qs(self, orig, expect): + result = urllib.parse.parse_qs(orig, keep_blank_values=True) + self.assertEqual(result, expect) + expect_without_blanks = {v: expect[v] + for v in expect if len(expect[v][0])} + result = urllib.parse.parse_qs(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,parsed,split', [ ('path/to/file', ('', '', 'path/to/file', '', '', ''), ('', '', 'path/to/file', '', '')), @@ -263,23 +262,21 @@ class UrlParseTestCase(unittest.TestCase): ('sch_me:path/to/file', ('', '', 'sch_me:path/to/file', '', '', ''), ('', '', 'sch_me:path/to/file', '', '')), - ] - def _encode(t): - return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) - bytes_cases = [_encode(x) for x in str_cases] - str_cases += [ ('schème:path/to/file', ('', '', 'schème:path/to/file', '', '', ''), ('', '', 'schème:path/to/file', '', '')), - ] - for url, parsed, split in str_cases + bytes_cases: - with self.subTest(url): - self.checkRoundtrips(url, parsed, split) - - def test_roundtrips_normalization(self): - str_cases = [ + ]) + def test_roundtrips(self, bytes, url, parsed, split): + if bytes: + if not url.isascii(): + self.skipTest('non-ASCII bytes') + url = str_encode(url) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + self.checkRoundtrips(url, parsed, split) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,url2,parsed,split', [ ('///path/to/file', '/path/to/file', ('', '', '/path/to/file', '', '', ''), @@ -300,22 +297,18 @@ class UrlParseTestCase(unittest.TestCase): 'https:///tmp/junk.txt', ('https', '', '/tmp/junk.txt', '', '', ''), ('https', '', '/tmp/junk.txt', '', '')), - ] - def _encode(t): - return (t[0].encode('ascii'), - t[1].encode('ascii'), - tuple(x.encode('ascii') for x in t[2]), - tuple(x.encode('ascii') for x in t[3])) - bytes_cases = [_encode(x) for x in str_cases] - for url, url2, parsed, split in str_cases + bytes_cases: - with self.subTest(url): - self.checkRoundtrips(url, parsed, split, url2) - - def test_http_roundtrips(self): - # urllib.parse.urlsplit treats 'http:' as an optimized special case, - # so we test both 'http:' and 'https:' in all the following. - # Three cheers for white box knowledge! - str_cases = [ + ]) + def test_roundtrips_normalization(self, bytes, url, url2, parsed, split): + if bytes: + url = str_encode(url) + url2 = str_encode(url2) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + self.checkRoundtrips(url, parsed, split, url2) + + @support.subTests('bytes', (False, True)) + @support.subTests('scheme', ('http', 'https')) + @support.subTests('url,parsed,split', [ ('://www.python.org', ('www.python.org', '', '', '', ''), ('www.python.org', '', '', '')), @@ -331,23 +324,20 @@ class UrlParseTestCase(unittest.TestCase): ('://a/b/c/d;p?q#f', ('a', '/b/c/d', 'p', 'q', 'f'), ('a', '/b/c/d;p', 'q', 'f')), - ] - def _encode(t): - return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) - bytes_cases = [_encode(x) for x in str_cases] - str_schemes = ('http', 'https') - bytes_schemes = (b'http', b'https') - str_tests = str_schemes, str_cases - bytes_tests = bytes_schemes, bytes_cases - for schemes, test_cases in (str_tests, bytes_tests): - for scheme in schemes: - for url, parsed, split in test_cases: - url = scheme + url - parsed = (scheme,) + parsed - split = (scheme,) + split - self.checkRoundtrips(url, parsed, split) + ]) + def test_http_roundtrips(self, bytes, scheme, url, parsed, split): + # urllib.parse.urlsplit treats 'http:' as an optimized special case, + # so we test both 'http:' and 'https:' in all the following. + # Three cheers for white box knowledge! + if bytes: + scheme = str_encode(scheme) + url = str_encode(url) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected, *, relroundtrip=True): with self.subTest(base=base, relurl=relurl): @@ -363,12 +353,13 @@ class UrlParseTestCase(unittest.TestCase): relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb)) self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) - def test_unparse_parse(self): - str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] - bytes_cases = [x.encode('ascii') for x in str_cases] - for u in str_cases + bytes_cases: - self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) - self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) + @support.subTests('bytes', (False, True)) + @support.subTests('u', ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]) + def test_unparse_parse(self, bytes, u): + if bytes: + u = str_encode(u) + self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) + self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) def test_RFC1808(self): # "normal" cases from RFC 1808: @@ -695,8 +686,8 @@ class UrlParseTestCase(unittest.TestCase): self.checkJoin('///b/c', '///w', '///w') self.checkJoin('///b/c', 'w', '///b/w') - def test_RFC2732(self): - str_cases = [ + @support.subTests('bytes', (False, True)) + @support.subTests('url,hostname,port', [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), @@ -727,26 +718,28 @@ class UrlParseTestCase(unittest.TestCase): ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:12.34.56.78', None), - ] - def _encode(t): - return t[0].encode('ascii'), t[1].encode('ascii'), t[2] - bytes_cases = [_encode(x) for x in str_cases] - for url, hostname, port in str_cases + bytes_cases: - urlparsed = urllib.parse.urlparse(url) - self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) - - str_cases = [ + ]) + def test_RFC2732(self, bytes, url, hostname, port): + if bytes: + url = str_encode(url) + hostname = str_encode(hostname) + urlparsed = urllib.parse.urlparse(url) + self.assertEqual((urlparsed.hostname, urlparsed.port), (hostname, port)) + + @support.subTests('bytes', (False, True)) + @support.subTests('invalid_url', [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', - 'http://[::ffff:12.34.56.78'] - bytes_cases = [x.encode('ascii') for x in str_cases] - for invalid_url in str_cases + bytes_cases: - self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) - - def test_urldefrag(self): - str_cases = [ + 'http://[::ffff:12.34.56.78']) + def test_RFC2732_invalid(self, bytes, invalid_url): + if bytes: + invalid_url = str_encode(invalid_url) + self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,defrag,frag', [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), @@ -770,18 +763,18 @@ class UrlParseTestCase(unittest.TestCase): ('http:?q#f', 'http:?q', 'f'), ('//a/b/c;p?q#f', '//a/b/c;p?q', 'f'), ('://a/b/c;p?q#f', '://a/b/c;p?q', 'f'), - ] - def _encode(t): - return type(t)(x.encode('ascii') for x in t) - bytes_cases = [_encode(x) for x in str_cases] - for url, defrag, frag in str_cases + bytes_cases: - with self.subTest(url): - result = urllib.parse.urldefrag(url) - hash = '#' if isinstance(url, str) else b'#' - self.assertEqual(result.geturl(), url.rstrip(hash)) - self.assertEqual(result, (defrag, frag)) - self.assertEqual(result.url, defrag) - self.assertEqual(result.fragment, frag) + ]) + def test_urldefrag(self, bytes, url, defrag, frag): + if bytes: + url = str_encode(url) + defrag = str_encode(defrag) + frag = str_encode(frag) + result = urllib.parse.urldefrag(url) + hash = '#' if isinstance(url, str) else b'#' + self.assertEqual(result.geturl(), url.rstrip(hash)) + self.assertEqual(result, (defrag, frag)) + self.assertEqual(result.url, defrag) + self.assertEqual(result.fragment, frag) def test_urlsplit_scoped_IPv6(self): p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') @@ -981,42 +974,35 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p.scheme, "https") self.assertEqual(p.geturl(), "https://www.python.org/") - def test_attributes_bad_port(self): + @support.subTests('bytes', (False, True)) + @support.subTests('parse', (urllib.parse.urlsplit, urllib.parse.urlparse)) + @support.subTests('port', ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६")) + def test_attributes_bad_port(self, bytes, parse, port): """Check handling of invalid ports.""" - for bytes in (False, True): - for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"): - with self.subTest(bytes=bytes, parse=parse, port=port): - netloc = "www.example.net:" + port - url = "http://" + netloc + "/" - if bytes: - if netloc.isascii() and port.isascii(): - netloc = netloc.encode("ascii") - url = url.encode("ascii") - else: - continue - p = parse(url) - self.assertEqual(p.netloc, netloc) - with self.assertRaises(ValueError): - p.port + netloc = "www.example.net:" + port + url = "http://" + netloc + "/" + if bytes: + if not (netloc.isascii() and port.isascii()): + self.skipTest('non-ASCII bytes') + netloc = str_encode(netloc) + url = str_encode(url) + p = parse(url) + self.assertEqual(p.netloc, netloc) + with self.assertRaises(ValueError): + p.port - def test_attributes_bad_scheme(self): + @support.subTests('bytes', (False, True)) + @support.subTests('parse', (urllib.parse.urlsplit, urllib.parse.urlparse)) + @support.subTests('scheme', (".", "+", "-", "0", "http&", "६http")) + def test_attributes_bad_scheme(self, bytes, parse, scheme): """Check handling of invalid schemes.""" - for bytes in (False, True): - for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for scheme in (".", "+", "-", "0", "http&", "६http"): - with self.subTest(bytes=bytes, parse=parse, scheme=scheme): - url = scheme + "://www.example.net" - if bytes: - if url.isascii(): - url = url.encode("ascii") - else: - continue - p = parse(url) - if bytes: - self.assertEqual(p.scheme, b"") - else: - self.assertEqual(p.scheme, "") + url = scheme + "://www.example.net" + if bytes: + if not url.isascii(): + self.skipTest('non-ASCII bytes') + url = url.encode("ascii") + p = parse(url) + self.assertEqual(p.scheme, b"" if bytes else "") def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it @@ -1128,24 +1114,21 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) - def test_default_scheme(self): + @support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit)) + def test_default_scheme(self, func): # Exercise the scheme parameter of urlparse() and urlsplit() - for func in (urllib.parse.urlparse, urllib.parse.urlsplit): - with self.subTest(function=func): - result = func("http://example.net/", "ftp") - self.assertEqual(result.scheme, "http") - result = func(b"http://example.net/", b"ftp") - self.assertEqual(result.scheme, b"http") - self.assertEqual(func("path", "ftp").scheme, "ftp") - self.assertEqual(func("path", scheme="ftp").scheme, "ftp") - self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") - self.assertEqual(func("path").scheme, "") - self.assertEqual(func(b"path").scheme, b"") - self.assertEqual(func(b"path", "").scheme, b"") - - def test_parse_fragments(self): - # Exercise the allow_fragments parameter of urlparse() and urlsplit() - tests = ( + result = func("http://example.net/", "ftp") + self.assertEqual(result.scheme, "http") + result = func(b"http://example.net/", b"ftp") + self.assertEqual(result.scheme, b"http") + self.assertEqual(func("path", "ftp").scheme, "ftp") + self.assertEqual(func("path", scheme="ftp").scheme, "ftp") + self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") + self.assertEqual(func("path").scheme, "") + self.assertEqual(func(b"path").scheme, b"") + self.assertEqual(func(b"path", "").scheme, b"") + + @support.subTests('url,attr,expected_frag', ( ("http:#frag", "path", "frag"), ("//example.net#frag", "path", "frag"), ("index.html#frag", "path", "frag"), @@ -1156,24 +1139,24 @@ class UrlParseTestCase(unittest.TestCase): ("//abc#@frag", "path", "@frag"), ("//abc:80#@frag", "path", "@frag"), ("//abc#@frag:80", "path", "@frag:80"), - ) - for url, attr, expected_frag in tests: - for func in (urllib.parse.urlparse, urllib.parse.urlsplit): - if attr == "params" and func is urllib.parse.urlsplit: - attr = "path" - with self.subTest(url=url, function=func): - result = func(url, allow_fragments=False) - self.assertEqual(result.fragment, "") - self.assertEndsWith(getattr(result, attr), - "#" + expected_frag) - self.assertEqual(func(url, "", False).fragment, "") - - result = func(url, allow_fragments=True) - self.assertEqual(result.fragment, expected_frag) - self.assertNotEndsWith(getattr(result, attr), expected_frag) - self.assertEqual(func(url, "", True).fragment, - expected_frag) - self.assertEqual(func(url).fragment, expected_frag) + )) + @support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit)) + def test_parse_fragments(self, url, attr, expected_frag, func): + # Exercise the allow_fragments parameter of urlparse() and urlsplit() + if attr == "params" and func is urllib.parse.urlsplit: + attr = "path" + result = func(url, allow_fragments=False) + self.assertEqual(result.fragment, "") + self.assertEndsWith(getattr(result, attr), + "#" + expected_frag) + self.assertEqual(func(url, "", False).fragment, "") + + result = func(url, allow_fragments=True) + self.assertEqual(result.fragment, expected_frag) + self.assertNotEndsWith(getattr(result, attr), expected_frag) + self.assertEqual(func(url, "", True).fragment, + expected_frag) + self.assertEqual(func(url).fragment, expected_frag) def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes @@ -1199,7 +1182,14 @@ class UrlParseTestCase(unittest.TestCase): with self.assertRaisesRegex(TypeError, "Cannot mix str"): urllib.parse.urljoin(b"http://python.org", "http://python.org") - def _check_result_type(self, str_type): + @support.subTests('result_type', [ + urllib.parse.DefragResult, + urllib.parse.SplitResult, + urllib.parse.ParseResult, + ]) + def test_result_pairs(self, result_type): + # Check encoding and decoding between result pairs + str_type = result_type num_args = len(str_type._fields) bytes_type = str_type._encoded_counterpart self.assertIs(bytes_type._decoded_counterpart, str_type) @@ -1224,16 +1214,6 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(str_result.encode(encoding, errors), bytes_args) self.assertEqual(str_result.encode(encoding, errors), bytes_result) - def test_result_pairs(self): - # Check encoding and decoding between result pairs - result_types = [ - urllib.parse.DefragResult, - urllib.parse.SplitResult, - urllib.parse.ParseResult, - ] - for result_type in result_types: - self._check_result_type(result_type) - def test_parse_qs_encoding(self): result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") self.assertEqual(result, {'key': ['\u0141\xE9']}) @@ -1265,8 +1245,7 @@ class UrlParseTestCase(unittest.TestCase): urllib.parse.parse_qsl('&'.join(['a=a']*11), max_num_fields=10) urllib.parse.parse_qsl('&'.join(['a=a']*10), max_num_fields=10) - def test_parse_qs_separator(self): - parse_qs_semicolon_cases = [ + @support.subTests('orig,expect', [ (";", {}), (";;", {}), (";a=b", {'a': ['b']}), @@ -1277,17 +1256,14 @@ class UrlParseTestCase(unittest.TestCase): (b";a=b", {b'a': [b'b']}), (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=1;a=2", {b'a': [b'1', b'2']}), - ] - for orig, expect in parse_qs_semicolon_cases: - with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): - result = urllib.parse.parse_qs(orig, separator=';') - self.assertEqual(result, expect, "Error parsing %r" % orig) - result_bytes = urllib.parse.parse_qs(orig, separator=b';') - self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) - - - def test_parse_qsl_separator(self): - parse_qsl_semicolon_cases = [ + ]) + def test_parse_qs_separator(self, orig, expect): + result = urllib.parse.parse_qs(orig, separator=';') + self.assertEqual(result, expect) + result_bytes = urllib.parse.parse_qs(orig, separator=b';') + self.assertEqual(result_bytes, expect) + + @support.subTests('orig,expect', [ (";", []), (";;", []), (";a=b", [('a', 'b')]), @@ -1298,13 +1274,12 @@ class UrlParseTestCase(unittest.TestCase): (b";a=b", [(b'a', b'b')]), (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), - ] - for orig, expect in parse_qsl_semicolon_cases: - with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): - result = urllib.parse.parse_qsl(orig, separator=';') - self.assertEqual(result, expect, "Error parsing %r" % orig) - result_bytes = urllib.parse.parse_qsl(orig, separator=b';') - self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) + ]) + def test_parse_qsl_separator(self, orig, expect): + result = urllib.parse.parse_qsl(orig, separator=';') + self.assertEqual(result, expect) + result_bytes = urllib.parse.parse_qsl(orig, separator=b';') + self.assertEqual(result_bytes, expect) def test_parse_qsl_bytes(self): self.assertEqual(urllib.parse.parse_qsl(b'a=b'), [(b'a', b'b')]) @@ -1695,11 +1670,12 @@ class Utility_Tests(unittest.TestCase): self.assertRaises(UnicodeError, urllib.parse._to_bytes, 'http://www.python.org/medi\u00e6val') - def test_unwrap(self): - for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>', - 'URL:scheme://host/path', 'scheme://host/path'): - url = urllib.parse.unwrap(wrapped_url) - self.assertEqual(url, 'scheme://host/path') + @support.subTests('wrapped_url', + ('<URL:scheme://host/path>', '<scheme://host/path>', + 'URL:scheme://host/path', 'scheme://host/path')) + def test_unwrap(self, wrapped_url): + url = urllib.parse.unwrap(wrapped_url) + self.assertEqual(url, 'scheme://host/path') class DeprecationTest(unittest.TestCase): @@ -1780,5 +1756,11 @@ class DeprecationTest(unittest.TestCase): 'urllib.parse.to_bytes() is deprecated as of 3.8') +def str_encode(s): + return s.encode('ascii') + +def tuple_encode(t): + return tuple(str_encode(x) for x in t) + if __name__ == "__main__": unittest.main() |