diff options
author | Filipe LaĆns <lains@archlinux.org> | 2020-08-10 15:48:20 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-10 07:48:20 -0700 |
commit | 4ce6faa6c9591de6079347eccc9e61ae4e8d9e31 (patch) | |
tree | 72e6015f5de2f32e283864275cc48b72de8bb969 | |
parent | 39042e00ab01d6521548c1b7cc6554c09f4389ff (diff) | |
download | cpython-4ce6faa6c9591de6079347eccc9e61ae4e8d9e31.tar.gz cpython-4ce6faa6c9591de6079347eccc9e61ae4e8d9e31.zip |
bpo-16995: add support for base32 extended hex (base32hex) (GH-20441)
cc @pganssle
Automerge-Triggered-By: @pganssle
-rw-r--r-- | Doc/library/base64.rst | 23 | ||||
-rw-r--r-- | Doc/whatsnew/3.10.rst | 6 | ||||
-rwxr-xr-x | Lib/base64.py | 86 | ||||
-rw-r--r-- | Lib/test/test_base64.py | 70 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst | 2 |
5 files changed, 155 insertions, 32 deletions
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 1ff22a00d61..2f24bb63912 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -124,7 +124,7 @@ The modern interface provides: whether a lowercase alphabet is acceptable as input. For security purposes, the default is ``False``. - :rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O + :rfc:`4648` allows for optional mapping of the digit 0 (zero) to the letter O (oh), and for optional mapping of the digit 1 (one) to either the letter I (eye) or letter L (el). The optional argument *map01* when not ``None``, specifies which letter the digit 1 should be mapped to (when *map01* is not ``None``, the @@ -136,6 +136,27 @@ The modern interface provides: input. +.. function:: b32hexencode(s) + + Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in + :rfc:`4648`. + + .. versionadded:: 3.10 + + +.. function:: b32hexdecode(s, casefold=False) + + Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in + :rfc:`4648`. + + This version does not allow the digit 0 (zero) to the letter O (oh) and digit + 1 (one) to either the letter I (eye) or letter L (el) mappings, all these + characters are included in the Extended Hex Alphabet and are not + interchangable. + + .. versionadded:: 3.10 + + .. function:: b16encode(s) Encode the :term:`bytes-like object` *s* using Base16 and return the diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 2af0ea3f4dd..eb5ae01a7c0 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -103,6 +103,12 @@ New Modules Improved Modules ================ +base64 +------ + +Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the +Base32 Encoding with Extended Hex Alphabet. + curses ------ diff --git a/Lib/base64.py b/Lib/base64.py index a28109f8a7f..539ad16f0e8 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -16,7 +16,7 @@ __all__ = [ 'encode', 'decode', 'encodebytes', 'decodebytes', # Generalized interface for other encodings 'b64encode', 'b64decode', 'b32encode', 'b32decode', - 'b16encode', 'b16decode', + 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings 'b85encode', 'b85decode', 'a85encode', 'a85decode', # Standard Base64 encoding @@ -135,19 +135,40 @@ def urlsafe_b64decode(s): # Base32 encoding/decoding must be done in Python +_B32_ENCODE_DOCSTRING = ''' +Encode the bytes-like objects using {encoding} and return a bytes object. +''' +_B32_DECODE_DOCSTRING = ''' +Decode the {encoding} encoded bytes-like object or ASCII string s. + +Optional casefold is a flag specifying whether a lowercase alphabet is +acceptable as input. For security purposes, the default is False. +{extra_args} +The result is returned as a bytes object. A binascii.Error is raised if +the input is incorrectly padded or if there are non-alphabet +characters present in the input. +''' +_B32_DECODE_MAP01_DOCSTRING = ''' +RFC 3548 allows for optional mapping of the digit 0 (zero) to the +letter O (oh), and for optional mapping of the digit 1 (one) to +either the letter I (eye) or letter L (el). The optional argument +map01 when not None, specifies which letter the digit 1 should be +mapped to (when map01 is not None, the digit 0 is always mapped to +the letter O). For security purposes the default is None, so that +0 and 1 are not allowed in the input. +''' _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' -_b32tab2 = None -_b32rev = None +_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' +_b32tab2 = {} +_b32rev = {} -def b32encode(s): - """Encode the bytes-like object s using Base32 and return a bytes object. - """ +def _b32encode(alphabet, s): global _b32tab2 # Delay the initialization of the table to not waste memory # if the function is never called - if _b32tab2 is None: - b32tab = [bytes((i,)) for i in _b32alphabet] - _b32tab2 = [a + b for a in b32tab for b in b32tab] + if alphabet not in _b32tab2: + b32tab = [bytes((i,)) for i in alphabet] + _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab] b32tab = None if not isinstance(s, bytes_types): @@ -158,7 +179,7 @@ def b32encode(s): s = s + b'\0' * (5 - leftover) # Don't use += ! encoded = bytearray() from_bytes = int.from_bytes - b32tab2 = _b32tab2 + b32tab2 = _b32tab2[alphabet] for i in range(0, len(s), 5): c = from_bytes(s[i: i + 5], 'big') encoded += (b32tab2[c >> 30] + # bits 1 - 10 @@ -177,29 +198,12 @@ def b32encode(s): encoded[-1:] = b'=' return bytes(encoded) -def b32decode(s, casefold=False, map01=None): - """Decode the Base32 encoded bytes-like object or ASCII string s. - - Optional casefold is a flag specifying whether a lowercase alphabet is - acceptable as input. For security purposes, the default is False. - - RFC 3548 allows for optional mapping of the digit 0 (zero) to the - letter O (oh), and for optional mapping of the digit 1 (one) to - either the letter I (eye) or letter L (el). The optional argument - map01 when not None, specifies which letter the digit 1 should be - mapped to (when map01 is not None, the digit 0 is always mapped to - the letter O). For security purposes the default is None, so that - 0 and 1 are not allowed in the input. - - The result is returned as a bytes object. A binascii.Error is raised if - the input is incorrectly padded or if there are non-alphabet - characters present in the input. - """ +def _b32decode(alphabet, s, casefold=False, map01=None): global _b32rev # Delay the initialization of the table to not waste memory # if the function is never called - if _b32rev is None: - _b32rev = {v: k for k, v in enumerate(_b32alphabet)} + if alphabet not in _b32rev: + _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)} s = _bytes_from_decode_data(s) if len(s) % 8: raise binascii.Error('Incorrect padding') @@ -220,7 +224,7 @@ def b32decode(s, casefold=False, map01=None): padchars = l - len(s) # Now decode the full quanta decoded = bytearray() - b32rev = _b32rev + b32rev = _b32rev[alphabet] for i in range(0, len(s), 8): quanta = s[i: i + 8] acc = 0 @@ -241,6 +245,26 @@ def b32decode(s, casefold=False, map01=None): return bytes(decoded) +def b32encode(s): + return _b32encode(_b32alphabet, s) +b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') + +def b32decode(s, casefold=False, map01=None): + return _b32decode(_b32alphabet, s, casefold, map01) +b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', + extra_args=_B32_DECODE_MAP01_DOCSTRING) + +def b32hexencode(s): + return _b32encode(_b32hexalphabet, s) +b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') + +def b32hexdecode(s, casefold=False): + # base32hex does not have the 01 mapping + return _b32decode(_b32hexalphabet, s, casefold) +b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', + extra_args='') + + # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns # lowercase. The RFC also recommends against accepting input case # insensitively. diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 1f67e46cd22..4f62c4115f6 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -351,6 +351,76 @@ class BaseXYTestCase(unittest.TestCase): with self.assertRaises(binascii.Error): base64.b32decode(data.decode('ascii')) + def test_b32hexencode(self): + test_cases = [ + # to_encode, expected + (b'', b''), + (b'\x00', b'00======'), + (b'a', b'C4======'), + (b'ab', b'C5H0===='), + (b'abc', b'C5H66==='), + (b'abcd', b'C5H66P0='), + (b'abcde', b'C5H66P35'), + ] + for to_encode, expected in test_cases: + with self.subTest(to_decode=to_encode): + self.assertEqual(base64.b32hexencode(to_encode), expected) + + def test_b32hexencode_other_types(self): + self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=') + self.check_encode_type_errors(base64.b32hexencode) + + def test_b32hexdecode(self): + test_cases = [ + # to_decode, expected, casefold + (b'', b'', False), + (b'00======', b'\x00', False), + (b'C4======', b'a', False), + (b'C5H0====', b'ab', False), + (b'C5H66===', b'abc', False), + (b'C5H66P0=', b'abcd', False), + (b'C5H66P35', b'abcde', False), + (b'', b'', True), + (b'00======', b'\x00', True), + (b'C4======', b'a', True), + (b'C5H0====', b'ab', True), + (b'C5H66===', b'abc', True), + (b'C5H66P0=', b'abcd', True), + (b'C5H66P35', b'abcde', True), + (b'c4======', b'a', True), + (b'c5h0====', b'ab', True), + (b'c5h66===', b'abc', True), + (b'c5h66p0=', b'abcd', True), + (b'c5h66p35', b'abcde', True), + ] + for to_decode, expected, casefold in test_cases: + with self.subTest(to_decode=to_decode, casefold=casefold): + self.assertEqual(base64.b32hexdecode(to_decode, casefold), + expected) + self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii'), + casefold), expected) + + def test_b32hexdecode_other_types(self): + self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc') + self.check_decode_type_errors(base64.b32hexdecode) + + def test_b32hexdecode_error(self): + tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======'] + prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] + for i in range(0, 17): + if i: + tests.append(b'='*i) + for prefix in prefixes: + if len(prefix) + i != 8: + tests.append(prefix + b'='*i) + for data in tests: + with self.subTest(to_decode=data): + with self.assertRaises(binascii.Error): + base64.b32hexdecode(data) + with self.assertRaises(binascii.Error): + base64.b32hexdecode(data.decode('ascii')) + + def test_b16encode(self): eq = self.assertEqual eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF') diff --git a/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst new file mode 100644 index 00000000000..88b95998d08 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst @@ -0,0 +1,2 @@ +Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the +Base32 Encoding with Extended Hex Alphabet. |