aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorFilipe LaĆ­ns <lains@archlinux.org>2020-08-10 15:48:20 +0100
committerGitHub <noreply@github.com>2020-08-10 07:48:20 -0700
commit4ce6faa6c9591de6079347eccc9e61ae4e8d9e31 (patch)
tree72e6015f5de2f32e283864275cc48b72de8bb969
parent39042e00ab01d6521548c1b7cc6554c09f4389ff (diff)
downloadcpython-4ce6faa6c9591de6079347eccc9e61ae4e8d9e31.tar.gz
cpython-4ce6faa6c9591de6079347eccc9e61ae4e8d9e31.zip
bpo-16995: add support for base32 extended hex (base32hex) (GH-20441)
cc @pganssle Automerge-Triggered-By: @pganssle
-rw-r--r--Doc/library/base64.rst23
-rw-r--r--Doc/whatsnew/3.10.rst6
-rwxr-xr-xLib/base64.py86
-rw-r--r--Lib/test/test_base64.py70
-rw-r--r--Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst2
5 files changed, 155 insertions, 32 deletions
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 1ff22a00d61..2f24bb63912 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -124,7 +124,7 @@ The modern interface provides:
whether a lowercase alphabet is acceptable as input. For security purposes,
the default is ``False``.
- :rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O
+ :rfc:`4648` allows for optional mapping of the digit 0 (zero) to the letter O
(oh), and for optional mapping of the digit 1 (one) to either the letter I (eye)
or letter L (el). The optional argument *map01* when not ``None``, specifies
which letter the digit 1 should be mapped to (when *map01* is not ``None``, the
@@ -136,6 +136,27 @@ The modern interface provides:
input.
+.. function:: b32hexencode(s)
+
+ Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in
+ :rfc:`4648`.
+
+ .. versionadded:: 3.10
+
+
+.. function:: b32hexdecode(s, casefold=False)
+
+ Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in
+ :rfc:`4648`.
+
+ This version does not allow the digit 0 (zero) to the letter O (oh) and digit
+ 1 (one) to either the letter I (eye) or letter L (el) mappings, all these
+ characters are included in the Extended Hex Alphabet and are not
+ interchangable.
+
+ .. versionadded:: 3.10
+
+
.. function:: b16encode(s)
Encode the :term:`bytes-like object` *s* using Base16 and return the
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 2af0ea3f4dd..eb5ae01a7c0 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -103,6 +103,12 @@ New Modules
Improved Modules
================
+base64
+------
+
+Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
+Base32 Encoding with Extended Hex Alphabet.
+
curses
------
diff --git a/Lib/base64.py b/Lib/base64.py
index a28109f8a7f..539ad16f0e8 100755
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -16,7 +16,7 @@ __all__ = [
'encode', 'decode', 'encodebytes', 'decodebytes',
# Generalized interface for other encodings
'b64encode', 'b64decode', 'b32encode', 'b32decode',
- 'b16encode', 'b16decode',
+ 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
# Base85 and Ascii85 encodings
'b85encode', 'b85decode', 'a85encode', 'a85decode',
# Standard Base64 encoding
@@ -135,19 +135,40 @@ def urlsafe_b64decode(s):
# Base32 encoding/decoding must be done in Python
+_B32_ENCODE_DOCSTRING = '''
+Encode the bytes-like objects using {encoding} and return a bytes object.
+'''
+_B32_DECODE_DOCSTRING = '''
+Decode the {encoding} encoded bytes-like object or ASCII string s.
+
+Optional casefold is a flag specifying whether a lowercase alphabet is
+acceptable as input. For security purposes, the default is False.
+{extra_args}
+The result is returned as a bytes object. A binascii.Error is raised if
+the input is incorrectly padded or if there are non-alphabet
+characters present in the input.
+'''
+_B32_DECODE_MAP01_DOCSTRING = '''
+RFC 3548 allows for optional mapping of the digit 0 (zero) to the
+letter O (oh), and for optional mapping of the digit 1 (one) to
+either the letter I (eye) or letter L (el). The optional argument
+map01 when not None, specifies which letter the digit 1 should be
+mapped to (when map01 is not None, the digit 0 is always mapped to
+the letter O). For security purposes the default is None, so that
+0 and 1 are not allowed in the input.
+'''
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
-_b32tab2 = None
-_b32rev = None
+_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
+_b32tab2 = {}
+_b32rev = {}
-def b32encode(s):
- """Encode the bytes-like object s using Base32 and return a bytes object.
- """
+def _b32encode(alphabet, s):
global _b32tab2
# Delay the initialization of the table to not waste memory
# if the function is never called
- if _b32tab2 is None:
- b32tab = [bytes((i,)) for i in _b32alphabet]
- _b32tab2 = [a + b for a in b32tab for b in b32tab]
+ if alphabet not in _b32tab2:
+ b32tab = [bytes((i,)) for i in alphabet]
+ _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
b32tab = None
if not isinstance(s, bytes_types):
@@ -158,7 +179,7 @@ def b32encode(s):
s = s + b'\0' * (5 - leftover) # Don't use += !
encoded = bytearray()
from_bytes = int.from_bytes
- b32tab2 = _b32tab2
+ b32tab2 = _b32tab2[alphabet]
for i in range(0, len(s), 5):
c = from_bytes(s[i: i + 5], 'big')
encoded += (b32tab2[c >> 30] + # bits 1 - 10
@@ -177,29 +198,12 @@ def b32encode(s):
encoded[-1:] = b'='
return bytes(encoded)
-def b32decode(s, casefold=False, map01=None):
- """Decode the Base32 encoded bytes-like object or ASCII string s.
-
- Optional casefold is a flag specifying whether a lowercase alphabet is
- acceptable as input. For security purposes, the default is False.
-
- RFC 3548 allows for optional mapping of the digit 0 (zero) to the
- letter O (oh), and for optional mapping of the digit 1 (one) to
- either the letter I (eye) or letter L (el). The optional argument
- map01 when not None, specifies which letter the digit 1 should be
- mapped to (when map01 is not None, the digit 0 is always mapped to
- the letter O). For security purposes the default is None, so that
- 0 and 1 are not allowed in the input.
-
- The result is returned as a bytes object. A binascii.Error is raised if
- the input is incorrectly padded or if there are non-alphabet
- characters present in the input.
- """
+def _b32decode(alphabet, s, casefold=False, map01=None):
global _b32rev
# Delay the initialization of the table to not waste memory
# if the function is never called
- if _b32rev is None:
- _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
+ if alphabet not in _b32rev:
+ _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
s = _bytes_from_decode_data(s)
if len(s) % 8:
raise binascii.Error('Incorrect padding')
@@ -220,7 +224,7 @@ def b32decode(s, casefold=False, map01=None):
padchars = l - len(s)
# Now decode the full quanta
decoded = bytearray()
- b32rev = _b32rev
+ b32rev = _b32rev[alphabet]
for i in range(0, len(s), 8):
quanta = s[i: i + 8]
acc = 0
@@ -241,6 +245,26 @@ def b32decode(s, casefold=False, map01=None):
return bytes(decoded)
+def b32encode(s):
+ return _b32encode(_b32alphabet, s)
+b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
+
+def b32decode(s, casefold=False, map01=None):
+ return _b32decode(_b32alphabet, s, casefold, map01)
+b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
+ extra_args=_B32_DECODE_MAP01_DOCSTRING)
+
+def b32hexencode(s):
+ return _b32encode(_b32hexalphabet, s)
+b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
+
+def b32hexdecode(s, casefold=False):
+ # base32hex does not have the 01 mapping
+ return _b32decode(_b32hexalphabet, s, casefold)
+b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
+ extra_args='')
+
+
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
# lowercase. The RFC also recommends against accepting input case
# insensitively.
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 1f67e46cd22..4f62c4115f6 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -351,6 +351,76 @@ class BaseXYTestCase(unittest.TestCase):
with self.assertRaises(binascii.Error):
base64.b32decode(data.decode('ascii'))
+ def test_b32hexencode(self):
+ test_cases = [
+ # to_encode, expected
+ (b'', b''),
+ (b'\x00', b'00======'),
+ (b'a', b'C4======'),
+ (b'ab', b'C5H0===='),
+ (b'abc', b'C5H66==='),
+ (b'abcd', b'C5H66P0='),
+ (b'abcde', b'C5H66P35'),
+ ]
+ for to_encode, expected in test_cases:
+ with self.subTest(to_decode=to_encode):
+ self.assertEqual(base64.b32hexencode(to_encode), expected)
+
+ def test_b32hexencode_other_types(self):
+ self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=')
+ self.check_encode_type_errors(base64.b32hexencode)
+
+ def test_b32hexdecode(self):
+ test_cases = [
+ # to_decode, expected, casefold
+ (b'', b'', False),
+ (b'00======', b'\x00', False),
+ (b'C4======', b'a', False),
+ (b'C5H0====', b'ab', False),
+ (b'C5H66===', b'abc', False),
+ (b'C5H66P0=', b'abcd', False),
+ (b'C5H66P35', b'abcde', False),
+ (b'', b'', True),
+ (b'00======', b'\x00', True),
+ (b'C4======', b'a', True),
+ (b'C5H0====', b'ab', True),
+ (b'C5H66===', b'abc', True),
+ (b'C5H66P0=', b'abcd', True),
+ (b'C5H66P35', b'abcde', True),
+ (b'c4======', b'a', True),
+ (b'c5h0====', b'ab', True),
+ (b'c5h66===', b'abc', True),
+ (b'c5h66p0=', b'abcd', True),
+ (b'c5h66p35', b'abcde', True),
+ ]
+ for to_decode, expected, casefold in test_cases:
+ with self.subTest(to_decode=to_decode, casefold=casefold):
+ self.assertEqual(base64.b32hexdecode(to_decode, casefold),
+ expected)
+ self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii'),
+ casefold), expected)
+
+ def test_b32hexdecode_other_types(self):
+ self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc')
+ self.check_decode_type_errors(base64.b32hexdecode)
+
+ def test_b32hexdecode_error(self):
+ tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======']
+ prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF']
+ for i in range(0, 17):
+ if i:
+ tests.append(b'='*i)
+ for prefix in prefixes:
+ if len(prefix) + i != 8:
+ tests.append(prefix + b'='*i)
+ for data in tests:
+ with self.subTest(to_decode=data):
+ with self.assertRaises(binascii.Error):
+ base64.b32hexdecode(data)
+ with self.assertRaises(binascii.Error):
+ base64.b32hexdecode(data.decode('ascii'))
+
+
def test_b16encode(self):
eq = self.assertEqual
eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
diff --git a/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
new file mode 100644
index 00000000000..88b95998d08
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
@@ -0,0 +1,2 @@
+Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
+Base32 Encoding with Extended Hex Alphabet.