diff options
author | Glenn Moloney <glenn.moloney@gmail.com> | 2024-08-13 08:38:20 +1000 |
---|---|---|
committer | Glenn Moloney <glenn.moloney@gmail.com> | 2024-08-19 14:18:34 +1000 |
commit | 6367099f8365c22b42474818ce5fdb9b35591ade (patch) | |
tree | 9784e255a91ef704201d149194753938598cffa3 | |
parent | e9814e987bcc816fb67e38748a5afce466c45606 (diff) | |
download | micropython-6367099f8365c22b42474818ce5fdb9b35591ade.tar.gz micropython-6367099f8365c22b42474818ce5fdb9b35591ade.zip |
py/objstr: Skip whitespace in bytes.fromhex().
Skip whitespace characters between pairs of hex numbers.
This makes `bytes.fromhex()` compatible with cpython.
Includes simple test in `tests/basic/builtin_str_hex.py`.
Signed-off-by: Glenn Moloney <glenn.moloney@gmail.com>
-rw-r--r-- | py/objstr.c | 24 | ||||
-rw-r--r-- | tests/basics/builtin_str_hex.py | 17 | ||||
-rw-r--r-- | tests/basics/builtin_str_hex.py.exp | 11 |
3 files changed, 36 insertions, 16 deletions
diff --git a/py/objstr.c b/py/objstr.c index 757da827c0..fc0623eb7a 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -2014,27 +2014,21 @@ mp_obj_t mp_obj_bytes_fromhex(mp_obj_t type_in, mp_obj_t data) { mp_buffer_info_t bufinfo; mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ); - if ((bufinfo.len & 1) != 0) { - mp_raise_ValueError(MP_ERROR_TEXT("odd-length string")); - } vstr_t vstr; vstr_init_len(&vstr, bufinfo.len / 2); byte *in = bufinfo.buf, *out = (byte *)vstr.buf; - byte hex_byte = 0; - for (mp_uint_t i = bufinfo.len; i--;) { - byte hex_ch = *in++; - if (unichar_isxdigit(hex_ch)) { - hex_byte += unichar_xdigit_value(hex_ch); - } else { - mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit found")); + byte *in_end = in + bufinfo.len; + mp_uint_t ch1, ch2; + while (in < in_end) { + if (unichar_isspace(ch1 = *in++)) { + continue; // Skip whitespace between hex digit pairs } - if (i & 1) { - hex_byte <<= 4; - } else { - *out++ = hex_byte; - hex_byte = 0; + if (in == in_end || !unichar_isxdigit(ch1) || !unichar_isxdigit(ch2 = *in++)) { + mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit")); } + *out++ = (byte)((unichar_xdigit_value(ch1) << 4) | unichar_xdigit_value(ch2)); } + vstr.len = out - (byte *)vstr.buf; // Length may be shorter due to whitespace in input return mp_obj_new_str_type_from_vstr(MP_OBJ_TO_PTR(type_in), &vstr); } diff --git a/tests/basics/builtin_str_hex.py b/tests/basics/builtin_str_hex.py index 7390c8eaee..9455883012 100644 --- a/tests/basics/builtin_str_hex.py +++ b/tests/basics/builtin_str_hex.py @@ -20,5 +20,20 @@ for x in ( "08090a0b0c0d0e0f", "7f80ff", "313233344142434461626364", + "ab\tcd\n ef ", + "ab cd ef", + "ab cd ef ", + " ab cd ef ", + # Invalid hex strings: + "abcde", # Odd number of hex digits + "ab cd e", + "a b cd ef", # Spaces between hex pairs + "ab cd e f ", + "abga", # Invalid hex digits + "ab_cd", + "ab:cd", ): - print(bytes.fromhex(x)) + try: + print(bytes.fromhex(x)) + except ValueError as e: + print("ValueError:", e) diff --git a/tests/basics/builtin_str_hex.py.exp b/tests/basics/builtin_str_hex.py.exp index 990dd85707..0309cad02d 100644 --- a/tests/basics/builtin_str_hex.py.exp +++ b/tests/basics/builtin_str_hex.py.exp @@ -26,3 +26,14 @@ b'\x00\x01\x02\x03\x04\x05\x06\x07' b'\x08\t\n\x0b\x0c\r\x0e\x0f' b'\x7f\x80\xff' b'1234ABCDabcd' +b'\xab\xcd\xef' +b'\xab\xcd\xef' +b'\xab\xcd\xef' +b'\xab\xcd\xef' +ValueError: non-hex digit +ValueError: non-hex digit +ValueError: non-hex digit +ValueError: non-hex digit +ValueError: non-hex digit +ValueError: non-hex digit +ValueError: non-hex digit |