diff options
author | Jeff Epler <jepler@gmail.com> | 2024-01-03 19:31:35 -0600 |
---|---|---|
committer | Damien George <damien@micropython.org> | 2025-01-26 22:54:58 +1100 |
commit | 13b13d1fdd05549d504eeded0b5aa8871d5e5dcf (patch) | |
tree | 0cd66eb9e2e2cb2ca0c6904c0093bf59ed77c1cb /py | |
parent | 7b3f189b1723fe642f122a3b7826d16fe32f801a (diff) | |
download | micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.tar.gz micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.zip |
py/parsenum: Throw an exception for invalid int literals like "01".
This includes making int("01") parse in base 10 like standard Python.
When a base of 0 is specified it means auto-detect based on the prefix, and
literals begining with 0 (except when the literal is all 0's) like "01" are
then invalid and now throw an exception.
The new error message is different from CPython. It says e.g.,
`SyntaxError: invalid syntax for integer with base 0: '09'`
Additional test cases were added to cover the changed & added code.
Co-authored-by: Damien George <damien@micropython.org>
Signed-off-by: Jeff Epler <jepler@gmail.com>
Diffstat (limited to 'py')
-rw-r--r-- | py/objint.c | 2 | ||||
-rw-r--r-- | py/parsenum.c | 4 | ||||
-rw-r--r-- | py/parsenumbase.c | 29 |
3 files changed, 14 insertions, 21 deletions
diff --git a/py/objint.c b/py/objint.c index 773e180343..4be6009a44 100644 --- a/py/objint.c +++ b/py/objint.c @@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, return o; } else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) { // a textual representation, parse it - return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL); + return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL); #if MICROPY_PY_BUILTINS_FLOAT } else if (mp_obj_is_float(args[0])) { return mp_obj_new_int_from_float(mp_obj_float_get(args[0])); diff --git a/py/parsenum.c b/py/parsenum.c index b33ffb6ff2..27d6641198 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -151,13 +151,13 @@ value_error: raise_exc(exc, lex); #elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError, - MP_ERROR_TEXT("invalid syntax for integer with base %d"), base); + MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base); raise_exc(exc, lex); #else vstr_t vstr; mp_print_t print; vstr_init_print(&vstr, 50, &print); - mp_printf(&print, "invalid syntax for integer with base %d: ", base); + mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base); mp_str_print_quoted(&print, str_val_start, top - str_val_start, true); mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError, mp_obj_new_str_from_utf8_vstr(&vstr)); diff --git a/py/parsenumbase.c b/py/parsenumbase.c index 94523a666d..cc3275c456 100644 --- a/py/parsenumbase.c +++ b/py/parsenumbase.c @@ -30,35 +30,28 @@ // find real radix base, and strip preceding '0x', '0o' and '0b' // puts base in *base, and returns number of bytes to skip the prefix +// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a +// ValueError if it's not all-digits-zero. size_t mp_parse_num_base(const char *str, size_t len, int *base) { const byte *p = (const byte *)str; if (len <= 1) { goto no_prefix; } unichar c = *(p++); - if ((*base == 0 || *base == 16) && c == '0') { - c = *(p++); - if ((c | 32) == 'x') { + if (c == '0') { + c = *(p++) | 32; + int b = *base; + if (c == 'x' && !(b & ~16)) { *base = 16; - } else if (*base == 0 && (c | 32) == 'o') { + } else if (c == 'o' && !(b & ~8)) { *base = 8; - } else if (*base == 0 && (c | 32) == 'b') { + } else if (c == 'b' && !(b & ~2)) { *base = 2; } else { - if (*base == 0) { - *base = 10; - } - p -= 2; - } - } else if (*base == 8 && c == '0') { - c = *(p++); - if ((c | 32) != 'o') { - p -= 2; - } - } else if (*base == 2 && c == '0') { - c = *(p++); - if ((c | 32) != 'b') { p -= 2; + if (b == 0) { + *base = 1; + } } } else { p--; |