summaryrefslogtreecommitdiffstatshomepage
path: root/py
diff options
context:
space:
mode:
authorJeff Epler <jepler@gmail.com>2024-01-03 19:31:35 -0600
committerDamien George <damien@micropython.org>2025-01-26 22:54:58 +1100
commit13b13d1fdd05549d504eeded0b5aa8871d5e5dcf (patch)
tree0cd66eb9e2e2cb2ca0c6904c0093bf59ed77c1cb /py
parent7b3f189b1723fe642f122a3b7826d16fe32f801a (diff)
downloadmicropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.tar.gz
micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.zip
py/parsenum: Throw an exception for invalid int literals like "01".
This includes making int("01") parse in base 10 like standard Python. When a base of 0 is specified it means auto-detect based on the prefix, and literals begining with 0 (except when the literal is all 0's) like "01" are then invalid and now throw an exception. The new error message is different from CPython. It says e.g., `SyntaxError: invalid syntax for integer with base 0: '09'` Additional test cases were added to cover the changed & added code. Co-authored-by: Damien George <damien@micropython.org> Signed-off-by: Jeff Epler <jepler@gmail.com>
Diffstat (limited to 'py')
-rw-r--r--py/objint.c2
-rw-r--r--py/parsenum.c4
-rw-r--r--py/parsenumbase.c29
3 files changed, 14 insertions, 21 deletions
diff --git a/py/objint.c b/py/objint.c
index 773e180343..4be6009a44 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
return o;
} else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) {
// a textual representation, parse it
- return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL);
+ return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL);
#if MICROPY_PY_BUILTINS_FLOAT
} else if (mp_obj_is_float(args[0])) {
return mp_obj_new_int_from_float(mp_obj_float_get(args[0]));
diff --git a/py/parsenum.c b/py/parsenum.c
index b33ffb6ff2..27d6641198 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -151,13 +151,13 @@ value_error:
raise_exc(exc, lex);
#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL
mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError,
- MP_ERROR_TEXT("invalid syntax for integer with base %d"), base);
+ MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base);
raise_exc(exc, lex);
#else
vstr_t vstr;
mp_print_t print;
vstr_init_print(&vstr, 50, &print);
- mp_printf(&print, "invalid syntax for integer with base %d: ", base);
+ mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base);
mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
mp_obj_new_str_from_utf8_vstr(&vstr));
diff --git a/py/parsenumbase.c b/py/parsenumbase.c
index 94523a666d..cc3275c456 100644
--- a/py/parsenumbase.c
+++ b/py/parsenumbase.c
@@ -30,35 +30,28 @@
// find real radix base, and strip preceding '0x', '0o' and '0b'
// puts base in *base, and returns number of bytes to skip the prefix
+// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a
+// ValueError if it's not all-digits-zero.
size_t mp_parse_num_base(const char *str, size_t len, int *base) {
const byte *p = (const byte *)str;
if (len <= 1) {
goto no_prefix;
}
unichar c = *(p++);
- if ((*base == 0 || *base == 16) && c == '0') {
- c = *(p++);
- if ((c | 32) == 'x') {
+ if (c == '0') {
+ c = *(p++) | 32;
+ int b = *base;
+ if (c == 'x' && !(b & ~16)) {
*base = 16;
- } else if (*base == 0 && (c | 32) == 'o') {
+ } else if (c == 'o' && !(b & ~8)) {
*base = 8;
- } else if (*base == 0 && (c | 32) == 'b') {
+ } else if (c == 'b' && !(b & ~2)) {
*base = 2;
} else {
- if (*base == 0) {
- *base = 10;
- }
- p -= 2;
- }
- } else if (*base == 8 && c == '0') {
- c = *(p++);
- if ((c | 32) != 'o') {
- p -= 2;
- }
- } else if (*base == 2 && c == '0') {
- c = *(p++);
- if ((c | 32) != 'b') {
p -= 2;
+ if (b == 0) {
+ *base = 1;
+ }
}
} else {
p--;