py/parsenum: Throw an exception for invalid int literals like "01".

This includes making int("01") parse in base 10 like standard Python. When a base of 0 is specified it means auto-detect based on the prefix, and literals begining with 0 (except when the literal is all 0's) like "01" are then invalid and now throw an exception. The new error message is different from CPython. It says e.g., `SyntaxError: invalid syntax for integer with base 0: '09'` Additional test cases were added to cover the changed & added code. Co-authored-by: Damien George <damien@micropython.org> Signed-off-by: Jeff Epler <jepler@gmail.com>
author: Jeff Epler <jepler@gmail.com> 2024-01-03 19:31:35 -0600
committer: Damien George <damien@micropython.org> 2025-01-26 22:54:58 +1100
commit: 13b13d1fdd05549d504eeded0b5aa8871d5e5dcf (patch)
tree: 0cd66eb9e2e2cb2ca0c6904c0093bf59ed77c1cb /py
parent: 7b3f189b1723fe642f122a3b7826d16fe32f801a (diff)
download: micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.tar.gz
micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.zip
3 files changed, 14 insertions, 21 deletions
diff --git a/py/objint.c b/py/objint.c
index 773e180343..4be6009a44 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
                 return o;
             } else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) {
                 // a textual representation, parse it
-                return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL);
+                return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL);
             #if MICROPY_PY_BUILTINS_FLOAT
             } else if (mp_obj_is_float(args[0])) {
                 return mp_obj_new_int_from_float(mp_obj_float_get(args[0]));
diff --git a/py/parsenum.c b/py/parsenum.c
index b33ffb6ff2..27d6641198 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -151,13 +151,13 @@ value_error:
         raise_exc(exc, lex);
         #elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL
         mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError,
-            MP_ERROR_TEXT("invalid syntax for integer with base %d"), base);
+            MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base);
         raise_exc(exc, lex);
         #else
         vstr_t vstr;
         mp_print_t print;
         vstr_init_print(&vstr, 50, &print);
-        mp_printf(&print, "invalid syntax for integer with base %d: ", base);
+        mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base);
         mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
         mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
             mp_obj_new_str_from_utf8_vstr(&vstr));
diff --git a/py/parsenumbase.c b/py/parsenumbase.c
index 94523a666d..cc3275c456 100644
--- a/py/parsenumbase.c
+++ b/py/parsenumbase.c
@@ -30,35 +30,28 @@
 
 // find real radix base, and strip preceding '0x', '0o' and '0b'
 // puts base in *base, and returns number of bytes to skip the prefix
+// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a
+// ValueError if it's not all-digits-zero.
 size_t mp_parse_num_base(const char *str, size_t len, int *base) {
     const byte *p = (const byte *)str;
     if (len <= 1) {
         goto no_prefix;
     }
     unichar c = *(p++);
-    if ((*base == 0 || *base == 16) && c == '0') {
-        c = *(p++);
-        if ((c | 32) == 'x') {
+    if (c == '0') {
+        c = *(p++) | 32;
+        int b = *base;
+        if (c == 'x' && !(b & ~16)) {
             *base = 16;
-        } else if (*base == 0 && (c | 32) == 'o') {
+        } else if (c == 'o' && !(b & ~8)) {
             *base = 8;
-        } else if (*base == 0 && (c | 32) == 'b') {
+        } else if (c == 'b' && !(b & ~2)) {
             *base = 2;
         } else {
-            if (*base == 0) {
-                *base = 10;
-            }
-            p -= 2;
-        }
-    } else if (*base == 8 && c == '0') {
-        c = *(p++);
-        if ((c | 32) != 'o') {
-            p -= 2;
-        }
-    } else if (*base == 2 && c == '0') {
-        c = *(p++);
-        if ((c | 32) != 'b') {
             p -= 2;
+            if (b == 0) {
+                *base = 1;
+            }
         }
     } else {
         p--;
author	Jeff Epler <jepler@gmail.com>	2024-01-03 19:31:35 -0600
committer	Damien George <damien@micropython.org>	2025-01-26 22:54:58 +1100
commit	13b13d1fdd05549d504eeded0b5aa8871d5e5dcf (patch)
tree	0cd66eb9e2e2cb2ca0c6904c0093bf59ed77c1cb /py
parent	7b3f189b1723fe642f122a3b7826d16fe32f801a (diff)
download	micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.tar.gz micropython-13b13d1fdd05549d504eeded0b5aa8871d5e5dcf.zip