1 files changed, 169 insertions, 91 deletions
diff --git a/py/parsenum.c b/py/parsenum.c
index c9cef5fcd8..7be53897a7 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -9,139 +9,217 @@
 #include "parsenumbase.h"
 #include "parsenum.h"
 
-#if defined(UNIX)
-
-#include <ctype.h>
-#include <errno.h>
+#if MICROPY_ENABLE_FLOAT
+#include <math.h>
+#endif
 
 mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
-    // TODO at the moment we ignore len; we should honour it!
-    // TODO detect integer overflow and return bignum
-
-    int c, neg = 0;
-    const char *p = str;
-    char *num;
-    long found;
+    const char *restrict top = str + len;
+    bool neg = false;
 
     // check radix base
     if ((base != 0 && base < 2) || base > 36) {
         nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36"));
     }
-    // skip surrounded whitespace
-    while (isspace((c = *(p++))));
-    if (c == 0) {
-        goto value_error;
+
+    // skip leading space
+    for (; str < top && unichar_isspace(*str); str++) {
     }
-    // preced sign
-    if (c == '+' || c == '-') {
-        neg = - (c == '-');
-    } else {
-        p--;
+
+    // parse optional sign
+    if (str < top) {
+        if (*str == '+') {
+            str++;
+        } else if (*str == '-') {
+            str++;
+            neg = true;
+        }
     }
 
-    len -= p - str;
-    int skip = mp_parse_num_base(p, len, &base);
-    p += skip;
-    len -= skip;
+    // parse optional base prefix
+    str += mp_parse_num_base(str, top - str, &base);
 
-    errno = 0;
-    found = strtol(p, &num, base);
-    if (errno) {
-        goto value_error;
-    } else if (found && *(num) == 0) {
-        goto done;
-    } else if (found || num != p) {
-        goto check_tail_space;
-    } else {
+    // string should be an integer number
+    machine_int_t int_val = 0;
+    const char *restrict str_val_start = str;
+    for (; str < top; str++) {
+        machine_int_t old_val = int_val;
+        int dig = *str;
+        if (unichar_isdigit(dig) && dig - '0' < base) {
+            // 0-9 digit
+            int_val = base * int_val + dig - '0';
+        } else if (base == 16) {
+            dig |= 0x20;
+            if ('a' <= dig && dig <= 'f') {
+                // a-f hex digit
+                int_val = base * int_val + dig - 'a' + 10;
+            } else {
+                // unknown character
+                break;
+            }
+        } else {
+            // unknown character
+            break;
+        }
+        if (int_val < old_val) {
+            // If new value became less than previous, it's overflow
+            goto overflow;
+        } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
+            // If signed number changed sign - it's overflow
+            goto overflow;
+        }
+    }
+
+    // check we parsed something
+    if (str == str_val_start) {
         goto value_error;
     }
 
-check_tail_space:
-    if (*(num) != 0) {
-        while (isspace((c = *(num++))));
-        if (c != 0) {
-            goto value_error;
-        }
+    // negate value if needed
+    if (neg) {
+        int_val = -int_val;
+    }
+
+    // skip trailing space
+    for (; str < top && unichar_isspace(*str); str++) {
+    }
+
+    // check we reached the end of the string
+    if (str != top) {
+        goto value_error;
     }
 
-done:
-    return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg);
+    // return the object
+    return MP_OBJ_NEW_SMALL_INT(int_val);
 
 value_error:
     nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
-}
 
-#else /* defined(UNIX) */
-
-mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
-    // TODO port strtol to stm
-    return MP_OBJ_NEW_SMALL_INT(0);
+overflow:
+    // TODO reparse using bignum
+    nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer"));
 }
 
-#endif /* defined(UNIX) */
-
 #define PARSE_DEC_IN_INTG (1)
 #define PARSE_DEC_IN_FRAC (2)
 #define PARSE_DEC_IN_EXP  (3)
 
-mp_obj_t mp_parse_num_decimal(const char *str, uint len) {
+mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex) {
 #if MICROPY_ENABLE_FLOAT
-    int in = PARSE_DEC_IN_INTG;
+    const char *top = str + len;
     mp_float_t dec_val = 0;
-    bool exp_neg = false;
-    int exp_val = 0;
-    int exp_extra = 0;
+    bool dec_neg = false;
     bool imag = false;
-    const char *top = str + len;
-    for (; str < top; str++) {
-        int dig = *str;
-        if ('0' <= dig && dig <= '9') {
-            dig -= '0';
-            if (in == PARSE_DEC_IN_EXP) {
-                exp_val = 10 * exp_val + dig;
-            } else {
-                dec_val = 10 * dec_val + dig;
-                if (in == PARSE_DEC_IN_FRAC) {
-                    exp_extra -= 1;
-                }
+
+    // skip leading space
+    for (; str < top && unichar_isspace(*str); str++) {
+    }
+
+    // parse optional sign
+    if (str < top) {
+        if (*str == '+') {
+            str++;
+        } else if (*str == '-') {
+            str++;
+            dec_neg = true;
+        }
+    }
+
+    // determine what the string is
+    if (str < top && (str[0] | 0x20) == 'i') {
+        // string starts with 'i', should be 'inf' or 'infinity' (case insensitive)
+        if (str + 2 < top && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') {
+            // inf
+            str += 3;
+            dec_val = INFINITY;
+            if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') {
+                // infinity
+                str += 5;
             }
-        } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
-            in = PARSE_DEC_IN_FRAC;
-        } else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) {
-            in = PARSE_DEC_IN_EXP;
-            if (str[1] == '+') {
-                str++;
-            } else if (str[1] == '-') {
+        }
+    } else if (str < top && (str[0] | 0x20) == 'n') {
+        // string starts with 'n', should be 'nan' (case insensitive)
+        if (str + 2 < top && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') {
+            // NaN
+            str += 3;
+            dec_val = MICROPY_FLOAT_C_FUN(nan)("");
+        }
+    } else {
+        // string should be a decimal number
+        int in = PARSE_DEC_IN_INTG;
+        bool exp_neg = false;
+        int exp_val = 0;
+        int exp_extra = 0;
+        for (; str < top; str++) {
+            int dig = *str;
+            if ('0' <= dig && dig <= '9') {
+                dig -= '0';
+                if (in == PARSE_DEC_IN_EXP) {
+                    exp_val = 10 * exp_val + dig;
+                } else {
+                    dec_val = 10 * dec_val + dig;
+                    if (in == PARSE_DEC_IN_FRAC) {
+                        exp_extra -= 1;
+                    }
+                }
+            } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
+                in = PARSE_DEC_IN_FRAC;
+            } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
+                in = PARSE_DEC_IN_EXP;
+                if (str[1] == '+') {
+                    str++;
+                } else if (str[1] == '-') {
+                    str++;
+                    exp_neg = true;
+                }
+            } else if (allow_imag && (dig | 0x20) == 'j') {
                 str++;
-                exp_neg = true;
+                imag = true;
+                break;
+            } else {
+                // unknown character
+                break;
             }
-        } else if (dig == 'J' || dig == 'j') {
-            str++;
-            imag = true;
-            break;
-        } else {
-            // unknown character
-            break;
+        }
+
+        // work out the exponent
+        if (exp_neg) {
+            exp_val = -exp_val;
+        }
+        exp_val += exp_extra;
+
+        // apply the exponent
+        for (; exp_val > 0; exp_val--) {
+            dec_val *= 10;
+        }
+        for (; exp_val < 0; exp_val++) {
+            dec_val *= 0.1;
         }
     }
-    if (*str != 0) {
-        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
-    }
-    if (exp_neg) {
-        exp_val = -exp_val;
+
+    // negate value if needed
+    if (dec_neg) {
+        dec_val = -dec_val;
     }
-    exp_val += exp_extra;
-    for (; exp_val > 0; exp_val--) {
-        dec_val *= 10;
+
+    // skip trailing space
+    for (; str < top && unichar_isspace(*str); str++) {
     }
-    for (; exp_val < 0; exp_val++) {
-        dec_val *= 0.1;
+
+    // check we reached the end of the string
+    if (str != top) {
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
     }
+
+    // return the object
     if (imag) {
         return mp_obj_new_complex(0, dec_val);
+    } else if (force_complex) {
+        return mp_obj_new_complex(dec_val, 0);
     } else {
         return mp_obj_new_float(dec_val);
     }
+
 #else
     nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported"));
 #endif