From c06ea7abf249765bf93595fc42656eed585d7a47 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Fri, 21 Mar 2014 10:55:08 +0000
Subject: py: Implement parsing of infinity and nan for floats.

---
 py/parsenum.c | 144 +++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 102 insertions(+), 42 deletions(-)

(limited to 'py/parsenum.c')

diff --git a/py/parsenum.c b/py/parsenum.c
index c9cef5fcd8..b1a70c352d 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -9,6 +9,10 @@
 #include "parsenumbase.h"
 #include "parsenum.h"
 
+#if MICROPY_ENABLE_FLOAT
+#include <math.h>
+#endif
+
 #if defined(UNIX)
 
 #include <ctype.h>
@@ -84,64 +88,120 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
 #define PARSE_DEC_IN_FRAC (2)
 #define PARSE_DEC_IN_EXP  (3)
 
-mp_obj_t mp_parse_num_decimal(const char *str, uint len) {
+mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) {
 #if MICROPY_ENABLE_FLOAT
-    int in = PARSE_DEC_IN_INTG;
+    const char *top = str + len;
     mp_float_t dec_val = 0;
-    bool exp_neg = false;
-    int exp_val = 0;
-    int exp_extra = 0;
+    bool dec_neg = false;
     bool imag = false;
-    const char *top = str + len;
-    for (; str < top; str++) {
-        int dig = *str;
-        if ('0' <= dig && dig <= '9') {
-            dig -= '0';
-            if (in == PARSE_DEC_IN_EXP) {
-                exp_val = 10 * exp_val + dig;
-            } else {
-                dec_val = 10 * dec_val + dig;
-                if (in == PARSE_DEC_IN_FRAC) {
-                    exp_extra -= 1;
-                }
+
+    // skip leading space
+    for (; str < top && isspace(*str); str++) {
+    }
+
+    // get optional sign
+    if (str < top) {
+        if (*str == '+') {
+            str++;
+        } else if (*str == '-') {
+            str++;
+            dec_neg = true;
+        }
+    }
+
+    // determine what the string is
+    if (str < top && (str[0] | 0x20) == 'i') {
+        // string starts with 'i', should be 'inf' or 'infinity' (case insensitive)
+        if (str + 2 < top && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') {
+            // inf
+            str += 3;
+            dec_val = INFINITY;
+            if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') {
+                // infinity
+                str += 5;
             }
-        } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
-            in = PARSE_DEC_IN_FRAC;
-        } else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) {
-            in = PARSE_DEC_IN_EXP;
-            if (str[1] == '+') {
-                str++;
-            } else if (str[1] == '-') {
+        }
+    } else if (str < top && (str[0] | 0x20) == 'n') {
+        // string starts with 'n', should be 'nan' (case insensitive)
+        if (str + 2 < top && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') {
+            // NaN
+            str += 3;
+            dec_val = MICROPY_FLOAT_C_FUN(nan)("");
+        }
+    } else {
+        // parse the digits
+        int in = PARSE_DEC_IN_INTG;
+        bool exp_neg = false;
+        int exp_val = 0;
+        int exp_extra = 0;
+        for (; str < top; str++) {
+            int dig = *str;
+            if ('0' <= dig && dig <= '9') {
+                dig -= '0';
+                if (in == PARSE_DEC_IN_EXP) {
+                    exp_val = 10 * exp_val + dig;
+                } else {
+                    dec_val = 10 * dec_val + dig;
+                    if (in == PARSE_DEC_IN_FRAC) {
+                        exp_extra -= 1;
+                    }
+                }
+            } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
+                in = PARSE_DEC_IN_FRAC;
+            } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
+                in = PARSE_DEC_IN_EXP;
+                if (str[1] == '+') {
+                    str++;
+                } else if (str[1] == '-') {
+                    str++;
+                    exp_neg = true;
+                }
+            } else if (allow_imag && (dig | 0x20) == 'j') {
                 str++;
-                exp_neg = true;
+                imag = true;
+                break;
+            } else {
+                // unknown character
+                break;
             }
-        } else if (dig == 'J' || dig == 'j') {
-            str++;
-            imag = true;
-            break;
-        } else {
-            // unknown character
-            break;
+        }
+
+        // work out the exponent
+        if (exp_neg) {
+            exp_val = -exp_val;
+        }
+        exp_val += exp_extra;
+
+        // apply the exponent
+        for (; exp_val > 0; exp_val--) {
+            dec_val *= 10;
+        }
+        for (; exp_val < 0; exp_val++) {
+            dec_val *= 0.1;
         }
     }
-    if (*str != 0) {
-        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
-    }
-    if (exp_neg) {
-        exp_val = -exp_val;
+
+    // negate value if needed
+    if (dec_neg) {
+        dec_val = -dec_val;
     }
-    exp_val += exp_extra;
-    for (; exp_val > 0; exp_val--) {
-        dec_val *= 10;
+
+    // skip trailing space
+    for (; str < top && isspace(*str); str++) {
     }
-    for (; exp_val < 0; exp_val++) {
-        dec_val *= 0.1;
+
+    // check we reached the end of the string
+    if (str != top) {
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
     }
+
+    // return the object
     if (imag) {
         return mp_obj_new_complex(0, dec_val);
     } else {
         return mp_obj_new_float(dec_val);
     }
+
 #else
     nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported"));
 #endif
-- 
cgit v1.2.3


From 6e48f7fa856e4acaf085dfc8876c4e3772d979c2 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Fri, 21 Mar 2014 11:45:46 +0000
Subject: py: Allow 'complex()' to take a string as first argument.

---
 py/objcomplex.c | 14 ++++++++++----
 py/objfloat.c   |  4 +++-
 py/parsenum.c   |  6 ++++--
 py/parsenum.h   |  2 +-
 py/runtime.c    |  2 +-
 5 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'py/parsenum.c')

diff --git a/py/objcomplex.c b/py/objcomplex.c
index 65957cbf60..2ba5226150 100644
--- a/py/objcomplex.c
+++ b/py/objcomplex.c
@@ -6,6 +6,7 @@
 #include "mpconfig.h"
 #include "qstr.h"
 #include "obj.h"
+#include "parsenum.h"
 #include "runtime0.h"
 #include "map.h"
 
@@ -36,15 +37,20 @@ STATIC mp_obj_t complex_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const
             return mp_obj_new_complex(0, 0);
 
         case 1:
-            // TODO allow string as first arg and parse it
-            if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
+            if (MP_OBJ_IS_STR(args[0])) {
+                // a string, parse it
+                uint l;
+                const char *s = mp_obj_str_get_data(args[0], &l);
+                return mp_parse_num_decimal(s, l, true, true);
+            } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
+                // a complex, just return it
                 return args[0];
             } else {
+                // something else, try to cast it to a complex
                 return mp_obj_new_complex(mp_obj_get_float(args[0]), 0);
             }
 
-        case 2:
-        {
+        case 2: {
             mp_float_t real, imag;
             if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
                 mp_obj_complex_get(args[0], &real, &imag);
diff --git a/py/objfloat.c b/py/objfloat.c
index 65dafa607e..c51e13e7a1 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -38,10 +38,12 @@ STATIC mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
                 // a string, parse it
                 uint l;
                 const char *s = mp_obj_str_get_data(args[0], &l);
-                return mp_parse_num_decimal(s, l, false);
+                return mp_parse_num_decimal(s, l, false, false);
             } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_float)) {
+                // a float, just return it
                 return args[0];
             } else {
+                // something else, try to cast it to a float
                 return mp_obj_new_float(mp_obj_get_float(args[0]));
             }
 
diff --git a/py/parsenum.c b/py/parsenum.c
index b1a70c352d..77f00957c6 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -88,7 +88,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
 #define PARSE_DEC_IN_FRAC (2)
 #define PARSE_DEC_IN_EXP  (3)
 
-mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) {
+mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex) {
 #if MICROPY_ENABLE_FLOAT
     const char *top = str + len;
     mp_float_t dec_val = 0;
@@ -129,7 +129,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) {
             dec_val = MICROPY_FLOAT_C_FUN(nan)("");
         }
     } else {
-        // parse the digits
+        // string should be a decimal number
         int in = PARSE_DEC_IN_INTG;
         bool exp_neg = false;
         int exp_val = 0;
@@ -198,6 +198,8 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) {
     // return the object
     if (imag) {
         return mp_obj_new_complex(0, dec_val);
+    } else if (force_complex) {
+        return mp_obj_new_complex(dec_val, 0);
     } else {
         return mp_obj_new_float(dec_val);
     }
diff --git a/py/parsenum.h b/py/parsenum.h
index f87fefbe77..97578423c7 100644
--- a/py/parsenum.h
+++ b/py/parsenum.h
@@ -1,2 +1,2 @@
 mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base);
-mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag);
+mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex);
diff --git a/py/runtime.c b/py/runtime.c
index 5604e1a945..c268fd5464 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -375,7 +375,7 @@ mp_obj_t rt_load_const_dec(qstr qstr) {
     DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
     uint len;
     const byte* data = qstr_data(qstr, &len);
-    return mp_parse_num_decimal((const char*)data, len, true);
+    return mp_parse_num_decimal((const char*)data, len, true, false);
 }
 
 mp_obj_t rt_load_const_str(qstr qstr) {
-- 
cgit v1.2.3


From dfbafabf6ffd230ef7165c8df62c58dd912d41e4 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Fri, 21 Mar 2014 12:15:59 +0000
Subject: py: Improve mp_parse_num_integer; make it self contained.

---
 py/parsenum.c | 119 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 63 insertions(+), 56 deletions(-)

(limited to 'py/parsenum.c')

diff --git a/py/parsenum.c b/py/parsenum.c
index 77f00957c6..6be042fe89 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -13,76 +13,83 @@
 #include <math.h>
 #endif
 
-#if defined(UNIX)
-
-#include <ctype.h>
-#include <errno.h>
-
 mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
-    // TODO at the moment we ignore len; we should honour it!
-    // TODO detect integer overflow and return bignum
-
-    int c, neg = 0;
-    const char *p = str;
-    char *num;
-    long found;
+    const char *restrict top = str + len;
+    bool neg = false;
 
     // check radix base
     if ((base != 0 && base < 2) || base > 36) {
         nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36"));
     }
-    // skip surrounded whitespace
-    while (isspace((c = *(p++))));
-    if (c == 0) {
-        goto value_error;
-    }
-    // preced sign
-    if (c == '+' || c == '-') {
-        neg = - (c == '-');
-    } else {
-        p--;
+
+    // skip leading space
+    for (; str < top && unichar_isspace(*str); str++) {
     }
 
-    len -= p - str;
-    int skip = mp_parse_num_base(p, len, &base);
-    p += skip;
-    len -= skip;
-
-    errno = 0;
-    found = strtol(p, &num, base);
-    if (errno) {
-        goto value_error;
-    } else if (found && *(num) == 0) {
-        goto done;
-    } else if (found || num != p) {
-        goto check_tail_space;
-    } else {
-        goto value_error;
+    // parse optional sign
+    if (str < top) {
+        if (*str == '+') {
+            str++;
+        } else if (*str == '-') {
+            str++;
+            neg = true;
+        }
     }
 
-check_tail_space:
-    if (*(num) != 0) {
-        while (isspace((c = *(num++))));
-        if (c != 0) {
-            goto value_error;
+    // parse optional base prefix
+    str += mp_parse_num_base(str, top - str, &base);
+
+    // string should be an integer number
+    machine_int_t int_val = 0;
+    for (; str < top; str++) {
+        machine_int_t old_val = int_val;
+        int dig = *str;
+        if (unichar_isdigit(dig) && dig - '0' < base) {
+            // 0-9 digit
+            int_val = base * int_val + dig - '0';
+        } else if (base == 16) {
+            dig |= 0x20;
+            if ('a' <= dig && dig <= 'f') {
+                // a-f hex digit
+                int_val = base * int_val + dig - 'a' + 10;
+            } else {
+                // unknown character
+                break;
+            }
+        } else {
+            // unknown character
+            break;
+        }
+        if (int_val < old_val) {
+            // If new value became less than previous, it's overflow
+            goto overflow;
+        } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
+            // If signed number changed sign - it's overflow
+            goto overflow;
         }
     }
 
-done:
-    return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg);
+    // negate value if needed
+    if (neg) {
+        int_val = -int_val;
+    }
 
-value_error:
-    nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
-}
+    // skip trailing space
+    for (; str < top && unichar_isspace(*str); str++) {
+    }
 
-#else /* defined(UNIX) */
+    // check we reached the end of the string
+    if (str != top) {
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
+    }
 
-mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
-    // TODO port strtol to stm
-    return MP_OBJ_NEW_SMALL_INT(0);
-}
+    // return the object
+    return MP_OBJ_NEW_SMALL_INT(int_val);
 
-#endif /* defined(UNIX) */
+overflow:
+    // TODO reparse using bignum
+    nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer"));
+}
 
 #define PARSE_DEC_IN_INTG (1)
 #define PARSE_DEC_IN_FRAC (2)
@@ -96,10 +103,10 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool f
     bool imag = false;
 
     // skip leading space
-    for (; str < top && isspace(*str); str++) {
+    for (; str < top && unichar_isspace(*str); str++) {
     }
 
-    // get optional sign
+    // parse optional sign
     if (str < top) {
         if (*str == '+') {
             str++;
@@ -187,7 +194,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool f
     }
 
     // skip trailing space
-    for (; str < top && isspace(*str); str++) {
+    for (; str < top && unichar_isspace(*str); str++) {
     }
 
     // check we reached the end of the string
-- 
cgit v1.2.3


From 7b4b78bc33fdb9b0007060877fd7c1ca2392bceb Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Fri, 21 Mar 2014 20:46:38 +0000
Subject: py: Put back proper ValueError for badly parsed integers.

---
 py/parsenum.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'py/parsenum.c')

diff --git a/py/parsenum.c b/py/parsenum.c
index 6be042fe89..7be53897a7 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -41,6 +41,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
 
     // string should be an integer number
     machine_int_t int_val = 0;
+    const char *restrict str_val_start = str;
     for (; str < top; str++) {
         machine_int_t old_val = int_val;
         int dig = *str;
@@ -69,6 +70,11 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
         }
     }
 
+    // check we parsed something
+    if (str == str_val_start) {
+        goto value_error;
+    }
+
     // negate value if needed
     if (neg) {
         int_val = -int_val;
@@ -80,12 +86,15 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
 
     // check we reached the end of the string
     if (str != top) {
-        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
+        goto value_error;
     }
 
     // return the object
     return MP_OBJ_NEW_SMALL_INT(int_val);
 
+value_error:
+    nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
+
 overflow:
     // TODO reparse using bignum
     nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer"));
-- 
cgit v1.2.3