From c06ea7abf249765bf93595fc42656eed585d7a47 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 21 Mar 2014 10:55:08 +0000 Subject: py: Implement parsing of infinity and nan for floats. --- py/parsenum.c | 144 +++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 42 deletions(-) (limited to 'py/parsenum.c') diff --git a/py/parsenum.c b/py/parsenum.c index c9cef5fcd8..b1a70c352d 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -9,6 +9,10 @@ #include "parsenumbase.h" #include "parsenum.h" +#if MICROPY_ENABLE_FLOAT +#include +#endif + #if defined(UNIX) #include @@ -84,64 +88,120 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { #define PARSE_DEC_IN_FRAC (2) #define PARSE_DEC_IN_EXP (3) -mp_obj_t mp_parse_num_decimal(const char *str, uint len) { +mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) { #if MICROPY_ENABLE_FLOAT - int in = PARSE_DEC_IN_INTG; + const char *top = str + len; mp_float_t dec_val = 0; - bool exp_neg = false; - int exp_val = 0; - int exp_extra = 0; + bool dec_neg = false; bool imag = false; - const char *top = str + len; - for (; str < top; str++) { - int dig = *str; - if ('0' <= dig && dig <= '9') { - dig -= '0'; - if (in == PARSE_DEC_IN_EXP) { - exp_val = 10 * exp_val + dig; - } else { - dec_val = 10 * dec_val + dig; - if (in == PARSE_DEC_IN_FRAC) { - exp_extra -= 1; - } + + // skip leading space + for (; str < top && isspace(*str); str++) { + } + + // get optional sign + if (str < top) { + if (*str == '+') { + str++; + } else if (*str == '-') { + str++; + dec_neg = true; + } + } + + // determine what the string is + if (str < top && (str[0] | 0x20) == 'i') { + // string starts with 'i', should be 'inf' or 'infinity' (case insensitive) + if (str + 2 < top && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') { + // inf + str += 3; + dec_val = INFINITY; + if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') { + // infinity + str += 5; } - } else if (in == PARSE_DEC_IN_INTG && dig == '.') { - in = PARSE_DEC_IN_FRAC; - } else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) { - in = PARSE_DEC_IN_EXP; - if (str[1] == '+') { - str++; - } else if (str[1] == '-') { + } + } else if (str < top && (str[0] | 0x20) == 'n') { + // string starts with 'n', should be 'nan' (case insensitive) + if (str + 2 < top && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') { + // NaN + str += 3; + dec_val = MICROPY_FLOAT_C_FUN(nan)(""); + } + } else { + // parse the digits + int in = PARSE_DEC_IN_INTG; + bool exp_neg = false; + int exp_val = 0; + int exp_extra = 0; + for (; str < top; str++) { + int dig = *str; + if ('0' <= dig && dig <= '9') { + dig -= '0'; + if (in == PARSE_DEC_IN_EXP) { + exp_val = 10 * exp_val + dig; + } else { + dec_val = 10 * dec_val + dig; + if (in == PARSE_DEC_IN_FRAC) { + exp_extra -= 1; + } + } + } else if (in == PARSE_DEC_IN_INTG && dig == '.') { + in = PARSE_DEC_IN_FRAC; + } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) { + in = PARSE_DEC_IN_EXP; + if (str[1] == '+') { + str++; + } else if (str[1] == '-') { + str++; + exp_neg = true; + } + } else if (allow_imag && (dig | 0x20) == 'j') { str++; - exp_neg = true; + imag = true; + break; + } else { + // unknown character + break; } - } else if (dig == 'J' || dig == 'j') { - str++; - imag = true; - break; - } else { - // unknown character - break; + } + + // work out the exponent + if (exp_neg) { + exp_val = -exp_val; + } + exp_val += exp_extra; + + // apply the exponent + for (; exp_val > 0; exp_val--) { + dec_val *= 10; + } + for (; exp_val < 0; exp_val++) { + dec_val *= 0.1; } } - if (*str != 0) { - nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); - } - if (exp_neg) { - exp_val = -exp_val; + + // negate value if needed + if (dec_neg) { + dec_val = -dec_val; } - exp_val += exp_extra; - for (; exp_val > 0; exp_val--) { - dec_val *= 10; + + // skip trailing space + for (; str < top && isspace(*str); str++) { } - for (; exp_val < 0; exp_val++) { - dec_val *= 0.1; + + // check we reached the end of the string + if (str != top) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); } + + // return the object if (imag) { return mp_obj_new_complex(0, dec_val); } else { return mp_obj_new_float(dec_val); } + #else nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported")); #endif -- cgit v1.2.3 From 6e48f7fa856e4acaf085dfc8876c4e3772d979c2 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 21 Mar 2014 11:45:46 +0000 Subject: py: Allow 'complex()' to take a string as first argument. --- py/objcomplex.c | 14 ++++++++++---- py/objfloat.c | 4 +++- py/parsenum.c | 6 ++++-- py/parsenum.h | 2 +- py/runtime.c | 2 +- 5 files changed, 19 insertions(+), 9 deletions(-) (limited to 'py/parsenum.c') diff --git a/py/objcomplex.c b/py/objcomplex.c index 65957cbf60..2ba5226150 100644 --- a/py/objcomplex.c +++ b/py/objcomplex.c @@ -6,6 +6,7 @@ #include "mpconfig.h" #include "qstr.h" #include "obj.h" +#include "parsenum.h" #include "runtime0.h" #include "map.h" @@ -36,15 +37,20 @@ STATIC mp_obj_t complex_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const return mp_obj_new_complex(0, 0); case 1: - // TODO allow string as first arg and parse it - if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) { + if (MP_OBJ_IS_STR(args[0])) { + // a string, parse it + uint l; + const char *s = mp_obj_str_get_data(args[0], &l); + return mp_parse_num_decimal(s, l, true, true); + } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) { + // a complex, just return it return args[0]; } else { + // something else, try to cast it to a complex return mp_obj_new_complex(mp_obj_get_float(args[0]), 0); } - case 2: - { + case 2: { mp_float_t real, imag; if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) { mp_obj_complex_get(args[0], &real, &imag); diff --git a/py/objfloat.c b/py/objfloat.c index 65dafa607e..c51e13e7a1 100644 --- a/py/objfloat.c +++ b/py/objfloat.c @@ -38,10 +38,12 @@ STATIC mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m // a string, parse it uint l; const char *s = mp_obj_str_get_data(args[0], &l); - return mp_parse_num_decimal(s, l, false); + return mp_parse_num_decimal(s, l, false, false); } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_float)) { + // a float, just return it return args[0]; } else { + // something else, try to cast it to a float return mp_obj_new_float(mp_obj_get_float(args[0])); } diff --git a/py/parsenum.c b/py/parsenum.c index b1a70c352d..77f00957c6 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -88,7 +88,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { #define PARSE_DEC_IN_FRAC (2) #define PARSE_DEC_IN_EXP (3) -mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) { +mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex) { #if MICROPY_ENABLE_FLOAT const char *top = str + len; mp_float_t dec_val = 0; @@ -129,7 +129,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) { dec_val = MICROPY_FLOAT_C_FUN(nan)(""); } } else { - // parse the digits + // string should be a decimal number int in = PARSE_DEC_IN_INTG; bool exp_neg = false; int exp_val = 0; @@ -198,6 +198,8 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag) { // return the object if (imag) { return mp_obj_new_complex(0, dec_val); + } else if (force_complex) { + return mp_obj_new_complex(dec_val, 0); } else { return mp_obj_new_float(dec_val); } diff --git a/py/parsenum.h b/py/parsenum.h index f87fefbe77..97578423c7 100644 --- a/py/parsenum.h +++ b/py/parsenum.h @@ -1,2 +1,2 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base); -mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag); +mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex); diff --git a/py/runtime.c b/py/runtime.c index 5604e1a945..c268fd5464 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -375,7 +375,7 @@ mp_obj_t rt_load_const_dec(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); uint len; const byte* data = qstr_data(qstr, &len); - return mp_parse_num_decimal((const char*)data, len, true); + return mp_parse_num_decimal((const char*)data, len, true, false); } mp_obj_t rt_load_const_str(qstr qstr) { -- cgit v1.2.3 From dfbafabf6ffd230ef7165c8df62c58dd912d41e4 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 21 Mar 2014 12:15:59 +0000 Subject: py: Improve mp_parse_num_integer; make it self contained. --- py/parsenum.c | 119 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 63 insertions(+), 56 deletions(-) (limited to 'py/parsenum.c') diff --git a/py/parsenum.c b/py/parsenum.c index 77f00957c6..6be042fe89 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -13,76 +13,83 @@ #include #endif -#if defined(UNIX) - -#include -#include - mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { - // TODO at the moment we ignore len; we should honour it! - // TODO detect integer overflow and return bignum - - int c, neg = 0; - const char *p = str; - char *num; - long found; + const char *restrict top = str + len; + bool neg = false; // check radix base if ((base != 0 && base < 2) || base > 36) { nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36")); } - // skip surrounded whitespace - while (isspace((c = *(p++)))); - if (c == 0) { - goto value_error; - } - // preced sign - if (c == '+' || c == '-') { - neg = - (c == '-'); - } else { - p--; + + // skip leading space + for (; str < top && unichar_isspace(*str); str++) { } - len -= p - str; - int skip = mp_parse_num_base(p, len, &base); - p += skip; - len -= skip; - - errno = 0; - found = strtol(p, &num, base); - if (errno) { - goto value_error; - } else if (found && *(num) == 0) { - goto done; - } else if (found || num != p) { - goto check_tail_space; - } else { - goto value_error; + // parse optional sign + if (str < top) { + if (*str == '+') { + str++; + } else if (*str == '-') { + str++; + neg = true; + } } -check_tail_space: - if (*(num) != 0) { - while (isspace((c = *(num++)))); - if (c != 0) { - goto value_error; + // parse optional base prefix + str += mp_parse_num_base(str, top - str, &base); + + // string should be an integer number + machine_int_t int_val = 0; + for (; str < top; str++) { + machine_int_t old_val = int_val; + int dig = *str; + if (unichar_isdigit(dig) && dig - '0' < base) { + // 0-9 digit + int_val = base * int_val + dig - '0'; + } else if (base == 16) { + dig |= 0x20; + if ('a' <= dig && dig <= 'f') { + // a-f hex digit + int_val = base * int_val + dig - 'a' + 10; + } else { + // unknown character + break; + } + } else { + // unknown character + break; + } + if (int_val < old_val) { + // If new value became less than previous, it's overflow + goto overflow; + } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) { + // If signed number changed sign - it's overflow + goto overflow; } } -done: - return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg); + // negate value if needed + if (neg) { + int_val = -int_val; + } -value_error: - nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str)); -} + // skip trailing space + for (; str < top && unichar_isspace(*str); str++) { + } -#else /* defined(UNIX) */ + // check we reached the end of the string + if (str != top) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); + } -mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { - // TODO port strtol to stm - return MP_OBJ_NEW_SMALL_INT(0); -} + // return the object + return MP_OBJ_NEW_SMALL_INT(int_val); -#endif /* defined(UNIX) */ +overflow: + // TODO reparse using bignum + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer")); +} #define PARSE_DEC_IN_INTG (1) #define PARSE_DEC_IN_FRAC (2) @@ -96,10 +103,10 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool f bool imag = false; // skip leading space - for (; str < top && isspace(*str); str++) { + for (; str < top && unichar_isspace(*str); str++) { } - // get optional sign + // parse optional sign if (str < top) { if (*str == '+') { str++; @@ -187,7 +194,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool f } // skip trailing space - for (; str < top && isspace(*str); str++) { + for (; str < top && unichar_isspace(*str); str++) { } // check we reached the end of the string -- cgit v1.2.3 From 7b4b78bc33fdb9b0007060877fd7c1ca2392bceb Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 21 Mar 2014 20:46:38 +0000 Subject: py: Put back proper ValueError for badly parsed integers. --- py/parsenum.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'py/parsenum.c') diff --git a/py/parsenum.c b/py/parsenum.c index 6be042fe89..7be53897a7 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -41,6 +41,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { // string should be an integer number machine_int_t int_val = 0; + const char *restrict str_val_start = str; for (; str < top; str++) { machine_int_t old_val = int_val; int dig = *str; @@ -69,6 +70,11 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { } } + // check we parsed something + if (str == str_val_start) { + goto value_error; + } + // negate value if needed if (neg) { int_val = -int_val; @@ -80,12 +86,15 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) { // check we reached the end of the string if (str != top) { - nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number")); + goto value_error; } // return the object return MP_OBJ_NEW_SMALL_INT(int_val); +value_error: + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str)); + overflow: // TODO reparse using bignum nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer")); -- cgit v1.2.3