summaryrefslogtreecommitdiffstatshomepage
path: root/py/parsenum.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/parsenum.c')
-rw-r--r--py/parsenum.c260
1 files changed, 169 insertions, 91 deletions
diff --git a/py/parsenum.c b/py/parsenum.c
index c9cef5fcd8..7be53897a7 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -9,139 +9,217 @@
#include "parsenumbase.h"
#include "parsenum.h"
-#if defined(UNIX)
-
-#include <ctype.h>
-#include <errno.h>
+#if MICROPY_ENABLE_FLOAT
+#include <math.h>
+#endif
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
- // TODO at the moment we ignore len; we should honour it!
- // TODO detect integer overflow and return bignum
-
- int c, neg = 0;
- const char *p = str;
- char *num;
- long found;
+ const char *restrict top = str + len;
+ bool neg = false;
// check radix base
if ((base != 0 && base < 2) || base > 36) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36"));
}
- // skip surrounded whitespace
- while (isspace((c = *(p++))));
- if (c == 0) {
- goto value_error;
+
+ // skip leading space
+ for (; str < top && unichar_isspace(*str); str++) {
}
- // preced sign
- if (c == '+' || c == '-') {
- neg = - (c == '-');
- } else {
- p--;
+
+ // parse optional sign
+ if (str < top) {
+ if (*str == '+') {
+ str++;
+ } else if (*str == '-') {
+ str++;
+ neg = true;
+ }
}
- len -= p - str;
- int skip = mp_parse_num_base(p, len, &base);
- p += skip;
- len -= skip;
+ // parse optional base prefix
+ str += mp_parse_num_base(str, top - str, &base);
- errno = 0;
- found = strtol(p, &num, base);
- if (errno) {
- goto value_error;
- } else if (found && *(num) == 0) {
- goto done;
- } else if (found || num != p) {
- goto check_tail_space;
- } else {
+ // string should be an integer number
+ machine_int_t int_val = 0;
+ const char *restrict str_val_start = str;
+ for (; str < top; str++) {
+ machine_int_t old_val = int_val;
+ int dig = *str;
+ if (unichar_isdigit(dig) && dig - '0' < base) {
+ // 0-9 digit
+ int_val = base * int_val + dig - '0';
+ } else if (base == 16) {
+ dig |= 0x20;
+ if ('a' <= dig && dig <= 'f') {
+ // a-f hex digit
+ int_val = base * int_val + dig - 'a' + 10;
+ } else {
+ // unknown character
+ break;
+ }
+ } else {
+ // unknown character
+ break;
+ }
+ if (int_val < old_val) {
+ // If new value became less than previous, it's overflow
+ goto overflow;
+ } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
+ // If signed number changed sign - it's overflow
+ goto overflow;
+ }
+ }
+
+ // check we parsed something
+ if (str == str_val_start) {
goto value_error;
}
-check_tail_space:
- if (*(num) != 0) {
- while (isspace((c = *(num++))));
- if (c != 0) {
- goto value_error;
- }
+ // negate value if needed
+ if (neg) {
+ int_val = -int_val;
+ }
+
+ // skip trailing space
+ for (; str < top && unichar_isspace(*str); str++) {
+ }
+
+ // check we reached the end of the string
+ if (str != top) {
+ goto value_error;
}
-done:
- return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg);
+ // return the object
+ return MP_OBJ_NEW_SMALL_INT(int_val);
value_error:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
-}
-#else /* defined(UNIX) */
-
-mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
- // TODO port strtol to stm
- return MP_OBJ_NEW_SMALL_INT(0);
+overflow:
+ // TODO reparse using bignum
+ nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer"));
}
-#endif /* defined(UNIX) */
-
#define PARSE_DEC_IN_INTG (1)
#define PARSE_DEC_IN_FRAC (2)
#define PARSE_DEC_IN_EXP (3)
-mp_obj_t mp_parse_num_decimal(const char *str, uint len) {
+mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex) {
#if MICROPY_ENABLE_FLOAT
- int in = PARSE_DEC_IN_INTG;
+ const char *top = str + len;
mp_float_t dec_val = 0;
- bool exp_neg = false;
- int exp_val = 0;
- int exp_extra = 0;
+ bool dec_neg = false;
bool imag = false;
- const char *top = str + len;
- for (; str < top; str++) {
- int dig = *str;
- if ('0' <= dig && dig <= '9') {
- dig -= '0';
- if (in == PARSE_DEC_IN_EXP) {
- exp_val = 10 * exp_val + dig;
- } else {
- dec_val = 10 * dec_val + dig;
- if (in == PARSE_DEC_IN_FRAC) {
- exp_extra -= 1;
- }
+
+ // skip leading space
+ for (; str < top && unichar_isspace(*str); str++) {
+ }
+
+ // parse optional sign
+ if (str < top) {
+ if (*str == '+') {
+ str++;
+ } else if (*str == '-') {
+ str++;
+ dec_neg = true;
+ }
+ }
+
+ // determine what the string is
+ if (str < top && (str[0] | 0x20) == 'i') {
+ // string starts with 'i', should be 'inf' or 'infinity' (case insensitive)
+ if (str + 2 < top && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') {
+ // inf
+ str += 3;
+ dec_val = INFINITY;
+ if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') {
+ // infinity
+ str += 5;
}
- } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
- in = PARSE_DEC_IN_FRAC;
- } else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) {
- in = PARSE_DEC_IN_EXP;
- if (str[1] == '+') {
- str++;
- } else if (str[1] == '-') {
+ }
+ } else if (str < top && (str[0] | 0x20) == 'n') {
+ // string starts with 'n', should be 'nan' (case insensitive)
+ if (str + 2 < top && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') {
+ // NaN
+ str += 3;
+ dec_val = MICROPY_FLOAT_C_FUN(nan)("");
+ }
+ } else {
+ // string should be a decimal number
+ int in = PARSE_DEC_IN_INTG;
+ bool exp_neg = false;
+ int exp_val = 0;
+ int exp_extra = 0;
+ for (; str < top; str++) {
+ int dig = *str;
+ if ('0' <= dig && dig <= '9') {
+ dig -= '0';
+ if (in == PARSE_DEC_IN_EXP) {
+ exp_val = 10 * exp_val + dig;
+ } else {
+ dec_val = 10 * dec_val + dig;
+ if (in == PARSE_DEC_IN_FRAC) {
+ exp_extra -= 1;
+ }
+ }
+ } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
+ in = PARSE_DEC_IN_FRAC;
+ } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
+ in = PARSE_DEC_IN_EXP;
+ if (str[1] == '+') {
+ str++;
+ } else if (str[1] == '-') {
+ str++;
+ exp_neg = true;
+ }
+ } else if (allow_imag && (dig | 0x20) == 'j') {
str++;
- exp_neg = true;
+ imag = true;
+ break;
+ } else {
+ // unknown character
+ break;
}
- } else if (dig == 'J' || dig == 'j') {
- str++;
- imag = true;
- break;
- } else {
- // unknown character
- break;
+ }
+
+ // work out the exponent
+ if (exp_neg) {
+ exp_val = -exp_val;
+ }
+ exp_val += exp_extra;
+
+ // apply the exponent
+ for (; exp_val > 0; exp_val--) {
+ dec_val *= 10;
+ }
+ for (; exp_val < 0; exp_val++) {
+ dec_val *= 0.1;
}
}
- if (*str != 0) {
- nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
- }
- if (exp_neg) {
- exp_val = -exp_val;
+
+ // negate value if needed
+ if (dec_neg) {
+ dec_val = -dec_val;
}
- exp_val += exp_extra;
- for (; exp_val > 0; exp_val--) {
- dec_val *= 10;
+
+ // skip trailing space
+ for (; str < top && unichar_isspace(*str); str++) {
}
- for (; exp_val < 0; exp_val++) {
- dec_val *= 0.1;
+
+ // check we reached the end of the string
+ if (str != top) {
+ nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
}
+
+ // return the object
if (imag) {
return mp_obj_new_complex(0, dec_val);
+ } else if (force_complex) {
+ return mp_obj_new_complex(dec_val, 0);
} else {
return mp_obj_new_float(dec_val);
}
+
#else
nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "decimal numbers not supported"));
#endif