summaryrefslogtreecommitdiffstatshomepage
path: root/py
diff options
context:
space:
mode:
Diffstat (limited to 'py')
-rw-r--r--py/compile.c12
-rw-r--r--py/emit.h2
-rw-r--r--py/emitbc.c12
-rw-r--r--py/emitnative.c4
-rw-r--r--py/misc.h2
-rw-r--r--py/mpz.c73
-rw-r--r--py/obj.h8
-rw-r--r--py/objfun.c4
-rw-r--r--py/objgenerator.c3
-rw-r--r--py/objint.c15
-rw-r--r--py/objint_longlong.c23
-rw-r--r--py/objint_mpz.c21
-rw-r--r--py/objstr.c93
-rw-r--r--py/parse.c20
-rw-r--r--py/parse.h7
-rw-r--r--py/parsenum.c49
-rw-r--r--py/qstrdefs.h5
-rw-r--r--py/runtime.c11
-rw-r--r--py/runtime.h1
-rw-r--r--py/runtime0.h4
-rw-r--r--py/smallint.c1
-rw-r--r--py/smallint.h5
-rw-r--r--py/vm.c21
23 files changed, 284 insertions, 112 deletions
diff --git a/py/compile.c b/py/compile.c
index f925c8c1ff..1f0d90570e 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -73,8 +73,8 @@ typedef struct _compiler_t {
uint next_label;
- uint break_label;
- uint continue_label;
+ uint16_t break_label; // highest bit set indicates we are breaking out of a for loop
+ uint16_t continue_label;
int break_continue_except_level;
uint16_t cur_except_level; // increased for SETUP_EXCEPT, SETUP_FINALLY; decreased for POP_BLOCK, POP_EXCEPT
@@ -249,7 +249,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
// shouldn't happen
assert(0);
}
- if (MP_PARSE_FITS_SMALL_INT(arg0)) {
+ if (MP_SMALL_INT_FITS(arg0)) {
//printf("%ld + %ld\n", arg0, arg1);
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0);
}
@@ -264,7 +264,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
// int * int
if (!mp_small_int_mul_overflow(arg0, arg1)) {
arg0 *= arg1;
- if (MP_PARSE_FITS_SMALL_INT(arg0)) {
+ if (MP_SMALL_INT_FITS(arg0)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0);
}
}
@@ -337,7 +337,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
mp_load_method_maybe(elem->value, q_attr, dest);
if (MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == NULL) {
machine_int_t val = MP_OBJ_SMALL_INT_VALUE(dest[0]);
- if (MP_PARSE_FITS_SMALL_INT(val)) {
+ if (MP_SMALL_INT_FITS(val)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, val);
}
}
@@ -1745,6 +1745,7 @@ void compile_while_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
// And, if the loop never runs, the loop variable should never be assigned
void compile_for_stmt_optimised_range(compiler_t *comp, mp_parse_node_t pn_var, mp_parse_node_t pn_start, mp_parse_node_t pn_end, mp_parse_node_t pn_step, mp_parse_node_t pn_body, mp_parse_node_t pn_else) {
START_BREAK_CONTINUE_BLOCK
+ // note that we don't need to pop anything when breaking from an optimise for loop
uint top_label = comp_next_label(comp);
uint entry_label = comp_next_label(comp);
@@ -1843,6 +1844,7 @@ void compile_for_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
#endif
START_BREAK_CONTINUE_BLOCK
+ comp->break_label |= MP_EMIT_BREAK_FROM_FOR;
uint pop_label = comp_next_label(comp);
uint end_label = comp_next_label(comp);
diff --git a/py/emit.h b/py/emit.h
index 5a3b27d839..874ec8819a 100644
--- a/py/emit.h
+++ b/py/emit.h
@@ -44,6 +44,8 @@ typedef enum {
#define MP_EMIT_STAR_FLAG_SINGLE (0x01)
#define MP_EMIT_STAR_FLAG_DOUBLE (0x02)
+#define MP_EMIT_BREAK_FROM_FOR (0x8000)
+
typedef struct _emit_t emit_t;
typedef struct _emit_method_table_t {
diff --git a/py/emitbc.c b/py/emitbc.c
index 06f63b6f6c..cfaea7c88a 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -617,11 +617,15 @@ STATIC void emit_bc_jump_if_false_or_pop(emit_t *emit, uint label) {
STATIC void emit_bc_unwind_jump(emit_t *emit, uint label, int except_depth) {
if (except_depth == 0) {
- emit_bc_jump(emit, label);
- } else {
emit_bc_pre(emit, 0);
- emit_write_bytecode_byte_signed_label(emit, MP_BC_UNWIND_JUMP, label);
- emit_write_bytecode_byte(emit, except_depth);
+ if (label & MP_EMIT_BREAK_FROM_FOR) {
+ // need to pop the iterator if we are breaking out of a for loop
+ emit_write_bytecode_byte(emit, MP_BC_POP_TOP);
+ }
+ emit_write_bytecode_byte_signed_label(emit, MP_BC_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR);
+ } else {
+ emit_write_bytecode_byte_signed_label(emit, MP_BC_UNWIND_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR);
+ emit_write_bytecode_byte(emit, ((label & MP_EMIT_BREAK_FROM_FOR) ? 0x80 : 0) | except_depth);
}
}
diff --git a/py/emitnative.c b/py/emitnative.c
index 261b1a2a51..4dac5ffb09 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -704,7 +704,7 @@ STATIC void emit_native_load_const_int(emit_t *emit, qstr qst) {
DEBUG_printf("load_const_int %s\n", qstr_str(st));
// for viper: load integer, check fits in 32 bits
emit_native_pre(emit);
- emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_INT, mp_obj_new_int_from_qstr, qst, REG_ARG_1);
+ emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_INT, mp_load_const_int, qst, REG_ARG_1);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
@@ -1043,7 +1043,7 @@ STATIC void emit_native_jump_if_false_or_pop(emit_t *emit, uint label) {
}
STATIC void emit_native_break_loop(emit_t *emit, uint label, int except_depth) {
- emit_native_jump(emit, label); // TODO properly
+ emit_native_jump(emit, label & ~MP_EMIT_BREAK_FROM_FOR); // TODO properly
}
STATIC void emit_native_continue_loop(emit_t *emit, uint label, int except_depth) {
diff --git a/py/misc.h b/py/misc.h
index 013b5f123e..fd54147efd 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -96,6 +96,8 @@ bool unichar_isalpha(unichar c);
bool unichar_isprint(unichar c);
bool unichar_isdigit(unichar c);
bool unichar_isxdigit(unichar c);
+bool unichar_isupper(unichar c);
+bool unichar_islower(unichar c);
unichar unichar_tolower(unichar c);
unichar unichar_toupper(unichar c);
diff --git a/py/mpz.c b/py/mpz.c
index 9e60fc50d0..9c42878ff8 100644
--- a/py/mpz.c
+++ b/py/mpz.c
@@ -218,6 +218,38 @@ STATIC uint mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, uint jlen, const mpz
return idig + 1 - oidig;
}
+/* computes i = j & -k = j & (~k + 1)
+ returns number of digits in i
+ assumes enough memory in i; assumes normalised j, k
+ can have i, j, k pointing to same memory
+*/
+STATIC uint mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, uint jlen, const mpz_dig_t *kdig, uint klen) {
+ mpz_dig_t *oidig = idig;
+ mpz_dbl_dig_t carry = 1;
+
+ for (; jlen > 0 && klen > 0; --jlen, --klen, ++idig, ++jdig, ++kdig) {
+ carry += *kdig ^ DIG_MASK;
+ *idig = (*jdig & carry) & DIG_MASK;
+ carry >>= DIG_SIZE;
+ }
+
+ for (; jlen > 0; --jlen, ++idig, ++jdig) {
+ carry += DIG_MASK;
+ *idig = (*jdig & carry) & DIG_MASK;
+ carry >>= DIG_SIZE;
+ }
+
+ if (carry != 0) {
+ *idig = carry;
+ } else {
+ // remove trailing zeros
+ for (--idig; idig >= oidig && *idig == 0; --idig) {
+ }
+ }
+
+ return idig + 1 - oidig;
+}
+
/* computes i = j | k
returns number of digits in i
assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen
@@ -896,24 +928,35 @@ void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) {
can have dest, lhs, rhs the same
*/
void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) {
- // make sure lhs has the most digits
- if (lhs->len < rhs->len) {
- const mpz_t *temp = lhs;
- lhs = rhs;
- rhs = temp;
- }
-
if (lhs->neg == rhs->neg) {
- mpz_need_dig(dest, rhs->len);
- dest->len = mpn_and(dest->dig, lhs->dig, lhs->len, rhs->dig, rhs->len);
+ if (lhs->neg == 0) {
+ // make sure lhs has the most digits
+ if (lhs->len < rhs->len) {
+ const mpz_t *temp = lhs;
+ lhs = rhs;
+ rhs = temp;
+ }
+ // do the and'ing
+ mpz_need_dig(dest, rhs->len);
+ dest->len = mpn_and(dest->dig, lhs->dig, lhs->len, rhs->dig, rhs->len);
+ dest->neg = 0;
+ } else {
+ // TODO both args are negative
+ assert(0);
+ }
} else {
- mpz_need_dig(dest, lhs->len);
- // TODO
- assert(0);
-// dest->len = mpn_and_neg(dest->dig, lhs->dig, lhs->len, rhs->dig, rhs->len);
+ // args have different sign
+ // make sure lhs is the positive arg
+ if (rhs->neg == 0) {
+ const mpz_t *temp = lhs;
+ lhs = rhs;
+ rhs = temp;
+ }
+ mpz_need_dig(dest, lhs->len + 1);
+ dest->len = mpn_and_neg(dest->dig, lhs->dig, lhs->len, rhs->dig, rhs->len);
+ assert(dest->len <= dest->alloc);
+ dest->neg = 0;
}
-
- dest->neg = lhs->neg;
}
/* computes dest = lhs | rhs
diff --git a/py/obj.h b/py/obj.h
index e4350b424f..185fe14db0 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -72,10 +72,6 @@ typedef struct _mp_obj_base_t mp_obj_base_t;
// These macros check for small int, qstr or object, and access small int and qstr values
-// In SMALL_INT, next-to-highest bits is used as sign, so both must match for value in range
-#define MP_SMALL_INT_MIN ((mp_small_int_t)(((machine_int_t)WORD_MSBIT_HIGH) >> 1))
-#define MP_SMALL_INT_MAX ((mp_small_int_t)(~(MP_SMALL_INT_MIN)))
-#define MP_OBJ_FITS_SMALL_INT(n) ((((n) ^ ((n) << 1)) & WORD_MSBIT_HIGH) == 0)
// these macros have now become inline functions; see below
//#define MP_OBJ_IS_SMALL_INT(o) ((((mp_small_int_t)(o)) & 1) != 0)
//#define MP_OBJ_IS_QSTR(o) ((((mp_small_int_t)(o)) & 3) == 2)
@@ -371,7 +367,7 @@ mp_obj_t mp_obj_new_bool(bool value);
mp_obj_t mp_obj_new_cell(mp_obj_t obj);
mp_obj_t mp_obj_new_int(machine_int_t value);
mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value);
-mp_obj_t mp_obj_new_int_from_qstr(qstr qst);
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base);
mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception)
mp_obj_t mp_obj_new_str(const char* data, uint len, bool make_qstr_if_not_already);
mp_obj_t mp_obj_new_bytes(const byte* data, uint len);
@@ -445,7 +441,7 @@ void mp_obj_cell_set(mp_obj_t self_in, mp_obj_t obj);
// int
// For long int, returns value truncated to machine_int_t
-machine_int_t mp_obj_int_get(mp_obj_t self_in);
+machine_int_t mp_obj_int_get(mp_const_obj_t self_in);
#if MICROPY_ENABLE_FLOAT
mp_float_t mp_obj_int_as_float(mp_obj_t self_in);
#endif
diff --git a/py/objfun.c b/py/objfun.c
index 26dfe0b2e2..8ee5365d5d 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -196,13 +196,13 @@ STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, uint expected, ui
// If it's possible to call a function without allocating new argument array,
// this function returns true, together with pointers to 2 subarrays to be used
-// as arguments. Otherwise, it returns false. It is expected that this fucntion
+// as arguments. Otherwise, it returns false. It is expected that this function
// will be accompanied by another, mp_obj_fun_prepare_full_args(), which will
// instead take pointer to full-length out-array, and will fill it in. Rationale
// being that a caller can try this function and if it succeeds, the function call
// can be made without allocating extra memory. Otherwise, caller can allocate memory
// and try "full" function. These functions are expected to be refactoring of
-// code in fun_bc_call() and evenrually replace it.
+// code in fun_bc_call() and eventually replace it.
bool mp_obj_fun_prepare_simple_args(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args,
uint *out_args1_len, const mp_obj_t **out_args1, uint *out_args2_len, const mp_obj_t **out_args2) {
mp_obj_fun_bc_t *self = self_in;
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 8180185b94..d9825f814f 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -252,7 +252,8 @@ const mp_obj_type_t mp_type_gen_instance = {
.locals_dict = (mp_obj_t)&gen_instance_locals_dict,
};
-mp_obj_t mp_obj_new_gen_instance(mp_obj_dict_t *globals, const byte *bytecode, uint n_args, const mp_obj_t *args,
+mp_obj_t mp_obj_new_gen_instance(mp_obj_dict_t *globals, const byte *bytecode,
+ uint n_args, const mp_obj_t *args,
uint n_args2, const mp_obj_t *args2) {
const byte *code_info = bytecode;
// get code info size, and skip the line number table
diff --git a/py/objint.c b/py/objint.c
index a3b3554008..f631d698f3 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -35,6 +35,7 @@
#include "qstr.h"
#include "obj.h"
#include "parsenum.h"
+#include "smallint.h"
#include "mpz.h"
#include "objint.h"
#include "runtime0.h"
@@ -53,7 +54,10 @@ STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, co
return MP_OBJ_NEW_SMALL_INT(0);
case 1:
- if (MP_OBJ_IS_STR(args[0])) {
+ if (MP_OBJ_IS_INT(args[0])) {
+ // already an int (small or long), just return it
+ return args[0];
+ } else if (MP_OBJ_IS_STR(args[0])) {
// a string, parse it
uint l;
const char *s = mp_obj_str_get_data(args[0], &l);
@@ -63,6 +67,7 @@ STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, co
return MP_OBJ_NEW_SMALL_INT((machine_int_t)(MICROPY_FLOAT_C_FUN(trunc)(mp_obj_float_get(args[0]))));
#endif
} else {
+ // try to convert to small int (eg from bool)
return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0]));
}
@@ -139,7 +144,7 @@ char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_const_ob
} else if (MP_OBJ_IS_TYPE(self_in, &mp_type_int)) {
// Not a small int.
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
- mp_obj_int_t *self = self_in;
+ const mp_obj_int_t *self = self_in;
// Get the value to format; mp_obj_get_int truncates to machine_int_t.
num = self->val;
#else
@@ -225,7 +230,7 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
}
// This is called only with strings whose value doesn't fit in SMALL_INT
-mp_obj_t mp_obj_new_int_from_qstr(qstr qst) {
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) {
nlr_raise(mp_obj_new_exception_msg(&mp_type_OverflowError, "long int not supported in this build"));
return mp_const_none;
}
@@ -247,14 +252,14 @@ mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value) {
}
mp_obj_t mp_obj_new_int(machine_int_t value) {
- if (MP_OBJ_FITS_SMALL_INT(value)) {
+ if (MP_SMALL_INT_FITS(value)) {
return MP_OBJ_NEW_SMALL_INT(value);
}
nlr_raise(mp_obj_new_exception_msg(&mp_type_OverflowError, "small int overflow"));
return mp_const_none;
}
-machine_int_t mp_obj_int_get(mp_obj_t self_in) {
+machine_int_t mp_obj_int_get(mp_const_obj_t self_in) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
index 1e0167b464..82db9e6608 100644
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@@ -34,6 +34,7 @@
#include "misc.h"
#include "qstr.h"
#include "obj.h"
+#include "smallint.h"
#include "mpz.h"
#include "objint.h"
#include "runtime0.h"
@@ -140,7 +141,7 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
}
mp_obj_t mp_obj_new_int(machine_int_t value) {
- if (MP_OBJ_FITS_SMALL_INT(value)) {
+ if (MP_SMALL_INT_FITS(value)) {
return MP_OBJ_NEW_SMALL_INT(value);
}
return mp_obj_new_int_from_ll(value);
@@ -162,26 +163,22 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) {
return o;
}
-mp_obj_t mp_obj_new_int_from_qstr(qstr qst) {
- const char *s = qstr_str(qst);
- long long v;
- char *end;
- // TODO: this doesn't handle Python hacked 0o octal syntax
- v = strtoll(s, &end, 0);
- if (*end != 0) {
- nlr_raise(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
- }
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) {
+ // TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated
+ // TODO check overflow
mp_obj_int_t *o = m_new_obj(mp_obj_int_t);
o->base.type = &mp_type_int;
- o->val = v;
+ char *endptr;
+ o->val = strtoll(*str, &endptr, base);
+ *str = endptr;
return o;
}
-machine_int_t mp_obj_int_get(mp_obj_t self_in) {
+machine_int_t mp_obj_int_get(mp_const_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
} else {
- mp_obj_int_t *self = self_in;
+ const mp_obj_int_t *self = self_in;
return self->val;
}
}
diff --git a/py/objint_mpz.c b/py/objint_mpz.c
index 2df3232e9c..9cdbb71688 100644
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@@ -35,6 +35,7 @@
#include "qstr.h"
#include "parsenumbase.h"
#include "obj.h"
+#include "smallint.h"
#include "mpz.h"
#include "objint.h"
#include "runtime0.h"
@@ -239,7 +240,7 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
}
mp_obj_t mp_obj_new_int(machine_int_t value) {
- if (MP_OBJ_FITS_SMALL_INT(value)) {
+ if (MP_SMALL_INT_FITS(value)) {
return MP_OBJ_NEW_SMALL_INT(value);
}
return mp_obj_new_int_from_ll(value);
@@ -260,26 +261,18 @@ mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value) {
return mp_obj_new_int_from_ll(value);
}
-mp_obj_t mp_obj_new_int_from_qstr(qstr qst) {
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) {
mp_obj_int_t *o = mp_obj_int_new_mpz();
- uint len;
- const char* str = (const char*)qstr_data(qst, &len);
- int base = 0;
- int skip = mp_parse_num_base(str, len, &base);
- str += skip;
- len -= skip;
- uint n = mpz_set_from_str(&o->mpz, str, len, false, base);
- if (n != len) {
- nlr_raise(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
- }
+ uint n = mpz_set_from_str(&o->mpz, *str, len, neg, base);
+ *str += n;
return o;
}
-machine_int_t mp_obj_int_get(mp_obj_t self_in) {
+machine_int_t mp_obj_int_get(mp_const_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
} else {
- mp_obj_int_t *self = self_in;
+ const mp_obj_int_t *self = self_in;
return mpz_as_int(&self->mpz);
}
}
diff --git a/py/objstr.c b/py/objstr.c
index 83fd002d1e..27f6d9cd6f 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -667,6 +667,7 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
for (machine_uint_t len = orig_str_len; len > 0; len--) {
if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
if (!first_good_char_pos_set) {
+ first_good_char_pos_set = true;
first_good_char_pos = i;
if (type == LSTRIP) {
last_good_char_pos = orig_str_len - 1;
@@ -676,14 +677,13 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
last_good_char_pos = i;
break;
}
- first_good_char_pos_set = true;
}
last_good_char_pos = i;
}
i += delta;
}
- if (first_good_char_pos == 0 && last_good_char_pos == 0) {
+ if (!first_good_char_pos_set) {
// string is all whitespace, return ''
return MP_OBJ_NEW_QSTR(MP_QSTR_);
}
@@ -691,6 +691,12 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
assert(last_good_char_pos >= first_good_char_pos);
//+1 to accomodate the last character
machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
+ if (stripped_len == orig_str_len) {
+ // If nothing was stripped, don't bother to dup original string
+ // TODO: watch out for this case when we'll get to bytearray.strip()
+ assert(first_good_char_pos == 0);
+ return args[0];
+ }
return mp_obj_new_str_of_type(self_type, orig_str + first_good_char_pos, stripped_len);
}
@@ -1496,6 +1502,79 @@ STATIC mp_obj_t str_upper(mp_obj_t self_in) {
return str_caseconv(CASE_UPPER, self_in);
}
+enum { IS_SPACE, IS_ALPHA, IS_DIGIT, IS_UPPER, IS_LOWER };
+
+STATIC mp_obj_t str_uni_istype(int type, mp_obj_t self_in) {
+ GET_STR_DATA_LEN(self_in, self_data, self_len);
+
+ if (self_len == 0) {
+ return mp_const_false; // default to False for empty str
+ }
+
+ typedef bool (*check_function)(unichar);
+ check_function f;
+
+ if (type != IS_UPPER && type != IS_LOWER) {
+ switch (type) {
+ case IS_SPACE: f = &unichar_isspace; break;
+ case IS_ALPHA: f = &unichar_isalpha; break;
+ case IS_DIGIT: f = &unichar_isdigit; break;
+ default:
+ nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unknown type provided for str_uni_istype"));
+ }
+
+ for (int i = 0; i < self_len; i++) {
+ if (!f(*self_data++)) {
+ return mp_const_false;
+ }
+ }
+ } else {
+ switch (type) {
+ case IS_UPPER: f = &unichar_isupper; break;
+ case IS_LOWER: f = &unichar_islower; break;
+ default:
+ nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unknown type provided for str_uni_istype"));
+ }
+
+ bool contains_alpha = false;
+
+ for (int i = 0; i < self_len; i++) { // only check alphanumeric characters
+ if (unichar_isalpha(*self_data++)) {
+ contains_alpha = true;
+ if (!f(*(self_data-1))) {
+ return mp_const_false; // we already incremented
+ }
+ }
+ }
+
+ if (!contains_alpha) {
+ return mp_const_false;
+ }
+ }
+
+ return mp_const_true;
+}
+
+STATIC mp_obj_t str_isspace(mp_obj_t self_in) {
+ return str_uni_istype(IS_SPACE, self_in);
+}
+
+STATIC mp_obj_t str_isalpha(mp_obj_t self_in) {
+ return str_uni_istype(IS_ALPHA, self_in);
+}
+
+STATIC mp_obj_t str_isdigit(mp_obj_t self_in) {
+ return str_uni_istype(IS_DIGIT, self_in);
+}
+
+STATIC mp_obj_t str_isupper(mp_obj_t self_in) {
+ return str_uni_istype(IS_UPPER, self_in);
+}
+
+STATIC mp_obj_t str_islower(mp_obj_t self_in) {
+ return str_uni_istype(IS_LOWER, self_in);
+}
+
#if MICROPY_CPYTHON_COMPAT
// These methods are superfluous in the presense of str() and bytes()
// constructors.
@@ -1563,6 +1642,11 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower);
STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower);
STATIC const mp_map_elem_t str_locals_dict_table[] = {
#if MICROPY_CPYTHON_COMPAT
@@ -1588,6 +1672,11 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isspace), (mp_obj_t)&str_isspace_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isalpha), (mp_obj_t)&str_isalpha_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isdigit), (mp_obj_t)&str_isdigit_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isupper), (mp_obj_t)&str_isupper_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_islower), (mp_obj_t)&str_islower_obj },
};
STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table);
diff --git a/py/parse.c b/py/parse.c
index 93bced843b..af09c335f2 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -36,6 +36,7 @@
#include "lexer.h"
#include "parsenumbase.h"
#include "parse.h"
+#include "smallint.h"
#define RULE_ACT_KIND_MASK (0xf0)
#define RULE_ACT_ARG_MASK (0x0f)
@@ -311,13 +312,13 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
int i = mp_parse_num_base(str, len, &base);
bool overflow = false;
for (; i < len; i++) {
- machine_int_t old_val = int_val;
+ int dig;
if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
- int_val = base * int_val + str[i] - '0';
+ dig = str[i] - '0';
} else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
- int_val = base * int_val + str[i] - 'a' + 10;
+ dig = str[i] - 'a' + 10;
} else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') {
- int_val = base * int_val + str[i] - 'A' + 10;
+ dig = str[i] - 'A' + 10;
} else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') {
dec = true;
break;
@@ -325,17 +326,18 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
small_int = false;
break;
}
- if (int_val < old_val) {
- // If new value became less than previous, it's overflow
+ // add next digi and check for overflow
+ if (mp_small_int_mul_overflow(int_val, base)) {
overflow = true;
- } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
- // If signed number changed sign - it's overflow
+ }
+ int_val = int_val * base + dig;
+ if (!MP_SMALL_INT_FITS(int_val)) {
overflow = true;
}
}
if (dec) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
- } else if (small_int && !overflow && MP_PARSE_FITS_SMALL_INT(int_val)) {
+ } else if (small_int && !overflow && MP_SMALL_INT_FITS(int_val)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
} else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
diff --git a/py/parse.h b/py/parse.h
index 43c74e73b8..6950139e7b 100644
--- a/py/parse.h
+++ b/py/parse.h
@@ -37,13 +37,6 @@ struct _mp_lexer_t;
// - xx...x10010: a string of bytes; bits 5 and above are the qstr holding the value
// - xx...x10110: a token; bits 5 and above are mp_token_kind_t
-// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x)
-// makes sure the top 2 bits of x are all cleared (positive number) or all set (negavite number)
-// these macros can probably go somewhere else because they are used more than just in the parser
-#define MP_UINT_HIGH_2_BITS (~((~((machine_uint_t)0)) >> 2))
-// parser's small ints are different from VM small int
-#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == MP_UINT_HIGH_2_BITS))
-
#define MP_PARSE_NODE_NULL (0)
#define MP_PARSE_NODE_SMALL_INT (0x1)
#define MP_PARSE_NODE_ID (0x02)
diff --git a/py/parsenum.c b/py/parsenum.c
index d3cb821a13..9729ffe64a 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -34,6 +34,7 @@
#include "obj.h"
#include "parsenumbase.h"
#include "parsenum.h"
+#include "smallint.h"
#if MICROPY_ENABLE_FLOAT
#include <math.h>
@@ -42,6 +43,7 @@
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
const char *restrict top = str + len;
bool neg = false;
+ mp_obj_t ret_val;
// check radix base
if ((base != 0 && base < 2) || base > 36) {
@@ -69,16 +71,16 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
machine_int_t int_val = 0;
const char *restrict str_val_start = str;
for (; str < top; str++) {
- machine_int_t old_val = int_val;
+ // get next digit as a value
int dig = *str;
if (unichar_isdigit(dig) && dig - '0' < base) {
// 0-9 digit
- int_val = base * int_val + dig - '0';
+ dig = dig - '0';
} else if (base == 16) {
dig |= 0x20;
if ('a' <= dig && dig <= 'f') {
// a-f hex digit
- int_val = base * int_val + dig - 'a' + 10;
+ dig = dig - 'a' + 10;
} else {
// unknown character
break;
@@ -87,25 +89,31 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
// unknown character
break;
}
- if (int_val < old_val) {
- // If new value became less than previous, it's overflow
+
+ // add next digi and check for overflow
+ if (mp_small_int_mul_overflow(int_val, base)) {
goto overflow;
- } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
- // If signed number changed sign - it's overflow
+ }
+ int_val = int_val * base + dig;
+ if (!MP_SMALL_INT_FITS(int_val)) {
goto overflow;
}
}
- // check we parsed something
- if (str == str_val_start) {
- goto value_error;
- }
-
// negate value if needed
if (neg) {
int_val = -int_val;
}
+ // create the small int
+ ret_val = MP_OBJ_NEW_SMALL_INT(int_val);
+
+have_ret_val:
+ // check we parsed something
+ if (str == str_val_start) {
+ goto value_error;
+ }
+
// skip trailing space
for (; str < top && unichar_isspace(*str); str++) {
}
@@ -116,14 +124,19 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
}
// return the object
- return MP_OBJ_NEW_SMALL_INT(int_val);
-
-value_error:
- nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
+ return ret_val;
overflow:
- // TODO reparse using bignum
- nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer"));
+ // reparse using long int
+ {
+ const char *s2 = str_val_start;
+ ret_val = mp_obj_new_int_from_str_len(&s2, top - str_val_start, neg, base);
+ str = s2;
+ goto have_ret_val;
+ }
+
+value_error:
+ nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid syntax for integer with base %d: '%s'", base, str));
}
#define PARSE_DEC_IN_INTG (1)
diff --git a/py/qstrdefs.h b/py/qstrdefs.h
index 2d4ddfea29..4ee56db909 100644
--- a/py/qstrdefs.h
+++ b/py/qstrdefs.h
@@ -246,6 +246,11 @@ Q(partition)
Q(rpartition)
Q(lower)
Q(upper)
+Q(isspace)
+Q(isalpha)
+Q(isdigit)
+Q(isupper)
+Q(islower)
Q(iterable)
Q(start)
diff --git a/py/runtime.c b/py/runtime.c
index ea75280ce4..a9d57460ac 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -98,6 +98,13 @@ void mp_deinit(void) {
#endif
}
+mp_obj_t mp_load_const_int(qstr qstr) {
+ DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
+ uint len;
+ const byte* data = qstr_data(qstr, &len);
+ return mp_parse_num_integer((const char*)data, len, 0);
+}
+
mp_obj_t mp_load_const_dec(qstr qstr) {
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
uint len;
@@ -406,7 +413,7 @@ mp_obj_t mp_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
goto unsupported_op;
}
// TODO: We just should make mp_obj_new_int() inline and use that
- if (MP_OBJ_FITS_SMALL_INT(lhs_val)) {
+ if (MP_SMALL_INT_FITS(lhs_val)) {
return MP_OBJ_NEW_SMALL_INT(lhs_val);
} else {
return mp_obj_new_int(lhs_val);
@@ -1147,8 +1154,8 @@ void *m_malloc_fail(int num_bytes) {
// these must correspond to the respective enum
void *const mp_fun_table[MP_F_NUMBER_OF] = {
+ mp_load_const_int,
mp_load_const_dec,
- mp_obj_new_int_from_qstr,
mp_load_const_str,
mp_load_name,
mp_load_global,
diff --git a/py/runtime.h b/py/runtime.h
index a5d6743034..3c79b48ed0 100644
--- a/py/runtime.h
+++ b/py/runtime.h
@@ -77,6 +77,7 @@ void mp_delete_global(qstr qstr);
mp_obj_t mp_unary_op(int op, mp_obj_t arg);
mp_obj_t mp_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs);
+mp_obj_t mp_load_const_int(qstr qstr);
mp_obj_t mp_load_const_dec(qstr qstr);
mp_obj_t mp_load_const_str(qstr qstr);
mp_obj_t mp_load_const_bytes(qstr qstr);
diff --git a/py/runtime0.h b/py/runtime0.h
index 425122dbc2..542edf4a62 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -96,8 +96,8 @@ typedef enum {
} mp_binary_op_t;
typedef enum {
- MP_F_LOAD_CONST_DEC = 0,
- MP_F_LOAD_CONST_INT,
+ MP_F_LOAD_CONST_INT = 0,
+ MP_F_LOAD_CONST_DEC,
MP_F_LOAD_CONST_STR,
MP_F_LOAD_NAME,
MP_F_LOAD_GLOBAL,
diff --git a/py/smallint.c b/py/smallint.c
index 186c9c754f..5543f126c3 100644
--- a/py/smallint.c
+++ b/py/smallint.c
@@ -28,6 +28,7 @@
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
+#include "smallint.h"
bool mp_small_int_mul_overflow(machine_int_t x, machine_int_t y) {
// Check for multiply overflow; see CERT INT32-C
diff --git a/py/smallint.h b/py/smallint.h
index 9c1de42ad0..36b55f9197 100644
--- a/py/smallint.h
+++ b/py/smallint.h
@@ -26,6 +26,11 @@
// Functions for small integer arithmetic
+// In SMALL_INT, next-to-highest bits is used as sign, so both must match for value in range
+#define MP_SMALL_INT_MIN ((mp_small_int_t)(((machine_int_t)WORD_MSBIT_HIGH) >> 1))
+#define MP_SMALL_INT_MAX ((mp_small_int_t)(~(MP_SMALL_INT_MIN)))
+#define MP_SMALL_INT_FITS(n) ((((n) ^ ((n) << 1)) & WORD_MSBIT_HIGH) == 0)
+
bool mp_small_int_mul_overflow(machine_int_t x, machine_int_t y);
machine_int_t mp_small_int_modulo(machine_int_t dividend, machine_int_t divisor);
machine_int_t mp_small_int_floor_divide(machine_int_t num, machine_int_t denom);
diff --git a/py/vm.c b/py/vm.c
index f6aa74348d..75093d2401 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -103,7 +103,8 @@ typedef enum {
currently_in_except_block = MP_TAGPTR_TAG(exc_sp->val_sp); /* restore previous state */ \
exc_sp--; /* pop back to previous exception handler */
-mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, uint n_args, const mp_obj_t *args2, uint n_args2, mp_obj_t *ret) {
+mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args, uint n_args,
+ const mp_obj_t *args2, uint n_args2, mp_obj_t *ret) {
const byte *ip = code;
// get code info size, and skip line number table
@@ -157,6 +158,13 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args,
mp_vm_return_kind_t vm_return_kind = mp_execute_bytecode2(code, &ip, &state[n_state - 1], &sp, exc_stack, &exc_sp, MP_OBJ_NULL);
#if DETECT_VM_STACK_OVERFLOW
+ if (vm_return_kind == MP_VM_RETURN_NORMAL) {
+ if (sp < state) {
+ printf("VM stack underflow: " INT_FMT "\n", sp - state);
+ assert(0);
+ }
+ }
+
// We can't check the case when an exception is returned in state[n_state - 1]
// and there are no arguments, because in this case our detection slot may have
// been overwritten by the returned exception (which is allowed).
@@ -170,7 +178,7 @@ mp_vm_return_kind_t mp_execute_bytecode(const byte *code, const mp_obj_t *args,
}
}
if (overflow) {
- printf("VM stack overflow state=%p n_state+1=%u\n", state, n_state);
+ printf("VM stack overflow state=%p n_state+1=" UINT_FMT "\n", state, n_state);
assert(0);
}
}
@@ -312,7 +320,7 @@ dispatch_loop:
ENTRY(MP_BC_LOAD_CONST_INT): {
DECODE_QSTR;
- PUSH(mp_obj_new_int_from_qstr(qst));
+ PUSH(mp_load_const_int(qst));
DISPATCH();
}
@@ -618,10 +626,10 @@ dispatch_loop:
ENTRY(MP_BC_UNWIND_JUMP):
DECODE_SLABEL;
PUSH((void*)(ip + unum)); // push destination ip for jump
- PUSH((void*)(machine_uint_t)(*ip)); // push number of exception handlers to unwind
+ PUSH((void*)(machine_uint_t)(*ip)); // push number of exception handlers to unwind (0x80 bit set if we also need to pop stack)
unwind_jump:
unum = (machine_uint_t)POP(); // get number of exception handlers to unwind
- while (unum > 0) {
+ while ((unum & 0x7f) > 0) {
unum -= 1;
assert(exc_sp >= exc_stack);
if (exc_sp->opcode == MP_BC_SETUP_FINALLY || exc_sp->opcode == MP_BC_SETUP_WITH) {
@@ -638,6 +646,9 @@ unwind_jump:
exc_sp--;
}
ip = (const byte*)POP(); // pop destination ip for jump
+ if (unum != 0) {
+ sp--;
+ }
DISPATCH();
// matched against: POP_BLOCK or POP_EXCEPT (anything else?)