diff options
Diffstat (limited to 'py')
-rw-r--r-- | py/asmthumb.c | 2 | ||||
-rw-r--r-- | py/asmx64.c | 2 | ||||
-rw-r--r-- | py/binary.c | 50 | ||||
-rw-r--r-- | py/binary.h | 1 | ||||
-rw-r--r-- | py/builtin.c | 54 | ||||
-rw-r--r-- | py/builtintables.c | 16 | ||||
-rw-r--r-- | py/compile.c | 10 | ||||
-rw-r--r-- | py/emit.h | 5 | ||||
-rw-r--r-- | py/emitbc.c | 13 | ||||
-rw-r--r-- | py/emitcommon.c | 2 | ||||
-rw-r--r-- | py/emitcpy.c | 13 | ||||
-rw-r--r-- | py/emitglue.c | 2 | ||||
-rw-r--r-- | py/emitinlinethumb.c | 6 | ||||
-rw-r--r-- | py/emitnative.c | 119 | ||||
-rw-r--r-- | py/emitpass1.c | 6 | ||||
-rw-r--r-- | py/gc.c | 12 | ||||
-rw-r--r-- | py/gc.h | 1 | ||||
-rw-r--r-- | py/lexer.c | 37 | ||||
-rw-r--r-- | py/lexerstr.c | 2 | ||||
-rw-r--r-- | py/lexerunix.c | 2 | ||||
-rw-r--r-- | py/malloc.c | 2 | ||||
-rw-r--r-- | py/map.c | 2 | ||||
-rw-r--r-- | py/misc.h | 20 | ||||
-rw-r--r-- | py/mkrules.mk | 10 | ||||
-rw-r--r-- | py/modarray.c | 10 | ||||
-rw-r--r-- | py/modcmath.c | 6 | ||||
-rw-r--r-- | py/modcollections.c | 6 | ||||
-rw-r--r-- | py/modgc.c | 22 | ||||
-rw-r--r-- | py/modio.c | 6 | ||||
-rw-r--r-- | py/modmath.c | 6 | ||||
-rw-r--r-- | py/modmicropython.c | 6 | ||||
-rw-r--r-- | py/modstruct.c | 6 | ||||
-rw-r--r-- | py/modsys.c | 6 | ||||
-rw-r--r-- | py/mpconfig.h | 32 | ||||
-rw-r--r-- | py/mpz.c | 2 | ||||
-rw-r--r-- | py/nlr.h | 3 | ||||
-rw-r--r-- | py/nlrthumb.S | 12 | ||||
-rw-r--r-- | py/obj.c | 12 | ||||
-rw-r--r-- | py/objarray.c | 4 | ||||
-rw-r--r-- | py/objcomplex.c | 2 | ||||
-rw-r--r-- | py/objenumerate.c | 4 | ||||
-rw-r--r-- | py/objexcept.c | 8 | ||||
-rw-r--r-- | py/objfloat.c | 5 | ||||
-rw-r--r-- | py/objfun.c | 6 | ||||
-rw-r--r-- | py/objint_mpz.c | 2 | ||||
-rw-r--r-- | py/objstr.c | 112 | ||||
-rw-r--r-- | py/objstr.h | 48 | ||||
-rw-r--r-- | py/objstrunicode.c | 359 | ||||
-rw-r--r-- | py/parse.c | 2 | ||||
-rw-r--r-- | py/parsehelper.c | 2 | ||||
-rw-r--r-- | py/parsenum.c | 8 | ||||
-rw-r--r-- | py/parsenumbase.c | 2 | ||||
-rw-r--r-- | py/pfenv.c | 2 | ||||
-rw-r--r-- | py/py.mk | 2 | ||||
-rw-r--r-- | py/qstr.c | 2 | ||||
-rw-r--r-- | py/qstrdefs.h | 2 | ||||
-rw-r--r-- | py/repl.c | 2 | ||||
-rw-r--r-- | py/runtime.c | 5 | ||||
-rw-r--r-- | py/scope.c | 2 | ||||
-rw-r--r-- | py/smallint.c | 2 | ||||
-rw-r--r-- | py/stackctrl.c | 63 | ||||
-rw-r--r-- | py/stackctrl.h | 41 | ||||
-rw-r--r-- | py/stream.c | 11 | ||||
-rw-r--r-- | py/unicode.c | 53 | ||||
-rw-r--r-- | py/unicode.h | 1 | ||||
-rw-r--r-- | py/vstr.c | 40 |
66 files changed, 1123 insertions, 193 deletions
diff --git a/py/asmthumb.c b/py/asmthumb.c index 891947567b..03752ed938 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -28,8 +28,8 @@ #include <assert.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "asmthumb.h" // wrapper around everything in this file diff --git a/py/asmx64.c b/py/asmx64.c index 6c22ea25de..4695bdc731 100644 --- a/py/asmx64.c +++ b/py/asmx64.c @@ -29,8 +29,8 @@ #include <assert.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" // wrapper around everything in this file #if MICROPY_EMIT_X64 diff --git a/py/binary.c b/py/binary.c index 833d9c85ad..d755bc86e0 100644 --- a/py/binary.c +++ b/py/binary.c @@ -29,8 +29,8 @@ #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "binary.h" @@ -125,24 +125,9 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, int index) { return MP_OBJ_NEW_SMALL_INT(val); } -#define is_signed(typecode) (typecode > 'Z') -mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr) { - byte *p = *ptr; - uint align; - - int size = mp_binary_get_size(struct_type, val_type, &align); - if (struct_type == '@') { - // Make pointer aligned - p = (byte*)(((machine_uint_t)p + align - 1) & ~(align - 1)); - #if MP_ENDIANNESS_LITTLE - struct_type = '<'; - #else - struct_type = '>'; - #endif - } - +machine_int_t mp_binary_get_int(uint size, bool is_signed, bool big_endian, byte *p) { int delta; - if (struct_type == '<') { + if (!big_endian) { delta = -1; p += size - 1; } else { @@ -150,7 +135,7 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr) { } machine_int_t val = 0; - if (is_signed(val_type) && *p & 0x80) { + if (is_signed && *p & 0x80) { val = -1; } for (uint i = 0; i < size; i++) { @@ -159,7 +144,28 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr) { p += delta; } - *ptr += size; + return val; +} + +#define is_signed(typecode) (typecode > 'Z') +mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr) { + byte *p = *ptr; + uint align; + + int size = mp_binary_get_size(struct_type, val_type, &align); + if (struct_type == '@') { + // Make pointer aligned + p = (byte*)(((machine_uint_t)p + align - 1) & ~((machine_uint_t)align - 1)); + #if MP_ENDIANNESS_LITTLE + struct_type = '<'; + #else + struct_type = '>'; + #endif + } + *ptr = p + size; + + machine_int_t val = mp_binary_get_int(size, is_signed(val_type), (struct_type == '>'), p); + if (val_type == 'O') { return (mp_obj_t)val; } else if (val_type == 'S') { @@ -178,13 +184,14 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte ** int size = mp_binary_get_size(struct_type, val_type, &align); if (struct_type == '@') { // Make pointer aligned - p = (byte*)(((machine_uint_t)p + align - 1) & ~(align - 1)); + p = (byte*)(((machine_uint_t)p + align - 1) & ~((machine_uint_t)align - 1)); #if MP_ENDIANNESS_LITTLE struct_type = '<'; #else struct_type = '>'; #endif } + *ptr = p + size; #if MP_ENDIANNESS_BIG #error Not implemented @@ -215,7 +222,6 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte ** in += in_delta; } - *ptr += size; } void mp_binary_set_val_array(char typecode, void *p, int index, mp_obj_t val_in) { diff --git a/py/binary.h b/py/binary.h index f15a2fd7fb..63ea5d741e 100644 --- a/py/binary.h +++ b/py/binary.h @@ -34,3 +34,4 @@ void mp_binary_set_val_array(char typecode, void *p, int index, mp_obj_t val_in) void mp_binary_set_val_array_from_int(char typecode, void *p, int index, machine_int_t val); mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr); void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte **ptr); +machine_int_t mp_binary_get_int(uint size, bool is_signed, bool big_endian, byte *p); diff --git a/py/builtin.c b/py/builtin.c index 834108f1b5..f4bbe0e237 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -113,11 +113,13 @@ mp_obj_t mp_builtin_abs(mp_obj_t o_in) { } else { return o_in; } +#if MICROPY_PY_BUILTINS_COMPLEX } else if (MP_OBJ_IS_TYPE(o_in, &mp_type_complex)) { mp_float_t real, imag; mp_obj_complex_get(o_in, &real, &imag); return mp_obj_new_float(MICROPY_FLOAT_C_FUN(sqrt)(real*real + imag*imag)); #endif +#endif } else { assert(0); return mp_const_none; @@ -154,7 +156,7 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_any_obj, mp_builtin_any); STATIC mp_obj_t mp_builtin_bin(mp_obj_t o_in) { mp_obj_t args[] = { MP_OBJ_NEW_QSTR(MP_QSTR__brace_open__colon__hash_b_brace_close_), o_in }; - return mp_obj_str_format(ARRAY_SIZE(args), args); + return mp_obj_str_format(MP_ARRAY_SIZE(args), args); } MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_bin_obj, mp_builtin_bin); @@ -170,13 +172,40 @@ STATIC mp_obj_t mp_builtin_callable(mp_obj_t o_in) { MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_callable_obj, mp_builtin_callable); STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) { - int ord = mp_obj_get_int(o_in); + #if MICROPY_PY_BUILTINS_STR_UNICODE + machine_int_t c = mp_obj_get_int(o_in); + char str[4]; + int len = 0; + if (c < 0x80) { + *str = c; len = 1; + } else if (c < 0x800) { + str[0] = (c >> 6) | 0xC0; + str[1] = (c & 0x3F) | 0x80; + len = 2; + } else if (c < 0x10000) { + str[0] = (c >> 12) | 0xE0; + str[1] = ((c >> 6) & 0x3F) | 0x80; + str[2] = (c & 0x3F) | 0x80; + len = 3; + } else if (c < 0x110000) { + str[0] = (c >> 18) | 0xF0; + str[1] = ((c >> 12) & 0x3F) | 0x80; + str[2] = ((c >> 6) & 0x3F) | 0x80; + str[3] = (c & 0x3F) | 0x80; + len = 4; + } else { + nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)")); + } + return mp_obj_new_str(str, len, true); + #else + machine_int_t ord = mp_obj_get_int(o_in); if (0 <= ord && ord <= 0x10ffff) { char str[1] = {ord}; return mp_obj_new_str(str, 1, true); } else { nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)")); } + #endif } MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_chr_obj, mp_builtin_chr); @@ -342,13 +371,32 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct); STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) { uint len; const char *str = mp_obj_str_get_data(o_in, &len); + #if MICROPY_PY_BUILTINS_STR_UNICODE + machine_uint_t charlen = unichar_charlen(str, len); + if (charlen == 1) { + if (MP_OBJ_IS_STR(o_in) && UTF8_IS_NONASCII(*str)) { + machine_int_t ord = *str++ & 0x7F; + for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) { + ord &= ~mask; + } + while (UTF8_IS_CONT(*str)) { + ord = (ord << 6) | (*str++ & 0x3F); + } + return mp_obj_new_int(ord); + } else { + return mp_obj_new_int(((const byte*)str)[0]); + } + } else { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "ord() expected a character, but string of length %d found", charlen)); + } + #else if (len == 1) { // don't sign extend when converting to ord - // TODO unicode return mp_obj_new_int(((const byte*)str)[0]); } else { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "ord() expected a character, but string of length %d found", len)); } + #endif } MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_ord_obj, mp_builtin_ord); diff --git a/py/builtintables.c b/py/builtintables.c index 857a581de4..c42cdf89bb 100644 --- a/py/builtintables.c +++ b/py/builtintables.c @@ -26,8 +26,8 @@ #include <stdlib.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -43,8 +43,10 @@ STATIC const mp_map_elem_t mp_builtin_object_table[] = { // built-in types { MP_OBJ_NEW_QSTR(MP_QSTR_bool), (mp_obj_t)&mp_type_bool }, { MP_OBJ_NEW_QSTR(MP_QSTR_bytes), (mp_obj_t)&mp_type_bytes }, +#if MICROPY_PY_BUILTINS_BYTEARRAY { MP_OBJ_NEW_QSTR(MP_QSTR_bytearray), (mp_obj_t)&mp_type_bytearray }, -#if MICROPY_PY_BUILTINS_FLOAT +#endif +#if MICROPY_PY_BUILTINS_COMPLEX { MP_OBJ_NEW_QSTR(MP_QSTR_complex), (mp_obj_t)&mp_type_complex }, #endif { MP_OBJ_NEW_QSTR(MP_QSTR_dict), (mp_obj_t)&mp_type_dict }, @@ -150,8 +152,8 @@ const mp_obj_dict_t mp_builtin_object_dict_obj = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_builtin_object_table), - .alloc = ARRAY_SIZE(mp_builtin_object_table), + .used = MP_ARRAY_SIZE(mp_builtin_object_table), + .alloc = MP_ARRAY_SIZE(mp_builtin_object_table), .table = (mp_map_elem_t*)mp_builtin_object_table, }, }; @@ -160,7 +162,9 @@ STATIC const mp_map_elem_t mp_builtin_module_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR___main__), (mp_obj_t)&mp_module___main__ }, { MP_OBJ_NEW_QSTR(MP_QSTR_micropython), (mp_obj_t)&mp_module_micropython }, +#if MICROPY_PY_ARRAY { MP_OBJ_NEW_QSTR(MP_QSTR_array), (mp_obj_t)&mp_module_array }, +#endif #if MICROPY_PY_IO { MP_OBJ_NEW_QSTR(MP_QSTR__io), (mp_obj_t)&mp_module_io }, #endif @@ -195,8 +199,8 @@ const mp_obj_dict_t mp_builtin_module_dict_obj = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_builtin_module_table), - .alloc = ARRAY_SIZE(mp_builtin_module_table), + .used = MP_ARRAY_SIZE(mp_builtin_module_table), + .alloc = MP_ARRAY_SIZE(mp_builtin_module_table), .table = (mp_map_elem_t*)mp_builtin_module_table, }, }; diff --git a/py/compile.c b/py/compile.c index 946c8924b2..f2a108074f 100644 --- a/py/compile.c +++ b/py/compile.c @@ -31,8 +31,8 @@ #include <assert.h> #include <math.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" @@ -111,8 +111,8 @@ STATIC const mp_map_elem_t mp_constants_table[] = { STATIC const mp_map_t mp_constants_map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_constants_table), - .alloc = ARRAY_SIZE(mp_constants_table), + .used = MP_ARRAY_SIZE(mp_constants_table), + .alloc = MP_ARRAY_SIZE(mp_constants_table), .table = (mp_map_elem_t*)mp_constants_table, }; @@ -1894,7 +1894,7 @@ void compile_try_except(compiler_t *comp, mp_parse_node_t pn_body, int n_except, EMIT_ARG(jump, success_label); // jump over exception handler EMIT_ARG(label_assign, l1); // start of exception handler - EMIT_ARG(adjust_stack_size, 6); // stack adjust for the 3 exception items, +3 for possible UNWIND_JUMP state + EMIT(start_except_handler); uint l2 = comp_next_label(comp); @@ -1966,7 +1966,7 @@ void compile_try_except(compiler_t *comp, mp_parse_node_t pn_body, int n_except, compile_decrease_except_level(comp); EMIT(end_finally); - EMIT_ARG(adjust_stack_size, -5); // stack adjust + EMIT(end_except_handler); EMIT_ARG(label_assign, success_label); compile_node(comp, pn_else); // else block, can be null @@ -134,6 +134,11 @@ typedef struct _emit_method_table_t { void (*yield_value)(emit_t *emit); void (*yield_from)(emit_t *emit); + // these methods are used to control entry to/exit from an exception handler + // they may or may not emit code + void (*start_except_handler)(emit_t *emit); + void (*end_except_handler)(emit_t *emit); + #if MICROPY_EMIT_CPYTHON // these methods are only needed for emitcpy void (*load_const_verbatim_str)(emit_t *emit, const char *str); diff --git a/py/emitbc.c b/py/emitbc.c index 841dd4aabb..f9fbed4aaf 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -30,8 +30,8 @@ #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" @@ -849,6 +849,14 @@ STATIC void emit_bc_yield_from(emit_t *emit) { emit_write_bytecode_byte(emit, MP_BC_YIELD_FROM); } +STATIC void emit_bc_start_except_handler(emit_t *emit) { + emit_bc_adjust_stack_size(emit, 6); // stack adjust for the 3 exception items, +3 for possible UNWIND_JUMP state +} + +STATIC void emit_bc_end_except_handler(emit_t *emit) { + emit_bc_adjust_stack_size(emit, -5); // stack adjust +} + const emit_method_table_t emit_bc_method_table = { emit_bc_set_native_types, emit_bc_start_pass, @@ -934,6 +942,9 @@ const emit_method_table_t emit_bc_method_table = { emit_bc_raise_varargs, emit_bc_yield_value, emit_bc_yield_from, + + emit_bc_start_except_handler, + emit_bc_end_except_handler, }; #endif // !MICROPY_EMIT_CPYTHON diff --git a/py/emitcommon.c b/py/emitcommon.c index ea65183623..4649793134 100644 --- a/py/emitcommon.c +++ b/py/emitcommon.c @@ -28,8 +28,8 @@ #include <stdint.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" diff --git a/py/emitcpy.c b/py/emitcpy.c index a8a6265b8c..4ff99866a0 100644 --- a/py/emitcpy.c +++ b/py/emitcpy.c @@ -30,8 +30,8 @@ #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" @@ -792,6 +792,14 @@ STATIC void emit_cpy_yield_from(emit_t *emit) { } } +STATIC void emit_cpy_start_except_handler(emit_t *emit) { + emit_cpy_adjust_stack_size(emit, 3); // stack adjust for the 3 exception items +} + +STATIC void emit_cpy_end_except_handler(emit_t *emit) { + emit_cpy_adjust_stack_size(emit, -5); // stack adjust +} + STATIC void emit_cpy_load_const_verbatim_str(emit_t *emit, const char *str) { emit_pre(emit, 1, 3); if (emit->pass == MP_PASS_EMIT) { @@ -899,6 +907,9 @@ const emit_method_table_t emit_cpython_method_table = { emit_cpy_yield_value, emit_cpy_yield_from, + emit_cpy_start_except_handler, + emit_cpy_end_except_handler, + // emitcpy specific functions emit_cpy_load_const_verbatim_str, emit_cpy_load_closure, diff --git a/py/emitglue.c b/py/emitglue.c index f9b9460837..17dc8f867c 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -30,8 +30,8 @@ #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "runtime0.h" diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index 79ed1c4a02..435e1b64d9 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -30,8 +30,8 @@ #include <stdarg.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" @@ -167,7 +167,7 @@ STATIC uint get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t if (MP_PARSE_NODE_IS_ID(pn)) { qstr reg_qstr = MP_PARSE_NODE_LEAF_ARG(pn); const char *reg_str = qstr_str(reg_qstr); - for (uint i = 0; i < ARRAY_SIZE(reg_name_table); i++) { + for (uint i = 0; i < MP_ARRAY_SIZE(reg_name_table); i++) { const reg_name_t *r = ®_name_table[i]; if (reg_str[0] == r->name[0] && reg_str[1] == r->name[1] && reg_str[2] == r->name[2] && (reg_str[2] == '\0' || reg_str[3] == '\0')) { if (r->reg > max_reg) { @@ -286,7 +286,7 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, int n_args, m asm_thumb_b_n(emit->as, label_num); } else if (op_str[0] == 'b' && op_len == 3) { uint cc = -1; - for (uint i = 0; i < ARRAY_SIZE(cc_name_table); i++) { + for (uint i = 0; i < MP_ARRAY_SIZE(cc_name_table); i++) { if (op_str[1] == cc_name_table[i].name[0] && op_str[2] == cc_name_table[i].name[1]) { cc = cc_name_table[i].cc; } diff --git a/py/emitnative.c b/py/emitnative.c index 4dac5ffb09..4cab3f4697 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -48,8 +48,9 @@ #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "nlr.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" @@ -723,7 +724,11 @@ STATIC void emit_native_load_const_str(emit_t *emit, qstr qstr, bool bytes) { assert(0); emit_post_push_imm(emit, VTYPE_PTR, (machine_uint_t)qstr_str(qstr)); } else { - emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_STR, mp_load_const_str, qstr, REG_ARG_1); + if (bytes) { + emit_call_with_imm_arg(emit, 0, mp_load_const_bytes, qstr, REG_ARG_1); // TODO need to add function to runtime table + } else { + emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_STR, mp_load_const_str, qstr, REG_ARG_1); + } emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); } } @@ -917,8 +922,11 @@ STATIC void emit_native_delete_global(emit_t *emit, qstr qstr) { } STATIC void emit_native_delete_attr(emit_t *emit, qstr qstr) { - // not supported - assert(0); + vtype_kind_t vtype_base; + emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base + assert(vtype_base == VTYPE_PYOBJ); + emit_call_with_2_imm_args(emit, MP_F_STORE_ATTR, mp_store_attr, qstr, REG_ARG_2, (machine_uint_t)MP_OBJ_NULL, REG_ARG_3); // arg2 = attribute name, arg3 = value (null for delete) + emit_post(emit); } STATIC void emit_native_delete_subscr(emit_t *emit) { @@ -1054,17 +1062,33 @@ STATIC void emit_native_setup_with(emit_t *emit, uint label) { // not supported, or could be with runtime call assert(0); } + STATIC void emit_native_with_cleanup(emit_t *emit) { assert(0); } + STATIC void emit_native_setup_except(emit_t *emit, uint label) { - assert(0); + emit_native_pre(emit); + // need to commit stack because we may jump elsewhere + need_stack_settled(emit); + emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_1, sizeof(nlr_buf_t) / sizeof(machine_uint_t)); // arg1 = pointer to nlr buf + emit_call(emit, 0, nlr_push); // TODO need to add function to runtime table +#if N_X64 + asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET); + asm_x64_jcc_label(emit->as, JCC_JNZ, label); +#elif N_THUMB + asm_thumb_cmp_rlo_i8(emit->as, REG_RET, 0); + asm_thumb_bcc_label(emit->as, THUMB_CC_NE, label); +#endif + emit_post(emit); } + STATIC void emit_native_setup_finally(emit_t *emit, uint label) { assert(0); } + STATIC void emit_native_end_finally(emit_t *emit) { - assert(0); + //assert(0); } STATIC void emit_native_get_iter(emit_t *emit) { @@ -1104,19 +1128,31 @@ STATIC void emit_native_for_iter_end(emit_t *emit) { STATIC void emit_native_pop_block(emit_t *emit) { emit_native_pre(emit); + emit_call(emit, 0, nlr_pop); // TODO need to add function to runtime table + adjust_stack(emit, -(machine_int_t)(sizeof(nlr_buf_t) / sizeof(machine_uint_t))); emit_post(emit); } STATIC void emit_native_pop_except(emit_t *emit) { - assert(0); + /* + emit_native_pre(emit); + emit_call(emit, 0, nlr_pop); // TODO need to add function to runtime table + adjust_stack(emit, -(machine_int_t)(sizeof(nlr_buf_t) / sizeof(machine_uint_t))); + emit_post(emit); + */ } STATIC void emit_native_unary_op(emit_t *emit, mp_unary_op_t op) { - vtype_kind_t vtype; - emit_pre_pop_reg(emit, &vtype, REG_ARG_2); - assert(vtype == VTYPE_PYOBJ); - emit_call_with_imm_arg(emit, MP_F_UNARY_OP, mp_unary_op, op, REG_ARG_1); - emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); + if (op == MP_UNARY_OP_NOT) { + // we need to synthesise this operation + assert(0); + } else { + vtype_kind_t vtype; + emit_pre_pop_reg(emit, &vtype, REG_ARG_2); + assert(vtype == VTYPE_PYOBJ); + emit_call_with_imm_arg(emit, MP_F_UNARY_OP, mp_unary_op, op, REG_ARG_1); + emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); + } } STATIC void emit_native_binary_op(emit_t *emit, mp_binary_op_t op) { @@ -1230,17 +1266,26 @@ STATIC void emit_native_set_add(emit_t *emit, int set_index) { STATIC void emit_native_build_slice(emit_t *emit, int n_args) { DEBUG_printf("build_slice %d\n", n_args); - assert(n_args == 2); - vtype_kind_t vtype_start, vtype_stop; - emit_pre_pop_reg_reg(emit, &vtype_stop, REG_ARG_2, &vtype_start, REG_ARG_1); // arg1 = start, arg2 = stop - assert(vtype_start == VTYPE_PYOBJ); - assert(vtype_stop == VTYPE_PYOBJ); - emit_call_with_imm_arg(emit, MP_F_NEW_SLICE, mp_obj_new_slice, (machine_uint_t)MP_OBJ_NULL, REG_ARG_3); // arg3 = step - emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); + if (n_args == 2) { + vtype_kind_t vtype_start, vtype_stop; + emit_pre_pop_reg_reg(emit, &vtype_stop, REG_ARG_2, &vtype_start, REG_ARG_1); // arg1 = start, arg2 = stop + assert(vtype_start == VTYPE_PYOBJ); + assert(vtype_stop == VTYPE_PYOBJ); + emit_call_with_imm_arg(emit, MP_F_NEW_SLICE, mp_obj_new_slice, (machine_uint_t)mp_const_none, REG_ARG_3); // arg3 = step + emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); + } else { + assert(n_args == 3); + vtype_kind_t vtype_start, vtype_stop, vtype_step; + emit_pre_pop_reg_reg_reg(emit, &vtype_step, REG_ARG_3, &vtype_stop, REG_ARG_2, &vtype_start, REG_ARG_1); // arg1 = start, arg2 = stop, arg3 = step + assert(vtype_start == VTYPE_PYOBJ); + assert(vtype_stop == VTYPE_PYOBJ); + assert(vtype_step == VTYPE_PYOBJ); + emit_call(emit, MP_F_NEW_SLICE, mp_obj_new_slice); + emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); + } } STATIC void emit_native_unpack_sequence(emit_t *emit, int n_args) { - // TODO this is untested DEBUG_printf("unpack_sequence %d\n", n_args); vtype_kind_t vtype_base; emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = seq @@ -1250,13 +1295,12 @@ STATIC void emit_native_unpack_sequence(emit_t *emit, int n_args) { } STATIC void emit_native_unpack_ex(emit_t *emit, int n_left, int n_right) { - // TODO this is untested DEBUG_printf("unpack_ex %d %d\n", n_left, n_right); vtype_kind_t vtype_base; emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = seq assert(vtype_base == VTYPE_PYOBJ); - emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, n_left + n_right); // arg3 = dest ptr - emit_call_with_imm_arg(emit, MP_F_UNPACK_EX, mp_unpack_ex, n_left + n_right, REG_ARG_2); // arg2 = n_left + n_right + emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, n_left + n_right + 1); // arg3 = dest ptr + emit_call_with_imm_arg(emit, MP_F_UNPACK_EX, mp_unpack_ex, n_left | (n_right << 8), REG_ARG_2); // arg2 = n_left + n_right } STATIC void emit_native_make_function(emit_t *emit, scope_t *scope, uint n_pos_defaults, uint n_kw_defaults) { @@ -1365,9 +1409,16 @@ STATIC void emit_native_return_value(emit_t *emit) { } STATIC void emit_native_raise_varargs(emit_t *emit, int n_args) { - // call runtime - assert(0); + assert(n_args == 1); + vtype_kind_t vtype_err; + emit_pre_pop_reg(emit, &vtype_err, REG_ARG_1); // arg1 = object to raise + assert(vtype_err == VTYPE_PYOBJ); + emit_call(emit, 0, mp_make_raise_obj); // TODO need to add function to runtime table + emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); + emit_pre_pop_reg(emit, &vtype_err, REG_ARG_1); + emit_call(emit, 0, nlr_jump); // TODO need to add function to runtime table } + STATIC void emit_native_yield_value(emit_t *emit) { // not supported (for now) assert(0); @@ -1377,6 +1428,21 @@ STATIC void emit_native_yield_from(emit_t *emit) { assert(0); } +STATIC void emit_native_start_except_handler(emit_t *emit) { + // This instruction follows an nlr_pop, so the stack counter is back to zero, when really + // it should be up by a whole nlr_buf_t. We then want to pop the nlr_buf_t here, but save + // the first 2 elements, so we can get the thrown value. + adjust_stack(emit, 2); + vtype_kind_t vtype_nlr; + emit_pre_pop_reg(emit, &vtype_nlr, REG_ARG_1); // get the thrown value + emit_pre_pop_discard(emit, &vtype_nlr); // discard the linked-list pointer in the nlr_buf + emit_post_push_reg_reg_reg(emit, VTYPE_PYOBJ, REG_ARG_1, VTYPE_PYOBJ, REG_ARG_1, VTYPE_PYOBJ, REG_ARG_1); // push the 3 exception items +} + +STATIC void emit_native_end_except_handler(emit_t *emit) { + adjust_stack(emit, -3); // stack adjust (not sure why it's this much...) +} + const emit_method_table_t EXPORT_FUN(method_table) = { emit_native_set_viper_types, emit_native_start_pass, @@ -1462,6 +1528,9 @@ const emit_method_table_t EXPORT_FUN(method_table) = { emit_native_raise_varargs, emit_native_yield_value, emit_native_yield_from, + + emit_native_start_except_handler, + emit_native_end_except_handler, }; #endif // (MICROPY_EMIT_X64 && N_X64) || (MICROPY_EMIT_THUMB && N_THUMB) diff --git a/py/emitpass1.c b/py/emitpass1.c index 2e76420a21..b39597318a 100644 --- a/py/emitpass1.c +++ b/py/emitpass1.c @@ -28,8 +28,8 @@ #include <stdint.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" @@ -214,6 +214,10 @@ const emit_method_table_t emit_pass1_method_table = { (void*)emit_pass1_dummy, (void*)emit_pass1_dummy, (void*)emit_pass1_dummy, + + (void*)emit_pass1_dummy, + (void*)emit_pass1_dummy, + #if MICROPY_EMIT_CPYTHON (void*)emit_pass1_dummy, (void*)emit_pass1_dummy, @@ -33,7 +33,6 @@ #include "misc.h" #include "gc.h" -#include "misc.h" #include "qstr.h" #include "obj.h" #include "runtime.h" @@ -113,7 +112,7 @@ STATIC machine_uint_t gc_lock_depth; void gc_init(void *start, void *end) { // align end pointer on block boundary end = (void*)((machine_uint_t)end & (~(BYTES_PER_BLOCK - 1))); - DEBUG_printf("Initializing GC heap: %p..%p = " UINT_FMT " bytes\n", start, end, end - start); + DEBUG_printf("Initializing GC heap: %p..%p = " UINT_FMT " bytes\n", start, end, (byte*)end - (byte*)start); // calculate parameters for GC (T=total, A=alloc table, F=finaliser table, P=pool; all in bytes): // T = A + F + P @@ -173,6 +172,10 @@ void gc_unlock(void) { gc_lock_depth--; } +bool gc_is_locked(void) { + return gc_lock_depth != 0; +} + #define VERIFY_PTR(ptr) ( \ (ptr & (BYTES_PER_BLOCK - 1)) == 0 /* must be aligned on a block */ \ && ptr >= (machine_uint_t)gc_pool_start /* must be above start of pool */ \ @@ -268,6 +271,7 @@ STATIC void gc_sweep(void) { case AT_TAIL: if (free_tail) { + DEBUG_printf("gc_sweep(%p)\n",PTR_FROM_BLOCK(block)); ATB_ANY_TO_FREE(block); } break; @@ -401,6 +405,7 @@ found: // get pointer to first block void *ret_ptr = (void*)(gc_pool_start + start_block * WORDS_PER_BLOCK); + DEBUG_printf("gc_alloc(%p)\n", ret_ptr); // zero out the additional bytes of the newly allocated blocks // This is needed because the blocks may have previously held pointers @@ -439,6 +444,7 @@ void gc_free(void *ptr_in) { } machine_uint_t ptr = (machine_uint_t)ptr_in; + DEBUG_printf("gc_free(%p)\n", ptr); if (VERIFY_PTR(ptr)) { machine_uint_t block = BLOCK_FROM_PTR(ptr); @@ -590,7 +596,7 @@ void *gc_realloc(void *ptr_in, machine_uint_t n_bytes) { return NULL; } - DEBUG_printf("gc_realloc: allocating new block\n"); + DEBUG_printf("gc_realloc(%p -> %p)\n", ptr_in, ptr_out); memcpy(ptr_out, ptr_in, n_blocks * BYTES_PER_BLOCK); gc_free(ptr_in); return ptr_out; @@ -30,6 +30,7 @@ void gc_init(void *start, void *end); // They can be used to prevent the GC from allocating/freeing. void gc_lock(void); void gc_unlock(void); +bool gc_is_locked(void); // A given port must implement gc_collect by using the other collect functions. void gc_collect(void); diff --git a/py/lexer.c b/py/lexer.c index a65df54ba6..8732d64362 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -32,8 +32,8 @@ #include <stdio.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" @@ -502,19 +502,32 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs case 'v': c = 0x0b; break; case 'f': c = 0x0c; break; case 'r': c = 0x0d; break; + case 'u': + case 'U': + if (is_bytes) { + // b'\u1234' == b'\\u1234' + vstr_add_char(&lex->vstr, '\\'); + break; + } + // Otherwise fall through. case 'x': { uint num = 0; - if (!get_hex(lex, 2, &num)) { + if (!get_hex(lex, (c == 'x' ? 2 : c == 'u' ? 4 : 8), &num)) { // TODO error message assert(0); } c = num; break; } - case 'N': break; // TODO \N{name} only in strings - case 'u': break; // TODO \uxxxx only in strings - case 'U': break; // TODO \Uxxxxxxxx only in strings + case 'N': + // Supporting '\N{LATIN SMALL LETTER A}' == 'a' would require keeping the + // entire Unicode name table in the core. As of Unicode 6.3.0, that's nearly + // 3MB of text; even gzip-compressed and with minimal structure, it'll take + // roughly half a meg of storage. This form of Unicode escape may be added + // later on, but it's definitely not a priority right now. -- CJA 20140607 + assert(!"Unicode name escapes not supported"); + break; default: if (c >= '0' && c <= '7') { // Octal sequence, 1-3 chars @@ -533,7 +546,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } } if (c != MP_LEXER_CHAR_EOF) { - vstr_add_char(&lex->vstr, c); + if (c < 0x110000 && !is_bytes) { + vstr_add_char(&lex->vstr, c); + } else if (c < 0x100 && is_bytes) { + vstr_add_byte(&lex->vstr, c); + } else { + assert(!"TODO: Throw an error, invalid escape code probably"); + } } } else { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); @@ -694,10 +713,10 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs // need to check for this special token in many places in the compiler. // TODO improve speed of these string comparisons //for (int i = 0; tok_kw[i] != NULL; i++) { - for (int i = 0; i < ARRAY_SIZE(tok_kw); i++) { + for (int i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) { if (str_strn_equal(tok_kw[i], tok->str, tok->len)) { - if (i == ARRAY_SIZE(tok_kw) - 1) { - // tok_kw[ARRAY_SIZE(tok_kw) - 1] == "__debug__" + if (i == MP_ARRAY_SIZE(tok_kw) - 1) { + // tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__" tok->kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE); } else { tok->kind = MP_TOKEN_KW_FALSE + i; diff --git a/py/lexerstr.c b/py/lexerstr.c index 76e90671be..666dbfa37c 100644 --- a/py/lexerstr.c +++ b/py/lexerstr.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" diff --git a/py/lexerunix.c b/py/lexerunix.c index 89dc80b004..51bc915b22 100644 --- a/py/lexerunix.c +++ b/py/lexerunix.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #if MICROPY_HELPER_LEXER_UNIX diff --git a/py/malloc.c b/py/malloc.c index b180ddf6b5..8e90849e93 100644 --- a/py/malloc.c +++ b/py/malloc.c @@ -28,8 +28,8 @@ #include <stdlib.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #if 0 // print debugging info #define DEBUG_printf DEBUG_printf @@ -27,8 +27,8 @@ #include <stdlib.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "runtime0.h" @@ -82,7 +82,7 @@ int m_get_peak_bytes_allocated(void); /** array helpers ***********************************************/ // get the number of elements in a fixed-size array -#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#define MP_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) /** unichar / UTF-8 *********************************************/ @@ -100,7 +100,9 @@ bool unichar_isupper(unichar c); bool unichar_islower(unichar c); unichar unichar_tolower(unichar c); unichar unichar_toupper(unichar c); -#define unichar_charlen(s, bytelen) (bytelen) +machine_uint_t unichar_charlen(const char *str, machine_uint_t len); +#define UTF8_IS_NONASCII(ch) ((ch) & 0x80) +#define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80) /** variable string *********************************************/ @@ -164,4 +166,18 @@ int DEBUG_printf(const char *fmt, ...); extern uint mp_verbose_flag; +// This is useful for unicode handling. Some CPU archs has +// special instructions for efficient implentation of this +// function (e.g. CLZ on ARM). +// NOTE: this function is unused at the moment +#ifndef count_lead_ones +static inline uint count_lead_ones(byte val) { + uint c = 0; + for (byte mask = 0x80; val & mask; mask >>= 1) { + c++; + } + return c; +} +#endif + #endif // _INCLUDED_MINILIB_H diff --git a/py/mkrules.mk b/py/mkrules.mk index 9592d6c590..6127ece8fe 100644 --- a/py/mkrules.mk +++ b/py/mkrules.mk @@ -73,9 +73,9 @@ all: $(PROG) $(PROG): $(OBJ) $(ECHO) "LINK $@" - $(Q)$(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS) + $(Q)$(CC) $(COPT) -o $@ $(OBJ) $(LIB) $(LDFLAGS) ifndef DEBUG - $(Q)$(STRIP) $(PROG) + $(Q)$(STRIP) $(STRIPFLAGS_EXTRA) $(PROG) endif $(Q)$(SIZE) $(PROG) @@ -97,4 +97,10 @@ print-cfg: $(ECHO) "OBJ = $(OBJ)" .PHONY: print-cfg +print-def: + @$(ECHO) "The following defines are built into the $(CC) compiler" + touch __empty__.c + @$(CC) -E -Wp,-dM __empty__.c + @$(RM) -f __empty__.c + -include $(OBJ:.o=.P) diff --git a/py/modarray.c b/py/modarray.c index a741a0ecb4..c0fe331643 100644 --- a/py/modarray.c +++ b/py/modarray.c @@ -24,12 +24,14 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" +#if MICROPY_PY_ARRAY + STATIC const mp_map_elem_t mp_module_array_globals_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_array) }, { MP_OBJ_NEW_QSTR(MP_QSTR_array), (mp_obj_t)&mp_type_array }, @@ -40,8 +42,8 @@ STATIC const mp_obj_dict_t mp_module_array_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_array_globals_table), - .alloc = ARRAY_SIZE(mp_module_array_globals_table), + .used = MP_ARRAY_SIZE(mp_module_array_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_array_globals_table), .table = (mp_map_elem_t*)mp_module_array_globals_table, }, }; @@ -51,3 +53,5 @@ const mp_obj_module_t mp_module_array = { .name = MP_QSTR_array, .globals = (mp_obj_dict_t*)&mp_module_array_globals, }; + +#endif diff --git a/py/modcmath.c b/py/modcmath.c index 3bc3055dc8..ddd8abf71e 100644 --- a/py/modcmath.c +++ b/py/modcmath.c @@ -26,8 +26,8 @@ #include <math.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -142,8 +142,8 @@ STATIC const mp_obj_dict_t mp_module_cmath_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_cmath_globals_table), - .alloc = ARRAY_SIZE(mp_module_cmath_globals_table), + .used = MP_ARRAY_SIZE(mp_module_cmath_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_cmath_globals_table), .table = (mp_map_elem_t*)mp_module_cmath_globals_table, }, }; diff --git a/py/modcollections.c b/py/modcollections.c index 9e3da7e666..5cd0b317a1 100644 --- a/py/modcollections.c +++ b/py/modcollections.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -42,8 +42,8 @@ STATIC const mp_obj_dict_t mp_module_collections_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_collections_globals_table), - .alloc = ARRAY_SIZE(mp_module_collections_globals_table), + .used = MP_ARRAY_SIZE(mp_module_collections_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_collections_globals_table), .table = (mp_map_elem_t*)mp_module_collections_globals_table, }, }; diff --git a/py/modgc.c b/py/modgc.c index c53eed235f..4ffdc2be68 100644 --- a/py/modgc.c +++ b/py/modgc.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -61,11 +61,27 @@ STATIC mp_obj_t gc_enable(void) { } MP_DEFINE_CONST_FUN_OBJ_0(gc_enable_obj, gc_enable); +STATIC mp_obj_t gc_mem_free(void) { + gc_info_t info; + gc_info(&info); + return MP_OBJ_NEW_SMALL_INT((machine_uint_t)info.free); +} +MP_DEFINE_CONST_FUN_OBJ_0(gc_mem_free_obj, gc_mem_free); + +STATIC mp_obj_t gc_mem_alloc(void) { + gc_info_t info; + gc_info(&info); + return MP_OBJ_NEW_SMALL_INT((machine_uint_t)info.used); +} +MP_DEFINE_CONST_FUN_OBJ_0(gc_mem_alloc_obj, gc_mem_alloc); + STATIC const mp_map_elem_t mp_module_gc_globals_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_gc) }, { MP_OBJ_NEW_QSTR(MP_QSTR_collect), (mp_obj_t)&gc_collect_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_disable), (mp_obj_t)&gc_disable_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_enable), (mp_obj_t)&gc_enable_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_mem_free), (mp_obj_t)&gc_mem_free_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_mem_alloc), (mp_obj_t)&gc_mem_alloc_obj }, }; STATIC const mp_obj_dict_t mp_module_gc_globals = { @@ -73,8 +89,8 @@ STATIC const mp_obj_dict_t mp_module_gc_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_gc_globals_table), - .alloc = ARRAY_SIZE(mp_module_gc_globals_table), + .used = MP_ARRAY_SIZE(mp_module_gc_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_gc_globals_table), .table = (mp_map_elem_t*)mp_module_gc_globals_table, }, }; diff --git a/py/modio.c b/py/modio.c index 08c6c59dd9..ef3b29b53f 100644 --- a/py/modio.c +++ b/py/modio.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -57,8 +57,8 @@ STATIC const mp_obj_dict_t mp_module_io_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_io_globals_table), - .alloc = ARRAY_SIZE(mp_module_io_globals_table), + .used = MP_ARRAY_SIZE(mp_module_io_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_io_globals_table), .table = (mp_map_elem_t*)mp_module_io_globals_table, }, }; diff --git a/py/modmath.c b/py/modmath.c index 0fd583c2ff..0d0d13b4e2 100644 --- a/py/modmath.c +++ b/py/modmath.c @@ -26,8 +26,8 @@ #include <math.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -172,8 +172,8 @@ STATIC const mp_obj_dict_t mp_module_math_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_math_globals_table), - .alloc = ARRAY_SIZE(mp_module_math_globals_table), + .used = MP_ARRAY_SIZE(mp_module_math_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_math_globals_table), .table = (mp_map_elem_t*)mp_module_math_globals_table, }, }; diff --git a/py/modmicropython.c b/py/modmicropython.c index 40d749da2d..bbb315189b 100644 --- a/py/modmicropython.c +++ b/py/modmicropython.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -65,8 +65,8 @@ STATIC const mp_obj_dict_t mp_module_micropython_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_micropython_globals_table), - .alloc = ARRAY_SIZE(mp_module_micropython_globals_table), + .used = MP_ARRAY_SIZE(mp_module_micropython_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_micropython_globals_table), .table = (mp_map_elem_t*)mp_module_micropython_globals_table, }, }; diff --git a/py/modstruct.c b/py/modstruct.c index a45181852c..2e40264e8d 100644 --- a/py/modstruct.c +++ b/py/modstruct.c @@ -27,8 +27,8 @@ #include <assert.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -210,8 +210,8 @@ STATIC const mp_obj_dict_t mp_module_struct_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_struct_globals_table), - .alloc = ARRAY_SIZE(mp_module_struct_globals_table), + .used = MP_ARRAY_SIZE(mp_module_struct_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_struct_globals_table), .table = (mp_map_elem_t*)mp_module_struct_globals_table, }, }; diff --git a/py/modsys.c b/py/modsys.c index a99db1b7f8..1e7f7eff7f 100644 --- a/py/modsys.c +++ b/py/modsys.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "builtin.h" @@ -87,8 +87,8 @@ STATIC const mp_obj_dict_t mp_module_sys_globals = { .map = { .all_keys_are_qstrs = 1, .table_is_fixed_array = 1, - .used = ARRAY_SIZE(mp_module_sys_globals_table), - .alloc = ARRAY_SIZE(mp_module_sys_globals_table), + .used = MP_ARRAY_SIZE(mp_module_sys_globals_table), + .alloc = MP_ARRAY_SIZE(mp_module_sys_globals_table), .table = (mp_map_elem_t*)mp_module_sys_globals_table, }, }; diff --git a/py/mpconfig.h b/py/mpconfig.h index 93e98c25b6..3a9d342ea3 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -157,6 +157,12 @@ #define MICROPY_ENABLE_GC_FINALISER (0) #endif +// Whether to check C stack usage. C stack used for calling Python functions, +// etc. Not checking means segfault on overflow. +#ifndef MICROPY_STACK_CHECK +#define MICROPY_STACK_CHECK (1) +#endif + // Whether to include REPL helper function #ifndef MICROPY_HELPER_REPL #define MICROPY_HELPER_REPL (0) @@ -223,6 +229,10 @@ typedef double mp_float_t; #define MICROPY_PY_BUILTINS_FLOAT (0) #endif +#ifndef MICROPY_PY_BUILTINS_COMPLEX +#define MICROPY_PY_BUILTINS_COMPLEX (MICROPY_PY_BUILTINS_FLOAT) +#endif + // Enable features which improve CPython compatibility // but may lead to more code size/memory usage. // TODO: Originally intended as generic category to not @@ -239,6 +249,16 @@ typedef double mp_float_t; /*****************************************************************************/ /* Fine control over Python builtins, classes, modules, etc */ +// Whether str object is proper unicode +#ifndef MICROPY_PY_BUILTINS_STR_UNICODE +#define MICROPY_PY_BUILTINS_STR_UNICODE (0) +#endif + +// Whether to support bytearray object +#ifndef MICROPY_PY_BUILTINS_BYTEARRAY +#define MICROPY_PY_BUILTINS_BYTEARRAY (1) +#endif + // Whether to support set object #ifndef MICROPY_PY_BUILTINS_SET #define MICROPY_PY_BUILTINS_SET (1) @@ -259,6 +279,13 @@ typedef double mp_float_t; #define MICROPY_PY_BUILTINS_PROPERTY (1) #endif +// Whether to provide "array" module. Note that large chunk of the +// underlying code is shared with "bytearray" builtin type, so to +// get real savings, it should be disabled too. +#ifndef MICROPY_PY_ARRAY +#define MICROPY_PY_ARRAY (1) +#endif + // Whether to provide "collections" module #ifndef MICROPY_PY_COLLECTIONS #define MICROPY_PY_COLLECTIONS (1) @@ -377,3 +404,8 @@ typedef double mp_float_t; #ifndef NORETURN #define NORETURN __attribute__((noreturn)) #endif + +// Modifier for weak functions +#ifndef MP_WEAK +#define MP_WEAK __attribute__((weak)) +#endif @@ -30,8 +30,8 @@ #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "mpz.h" #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ @@ -29,6 +29,7 @@ #include <limits.h> #include <setjmp.h> +#include <assert.h> typedef struct _nlr_buf_t nlr_buf_t; struct _nlr_buf_t { @@ -44,7 +45,7 @@ struct _nlr_buf_t { #else void *regs[8]; #endif -#elif defined(__thumb2__) +#elif defined(__thumb2__) || defined(__thumb__) || defined(__arm__) void *regs[10]; #else #define MICROPY_NLR_SETJMP (1) diff --git a/py/nlrthumb.S b/py/nlrthumb.S index b306c01753..dabf57cf85 100644 --- a/py/nlrthumb.S +++ b/py/nlrthumb.S @@ -24,19 +24,21 @@ * THE SOFTWARE. */ -#if defined(__thumb2__) && !MICROPY_NLR_SETJMP -/* thumb callee save: bx, bp, sp, r12, r14, r14, r15 */ +#if !MICROPY_NLR_SETJMP && (defined(__thumb2__) || defined(__thumb__) || defined(__arm__)) +/* arm callee save: bx, bp, sp, r12, r14, r14, r15 */ .syntax unified /*.cpu cortex-m4*/ - .thumb + /*.thumb*/ .text .align 2 /* uint nlr_push(r0=nlr_buf_t *nlr) */ .global nlr_push +#if defined(__thumb2__) .thumb .thumb_func +#endif .type nlr_push, %function nlr_push: str lr, [r0, #8] @ store lr into nlr_buf @@ -64,8 +66,10 @@ nlr_push: @ void nlr_pop() .global nlr_pop +#if defined(__thumb2__) .thumb .thumb_func +#endif .type nlr_pop, %function nlr_pop: ldr r3, .L5 @ load addr of nlr_top @@ -80,8 +84,10 @@ nlr_pop: /* void nlr_jump(r0=uint val) */ .global nlr_jump +#if defined(__thumb2__) .thumb .thumb_func +#endif .type nlr_jump, %function nlr_jump: ldr r3, .L2 @ load addr of nlr_top @@ -35,6 +35,7 @@ #include "obj.h" #include "runtime0.h" #include "runtime.h" +#include "stackctrl.h" mp_obj_type_t *mp_obj_get_type(mp_const_obj_t o_in) { if (MP_OBJ_IS_SMALL_INT(o_in)) { @@ -59,6 +60,8 @@ void printf_wrapper(void *env, const char *fmt, ...) { } void mp_obj_print_helper(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in, mp_print_kind_t kind) { + // There can be data structures nested too deep, or just recursive + MP_STACK_CHECK(); #if !NDEBUG if (o_in == NULL) { print(env, "(nil)"); @@ -274,6 +277,7 @@ mp_float_t mp_obj_get_float(mp_obj_t arg) { } } +#if MICROPY_PY_BUILTINS_COMPLEX void mp_obj_get_complex(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) { if (arg == mp_const_false) { *real = 0; @@ -297,6 +301,7 @@ void mp_obj_get_complex(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) { } } #endif +#endif void mp_obj_get_array(mp_obj_t o, uint *len, mp_obj_t **items) { if (MP_OBJ_IS_TYPE(o, &mp_type_tuple)) { @@ -352,7 +357,12 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index, // may return MP_OBJ_NULL mp_obj_t mp_obj_len_maybe(mp_obj_t o_in) { - if (MP_OBJ_IS_STR(o_in) || MP_OBJ_IS_TYPE(o_in, &mp_type_bytes)) { + if ( +#if !MICROPY_PY_BUILTINS_STR_UNICODE + // It's simple - unicode is slow, non-unicode is fast + MP_OBJ_IS_STR(o_in) || +#endif + MP_OBJ_IS_TYPE(o_in, &mp_type_bytes)) { return MP_OBJ_NEW_SMALL_INT((machine_int_t)mp_obj_str_get_len(o_in)); } else { mp_obj_type_t *type = mp_obj_get_type(o_in); diff --git a/py/objarray.c b/py/objarray.c index 05821e8de4..b13df2bdba 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -37,6 +37,8 @@ #include "runtime.h" #include "binary.h" +#if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY + typedef struct _mp_obj_array_t { mp_obj_base_t base; machine_uint_t typecode : 8; @@ -310,3 +312,5 @@ STATIC mp_obj_t array_iterator_new(mp_obj_t array_in) { o->cur = 0; return o; } + +#endif // MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY diff --git a/py/objcomplex.c b/py/objcomplex.c index d58b53463c..20e7c97d37 100644 --- a/py/objcomplex.c +++ b/py/objcomplex.c @@ -36,7 +36,7 @@ #include "runtime0.h" #include "runtime.h" -#if MICROPY_PY_BUILTINS_FLOAT +#if MICROPY_PY_BUILTINS_COMPLEX #include <math.h> diff --git a/py/objenumerate.c b/py/objenumerate.c index 7d9ea9915a..37414464de 100644 --- a/py/objenumerate.c +++ b/py/objenumerate.c @@ -27,8 +27,8 @@ #include <stdlib.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "runtime.h" @@ -45,7 +45,7 @@ STATIC const mp_arg_t enumerate_make_new_args[] = { { MP_QSTR_iterable, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_obj = MP_OBJ_NULL} }, { MP_QSTR_start, MP_ARG_INT, {.u_int = 0} }, }; -#define ENUMERATE_MAKE_NEW_NUM_ARGS ARRAY_SIZE(enumerate_make_new_args) +#define ENUMERATE_MAKE_NEW_NUM_ARGS MP_ARRAY_SIZE(enumerate_make_new_args) STATIC mp_obj_t enumerate_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { #if MICROPY_CPYTHON_COMPAT diff --git a/py/objexcept.c b/py/objexcept.c index 9f421373bb..ad66bb50fe 100644 --- a/py/objexcept.c +++ b/py/objexcept.c @@ -37,6 +37,7 @@ #include "objtype.h" #include "runtime.h" #include "runtime0.h" +#include "gc.h" typedef struct _mp_obj_exception_t { mp_obj_base_t base; @@ -335,6 +336,13 @@ void mp_obj_exception_clear_traceback(mp_obj_t self_in) { } void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, machine_uint_t line, qstr block) { + #if MICROPY_ENABLE_GC + if (gc_is_locked()) { + // We can't allocate memory, so don't bother to try + return; + } + #endif + GET_NATIVE_EXCEPTION(self, self_in); // for traceback, we are just using the list object for convenience, it's not really a list of Python objects diff --git a/py/objfloat.c b/py/objfloat.c index b608b1a3d7..e3fefad8db 100644 --- a/py/objfloat.c +++ b/py/objfloat.c @@ -102,9 +102,12 @@ STATIC mp_obj_t float_unary_op(int op, mp_obj_t o_in) { STATIC mp_obj_t float_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { mp_obj_float_t *lhs = lhs_in; +#if MICROPY_PY_BUILTINS_COMPLEX if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_complex)) { return mp_obj_complex_binary_op(op, lhs->value, 0, rhs_in); - } else { + } else +#endif + { return mp_obj_float_binary_op(op, lhs->value, rhs_in); } } diff --git a/py/objfun.c b/py/objfun.c index 29363129b2..74e959f9d3 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -39,6 +39,7 @@ #include "runtime0.h" #include "runtime.h" #include "bc.h" +#include "stackctrl.h" #if 0 // print debugging info #define DEBUG_PRINT (1) @@ -204,6 +205,8 @@ STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, uint expected, ui // code_state should have ->ip filled in (pointing past code info block), // as well as ->n_state. void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) { + // This function is pretty complicated. It's main aim is to be efficient in speed and RAM + // usage for the common case of positional only args. mp_obj_fun_bc_t *self = self_in; machine_uint_t n_state = code_state->n_state; const byte *ip = code_state->ip; @@ -353,8 +356,7 @@ continue2:; STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) { - // This function is pretty complicated. It's main aim is to be efficient in speed and RAM - // usage for the common case of positional only args. + MP_STACK_CHECK(); DEBUG_printf("Input n_args: %d, n_kw: %d\n", n_args, n_kw); DEBUG_printf("Input pos args: "); diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 516fb52746..cf7896f9e1 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -121,9 +121,11 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { #if MICROPY_PY_BUILTINS_FLOAT } else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_float)) { return mp_obj_float_binary_op(op, mpz_as_float(zlhs), rhs_in); +#if MICROPY_PY_BUILTINS_COMPLEX } else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_complex)) { return mp_obj_complex_binary_op(op, mpz_as_float(zlhs), 0, rhs_in); #endif +#endif } else { // delegate to generic function to check for extra cases return mp_obj_int_binary_op_extra_cases(op, lhs_in, rhs_in); diff --git a/py/objstr.c b/py/objstr.c index c84d7c900d..b13517b63d 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -32,6 +32,7 @@ #include "mpconfig.h" #include "nlr.h" #include "misc.h" +#include "unicode.h" #include "qstr.h" #include "obj.h" #include "runtime0.h" @@ -43,16 +44,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args, mp_obj_t dict); const mp_obj_t mp_const_empty_bytes; -// use this macro to extract the string hash -#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; } - -// use this macro to extract the string length -#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; } - -// use this macro to extract the string data and length -#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } - -STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); +mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in); STATIC NORETURN void arg_type_mixup(); @@ -259,7 +251,7 @@ STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, cons return NULL; } -STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { +mp_obj_t mp_obj_str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len); mp_obj_type_t *lhs_type = mp_obj_get_type(lhs_in); mp_obj_type_t *rhs_type = mp_obj_get_type(rhs_in); @@ -352,11 +344,14 @@ uncomparable: return MP_OBJ_NULL; // op not supported } +#if !MICROPY_PY_BUILTINS_STR_UNICODE +// objstrunicode defines own version const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, uint self_len, mp_obj_t index, bool is_slice) { machine_uint_t index_val = mp_get_index(type, self_len, index, is_slice); return self_data + index_val; } +#endif STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { mp_obj_type_t *type = mp_obj_get_type(self_in); @@ -571,7 +566,6 @@ STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) { return res; } - STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) { const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); assert(2 <= n_args && n_args <= 4); @@ -600,6 +594,11 @@ STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t dire } } else { // found + #if MICROPY_PY_BUILTINS_STR_UNICODE + if (self_type == &mp_type_str) { + return MP_OBJ_NEW_SMALL_INT(utf8_ptr_to_index(haystack, p)); + } + #endif return MP_OBJ_NEW_SMALL_INT(p - haystack); } } @@ -1610,7 +1609,7 @@ STATIC mp_obj_t str_encode(uint n_args, const mp_obj_t *args) { } #endif -STATIC machine_int_t str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, int flags) { +machine_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, int flags) { if (flags == MP_BUFFER_READ) { GET_STR_DATA_LEN(self_in, str_data, str_len); bufinfo->buf = (void*)str_data; @@ -1627,38 +1626,45 @@ STATIC machine_int_t str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, } #if MICROPY_CPYTHON_COMPAT -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(bytes_decode_obj, 1, 3, bytes_decode); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_encode_obj, 1, 3, str_encode); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(bytes_decode_obj, 1, 3, bytes_decode); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_encode_obj, 1, 3, str_encode); #endif -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex); -STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rstrip_obj, 1, 2, str_rstrip); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, mp_obj_str_format); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace); -STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count); -STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition); -STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper); -STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex); +MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rstrip_obj, 1, 2, str_rstrip); +MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, mp_obj_str_format); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count); +MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition); +MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition); +MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower); +MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper); +MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace); +MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha); +MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit); +MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper); +MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower); STATIC const mp_map_elem_t str_locals_dict_table[] = { #if MICROPY_CPYTHON_COMPAT { MP_OBJ_NEW_QSTR(MP_QSTR_decode), (mp_obj_t)&bytes_decode_obj }, + #if !MICROPY_PY_BUILTINS_STR_UNICODE + // If we have separate unicode type, then here we have methods only + // for bytes type, and it should not have encode() methods. Otherwise, + // we have non-compliant-but-practical bytestring type, which shares + // method table with bytes, so they both have encode() and decode() + // methods (which should do type checking at runtime). { MP_OBJ_NEW_QSTR(MP_QSTR_encode), (mp_obj_t)&str_encode_obj }, + #endif #endif { MP_OBJ_NEW_QSTR(MP_QSTR_find), (mp_obj_t)&str_find_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_rfind), (mp_obj_t)&str_rfind_obj }, @@ -1688,17 +1694,19 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = { STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table); +#if !MICROPY_PY_BUILTINS_STR_UNICODE const mp_obj_type_t mp_type_str = { { &mp_type_type }, .name = MP_QSTR_str, .print = str_print, .make_new = str_make_new, - .binary_op = str_binary_op, + .binary_op = mp_obj_str_binary_op, .subscr = str_subscr, .getiter = mp_obj_new_str_iterator, - .buffer_p = { .get_buffer = str_get_buffer }, + .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .locals_dict = (mp_obj_t)&str_locals_dict, }; +#endif // Reuses most of methods from str const mp_obj_type_t mp_type_bytes = { @@ -1706,10 +1714,10 @@ const mp_obj_type_t mp_type_bytes = { .name = MP_QSTR_bytes, .print = str_print, .make_new = bytes_make_new, - .binary_op = str_binary_op, + .binary_op = mp_obj_str_binary_op, .subscr = str_subscr, .getiter = mp_obj_new_bytes_iterator, - .buffer_p = { .get_buffer = str_get_buffer }, + .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, .locals_dict = (mp_obj_t)&str_locals_dict, }; @@ -1866,6 +1874,7 @@ typedef struct _mp_obj_str_it_t { machine_uint_t cur; } mp_obj_str_it_t; +#if !MICROPY_PY_BUILTINS_STR_UNICODE STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) { mp_obj_str_it_t *self = self_in; GET_STR_DATA_LEN(self->str, str, len); @@ -1885,6 +1894,15 @@ STATIC const mp_obj_type_t mp_type_str_it = { .iternext = str_it_iternext, }; +mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) { + mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); + o->base.type = &mp_type_str_it; + o->str = str; + o->cur = 0; + return o; +} +#endif + STATIC mp_obj_t bytes_it_iternext(mp_obj_t self_in) { mp_obj_str_it_t *self = self_in; GET_STR_DATA_LEN(self->str, str, len); @@ -1904,14 +1922,6 @@ STATIC const mp_obj_type_t mp_type_bytes_it = { .iternext = bytes_it_iternext, }; -mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) { - mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); - o->base.type = &mp_type_str_it; - o->str = str; - o->cur = 0; - return o; -} - mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) { mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); o->base.type = &mp_type_bytes_it; diff --git a/py/objstr.h b/py/objstr.h index 5be137d36d..515890c6e1 100644 --- a/py/objstr.h +++ b/py/objstr.h @@ -35,5 +35,53 @@ typedef struct _mp_obj_str_t { #define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte*)str}; +// use this macro to extract the string hash +#define GET_STR_HASH(str_obj_in, str_hash) \ + uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) \ + { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; } + +// use this macro to extract the string length +#define GET_STR_LEN(str_obj_in, str_len) \ + uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) \ + { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; } + +// use this macro to extract the string data and length +#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) \ + const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) \ + { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } \ + else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } + mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args); mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len); + +mp_obj_t mp_obj_str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in); +machine_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, int flags); + +const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, uint self_len, + mp_obj_t index, bool is_slice); + +MP_DECLARE_CONST_FUN_OBJ(str_encode_obj); +MP_DECLARE_CONST_FUN_OBJ(str_find_obj); +MP_DECLARE_CONST_FUN_OBJ(str_rfind_obj); +MP_DECLARE_CONST_FUN_OBJ(str_index_obj); +MP_DECLARE_CONST_FUN_OBJ(str_rindex_obj); +MP_DECLARE_CONST_FUN_OBJ(str_join_obj); +MP_DECLARE_CONST_FUN_OBJ(str_split_obj); +MP_DECLARE_CONST_FUN_OBJ(str_rsplit_obj); +MP_DECLARE_CONST_FUN_OBJ(str_startswith_obj); +MP_DECLARE_CONST_FUN_OBJ(str_endswith_obj); +MP_DECLARE_CONST_FUN_OBJ(str_strip_obj); +MP_DECLARE_CONST_FUN_OBJ(str_lstrip_obj); +MP_DECLARE_CONST_FUN_OBJ(str_rstrip_obj); +MP_DECLARE_CONST_FUN_OBJ(str_format_obj); +MP_DECLARE_CONST_FUN_OBJ(str_replace_obj); +MP_DECLARE_CONST_FUN_OBJ(str_count_obj); +MP_DECLARE_CONST_FUN_OBJ(str_partition_obj); +MP_DECLARE_CONST_FUN_OBJ(str_rpartition_obj); +MP_DECLARE_CONST_FUN_OBJ(str_lower_obj); +MP_DECLARE_CONST_FUN_OBJ(str_upper_obj); +MP_DECLARE_CONST_FUN_OBJ(str_isspace_obj); +MP_DECLARE_CONST_FUN_OBJ(str_isalpha_obj); +MP_DECLARE_CONST_FUN_OBJ(str_isdigit_obj); +MP_DECLARE_CONST_FUN_OBJ(str_isupper_obj); +MP_DECLARE_CONST_FUN_OBJ(str_islower_obj); diff --git a/py/objstrunicode.c b/py/objstrunicode.c new file mode 100644 index 0000000000..d96ce0a552 --- /dev/null +++ b/py/objstrunicode.c @@ -0,0 +1,359 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2013, 2014 Damien P. George + * Copyright (c) 2014 Paul Sokolovsky + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdbool.h> +#include <string.h> +#include <assert.h> + +#include "mpconfig.h" +#include "nlr.h" +#include "misc.h" +#include "qstr.h" +#include "obj.h" +#include "runtime0.h" +#include "runtime.h" +#include "pfenv.h" +#include "objstr.h" +#include "objlist.h" + +#if MICROPY_PY_BUILTINS_STR_UNICODE + +STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); + +/******************************************************************************/ +/* str */ + +STATIC void uni_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) { + // this escapes characters, but it will be very slow to print (calling print many times) + bool has_single_quote = false; + bool has_double_quote = false; + for (const byte *s = str_data, *top = str_data + str_len; !has_double_quote && s < top; s++) { + if (*s == '\'') { + has_single_quote = true; + } else if (*s == '"') { + has_double_quote = true; + } + } + int quote_char = '\''; + if (has_single_quote && !has_double_quote) { + quote_char = '"'; + } + print(env, "%c", quote_char); + const byte *s = str_data, *top = str_data + str_len; + while (s < top) { + unichar ch; + ch = utf8_get_char(s); + s = utf8_next_char(s); + if (ch == quote_char) { + print(env, "\\%c", quote_char); + } else if (ch == '\\') { + print(env, "\\\\"); + } else if (32 <= ch && ch <= 126) { + print(env, "%c", ch); + } else if (ch == '\n') { + print(env, "\\n"); + } else if (ch == '\r') { + print(env, "\\r"); + } else if (ch == '\t') { + print(env, "\\t"); + } else if (ch < 0x100) { + print(env, "\\x%02x", ch); + } else if (ch < 0x10000) { + print(env, "\\u%04x", ch); + } else { + print(env, "\\U%08x", ch); + } + } + print(env, "%c", quote_char); +} + +STATIC void uni_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { + GET_STR_DATA_LEN(self_in, str_data, str_len); + if (kind == PRINT_STR) { + print(env, "%.*s", str_len, str_data); + } else { + uni_print_quoted(print, env, str_data, str_len); + } +} + +STATIC mp_obj_t uni_unary_op(int op, mp_obj_t self_in) { + GET_STR_DATA_LEN(self_in, str_data, str_len); + switch (op) { + case MP_UNARY_OP_BOOL: + return MP_BOOL(str_len != 0); + case MP_UNARY_OP_LEN: + return MP_OBJ_NEW_SMALL_INT(unichar_charlen((const char *)str_data, str_len)); + default: + return MP_OBJ_NULL; // op not supported + } +} + +STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { +#if MICROPY_CPYTHON_COMPAT + if (n_kw != 0) { + mp_arg_error_unimpl_kw(); + } +#endif + + switch (n_args) { + case 0: + return MP_OBJ_NEW_QSTR(MP_QSTR_); + + case 1: + { + vstr_t *vstr = vstr_new(); + mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR); + mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false); + vstr_free(vstr); + return s; + } + + case 2: + case 3: + { + // TODO: validate 2nd/3rd args + if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected")); + } + GET_STR_DATA_LEN(args[0], str_data, str_len); + GET_STR_HASH(args[0], str_hash); + mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len); + o->data = str_data; + o->hash = str_hash; + return o; + } + + default: + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments")); + } +} + +// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or +// be capped to the first/last character of the string, depending on is_slice. +const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, uint self_len, + mp_obj_t index, bool is_slice) { + machine_int_t i; + // Copied from mp_get_index; I don't want bounds checking, just give me + // the integer as-is. (I can't bounds-check without scanning the whole + // string; an out-of-bounds index will be caught in the loops below.) + if (MP_OBJ_IS_SMALL_INT(index)) { + i = MP_OBJ_SMALL_INT_VALUE(index); + } else if (!mp_obj_get_int_maybe(index, &i)) { + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "string indices must be integers, not %s", mp_obj_get_type_str(index))); + } + const byte *s, *top = self_data + self_len; + if (i < 0) + { + // Negative indexing is performed by counting from the end of the string. + for (s = top - 1; i; --s) { + if (s < self_data) { + if (is_slice) { + return self_data; + } + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range")); + } + if (!UTF8_IS_CONT(*s)) { + ++i; + } + } + ++s; + } else if (!i) { + return self_data; // Shortcut - str[0] is its base pointer + } else { + // Positive indexing, correspondingly, counts from the start of the string. + // It's assumed that negative indexing will generally be used with small + // absolute values (eg str[-1], not str[-1000000]), which means it'll be + // more efficient this way. + for (s = self_data; true; ++s) { + if (s >= top) { + if (is_slice) { + return top; + } + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range")); + } + while (UTF8_IS_CONT(*s)) { + ++s; + } + if (!i--) { + return s; + } + } + } + return s; +} + +STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { + mp_obj_type_t *type = mp_obj_get_type(self_in); + GET_STR_DATA_LEN(self_in, self_data, self_len); + if (value == MP_OBJ_SENTINEL) { + // load +#if MICROPY_PY_BUILTINS_SLICE + if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) { + mp_obj_t ostart, ostop, ostep; + mp_obj_slice_get(index, &ostart, &ostop, &ostep); + if (ostep != mp_const_none && ostep != MP_OBJ_NEW_SMALL_INT(1)) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_NotImplementedError, + "only slices with step=1 (aka None) are supported")); + } + + if (type == &mp_type_bytes) { + machine_int_t start = 0, stop = self_len; + if (ostart != mp_const_none) { + start = MP_OBJ_SMALL_INT_VALUE(ostart); + if (start < 0) { + start = self_len + start; + } + } + if (ostop != mp_const_none) { + stop = MP_OBJ_SMALL_INT_VALUE(ostop); + if (stop < 0) { + stop = self_len + stop; + } + } + return mp_obj_new_str_of_type(type, self_data + start, stop - start); + } + const byte *pstart, *pstop; + if (ostart != mp_const_none) { + pstart = str_index_to_ptr(type, self_data, self_len, ostart, true); + } else { + pstart = self_data; + } + if (ostop != mp_const_none) { + // pstop will point just after the stop character. This depends on + // the \0 at the end of the string. + pstop = str_index_to_ptr(type, self_data, self_len, ostop, true); + } else { + pstop = self_data + self_len; + } + if (pstop < pstart) { + return MP_OBJ_NEW_QSTR(MP_QSTR_); + } + return mp_obj_new_str_of_type(type, (const byte *)pstart, pstop - pstart); + } +#endif + if (type == &mp_type_bytes) { + uint index_val = mp_get_index(type, self_len, index, false); + return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]); + } + const byte *s = str_index_to_ptr(type, self_data, self_len, index, false); + int len = 1; + if (UTF8_IS_NONASCII(*s)) { + // Count the number of 1 bits (after the first) + for (char mask = 0x40; *s & mask; mask >>= 1) { + ++len; + } + } + return mp_obj_new_str((const char*)s, len, true); // This will create a one-character string + } else { + return MP_OBJ_NULL; // op not supported + } +} + +STATIC const mp_map_elem_t str_locals_dict_table[] = { +#if MICROPY_CPYTHON_COMPAT + { MP_OBJ_NEW_QSTR(MP_QSTR_encode), (mp_obj_t)&str_encode_obj }, +#endif + { MP_OBJ_NEW_QSTR(MP_QSTR_find), (mp_obj_t)&str_find_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rfind), (mp_obj_t)&str_rfind_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_index), (mp_obj_t)&str_index_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_lstrip), (mp_obj_t)&str_lstrip_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rstrip), (mp_obj_t)&str_rstrip_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_format), (mp_obj_t)&str_format_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_replace), (mp_obj_t)&str_replace_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_count), (mp_obj_t)&str_count_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_partition), (mp_obj_t)&str_partition_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isspace), (mp_obj_t)&str_isspace_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isalpha), (mp_obj_t)&str_isalpha_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isdigit), (mp_obj_t)&str_isdigit_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isupper), (mp_obj_t)&str_isupper_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_islower), (mp_obj_t)&str_islower_obj }, +}; + +STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table); + +const mp_obj_type_t mp_type_str = { + { &mp_type_type }, + .name = MP_QSTR_str, + .print = uni_print, + .make_new = str_make_new, + .unary_op = uni_unary_op, + .binary_op = mp_obj_str_binary_op, + .subscr = str_subscr, + .getiter = mp_obj_new_str_iterator, + .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, + .locals_dict = (mp_obj_t)&str_locals_dict, +}; + +/******************************************************************************/ +/* str iterator */ + +typedef struct _mp_obj_str_it_t { + mp_obj_base_t base; + mp_obj_t str; + machine_uint_t cur; +} mp_obj_str_it_t; + +STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) { + mp_obj_str_it_t *self = self_in; + GET_STR_DATA_LEN(self->str, str, len); + if (self->cur < len) { + const byte *cur = str + self->cur; + const byte *end = utf8_next_char(str + self->cur); + mp_obj_t o_out = mp_obj_new_str((const char*)cur, end - cur, true); + self->cur += end - cur; + return o_out; + } else { + return MP_OBJ_STOP_ITERATION; + } +} + +STATIC const mp_obj_type_t mp_type_str_it = { + { &mp_type_type }, + .name = MP_QSTR_iterator, + .getiter = mp_identity, + .iternext = str_it_iternext, +}; + +mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) { + mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); + o->base.type = &mp_type_str_it; + o->str = str; + o->cur = 0; + return o; +} + +#endif // MICROPY_PY_BUILTINS_STR_UNICODE diff --git a/py/parse.c b/py/parse.c index af09c335f2..492c1678b5 100644 --- a/py/parse.c +++ b/py/parse.c @@ -30,8 +30,8 @@ #include <assert.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parsenumbase.h" diff --git a/py/parsehelper.c b/py/parsehelper.c index 3ead5a3031..105afe711e 100644 --- a/py/parsehelper.c +++ b/py/parsehelper.c @@ -29,8 +29,8 @@ #include <stdint.h> #include <stdio.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "lexer.h" #include "parse.h" diff --git a/py/parsenum.c b/py/parsenum.c index 1c1868ae0a..b9801ab6a1 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -27,14 +27,15 @@ #include <stdbool.h> #include <stdlib.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "nlr.h" #include "obj.h" #include "parsenumbase.h" #include "parsenum.h" #include "smallint.h" +#include "runtime.h" #if MICROPY_PY_BUILTINS_FLOAT #include <math.h> @@ -252,10 +253,15 @@ mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool f } // return the object +#if MICROPY_PY_BUILTINS_COMPLEX if (imag) { return mp_obj_new_complex(0, dec_val); } else if (force_complex) { return mp_obj_new_complex(dec_val, 0); +#else + if (imag || force_complex) { + mp_not_implemented("complex values not supported"); +#endif } else { return mp_obj_new_float(dec_val); } diff --git a/py/parsenumbase.c b/py/parsenumbase.c index ce140655bd..4fddac9c3d 100644 --- a/py/parsenumbase.c +++ b/py/parsenumbase.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "parsenumbase.h" // find real radix base, and strip preceding '0x', '0o' and '0b' diff --git a/py/pfenv.c b/py/pfenv.c index e631f8654a..ca1e3e919b 100644 --- a/py/pfenv.c +++ b/py/pfenv.c @@ -27,8 +27,8 @@ #include <stdint.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "mpz.h" @@ -43,6 +43,7 @@ PY_O_BASENAME = \ parsenum.o \ emitglue.o \ runtime.o \ + stackctrl.o \ argcheck.o \ map.o \ obj.o \ @@ -74,6 +75,7 @@ PY_O_BASENAME = \ objset.o \ objslice.o \ objstr.o \ + objstrunicode.o \ objstringio.o \ objtuple.o \ objtype.o \ @@ -27,8 +27,8 @@ #include <assert.h> #include <string.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" // NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 856853fa55..4ff9ca87c8 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -377,6 +377,8 @@ Q(gc) Q(collect) Q(disable) Q(enable) +Q(mem_free) +Q(mem_alloc) #endif #if MICROPY_PY_BUILTINS_PROPERTY @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "repl.h" #if MICROPY_HELPER_REPL diff --git a/py/runtime.c b/py/runtime.c index d57bb686d1..5490bcbac5 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -45,6 +45,7 @@ #include "smallint.h" #include "objgenerator.h" #include "lexer.h" +#include "stackctrl.h" #if 0 // print debugging info #define DEBUG_PRINT (1) @@ -69,6 +70,8 @@ const mp_obj_module_t mp_module___main__ = { }; void mp_init(void) { + mp_stack_ctrl_init(); + // call port specific initialization if any #ifdef MICROPY_PORT_INIT_FUNC MICROPY_PORT_INIT_FUNC; @@ -426,6 +429,7 @@ mp_obj_t mp_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) { } else { return res; } +#if MICROPY_PY_BUILTINS_COMPLEX } else if (MP_OBJ_IS_TYPE(rhs, &mp_type_complex)) { mp_obj_t res = mp_obj_complex_binary_op(op, lhs_val, 0, rhs); if (res == MP_OBJ_NULL) { @@ -434,6 +438,7 @@ mp_obj_t mp_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) { return res; } #endif +#endif } } diff --git a/py/scope.c b/py/scope.c index 839e8216c1..83c2b6e07c 100644 --- a/py/scope.c +++ b/py/scope.c @@ -29,8 +29,8 @@ #include <stdio.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "parse.h" diff --git a/py/smallint.c b/py/smallint.c index 5543f126c3..c57f364e36 100644 --- a/py/smallint.c +++ b/py/smallint.c @@ -24,8 +24,8 @@ * THE SOFTWARE. */ -#include "misc.h" #include "mpconfig.h" +#include "misc.h" #include "qstr.h" #include "obj.h" #include "smallint.h" diff --git a/py/stackctrl.c b/py/stackctrl.c new file mode 100644 index 0000000000..724d54a1be --- /dev/null +++ b/py/stackctrl.c @@ -0,0 +1,63 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2014 Paul Sokolovsky + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "mpconfig.h" +#include "misc.h" +#include "nlr.h" +#include "qstr.h" +#include "obj.h" +#include "runtime.h" +#include "stackctrl.h" + +// Stack top at the start of program +char *stack_top; + +void mp_stack_ctrl_init() { + volatile int stack_dummy; + stack_top = (char*)&stack_dummy; +} + +uint mp_stack_usage() { + // Assumes descending stack + volatile int stack_dummy; + return stack_top - (char*)&stack_dummy; +} + +#if MICROPY_STACK_CHECK + +static uint stack_limit = 10240; + +void mp_stack_set_limit(uint limit) { + stack_limit = limit; +} + +void mp_stack_check() { + if (mp_stack_usage() >= stack_limit) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_RuntimeError, "maximum recursion depth exceeded")); + } +} + +#endif // MICROPY_STACK_CHECK diff --git a/py/stackctrl.h b/py/stackctrl.h new file mode 100644 index 0000000000..92de882bfa --- /dev/null +++ b/py/stackctrl.h @@ -0,0 +1,41 @@ +/* + * This file is part of the Micro Python project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2014 Paul Sokolovsky + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +void mp_stack_ctrl_init(); +uint mp_stack_usage(); + +#if MICROPY_STACK_CHECK + +void mp_stack_set_limit(uint limit); +void mp_stack_check(); +#define MP_STACK_CHECK() mp_stack_check() + +#else + +#define mp_stack_set_limit(limit) +#define MP_STACK_CHECK() + +#endif diff --git a/py/stream.c b/py/stream.c index 07a79248ab..cfdea15cca 100644 --- a/py/stream.c +++ b/py/stream.c @@ -33,9 +33,13 @@ #include "qstr.h" #include "obj.h" #include "objstr.h" +#include "runtime.h" #include "stream.h" #if MICROPY_STREAMS_NON_BLOCK #include <errno.h> +#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR) +#define EWOULDBLOCK 140 +#endif #endif // This file defines generic Python stream read/write methods which @@ -67,6 +71,13 @@ STATIC mp_obj_t stream_read(uint n_args, const mp_obj_t *args) { if (n_args == 1 || ((sz = mp_obj_get_int(args[1])) == -1)) { return stream_readall(args[0]); } + + #if MICROPY_PY_BUILTINS_STR_UNICODE + if (!o->type->stream_p->is_bytes) { + mp_not_implemented("Reading from unicode text streams by character count"); + } + #endif + byte *buf = m_new(byte, sz); int error; machine_int_t out_sz = o->type->stream_p->read(o, buf, sz, &error); diff --git a/py/unicode.c b/py/unicode.c index c8faa57009..d69e81c8e0 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -26,8 +26,8 @@ #include <stdint.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" // attribute flags #define FL_PRINT (0x01) @@ -65,14 +65,65 @@ STATIC const uint8_t attr[] = { AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0 }; +// TODO: Rename to str_get_char unichar utf8_get_char(const byte *s) { +#if MICROPY_PY_BUILTINS_STR_UNICODE + unichar ord = *s++; + if (!UTF8_IS_NONASCII(ord)) return ord; + ord &= 0x7F; + for (unichar mask = 0x40; ord & mask; mask >>= 1) { + ord &= ~mask; + } + while (UTF8_IS_CONT(*s)) { + ord = (ord << 6) | (*s++ & 0x3F); + } + return ord; +#else return *s; +#endif } +// TODO: Rename to str_next_char const byte *utf8_next_char(const byte *s) { +#if MICROPY_PY_BUILTINS_STR_UNICODE + ++s; + while (UTF8_IS_CONT(*s)) { + ++s; + } + return s; +#else return s + 1; +#endif +} + +machine_uint_t utf8_ptr_to_index(const char *s, const char *ptr) { + machine_uint_t i = 0; + while (ptr > s) { + if (!UTF8_IS_CONT(*--ptr)) { + i++; + } + } + + return i; +} + +// TODO: Rename to str_charlen +machine_uint_t unichar_charlen(const char *str, machine_uint_t len) +{ +#if MICROPY_PY_BUILTINS_STR_UNICODE + machine_uint_t charlen = 0; + for (const char *top = str + len; str < top; ++str) { + if (!UTF8_IS_CONT(*str)) { + ++charlen; + } + } + return charlen; +#else + return len; +#endif } +// Be aware: These unichar_is* functions are actually ASCII-only! bool unichar_isspace(unichar c) { return c < 128 && (attr[c] & FL_SPACE) != 0; } diff --git a/py/unicode.h b/py/unicode.h new file mode 100644 index 0000000000..2468b2fecf --- /dev/null +++ b/py/unicode.h @@ -0,0 +1 @@ +machine_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr); @@ -29,8 +29,8 @@ #include <stdarg.h> #include <string.h> #include <assert.h> -#include "misc.h" #include "mpconfig.h" +#include "misc.h" // returned value is always at least 1 greater than argument #define ROUND_ALLOC(a) (((a) & ((~0) - 7)) + 8) @@ -199,12 +199,48 @@ void vstr_add_byte(vstr_t *vstr, byte b) { } void vstr_add_char(vstr_t *vstr, unichar c) { - // TODO UNICODE +#if MICROPY_PY_BUILTINS_STR_UNICODE + // TODO: Can this be simplified and deduplicated? + // Is it worth just calling vstr_add_len(vstr, 4)? + if (c < 0x80) { + byte *buf = (byte*)vstr_add_len(vstr, 1); + if (buf == NULL) { + return; + } + *buf = (byte)c; + } else if (c < 0x800) { + byte *buf = (byte*)vstr_add_len(vstr, 2); + if (buf == NULL) { + return; + } + buf[0] = (c >> 6) | 0xC0; + buf[1] = (c & 0x3F) | 0x80; + } else if (c < 0x10000) { + byte *buf = (byte*)vstr_add_len(vstr, 3); + if (buf == NULL) { + return; + } + buf[0] = (c >> 12) | 0xE0; + buf[1] = ((c >> 6) & 0x3F) | 0x80; + buf[2] = (c & 0x3F) | 0x80; + } else { + assert(c < 0x110000); + byte *buf = (byte*)vstr_add_len(vstr, 4); + if (buf == NULL) { + return; + } + buf[0] = (c >> 18) | 0xF0; + buf[1] = ((c >> 12) & 0x3F) | 0x80; + buf[2] = ((c >> 6) & 0x3F) | 0x80; + buf[3] = (c & 0x3F) | 0x80; + } +#else byte *buf = (byte*)vstr_add_len(vstr, 1); if (buf == NULL) { return; } buf[0] = c; +#endif } void vstr_add_str(vstr_t *vstr, const char *str) { |