diff options
Diffstat (limited to 'py')
-rw-r--r-- | py/builtineval.c | 5 | ||||
-rw-r--r-- | py/builtinimport.c | 7 | ||||
-rw-r--r-- | py/emitbc.c | 26 | ||||
-rw-r--r-- | py/emitcpy.c | 31 | ||||
-rw-r--r-- | py/lexer.c | 8 | ||||
-rw-r--r-- | py/lexer.h | 4 | ||||
-rw-r--r-- | py/lexerstr.c | 2 | ||||
-rw-r--r-- | py/lexerunix.c | 2 | ||||
-rw-r--r-- | py/obj.h | 1 | ||||
-rw-r--r-- | py/objstr.c | 62 | ||||
-rw-r--r-- | py/parse.c | 38 | ||||
-rw-r--r-- | py/parse.h | 1 | ||||
-rw-r--r-- | py/qstrdefs.h | 1 | ||||
-rw-r--r-- | py/vm.c | 9 |
14 files changed, 129 insertions, 68 deletions
diff --git a/py/builtineval.c b/py/builtineval.c index 0e8f9e31d2..49d2bf16a2 100644 --- a/py/builtineval.c +++ b/py/builtineval.c @@ -24,13 +24,13 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) { const byte *str = mp_obj_str_get_data(o_in, &str_len); // create the lexer - mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", (const char*)str, str_len, 0); + mp_lexer_t *lex = mp_lexer_new_from_str_len(MP_QSTR__lt_string_gt_, (const char*)str, str_len, 0); + qstr source_name = mp_lexer_source_name(lex); // parse the string qstr parse_exc_id; const char *parse_exc_msg; mp_parse_node_t pn = mp_parse(lex, MP_PARSE_EVAL_INPUT, &parse_exc_id, &parse_exc_msg); - qstr source_name = mp_lexer_source_name(lex); mp_lexer_free(lex); if (pn == MP_PARSE_NODE_NULL) { @@ -40,6 +40,7 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) { // compile the string mp_obj_t module_fun = mp_compile(pn, source_name, false); + mp_parse_node_free(pn); if (module_fun == mp_const_none) { // TODO handle compile error correctly diff --git a/py/builtinimport.c b/py/builtinimport.c index 3cfd64e887..35e7dcbb88 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -29,9 +29,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { } */ - uint mod_name_l; - const byte *mod_name_s = mp_obj_str_get_data(args[0], &mod_name_l); - qstr mod_name = qstr_from_strn((const char*)mod_name_s, mod_name_l); + qstr mod_name = mp_obj_str_get_qstr(args[0]); mp_obj_t loaded = mp_obj_module_get(mod_name); if (loaded != MP_OBJ_NULL) { @@ -44,6 +42,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { // TODO handle lexer error correctly return mp_const_none; } + qstr source_name = mp_lexer_source_name(lex); // create a new module object mp_obj_t module_obj = mp_obj_new_module(mod_name); @@ -60,7 +59,6 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { qstr parse_exc_id; const char *parse_exc_msg; mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg); - qstr source_name = mp_lexer_source_name(lex); mp_lexer_free(lex); if (pn == MP_PARSE_NODE_NULL) { @@ -72,6 +70,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { // compile the imported script mp_obj_t module_fun = mp_compile(pn, source_name, false); + mp_parse_node_free(pn); if (module_fun == mp_const_none) { // TODO handle compile error correctly diff --git a/py/emitbc.c b/py/emitbc.c index 10a95fbcfa..9fa2880ecb 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -71,10 +71,14 @@ static void emit_write_code_info_qstr(emit_t* emit, qstr qstr) { c[3] = (qstr >> 24) & 0xff; } -static void emit_write_code_info_byte_byte(emit_t* emit, byte b1, uint b2) { - byte* c = emit_get_cur_to_write_code_info(emit, 2); - c[0] = b1; - c[1] = b2; +static void emit_write_code_info_bytes_lines(emit_t* emit, uint bytes_to_skip, uint lines_to_skip) { + for (; bytes_to_skip > 31; bytes_to_skip -= 31) { + *emit_get_cur_to_write_code_info(emit, 1) = 31; + } + for (; lines_to_skip > 7; lines_to_skip -= 7) { + *emit_get_cur_to_write_code_info(emit, 1) = 7 << 5; + } + *emit_get_cur_to_write_code_info(emit, 1) = bytes_to_skip | (lines_to_skip << 5); } // all functions must go through this one to emit byte code @@ -218,7 +222,7 @@ static void emit_bc_end_pass(emit_t *emit) { printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); } - emit_write_code_info_byte_byte(emit, 0, 0); // end of line number info + emit_write_code_info_bytes_lines(emit, 0, 0); // end of line number info if (emit->pass == PASS_2) { // calculate size of code in bytes @@ -246,15 +250,9 @@ static void emit_bc_set_stack_size(emit_t *emit, int size) { static void emit_bc_set_source_line(emit_t *emit, int source_line) { //printf("source: line %d -> %d offset %d -> %d\n", emit->last_source_line, source_line, emit->last_source_line_offset, emit->byte_code_offset); if (source_line > emit->last_source_line) { - int bytes_to_skip = emit->byte_code_offset - emit->last_source_line_offset; - for (; bytes_to_skip > 255; bytes_to_skip -= 255) { - emit_write_code_info_byte_byte(emit, 255, 0); - } - int lines_to_skip = source_line - emit->last_source_line; - for (; lines_to_skip > 255; lines_to_skip -= 255) { - emit_write_code_info_byte_byte(emit, 0, 255); - } - emit_write_code_info_byte_byte(emit, bytes_to_skip, lines_to_skip); + uint bytes_to_skip = emit->byte_code_offset - emit->last_source_line_offset; + uint lines_to_skip = source_line - emit->last_source_line; + emit_write_code_info_bytes_lines(emit, bytes_to_skip, lines_to_skip); //printf(" %d %d\n", bytes_to_skip, lines_to_skip); emit->last_source_line_offset = emit->byte_code_offset; emit->last_source_line = source_line; diff --git a/py/emitcpy.c b/py/emitcpy.c index de2a5784db..71861c918d 100644 --- a/py/emitcpy.c +++ b/py/emitcpy.c @@ -192,29 +192,26 @@ static void print_quoted_str(qstr qstr, bool bytes) { if (bytes) { printf("b"); } - bool quote_single = false; + int quote_char = '\''; if (has_single_quote && !has_double_quote) { - printf("\""); - } else { - quote_single = true; - printf("'"); + quote_char = '"'; } - for (int i = 0; i < len; i++) { - if (str[i] == '\n') { - printf("\\n"); - } else if (str[i] == '\\') { + printf("%c", quote_char); + for (const char *s = str, *top = str + len; s < top; s++) { + if (*s == quote_char) { + printf("\\%c", quote_char); + } else if (*s == '\\') { printf("\\\\"); - } else if (str[i] == '\'' && quote_single) { - printf("\\'"); + } else if (32 <= *s && *s <= 126) { + printf("%c", *s); + } else if (*s == '\n') { + printf("\\n"); + // TODO add more escape codes here } else { - printf("%c", str[i]); + printf("\\x%02x", (*s) & 0xff); } } - if (has_single_quote && !has_double_quote) { - printf("\""); - } else { - printf("'"); - } + printf("%c", quote_char); } static void emit_cpy_load_const_str(emit_t *emit, qstr qstr, bool bytes) { diff --git a/py/lexer.c b/py/lexer.c index 9911da33d9..f71e355476 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -493,8 +493,8 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } c = num; } else { - // TODO error message - assert(0); + // unrecognised escape character; CPython lets this through verbatim as '\' and then the character + vstr_add_char(&lex->vstr, '\\'); } break; } @@ -644,10 +644,10 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } } -mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) { +mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) { mp_lexer_t *lex = m_new(mp_lexer_t, 1); - lex->source_name = qstr_from_str(src_name); + lex->source_name = src_name; lex->stream_data = stream_data; lex->stream_next_char = stream_next_char; lex->stream_close = stream_close; diff --git a/py/lexer.h b/py/lexer.h index 69e97329b6..13fbfb5d33 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -124,8 +124,8 @@ typedef struct _mp_lexer_t mp_lexer_t; void mp_token_show(const mp_token_t *tok); -mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close); -mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len); +mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close); +mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len); void mp_lexer_free(mp_lexer_t *lex); qstr mp_lexer_source_name(mp_lexer_t *lex); diff --git a/py/lexerstr.c b/py/lexerstr.c index 1e105d8645..d53a47d0c9 100644 --- a/py/lexerstr.c +++ b/py/lexerstr.c @@ -28,7 +28,7 @@ static void str_buf_free(mp_lexer_str_buf_t *sb) { m_del_obj(mp_lexer_str_buf_t, sb); } -mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len) { +mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len) { mp_lexer_str_buf_t *sb = m_new_obj(mp_lexer_str_buf_t); sb->free_len = free_len; sb->src_beg = str; diff --git a/py/lexerunix.c b/py/lexerunix.c index 7846120a4a..5d96c468f8 100644 --- a/py/lexerunix.c +++ b/py/lexerunix.c @@ -28,7 +28,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) { return NULL; } - return mp_lexer_new_from_str_len(filename, data, size, size); + return mp_lexer_new_from_str_len(qstr_from_str(filename), data, size, size); } /******************************************************************************/ @@ -287,6 +287,7 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in); bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2); uint mp_obj_str_get_hash(mp_obj_t self_in); uint mp_obj_str_get_len(mp_obj_t self_in); +qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len); diff --git a/py/objstr.c b/py/objstr.c index 3a4d69cfcc..84ac74bab9 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -40,11 +40,39 @@ void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj if (kind == PRINT_STR && !is_bytes) { print(env, "%.*s", str_len, str_data); } else { + // this escapes characters, but it will be very slow to print (calling print many times) + bool has_single_quote = false; + bool has_double_quote = false; + for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) { + if (*s == '\'') { + has_single_quote = true; + } else if (*s == '"') { + has_double_quote = true; + } + } if (is_bytes) { print(env, "b"); } - // TODO need to escape chars etc - print(env, "'%.*s'", str_len, str_data); + int quote_char = '\''; + if (has_single_quote && !has_double_quote) { + quote_char = '"'; + } + print(env, "%c", quote_char); + for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) { + if (*s == quote_char) { + print(env, "\\%c", quote_char); + } else if (*s == '\\') { + print(env, "\\\\"); + } else if (32 <= *s && *s <= 126) { + print(env, "%c", *s); + } else if (*s == '\n') { + print(env, "\\n"); + // TODO add more escape codes here if we want to match CPython + } else { + print(env, "\\x%02x", *s); + } + } + print(env, "%c", quote_char); } } @@ -474,13 +502,17 @@ bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { } } +void bad_implicit_conversion(mp_obj_t self_in) __attribute__((noreturn)); +void bad_implicit_conversion(mp_obj_t self_in) { + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(self_in))); +} + uint mp_obj_str_get_hash(mp_obj_t self_in) { if (MP_OBJ_IS_STR(self_in)) { GET_STR_HASH(self_in, h); return h; } else { - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", - mp_obj_get_type_str(self_in))); + bad_implicit_conversion(self_in); } } @@ -489,8 +521,20 @@ uint mp_obj_str_get_len(mp_obj_t self_in) { GET_STR_LEN(self_in, l); return l; } else { - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", - mp_obj_get_type_str(self_in))); + bad_implicit_conversion(self_in); + } +} + +// use this if you will anyway convert the string to a qstr +// will be more efficient for the case where it's already a qstr +qstr mp_obj_str_get_qstr(mp_obj_t self_in) { + if (MP_OBJ_IS_QSTR(self_in)) { + return MP_OBJ_QSTR_VALUE(self_in); + } else if (MP_OBJ_IS_TYPE(self_in, &str_type)) { + mp_obj_str_t *self = self_in; + return qstr_from_strn((char*)self->data, self->len); + } else { + bad_implicit_conversion(self_in); } } @@ -502,8 +546,7 @@ const char *mp_obj_str_get_str(mp_obj_t self_in) { (void)l; // len unused return (const char*)s; } else { - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", - mp_obj_get_type_str(self_in))); + bad_implicit_conversion(self_in); } } @@ -513,8 +556,7 @@ const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len) { *len = l; return s; } else { - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", - mp_obj_get_type_str(self_in))); + bad_implicit_conversion(self_in); } } diff --git a/py/parse.c b/py/parse.c index 3cf909d752..d9969d6785 100644 --- a/py/parse.c +++ b/py/parse.c @@ -26,6 +26,8 @@ #define RULE_ARG_OPT_TOK (0x3000) #define RULE_ARG_OPT_RULE (0x4000) +#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond) + // (un)comment to use rule names; for debugging //#define USE_RULE_NAME (1) @@ -135,6 +137,28 @@ mp_parse_node_struct_t *parse_node_new_struct(int src_line, int rule_id, int num return pn; } +uint mp_parse_node_free(mp_parse_node_t pn) { + uint cnt = 0; + if (MP_PARSE_NODE_IS_STRUCT(pn)) { + mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; + uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); + uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns); + bool adjust = ADD_BLANK_NODE(rule_id); + if (adjust) { + n--; + } + for (uint i = 0; i < n; i++) { + cnt += mp_parse_node_free(pns->nodes[i]); + } + if (adjust) { + n++; + } + m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns); + cnt++; + } + return cnt; +} + #if MICROPY_DEBUG_PRINTERS void mp_parse_node_print(mp_parse_node_t pn, int indent) { if (MP_PARSE_NODE_IS_STRUCT(pn)) { @@ -160,15 +184,15 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) { default: assert(0); } } else { - mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t*)pn; - int n = pns2->kind_num_nodes >> 8; + mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; + uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); #ifdef USE_RULE_NAME - printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns2), n); + printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n); #else - printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns2), n); + printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n); #endif - for (int i = 0; i < n; i++) { - mp_parse_node_print(pns2->nodes[i], indent + 2); + for (uint i = 0; i < n; i++) { + mp_parse_node_print(pns->nodes[i], indent + 2); } } } @@ -458,7 +482,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr } // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data) - if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) { + if (ADD_BLANK_NODE(rule->rule_id)) { emit_rule = true; push_result_node(parser, MP_PARSE_NODE_NULL); i += 1; diff --git a/py/parse.h b/py/parse.h index 2801f414ee..9797873d1b 100644 --- a/py/parse.h +++ b/py/parse.h @@ -53,6 +53,7 @@ typedef struct _mp_parse_node_struct_t { #define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8) mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg); +uint mp_parse_node_free(mp_parse_node_t pn); void mp_parse_node_print(mp_parse_node_t pn, int indent); diff --git a/py/qstrdefs.h b/py/qstrdefs.h index e76efaf0e0..81706841cd 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -91,4 +91,5 @@ Q(<listcomp>) Q(<dictcomp>) Q(<setcomp>) Q(<genexpr>) +Q(<string>) Q(<stdin>) @@ -550,12 +550,9 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob machine_uint_t source_line = 1; machine_uint_t bc = save_ip - code_info - code_info_size; //printf("find %lu %d %d\n", bc, code_info[12], code_info[13]); - for (const byte* ci = code_info + 12; bc >= ci[0]; ci += 2) { - bc -= ci[0]; - source_line += ci[1]; - if (ci[0] == 0 && ci[1] == 0) { - break; - } + for (const byte* ci = code_info + 12; *ci && bc >= ((*ci) & 31); ci++) { + bc -= *ci & 31; + source_line += *ci >> 5; } mp_obj_exception_add_traceback(nlr.ret_val, source_file, source_line, block_name); } |