diff options
author | Damien George <damien.p.george@gmail.com> | 2015-10-08 14:58:15 +0100 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2015-10-12 12:58:45 +0100 |
commit | 64f2b213bb5be33f06ee7ab755d9344cd7e5958c (patch) | |
tree | 8be9542ce68d3a91503e75669ba547740b3b65e1 /py/parse.c | |
parent | 91fc075a33a6f0bf8e1a07b797f6fd01207a5e17 (diff) | |
download | micropython-64f2b213bb5be33f06ee7ab755d9344cd7e5958c.tar.gz micropython-64f2b213bb5be33f06ee7ab755d9344cd7e5958c.zip |
py: Move constant folding from compiler to parser.
It makes much more sense to do constant folding in the parser while the
parse tree is being built. This eliminates the need to create parse
nodes that will just be folded away. The code is slightly simpler and a
bit smaller as well.
Constant folding now has a configuration option,
MICROPY_COMP_CONST_FOLDING, which is enabled by default.
Diffstat (limited to 'py/parse.c')
-rw-r--r-- | py/parse.c | 304 |
1 files changed, 281 insertions, 23 deletions
diff --git a/py/parse.c b/py/parse.c index 64acaec441..759c1c3643 100644 --- a/py/parse.c +++ b/py/parse.c @@ -35,6 +35,8 @@ #include "py/parse.h" #include "py/parsenum.h" #include "py/smallint.h" +#include "py/runtime.h" +#include "py/builtin.h" #define RULE_ACT_ARG_MASK (0x0f) #define RULE_ACT_KIND_MASK (0x30) @@ -121,8 +123,14 @@ typedef struct _mp_parse_chunk_t { byte data[]; } mp_parse_chunk_t; +typedef enum { + PARSE_ERROR_NONE = 0, + PARSE_ERROR_MEMORY, + PARSE_ERROR_CONST, +} parse_error_t; + typedef struct _parser_t { - bool had_memory_error; + parse_error_t parse_error; mp_uint_t rule_stack_alloc; mp_uint_t rule_stack_top; @@ -136,11 +144,11 @@ typedef struct _parser_t { mp_parse_tree_t tree; mp_parse_chunk_t *cur_chunk; -} parser_t; -STATIC inline void memory_error(parser_t *parser) { - parser->had_memory_error = true; -} + #if MICROPY_COMP_CONST + mp_map_t consts; + #endif +} parser_t; STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) { // use a custom memory allocator to store parse nodes sequentially in large chunks @@ -184,13 +192,13 @@ STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) { } STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) { - if (parser->had_memory_error) { + if (parser->parse_error) { return; } if (parser->rule_stack_top >= parser->rule_stack_alloc) { rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true); if (rs == NULL) { - memory_error(parser); + parser->parse_error = PARSE_ERROR_MEMORY; return; } parser->rule_stack = rs; @@ -210,7 +218,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) { } STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) { - assert(!parser->had_memory_error); + assert(!parser->parse_error); parser->rule_stack_top -= 1; *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id]; *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i; @@ -301,7 +309,7 @@ STATIC void result_stack_show(parser_t *parser) { */ STATIC mp_parse_node_t pop_result(parser_t *parser) { - if (parser->had_memory_error) { + if (parser->parse_error) { return MP_PARSE_NODE_NULL; } assert(parser->result_stack_top > 0); @@ -309,7 +317,7 @@ STATIC mp_parse_node_t pop_result(parser_t *parser) { } STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) { - if (parser->had_memory_error) { + if (parser->parse_error) { return MP_PARSE_NODE_NULL; } assert(parser->result_stack_top > pos); @@ -317,13 +325,13 @@ STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) { } STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { - if (parser->had_memory_error) { + if (parser->parse_error) { return; } if (parser->result_stack_top >= parser->result_stack_alloc) { mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true); if (stack == NULL) { - memory_error(parser); + parser->parse_error = PARSE_ERROR_MEMORY; return; } parser->result_stack = stack; @@ -335,7 +343,7 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) { mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2); if (pn == NULL) { - memory_error(parser); + parser->parse_error = PARSE_ERROR_MEMORY; return MP_PARSE_NODE_NULL; } pn->source_line = src_line; @@ -350,7 +358,7 @@ STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_li STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) { mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t)); if (pn == NULL) { - memory_error(parser); + parser->parse_error = PARSE_ERROR_MEMORY; return MP_PARSE_NODE_NULL; } pn->source_line = src_line; @@ -363,7 +371,17 @@ STATIC void push_result_token(parser_t *parser) { mp_parse_node_t pn; mp_lexer_t *lex = parser->lexer; if (lex->tok_kind == MP_TOKEN_NAME) { - pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len)); + qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len); + #if MICROPY_COMP_CONST + // lookup identifier in table of dynamic constants + mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP); + if (elem != NULL) { + pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(elem->value)); + } else + #endif + { + pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id); + } } else if (lex->tok_kind == MP_TOKEN_INTEGER) { mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex); if (MP_OBJ_IS_SMALL_INT(o)) { @@ -398,10 +416,234 @@ STATIC void push_result_token(parser_t *parser) { push_result_node(parser, pn); } +#if MICROPY_COMP_MODULE_CONST +STATIC const mp_map_elem_t mp_constants_table[] = { + #if MICROPY_PY_UCTYPES + { MP_OBJ_NEW_QSTR(MP_QSTR_uctypes), (mp_obj_t)&mp_module_uctypes }, + #endif + // Extra constants as defined by a port + MICROPY_PORT_CONSTANTS +}; +STATIC MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table); +#endif + +#if MICROPY_COMP_CONST_FOLDING +STATIC bool fold_constants(parser_t *parser, const rule_t *rule, mp_uint_t num_args) { + // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4 + // it does not do partial folding, eg 1 + 2 + x -> 3 + x + + mp_int_t arg0; + if (rule->rule_id == RULE_expr + || rule->rule_id == RULE_xor_expr + || rule->rule_id == RULE_and_expr) { + // folding for binary ops: | ^ & + mp_parse_node_t pn = peek_result(parser, num_args - 1); + if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { + return false; + } + arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + for (mp_int_t i = num_args - 2; i >= 0; --i) { + pn = peek_result(parser, i); + if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { + return false; + } + mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + if (rule->rule_id == RULE_expr) { + // int | int + arg0 |= arg1; + } else if (rule->rule_id == RULE_xor_expr) { + // int ^ int + arg0 ^= arg1; + } else if (rule->rule_id == RULE_and_expr) { + // int & int + arg0 &= arg1; + } + } + } else if (rule->rule_id == RULE_shift_expr + || rule->rule_id == RULE_arith_expr + || rule->rule_id == RULE_term) { + // folding for binary ops: << >> + - * / % // + mp_parse_node_t pn = peek_result(parser, num_args - 1); + if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { + return false; + } + arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + for (mp_int_t i = num_args - 2; i >= 1; i -= 2) { + pn = peek_result(parser, i - 1); + if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { + return false; + } + mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i)); + if (tok == MP_TOKEN_OP_DBL_LESS) { + // int << int + if (arg1 >= (mp_int_t)BITS_PER_WORD + || arg0 > (MP_SMALL_INT_MAX >> arg1) + || arg0 < (MP_SMALL_INT_MIN >> arg1)) { + return false; + } + arg0 <<= arg1; + } else if (tok == MP_TOKEN_OP_DBL_MORE) { + // int >> int + if (arg1 >= (mp_int_t)BITS_PER_WORD) { + // Shifting to big amounts is underfined behavior + // in C and is CPU-dependent; propagate sign bit. + arg1 = BITS_PER_WORD - 1; + } + arg0 >>= arg1; + } else if (tok == MP_TOKEN_OP_PLUS) { + // int + int + arg0 += arg1; + } else if (tok == MP_TOKEN_OP_MINUS) { + // int - int + arg0 -= arg1; + } else if (tok == MP_TOKEN_OP_STAR) { + // int * int + if (mp_small_int_mul_overflow(arg0, arg1)) { + return false; + } + arg0 *= arg1; + } else if (tok == MP_TOKEN_OP_SLASH) { + // int / int + return false; + } else if (tok == MP_TOKEN_OP_PERCENT) { + // int % int + if (arg1 == 0) { + return false; + } + arg0 = mp_small_int_modulo(arg0, arg1); + } else { + assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be + // int // int + if (arg1 == 0) { + return false; + } + arg0 = mp_small_int_floor_divide(arg0, arg1); + } + if (!MP_SMALL_INT_FITS(arg0)) { + return false; + } + } + } else if (rule->rule_id == RULE_factor_2) { + // folding for unary ops: + - ~ + mp_parse_node_t pn = peek_result(parser, 0); + if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { + return false; + } + arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1)); + if (tok == MP_TOKEN_OP_PLUS) { + // +int + } else if (tok == MP_TOKEN_OP_MINUS) { + // -int + arg0 = -arg0; + if (!MP_SMALL_INT_FITS(arg0)) { + return false; + } + } else { + assert(tok == MP_TOKEN_OP_TILDE); // should be + // ~int + arg0 = ~arg0; + } + + #if MICROPY_COMP_CONST + } else if (rule->rule_id == RULE_expr_stmt) { + mp_parse_node_t pn1 = peek_result(parser, 0); + if (!MP_PARSE_NODE_IS_NULL(pn1) + && !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign) + || MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) { + // this node is of the form <x> = <y> + mp_parse_node_t pn0 = peek_result(parser, 1); + if (MP_PARSE_NODE_IS_ID(pn0) + && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_power) + && MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0]) + && MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const + && MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren) + && MP_PARSE_NODE_IS_NULL(((mp_parse_node_struct_t*)pn1)->nodes[2]) + ) { + // code to assign dynamic constants: id = const(value) + + // get the id + qstr id = MP_PARSE_NODE_LEAF_ARG(pn0); + + // get the value + mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0]; + if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) { + parser->parse_error = PARSE_ERROR_CONST; + return false; + } + mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value); + + // store the value in the table of dynamic constants + mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND); + assert(elem->value == MP_OBJ_NULL); + elem->value = MP_OBJ_NEW_SMALL_INT(value); + + // replace const(value) with value + pop_result(parser); + push_result_node(parser, pn_value); + + // finished folding this assignment, but we still want it to be part of the tree + return false; + } + } + return false; + #endif + + #if MICROPY_COMP_MODULE_CONST + } else if (rule->rule_id == RULE_power) { + mp_parse_node_t pn0 = peek_result(parser, 2); + mp_parse_node_t pn1 = peek_result(parser, 1); + mp_parse_node_t pn2 = peek_result(parser, 0); + if (!(MP_PARSE_NODE_IS_ID(pn0) + && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period) + && MP_PARSE_NODE_IS_NULL(pn2))) { + return false; + } + // id1.id2 + // look it up in constant table, see if it can be replaced with an integer + mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1; + assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0])); + qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0); + qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]); + mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP); + if (elem == NULL) { + return false; + } + mp_obj_t dest[2]; + mp_load_method_maybe(elem->value, q_attr, dest); + if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == NULL)) { + return false; + } + arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]); + #endif + + } else { + return false; + } + + // success folding this rule + + for (mp_uint_t i = num_args; i > 0; i--) { + pop_result(parser); + } + push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0)); + + return true; +} +#endif + STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) { + #if MICROPY_COMP_CONST_FOLDING + if (fold_constants(parser, rule, num_args)) { + // we folded this rule so return straight away + return; + } + #endif + mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args); if (pn == NULL) { - memory_error(parser); + parser->parse_error = PARSE_ERROR_MEMORY; return; } pn->source_line = src_line; @@ -418,7 +660,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { parser_t parser; - parser.had_memory_error = false; + parser.parse_error = PARSE_ERROR_NONE; parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT; parser.rule_stack_top = 0; @@ -433,6 +675,10 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { parser.tree.chunk = NULL; parser.cur_chunk = NULL; + #if MICROPY_COMP_CONST + mp_map_init(&parser.consts, 0); + #endif + // check if we could allocate the stacks if (parser.rule_stack == NULL || parser.result_stack == NULL) { goto memory_error; @@ -456,7 +702,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { for (;;) { next_rule: - if (parser.rule_stack_top == 0 || parser.had_memory_error) { + if (parser.rule_stack_top == 0 || parser.parse_error) { break; } @@ -737,6 +983,10 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { } } + #if MICROPY_COMP_CONST + mp_map_deinit(&parser.consts); + #endif + // truncate final chunk and link into chain of chunks if (parser.cur_chunk != NULL) { (void)m_renew(byte, parser.cur_chunk, @@ -749,11 +999,19 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { mp_obj_t exc; - // check if we had a memory error - if (parser.had_memory_error) { -memory_error: - exc = mp_obj_new_exception_msg(&mp_type_MemoryError, - "parser could not allocate enough memory"); + if (parser.parse_error) { + #if MICROPY_COMP_CONST + if (parser.parse_error == PARSE_ERROR_CONST) { + exc = mp_obj_new_exception_msg(&mp_type_SyntaxError, + "constant must be an integer"); + } else + #endif + { + assert(parser.parse_error == PARSE_ERROR_MEMORY); + memory_error: + exc = mp_obj_new_exception_msg(&mp_type_MemoryError, + "parser could not allocate enough memory"); + } parser.tree.root = MP_PARSE_NODE_NULL; goto finished; } |