py: Optimise lexer by exposing lexer type.

mp_lexer_t type is exposed, mp_token_t type is removed, and simple lexer functions (like checking current token kind) are now inlined. This saves 784 bytes ROM on 32-bit unix, 348 bytes on stmhal, and 460 bytes on bare-arm. It also saves a tiny bit of RAM since mp_lexer_t is a bit smaller. Also will run a bit more efficiently.
author: Damien George <damien.p.george@gmail.com> 2014-12-05 19:35:18 +0000
committer: Damien George <damien.p.george@gmail.com> 2014-12-05 19:35:18 +0000
commit: a4c52c5a3d19b5527023fedfaae96cb717d03802 (patch)
tree: 2974180c7270bd13df2e5f080cf951a22c559baa /py/parse.c
parent: 41c07d5b8063d752d2b3e41056bdee3615b54635 (diff)
download: micropython-a4c52c5a3d19b5527023fedfaae96cb717d03802.tar.gz
micropython-a4c52c5a3d19b5527023fedfaae96cb717d03802.zip
1 files changed, 29 insertions, 29 deletions
diff --git a/py/parse.c b/py/parse.c
index 092f6e65f3..6912920e3e 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -155,7 +155,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
     assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
     mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
     assert(rule_id < RULE_maximum_number_of);
-    push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0);
+    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
 }
 
 STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
@@ -298,17 +298,17 @@ STATIC void push_result_string(parser_t *parser, mp_uint_t src_line, const char
     push_result_node(parser, (mp_parse_node_t)pn);
 }
 
-STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
-    const mp_token_t *tok = mp_lexer_cur(lex);
+STATIC void push_result_token(parser_t *parser) {
     mp_parse_node_t pn;
-    if (tok->kind == MP_TOKEN_NAME) {
-        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
-    } else if (tok->kind == MP_TOKEN_NUMBER) {
+    mp_lexer_t *lex = parser->lexer;
+    if (lex->tok_kind == MP_TOKEN_NAME) {
+        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
+    } else if (lex->tok_kind == MP_TOKEN_NUMBER) {
         bool dec = false;
         bool small_int = true;
         mp_int_t int_val = 0;
-        mp_uint_t len = tok->len;
-        const char *str = tok->str;
+        mp_uint_t len = lex->vstr.len;
+        const char *str = lex->vstr.buf;
         mp_uint_t base = 0;
         mp_uint_t i = mp_parse_num_base(str, len, &base);
         bool overflow = false;
@@ -343,29 +343,29 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
         } else {
             pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
         }
-    } else if (tok->kind == MP_TOKEN_STRING) {
+    } else if (lex->tok_kind == MP_TOKEN_STRING) {
         // Don't automatically intern all strings.  doc strings (which are usually large)
         // will be discarded by the compiler, and so we shouldn't intern them.
         qstr qst = MP_QSTR_NULL;
-        if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
+        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
             // intern short strings
-            qst = qstr_from_strn(tok->str, tok->len);
+            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
         } else {
             // check if this string is already interned
-            qst = qstr_find_strn(tok->str, tok->len);
+            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
         }
         if (qst != MP_QSTR_NULL) {
             // qstr exists, make a leaf node
             pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst);
         } else {
             // not interned, make a node holding a pointer to the string data
-            push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
+            push_result_string(parser, lex->tok_line, lex->vstr.buf, lex->vstr.len);
             return;
         }
-    } else if (tok->kind == MP_TOKEN_BYTES) {
-        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
+    } else if (lex->tok_kind == MP_TOKEN_BYTES) {
+        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
     } else {
-        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
+        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
     }
     push_result_node(parser, pn);
 }
@@ -414,7 +414,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
         case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
         default: top_level_rule = RULE_file_input;
     }
-    push_rule(&parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0);
+    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
 
     // parse!
 
@@ -454,8 +454,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
                 for (; i < n - 1; ++i) {
                     switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                         case RULE_ARG_TOK:
-                            if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
-                                push_result_token(&parser, lex);
+                            if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
+                                push_result_token(&parser);
                                 mp_lexer_to_next(lex);
                                 goto next_rule;
                             }
@@ -469,8 +469,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
                     }
                 }
                 if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
-                    if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
-                        push_result_token(&parser, lex);
+                    if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
+                        push_result_token(&parser);
                         mp_lexer_to_next(lex);
                     } else {
                         backtrack = true;
@@ -507,10 +507,10 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
                         case RULE_ARG_TOK:
                             // need to match a token
                             tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
-                            if (mp_lexer_is_kind(lex, tok_kind)) {
+                            if (lex->tok_kind == tok_kind) {
                                 // matched token
                                 if (tok_kind == MP_TOKEN_NAME) {
-                                    push_result_token(&parser, lex);
+                                    push_result_token(&parser);
                                 }
                                 mp_lexer_to_next(lex);
                             } else {
@@ -657,11 +657,11 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
                         mp_uint_t arg = rule->arg[i & 1 & n];
                         switch (arg & RULE_ARG_KIND_MASK) {
                             case RULE_ARG_TOK:
-                                if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
+                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
                                     if (i & 1 & n) {
                                         // separators which are tokens are not pushed to result stack
                                     } else {
-                                        push_result_token(&parser, lex);
+                                        push_result_token(&parser);
                                     }
                                     mp_lexer_to_next(lex);
                                     // got element of list, so continue parsing list
@@ -722,7 +722,7 @@ memory_error:
     }
 
     // check we are at the end of the token stream
-    if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
+    if (lex->tok_kind != MP_TOKEN_END) {
         goto syntax_error;
     }
 
@@ -745,9 +745,9 @@ finished:
     return result;
 
 syntax_error:
-    if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
+    if (lex->tok_kind == MP_TOKEN_INDENT) {
         *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
-    } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
+    } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
         *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
     } else {
         *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
@@ -755,7 +755,7 @@ syntax_error:
         // debugging: print the rule name that failed and the token
         printf("rule: %s\n", rule->rule_name);
 #if MICROPY_DEBUG_PRINTERS
-        mp_token_show(mp_lexer_cur(lex));
+        mp_token_show(lex);
 #endif
 #endif
     }
author	Damien George <damien.p.george@gmail.com>	2014-12-05 19:35:18 +0000
committer	Damien George <damien.p.george@gmail.com>	2014-12-05 19:35:18 +0000
commit	a4c52c5a3d19b5527023fedfaae96cb717d03802 (patch)
tree	2974180c7270bd13df2e5f080cf951a22c559baa /py/parse.c
parent	41c07d5b8063d752d2b3e41056bdee3615b54635 (diff)
download	micropython-a4c52c5a3d19b5527023fedfaae96cb717d03802.tar.gz micropython-a4c52c5a3d19b5527023fedfaae96cb717d03802.zip