From 0c1de1cdeea98e55710d27f7926da717086ac0fc Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Thu, 14 Apr 2016 13:23:50 +0100
Subject: py: Simplify "and" action within parser by making ident-rules
 explicit.

Most grammar rules can optimise to the identity if they only have a single
argument, saving a lot of RAM building the parse tree.  Previous to this
patch, whether a given grammar rule could be optimised was defined (mostly
implicitly) by a complicated set of logic rules.  With this patch the
definition is always specified explicitly by using "and_ident" in the rule
definition in the grammar.  This simplifies the logic of the parser,
making it a bit smaller and faster.  RAM usage in unaffected.
---
 py/parse.c | 81 +++++++++++++++++++++++++-------------------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

(limited to 'py/parse.c')

diff --git a/py/parse.c b/py/parse.c
index ce2dcb62cd..3daa5ff83e 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -56,8 +56,6 @@
 #define RULE_ARG_RULE           (0x2000)
 #define RULE_ARG_OPT_RULE       (0x3000)
 
-#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
-
 // (un)comment to use rule names; for debugging
 //#define USE_RULE_NAME (1)
 
@@ -80,10 +78,10 @@ enum {
     RULE_const_object, // special node for a constant, generic Python object
 };
 
-#define ident                   (RULE_ACT_ALLOW_IDENT)
-#define blank                   (RULE_ACT_ADD_BLANK)
 #define or(n)                   (RULE_ACT_OR | n)
 #define and(n)                  (RULE_ACT_AND | n)
+#define and_ident(n)            (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
+#define and_blank(n)            (RULE_ACT_AND | n | RULE_ACT_ADD_BLANK)
 #define one_or_more             (RULE_ACT_LIST | 2)
 #define list                    (RULE_ACT_LIST | 1)
 #define list_with_end           (RULE_ACT_LIST | 3)
@@ -830,25 +828,6 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
 
                 // matched the rule, so now build the corresponding parse_node
 
-                // count number of arguments for the parse_node
-                i = 0;
-                bool emit_rule = false;
-                for (size_t x = 0; x < n; ++x) {
-                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
-                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
-                        if (tok_kind >= MP_TOKEN_NAME) {
-                            emit_rule = true;
-                        }
-                        if (tok_kind == MP_TOKEN_NAME) {
-                            // only tokens which were names are pushed to stack
-                            i += 1;
-                        }
-                    } else {
-                        // rules are always pushed
-                        i += 1;
-                    }
-                }
-
                 #if !MICROPY_ENABLE_DOC_STRING
                 // this code discards lonely statements, such as doc strings
                 if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
@@ -868,35 +847,29 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                 }
                 #endif
 
-                // always emit these rules, even if they have only 1 argument
-                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
-                    emit_rule = true;
-                }
-
-                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
-                // rule should not be emitted if it has only 1 argument
-                if (rule->act & RULE_ACT_ALLOW_IDENT) {
-                    emit_rule = false;
-                }
-
-                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
-                if (ADD_BLANK_NODE(rule)) {
-                    emit_rule = true;
-                    push_result_node(&parser, MP_PARSE_NODE_NULL);
-                    i += 1;
-                }
-
+                // count number of arguments for the parse node
+                i = 0;
                 size_t num_not_nil = 0;
-                for (size_t x = 0; x < i; ++x) {
-                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
-                        num_not_nil += 1;
+                for (size_t x = n; x > 0;) {
+                    --x;
+                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
+                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
+                        if (tok_kind == MP_TOKEN_NAME) {
+                            // only tokens which were names are pushed to stack
+                            i += 1;
+                            num_not_nil += 1;
+                        }
+                    } else {
+                        // rules are always pushed
+                        if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) {
+                            num_not_nil += 1;
+                        }
+                        i += 1;
                     }
                 }
-                if (emit_rule || num_not_nil != 1) {
-                    // need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
-                    push_result_rule(&parser, rule_src_line, rule, i);
-                } else {
-                    // single result, leave it on stack
+
+                if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) {
+                    // this rule has only 1 argument and should not be emitted
                     mp_parse_node_t pn = MP_PARSE_NODE_NULL;
                     for (size_t x = 0; x < i; ++x) {
                         mp_parse_node_t pn2 = pop_result(&parser);
@@ -905,6 +878,16 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                         }
                     }
                     push_result_node(&parser, pn);
+                } else {
+                    // this rule must be emitted
+
+                    if (rule->act & RULE_ACT_ADD_BLANK) {
+                        // and add an extra blank node at the end (used by the compiler to store data)
+                        push_result_node(&parser, MP_PARSE_NODE_NULL);
+                        i += 1;
+                    }
+
+                    push_result_rule(&parser, rule_src_line, rule, i);
                 }
                 break;
             }
-- 
cgit v1.2.3