summaryrefslogtreecommitdiffstatshomepage
path: root/py/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/lexer.c')
-rw-r--r--py/lexer.c82
1 files changed, 28 insertions, 54 deletions
diff --git a/py/lexer.c b/py/lexer.c
index a91f5c9c8b..05651abecf 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -176,7 +176,6 @@ STATIC void indent_pop(mp_lexer_t *lex) {
// some tricky operator encoding:
// <op> = begin with <op>, if this opchar matches then begin here
// e<op> = end with <op>, if this opchar matches then end
-// E<op> = mandatory end with <op>, this opchar must match, then end
// c<op> = continue with <op>, if this opchar matches then continue matching
// this means if the start of two ops are the same then they are equal til the last char
@@ -193,7 +192,7 @@ STATIC const char *const tok_enc =
"%e=" // % %=
"^e=" // ^ ^=
"=e=" // = ==
- "!E="; // !=
+ "!."; // start of special cases: != . ...
// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries
STATIC const uint8_t tok_enc_kind[] = {
@@ -213,7 +212,6 @@ STATIC const uint8_t tok_enc_kind[] = {
MP_TOKEN_OP_PERCENT, MP_TOKEN_DEL_PERCENT_EQUAL,
MP_TOKEN_OP_CARET, MP_TOKEN_DEL_CARET_EQUAL,
MP_TOKEN_DEL_EQUAL, MP_TOKEN_OP_DBL_EQUAL,
- MP_TOKEN_OP_NOT_EQUAL,
};
// must have the same order as enum in lexer.h
@@ -603,20 +601,6 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
}
}
- } else if (is_char(lex, '.')) {
- // special handling for . and ... operators, because .. is not a valid operator
-
- // get first char
- next_char(lex);
-
- if (is_char_and(lex, '.', '.')) {
- next_char(lex);
- next_char(lex);
- lex->tok_kind = MP_TOKEN_ELLIPSIS;
- } else {
- lex->tok_kind = MP_TOKEN_DEL_PERIOD;
- }
-
} else {
// search for encoded delimiter or operator
@@ -625,9 +609,6 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
for (; *t != 0 && !is_char(lex, *t); t += 1) {
if (*t == 'e' || *t == 'c') {
t += 1;
- } else if (*t == 'E') {
- tok_enc_index -= 1;
- t += 1;
}
tok_enc_index += 1;
}
@@ -638,55 +619,48 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
// didn't match any delimiter or operator characters
lex->tok_kind = MP_TOKEN_INVALID;
+ } else if (*t == '!') {
+ // "!=" is a special case because "!" is not a valid operator
+ if (is_char(lex, '=')) {
+ next_char(lex);
+ lex->tok_kind = MP_TOKEN_OP_NOT_EQUAL;
+ } else {
+ lex->tok_kind = MP_TOKEN_INVALID;
+ }
+
+ } else if (*t == '.') {
+ // "." and "..." are special cases because ".." is not a valid operator
+ if (is_char_and(lex, '.', '.')) {
+ next_char(lex);
+ next_char(lex);
+ lex->tok_kind = MP_TOKEN_ELLIPSIS;
+ } else {
+ lex->tok_kind = MP_TOKEN_DEL_PERIOD;
+ }
+
} else {
// matched a delimiter or operator character
// get the maximum characters for a valid token
t += 1;
size_t t_index = tok_enc_index;
- for (;;) {
- for (; *t == 'e'; t += 1) {
- t += 1;
- t_index += 1;
- if (is_char(lex, *t)) {
- next_char(lex);
- tok_enc_index = t_index;
- break;
- }
- }
-
- if (*t == 'E') {
- t += 1;
- if (is_char(lex, *t)) {
- next_char(lex);
- tok_enc_index = t_index;
- } else {
- lex->tok_kind = MP_TOKEN_INVALID;
- goto tok_enc_no_match;
- }
- break;
- }
-
- if (*t == 'c') {
- t += 1;
- t_index += 1;
- if (is_char(lex, *t)) {
- next_char(lex);
- tok_enc_index = t_index;
- t += 1;
- } else {
+ while (*t == 'c' || *t == 'e') {
+ t_index += 1;
+ if (is_char(lex, t[1])) {
+ next_char(lex);
+ tok_enc_index = t_index;
+ if (*t == 'e') {
break;
}
- } else {
+ } else if (*t == 'c') {
break;
}
+ t += 2;
}
// set token kind
lex->tok_kind = tok_enc_kind[tok_enc_index];
- tok_enc_no_match:
-
// compute bracket level for implicit line joining
if (lex->tok_kind == MP_TOKEN_DEL_PAREN_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACKET_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACE_OPEN) {
lex->nested_bracket_level += 1;