summaryrefslogtreecommitdiffstatshomepage
path: root/py/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/lexer.c')
-rw-r--r--py/lexer.c184
1 files changed, 92 insertions, 92 deletions
diff --git a/py/lexer.c b/py/lexer.c
index cd2e05ece0..cf9eae5531 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -13,11 +13,11 @@
// TODO seems that CPython allows NULL byte in the input stream
// don't know if that's intentional or not, but we don't allow it
-struct _py_lexer_t {
+struct _mp_lexer_t {
const char *name; // name of source
void *stream_data; // data for stream
- py_lexer_stream_next_char_t stream_next_char; // stream callback to get next char
- py_lexer_stream_close_t stream_close; // stream callback to free
+ mp_lexer_stream_next_char_t stream_next_char; // stream callback to get next char
+ mp_lexer_stream_close_t stream_close; // stream callback to free
unichar chr0, chr1, chr2; // current cached characters from source
@@ -32,7 +32,7 @@ struct _py_lexer_t {
uint16_t *indent_level;
vstr_t vstr;
- py_token_t tok_cur;
+ mp_token_t tok_cur;
};
bool str_strn_equal(const char *str, const char *strn, int len) {
@@ -47,7 +47,7 @@ bool str_strn_equal(const char *str, const char *strn, int len) {
return i == len && *str == 0;
}
-void py_token_show(const py_token_t *tok) {
+void mp_token_show(const mp_token_t *tok) {
printf("(%s:%d:%d) kind:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len);
if (tok->str != NULL && tok->len > 0) {
const char *i = tok->str;
@@ -66,83 +66,83 @@ void py_token_show(const py_token_t *tok) {
printf("\n");
}
-void py_token_show_error_prefix(const py_token_t *tok) {
+void mp_token_show_error_prefix(const mp_token_t *tok) {
printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column);
}
-bool py_token_show_error(const py_token_t *tok, const char *msg) {
+bool mp_token_show_error(const mp_token_t *tok, const char *msg) {
printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg);
return false;
}
#define CUR_CHAR(lex) ((lex)->chr0)
-static bool is_end(py_lexer_t *lex) {
- return lex->chr0 == PY_LEXER_CHAR_EOF;
+static bool is_end(mp_lexer_t *lex) {
+ return lex->chr0 == MP_LEXER_CHAR_EOF;
}
-static bool is_physical_newline(py_lexer_t *lex) {
+static bool is_physical_newline(mp_lexer_t *lex) {
return lex->chr0 == '\n' || lex->chr0 == '\r';
}
-static bool is_char(py_lexer_t *lex, char c) {
+static bool is_char(mp_lexer_t *lex, char c) {
return lex->chr0 == c;
}
-static bool is_char_or(py_lexer_t *lex, char c1, char c2) {
+static bool is_char_or(mp_lexer_t *lex, char c1, char c2) {
return lex->chr0 == c1 || lex->chr0 == c2;
}
-static bool is_char_or3(py_lexer_t *lex, char c1, char c2, char c3) {
+static bool is_char_or3(mp_lexer_t *lex, char c1, char c2, char c3) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
}
/*
-static bool is_char_following(py_lexer_t *lex, char c) {
+static bool is_char_following(mp_lexer_t *lex, char c) {
return lex->chr1 == c;
}
*/
-static bool is_char_following_or(py_lexer_t *lex, char c1, char c2) {
+static bool is_char_following_or(mp_lexer_t *lex, char c1, char c2) {
return lex->chr1 == c1 || lex->chr1 == c2;
}
-static bool is_char_following_following_or(py_lexer_t *lex, char c1, char c2) {
+static bool is_char_following_following_or(mp_lexer_t *lex, char c1, char c2) {
return lex->chr2 == c1 || lex->chr2 == c2;
}
-static bool is_char_and(py_lexer_t *lex, char c1, char c2) {
+static bool is_char_and(mp_lexer_t *lex, char c1, char c2) {
return lex->chr0 == c1 && lex->chr1 == c2;
}
-static bool is_whitespace(py_lexer_t *lex) {
+static bool is_whitespace(mp_lexer_t *lex) {
return g_unichar_isspace(lex->chr0);
}
-static bool is_letter(py_lexer_t *lex) {
+static bool is_letter(mp_lexer_t *lex) {
return g_unichar_isalpha(lex->chr0);
}
-static bool is_digit(py_lexer_t *lex) {
+static bool is_digit(mp_lexer_t *lex) {
return g_unichar_isdigit(lex->chr0);
}
-static bool is_following_digit(py_lexer_t *lex) {
+static bool is_following_digit(mp_lexer_t *lex) {
return g_unichar_isdigit(lex->chr1);
}
// TODO UNICODE include unicode characters in definition of identifiers
-static bool is_head_of_identifier(py_lexer_t *lex) {
+static bool is_head_of_identifier(mp_lexer_t *lex) {
return is_letter(lex) || lex->chr0 == '_';
}
// TODO UNICODE include unicode characters in definition of identifiers
-static bool is_tail_of_identifier(py_lexer_t *lex) {
+static bool is_tail_of_identifier(mp_lexer_t *lex) {
return is_head_of_identifier(lex) || is_digit(lex);
}
-static void next_char(py_lexer_t *lex) {
- if (lex->chr0 == PY_LEXER_CHAR_EOF) {
+static void next_char(mp_lexer_t *lex) {
+ if (lex->chr0 == MP_LEXER_CHAR_EOF) {
return;
}
@@ -172,16 +172,16 @@ static void next_char(py_lexer_t *lex) {
lex->chr0 = lex->chr1;
lex->chr1 = lex->chr2;
lex->chr2 = lex->stream_next_char(lex->stream_data);
- if (lex->chr2 == PY_LEXER_CHAR_EOF) {
+ if (lex->chr2 == MP_LEXER_CHAR_EOF) {
// EOF
- if (lex->chr1 != PY_LEXER_CHAR_EOF && lex->chr1 != '\n' && lex->chr1 != '\r') {
+ if (lex->chr1 != MP_LEXER_CHAR_EOF && lex->chr1 != '\n' && lex->chr1 != '\r') {
lex->chr2 = '\n'; // insert newline at end of file
}
}
}
}
-void indent_push(py_lexer_t *lex, uint indent) {
+void indent_push(mp_lexer_t *lex, uint indent) {
if (lex->num_indent_level >= lex->alloc_indent_level) {
lex->alloc_indent_level *= 2;
lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level);
@@ -189,11 +189,11 @@ void indent_push(py_lexer_t *lex, uint indent) {
lex->indent_level[lex->num_indent_level++] = indent;
}
-uint indent_top(py_lexer_t *lex) {
+uint indent_top(mp_lexer_t *lex) {
return lex->indent_level[lex->num_indent_level - 1];
}
-void indent_pop(py_lexer_t *lex) {
+void indent_pop(mp_lexer_t *lex) {
lex->num_indent_level -= 1;
}
@@ -222,24 +222,24 @@ static const char *tok_enc =
// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries
static const uint8_t tok_enc_kind[] = {
- PY_TOKEN_DEL_PAREN_OPEN, PY_TOKEN_DEL_PAREN_CLOSE,
- PY_TOKEN_DEL_BRACKET_OPEN, PY_TOKEN_DEL_BRACKET_CLOSE,
- PY_TOKEN_DEL_BRACE_OPEN, PY_TOKEN_DEL_BRACE_CLOSE,
- PY_TOKEN_DEL_COMMA, PY_TOKEN_DEL_COLON, PY_TOKEN_DEL_SEMICOLON, PY_TOKEN_DEL_AT, PY_TOKEN_OP_TILDE,
-
- PY_TOKEN_OP_LESS, PY_TOKEN_OP_LESS_EQUAL, PY_TOKEN_OP_DBL_LESS, PY_TOKEN_DEL_DBL_LESS_EQUAL,
- PY_TOKEN_OP_MORE, PY_TOKEN_OP_MORE_EQUAL, PY_TOKEN_OP_DBL_MORE, PY_TOKEN_DEL_DBL_MORE_EQUAL,
- PY_TOKEN_OP_STAR, PY_TOKEN_DEL_STAR_EQUAL, PY_TOKEN_OP_DBL_STAR, PY_TOKEN_DEL_DBL_STAR_EQUAL,
- PY_TOKEN_OP_PLUS, PY_TOKEN_DEL_PLUS_EQUAL,
- PY_TOKEN_OP_MINUS, PY_TOKEN_DEL_MINUS_EQUAL, PY_TOKEN_DEL_MINUS_MORE,
- PY_TOKEN_OP_AMPERSAND, PY_TOKEN_DEL_AMPERSAND_EQUAL,
- PY_TOKEN_OP_PIPE, PY_TOKEN_DEL_PIPE_EQUAL,
- PY_TOKEN_OP_SLASH, PY_TOKEN_DEL_SLASH_EQUAL, PY_TOKEN_OP_DBL_SLASH, PY_TOKEN_DEL_DBL_SLASH_EQUAL,
- PY_TOKEN_OP_PERCENT, PY_TOKEN_DEL_PERCENT_EQUAL,
- PY_TOKEN_OP_CARET, PY_TOKEN_DEL_CARET_EQUAL,
- PY_TOKEN_DEL_EQUAL, PY_TOKEN_OP_DBL_EQUAL,
- PY_TOKEN_OP_NOT_EQUAL,
- PY_TOKEN_DEL_PERIOD, PY_TOKEN_ELLIPSES,
+ MP_TOKEN_DEL_PAREN_OPEN, MP_TOKEN_DEL_PAREN_CLOSE,
+ MP_TOKEN_DEL_BRACKET_OPEN, MP_TOKEN_DEL_BRACKET_CLOSE,
+ MP_TOKEN_DEL_BRACE_OPEN, MP_TOKEN_DEL_BRACE_CLOSE,
+ MP_TOKEN_DEL_COMMA, MP_TOKEN_DEL_COLON, MP_TOKEN_DEL_SEMICOLON, MP_TOKEN_DEL_AT, MP_TOKEN_OP_TILDE,
+
+ MP_TOKEN_OP_LESS, MP_TOKEN_OP_LESS_EQUAL, MP_TOKEN_OP_DBL_LESS, MP_TOKEN_DEL_DBL_LESS_EQUAL,
+ MP_TOKEN_OP_MORE, MP_TOKEN_OP_MORE_EQUAL, MP_TOKEN_OP_DBL_MORE, MP_TOKEN_DEL_DBL_MORE_EQUAL,
+ MP_TOKEN_OP_STAR, MP_TOKEN_DEL_STAR_EQUAL, MP_TOKEN_OP_DBL_STAR, MP_TOKEN_DEL_DBL_STAR_EQUAL,
+ MP_TOKEN_OP_PLUS, MP_TOKEN_DEL_PLUS_EQUAL,
+ MP_TOKEN_OP_MINUS, MP_TOKEN_DEL_MINUS_EQUAL, MP_TOKEN_DEL_MINUS_MORE,
+ MP_TOKEN_OP_AMPERSAND, MP_TOKEN_DEL_AMPERSAND_EQUAL,
+ MP_TOKEN_OP_PIPE, MP_TOKEN_DEL_PIPE_EQUAL,
+ MP_TOKEN_OP_SLASH, MP_TOKEN_DEL_SLASH_EQUAL, MP_TOKEN_OP_DBL_SLASH, MP_TOKEN_DEL_DBL_SLASH_EQUAL,
+ MP_TOKEN_OP_PERCENT, MP_TOKEN_DEL_PERCENT_EQUAL,
+ MP_TOKEN_OP_CARET, MP_TOKEN_DEL_CARET_EQUAL,
+ MP_TOKEN_DEL_EQUAL, MP_TOKEN_OP_DBL_EQUAL,
+ MP_TOKEN_OP_NOT_EQUAL,
+ MP_TOKEN_DEL_PERIOD, MP_TOKEN_ELLIPSES,
};
// must have the same order as enum in lexer.h
@@ -280,7 +280,7 @@ static const char *tok_kw[] = {
NULL,
};
-static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool first_token) {
+static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) {
// skip white space and comments
bool had_physical_newline = false;
while (!is_end(lex)) {
@@ -322,18 +322,18 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
// if first token is not on first line, we get a physical newline and
// this check is done as part of normal indent/dedent checking below
// (done to get equivalence with CPython)
- tok->kind = PY_TOKEN_INDENT;
+ tok->kind = MP_TOKEN_INDENT;
} else if (lex->emit_dent < 0) {
- tok->kind = PY_TOKEN_DEDENT;
+ tok->kind = MP_TOKEN_DEDENT;
lex->emit_dent += 1;
} else if (lex->emit_dent > 0) {
- tok->kind = PY_TOKEN_INDENT;
+ tok->kind = MP_TOKEN_INDENT;
lex->emit_dent -= 1;
} else if (had_physical_newline && lex->nested_bracket_level == 0) {
- tok->kind = PY_TOKEN_NEWLINE;
+ tok->kind = MP_TOKEN_NEWLINE;
uint num_spaces = lex->column - 1;
lex->emit_dent = 0;
@@ -347,20 +347,20 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
lex->emit_dent -= 1;
}
if (num_spaces != indent_top(lex)) {
- tok->kind = PY_TOKEN_DEDENT_MISMATCH;
+ tok->kind = MP_TOKEN_DEDENT_MISMATCH;
}
}
} else if (is_end(lex)) {
if (indent_top(lex) > 0) {
- tok->kind = PY_TOKEN_NEWLINE;
+ tok->kind = MP_TOKEN_NEWLINE;
lex->emit_dent = 0;
while (indent_top(lex) > 0) {
indent_pop(lex);
lex->emit_dent -= 1;
}
} else {
- tok->kind = PY_TOKEN_END;
+ tok->kind = MP_TOKEN_END;
}
} else if (is_char_or(lex, '\'', '\"')
@@ -391,9 +391,9 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
// set token kind
if (is_bytes) {
- tok->kind = PY_TOKEN_BYTES;
+ tok->kind = MP_TOKEN_BYTES;
} else {
- tok->kind = PY_TOKEN_STRING;
+ tok->kind = MP_TOKEN_STRING;
}
// get first quoting character
@@ -427,8 +427,8 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
next_char(lex);
unichar c = CUR_CHAR(lex);
switch (c) {
- case PY_LEXER_CHAR_EOF: break; // TODO a proper error message?
- case '\n': c = PY_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it
+ case MP_LEXER_CHAR_EOF: break; // TODO a proper error message?
+ case '\n': c = MP_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it
case '\\': break;
case '\'': break;
case '"': break;
@@ -446,7 +446,7 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
case 'U': // TODO \Uxxxxxxxx only in strings
default: break; // TODO error message
}
- if (c != PY_LEXER_CHAR_EOF) {
+ if (c != MP_LEXER_CHAR_EOF) {
vstr_add_char(&lex->vstr, c);
}
} else {
@@ -458,14 +458,14 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
// check we got the required end quotes
if (n_closing < num_quotes) {
- tok->kind = PY_TOKEN_LONELY_STRING_OPEN;
+ tok->kind = MP_TOKEN_LONELY_STRING_OPEN;
}
// cut off the end quotes from the token text
vstr_cut_tail(&lex->vstr, n_closing);
} else if (is_head_of_identifier(lex)) {
- tok->kind = PY_TOKEN_NAME;
+ tok->kind = MP_TOKEN_NAME;
// get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
@@ -478,7 +478,7 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
}
} else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
- tok->kind = PY_TOKEN_NUMBER;
+ tok->kind = MP_TOKEN_NUMBER;
// get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
@@ -520,7 +520,7 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
if (*t == 0) {
// didn't match any delimiter or operator characters
- tok->kind = PY_TOKEN_INVALID;
+ tok->kind = MP_TOKEN_INVALID;
} else {
// matched a delimiter or operator character
@@ -545,7 +545,7 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
next_char(lex);
tok_enc_index = t_index;
} else {
- tok->kind = PY_TOKEN_INVALID;
+ tok->kind = MP_TOKEN_INVALID;
}
break;
}
@@ -569,9 +569,9 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
tok->kind = tok_enc_kind[tok_enc_index];
// compute bracket level for implicit line joining
- if (tok->kind == PY_TOKEN_DEL_PAREN_OPEN || tok->kind == PY_TOKEN_DEL_BRACKET_OPEN || tok->kind == PY_TOKEN_DEL_BRACE_OPEN) {
+ if (tok->kind == MP_TOKEN_DEL_PAREN_OPEN || tok->kind == MP_TOKEN_DEL_BRACKET_OPEN || tok->kind == MP_TOKEN_DEL_BRACE_OPEN) {
lex->nested_bracket_level += 1;
- } else if (tok->kind == PY_TOKEN_DEL_PAREN_CLOSE || tok->kind == PY_TOKEN_DEL_BRACKET_CLOSE || tok->kind == PY_TOKEN_DEL_BRACE_CLOSE) {
+ } else if (tok->kind == MP_TOKEN_DEL_PAREN_CLOSE || tok->kind == MP_TOKEN_DEL_BRACKET_CLOSE || tok->kind == MP_TOKEN_DEL_BRACE_CLOSE) {
lex->nested_bracket_level -= 1;
}
}
@@ -582,18 +582,18 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool firs
tok->len = vstr_len(&lex->vstr);
// check for keywords
- if (tok->kind == PY_TOKEN_NAME) {
+ if (tok->kind == MP_TOKEN_NAME) {
for (int i = 0; tok_kw[i] != NULL; i++) {
if (str_strn_equal(tok_kw[i], tok->str, tok->len)) {
- tok->kind = PY_TOKEN_KW_FALSE + i;
+ tok->kind = MP_TOKEN_KW_FALSE + i;
break;
}
}
}
}
-py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close) {
- py_lexer_t *lex = m_new(py_lexer_t, 1);
+mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
+ mp_lexer_t *lex = m_new(mp_lexer_t, 1);
lex->name = src_name; // TODO do we need to strdup this?
lex->stream_data = stream_data;
@@ -615,25 +615,25 @@ py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_strea
lex->chr2 = stream_next_char(stream_data);
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
- if (lex->chr0 == PY_LEXER_CHAR_EOF) {
+ if (lex->chr0 == MP_LEXER_CHAR_EOF) {
lex->chr0 = '\n';
- } else if (lex->chr1 == PY_LEXER_CHAR_EOF) {
+ } else if (lex->chr1 == MP_LEXER_CHAR_EOF) {
if (lex->chr0 != '\n' && lex->chr0 != '\r') {
lex->chr1 = '\n';
}
- } else if (lex->chr2 == PY_LEXER_CHAR_EOF) {
+ } else if (lex->chr2 == MP_LEXER_CHAR_EOF) {
if (lex->chr1 != '\n' && lex->chr1 != '\r') {
lex->chr2 = '\n';
}
}
// preload first token
- py_lexer_next_token_into(lex, &lex->tok_cur, true);
+ mp_lexer_next_token_into(lex, &lex->tok_cur, true);
return lex;
}
-void py_lexer_free(py_lexer_t *lex) {
+void mp_lexer_free(mp_lexer_t *lex) {
if (lex) {
if (lex->stream_close) {
lex->stream_close(lex->stream_data);
@@ -643,45 +643,45 @@ void py_lexer_free(py_lexer_t *lex) {
}
}
-void py_lexer_to_next(py_lexer_t *lex) {
- py_lexer_next_token_into(lex, &lex->tok_cur, false);
+void mp_lexer_to_next(mp_lexer_t *lex) {
+ mp_lexer_next_token_into(lex, &lex->tok_cur, false);
}
-const py_token_t *py_lexer_cur(const py_lexer_t *lex) {
+const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex) {
return &lex->tok_cur;
}
-bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind) {
+bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind) {
return lex->tok_cur.kind == kind;
}
/*
-bool py_lexer_is_str(py_lexer_t *lex, const char *str) {
- return py_token_is_str(&lex->tok_cur, str);
+bool mp_lexer_is_str(mp_lexer_t *lex, const char *str) {
+ return mp_token_is_str(&lex->tok_cur, str);
}
-bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) {
- if (py_lexer_is_kind(lex, kind)) {
- py_lexer_to_next(lex);
+bool mp_lexer_opt_kind(mp_lexer_t *lex, mp_token_kind_t kind) {
+ if (mp_lexer_is_kind(lex, kind)) {
+ mp_lexer_to_next(lex);
return true;
}
return false;
}
-bool py_lexer_opt_str(py_lexer_t *lex, const char *str) {
- if (py_lexer_is_str(lex, str)) {
- py_lexer_to_next(lex);
+bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str) {
+ if (mp_lexer_is_str(lex, str)) {
+ mp_lexer_to_next(lex);
return true;
}
return false;
}
*/
-bool py_lexer_show_error(py_lexer_t *lex, const char *msg) {
- return py_token_show_error(&lex->tok_cur, msg);
+bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg) {
+ return mp_token_show_error(&lex->tok_cur, msg);
}
-bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg) {
+bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg) {
printf(" File \"%s\", line %d column %d\n%s\n", lex->tok_cur.src_name, lex->tok_cur.src_line, lex->tok_cur.src_column, msg);
return false;
}