diff options
Diffstat (limited to 'py/lexer.h')
-rw-r--r-- | py/lexer.h | 262 |
1 files changed, 128 insertions, 134 deletions
diff --git a/py/lexer.h b/py/lexer.h index 3472370604..f58a38e92b 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -1,146 +1,140 @@ -/* lexer.h -- simple tokeniser for Python implementation +/* lexer.h -- simple tokeniser for Micro Python + * + * Uses (byte) length instead of null termination. + * Tokens are the same - UTF-8 with (byte) length. */ -#ifndef INCLUDED_LEXER_H -#define INCLUDED_LEXER_H - -/* uses (byte) length instead of null termination - * tokens are the same - UTF-8 with (byte) length - */ - -typedef enum _py_token_kind_t { - PY_TOKEN_END, // 0 - - PY_TOKEN_INVALID, - PY_TOKEN_DEDENT_MISMATCH, - PY_TOKEN_LONELY_STRING_OPEN, - - PY_TOKEN_NEWLINE, // 4 - PY_TOKEN_INDENT, // 5 - PY_TOKEN_DEDENT, // 6 - - PY_TOKEN_NAME, // 7 - PY_TOKEN_NUMBER, - PY_TOKEN_STRING, - PY_TOKEN_BYTES, - - PY_TOKEN_ELLIPSES, - - PY_TOKEN_KW_FALSE, // 12 - PY_TOKEN_KW_NONE, - PY_TOKEN_KW_TRUE, - PY_TOKEN_KW_AND, - PY_TOKEN_KW_AS, - PY_TOKEN_KW_ASSERT, - PY_TOKEN_KW_BREAK, - PY_TOKEN_KW_CLASS, - PY_TOKEN_KW_CONTINUE, - PY_TOKEN_KW_DEF, // 21 - PY_TOKEN_KW_DEL, - PY_TOKEN_KW_ELIF, - PY_TOKEN_KW_ELSE, - PY_TOKEN_KW_EXCEPT, - PY_TOKEN_KW_FINALLY, - PY_TOKEN_KW_FOR, - PY_TOKEN_KW_FROM, - PY_TOKEN_KW_GLOBAL, - PY_TOKEN_KW_IF, - PY_TOKEN_KW_IMPORT, // 31 - PY_TOKEN_KW_IN, - PY_TOKEN_KW_IS, - PY_TOKEN_KW_LAMBDA, - PY_TOKEN_KW_NONLOCAL, - PY_TOKEN_KW_NOT, - PY_TOKEN_KW_OR, - PY_TOKEN_KW_PASS, - PY_TOKEN_KW_RAISE, - PY_TOKEN_KW_RETURN, - PY_TOKEN_KW_TRY, // 41 - PY_TOKEN_KW_WHILE, - PY_TOKEN_KW_WITH, - PY_TOKEN_KW_YIELD, - - PY_TOKEN_OP_PLUS, // 45 - PY_TOKEN_OP_MINUS, - PY_TOKEN_OP_STAR, - PY_TOKEN_OP_DBL_STAR, - PY_TOKEN_OP_SLASH, - PY_TOKEN_OP_DBL_SLASH, - PY_TOKEN_OP_PERCENT, - PY_TOKEN_OP_LESS, - PY_TOKEN_OP_DBL_LESS, - PY_TOKEN_OP_MORE, - PY_TOKEN_OP_DBL_MORE, // 55 - PY_TOKEN_OP_AMPERSAND, - PY_TOKEN_OP_PIPE, - PY_TOKEN_OP_CARET, - PY_TOKEN_OP_TILDE, - PY_TOKEN_OP_LESS_EQUAL, - PY_TOKEN_OP_MORE_EQUAL, - PY_TOKEN_OP_DBL_EQUAL, - PY_TOKEN_OP_NOT_EQUAL, - - PY_TOKEN_DEL_PAREN_OPEN, // 64 - PY_TOKEN_DEL_PAREN_CLOSE, - PY_TOKEN_DEL_BRACKET_OPEN, - PY_TOKEN_DEL_BRACKET_CLOSE, - PY_TOKEN_DEL_BRACE_OPEN, - PY_TOKEN_DEL_BRACE_CLOSE, - PY_TOKEN_DEL_COMMA, - PY_TOKEN_DEL_COLON, - PY_TOKEN_DEL_PERIOD, - PY_TOKEN_DEL_SEMICOLON, - PY_TOKEN_DEL_AT, // 74 - PY_TOKEN_DEL_EQUAL, - PY_TOKEN_DEL_PLUS_EQUAL, - PY_TOKEN_DEL_MINUS_EQUAL, - PY_TOKEN_DEL_STAR_EQUAL, - PY_TOKEN_DEL_SLASH_EQUAL, - PY_TOKEN_DEL_DBL_SLASH_EQUAL, - PY_TOKEN_DEL_PERCENT_EQUAL, - PY_TOKEN_DEL_AMPERSAND_EQUAL, - PY_TOKEN_DEL_PIPE_EQUAL, - PY_TOKEN_DEL_CARET_EQUAL, // 84 - PY_TOKEN_DEL_DBL_MORE_EQUAL, - PY_TOKEN_DEL_DBL_LESS_EQUAL, - PY_TOKEN_DEL_DBL_STAR_EQUAL, - PY_TOKEN_DEL_MINUS_MORE, -} py_token_kind_t; - -typedef struct _py_token_t { +typedef enum _mp_token_kind_t { + MP_TOKEN_END, // 0 + + MP_TOKEN_INVALID, + MP_TOKEN_DEDENT_MISMATCH, + MP_TOKEN_LONELY_STRING_OPEN, + + MP_TOKEN_NEWLINE, // 4 + MP_TOKEN_INDENT, // 5 + MP_TOKEN_DEDENT, // 6 + + MP_TOKEN_NAME, // 7 + MP_TOKEN_NUMBER, + MP_TOKEN_STRING, + MP_TOKEN_BYTES, + + MP_TOKEN_ELLIPSES, + + MP_TOKEN_KW_FALSE, // 12 + MP_TOKEN_KW_NONE, + MP_TOKEN_KW_TRUE, + MP_TOKEN_KW_AND, + MP_TOKEN_KW_AS, + MP_TOKEN_KW_ASSERT, + MP_TOKEN_KW_BREAK, + MP_TOKEN_KW_CLASS, + MP_TOKEN_KW_CONTINUE, + MP_TOKEN_KW_DEF, // 21 + MP_TOKEN_KW_DEL, + MP_TOKEN_KW_ELIF, + MP_TOKEN_KW_ELSE, + MP_TOKEN_KW_EXCEPT, + MP_TOKEN_KW_FINALLY, + MP_TOKEN_KW_FOR, + MP_TOKEN_KW_FROM, + MP_TOKEN_KW_GLOBAL, + MP_TOKEN_KW_IF, + MP_TOKEN_KW_IMPORT, // 31 + MP_TOKEN_KW_IN, + MP_TOKEN_KW_IS, + MP_TOKEN_KW_LAMBDA, + MP_TOKEN_KW_NONLOCAL, + MP_TOKEN_KW_NOT, + MP_TOKEN_KW_OR, + MP_TOKEN_KW_PASS, + MP_TOKEN_KW_RAISE, + MP_TOKEN_KW_RETURN, + MP_TOKEN_KW_TRY, // 41 + MP_TOKEN_KW_WHILE, + MP_TOKEN_KW_WITH, + MP_TOKEN_KW_YIELD, + + MP_TOKEN_OP_PLUS, // 45 + MP_TOKEN_OP_MINUS, + MP_TOKEN_OP_STAR, + MP_TOKEN_OP_DBL_STAR, + MP_TOKEN_OP_SLASH, + MP_TOKEN_OP_DBL_SLASH, + MP_TOKEN_OP_PERCENT, + MP_TOKEN_OP_LESS, + MP_TOKEN_OP_DBL_LESS, + MP_TOKEN_OP_MORE, + MP_TOKEN_OP_DBL_MORE, // 55 + MP_TOKEN_OP_AMPERSAND, + MP_TOKEN_OP_PIPE, + MP_TOKEN_OP_CARET, + MP_TOKEN_OP_TILDE, + MP_TOKEN_OP_LESS_EQUAL, + MP_TOKEN_OP_MORE_EQUAL, + MP_TOKEN_OP_DBL_EQUAL, + MP_TOKEN_OP_NOT_EQUAL, + + MP_TOKEN_DEL_PAREN_OPEN, // 64 + MP_TOKEN_DEL_PAREN_CLOSE, + MP_TOKEN_DEL_BRACKET_OPEN, + MP_TOKEN_DEL_BRACKET_CLOSE, + MP_TOKEN_DEL_BRACE_OPEN, + MP_TOKEN_DEL_BRACE_CLOSE, + MP_TOKEN_DEL_COMMA, + MP_TOKEN_DEL_COLON, + MP_TOKEN_DEL_PERIOD, + MP_TOKEN_DEL_SEMICOLON, + MP_TOKEN_DEL_AT, // 74 + MP_TOKEN_DEL_EQUAL, + MP_TOKEN_DEL_PLUS_EQUAL, + MP_TOKEN_DEL_MINUS_EQUAL, + MP_TOKEN_DEL_STAR_EQUAL, + MP_TOKEN_DEL_SLASH_EQUAL, + MP_TOKEN_DEL_DBL_SLASH_EQUAL, + MP_TOKEN_DEL_PERCENT_EQUAL, + MP_TOKEN_DEL_AMPERSAND_EQUAL, + MP_TOKEN_DEL_PIPE_EQUAL, + MP_TOKEN_DEL_CARET_EQUAL, // 84 + MP_TOKEN_DEL_DBL_MORE_EQUAL, + MP_TOKEN_DEL_DBL_LESS_EQUAL, + MP_TOKEN_DEL_DBL_STAR_EQUAL, + MP_TOKEN_DEL_MINUS_MORE, +} mp_token_kind_t; + +typedef struct _mp_token_t { const char *src_name; // name of source uint src_line; // source line uint src_column; // source column - py_token_kind_t kind; // kind of token + mp_token_kind_t kind; // kind of token const char *str; // string of token (valid only while this token is current token) uint len; // (byte) length of string of token -} py_token_t; +} mp_token_t; // the next-char function must return the next character in the stream -// it must return PY_LEXER_CHAR_EOF if end of stream -// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF -#define PY_LEXER_CHAR_EOF (-1) -typedef unichar (*py_lexer_stream_next_char_t)(void*); -typedef void (*py_lexer_stream_close_t)(void*); - -typedef struct _py_lexer_t py_lexer_t; - -void py_token_show(const py_token_t *tok); -void py_token_show_error_prefix(const py_token_t *tok); -bool py_token_show_error(const py_token_t *tok, const char *msg); - -py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close); -void py_lexer_free(py_lexer_t *lex); -void py_lexer_to_next(py_lexer_t *lex); -const py_token_t *py_lexer_cur(const py_lexer_t *lex); -bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind); +// it must return MP_LEXER_CHAR_EOF if end of stream +// it can be called again after returning MP_LEXER_CHAR_EOF, and in that case must return MP_LEXER_CHAR_EOF +#define MP_LEXER_CHAR_EOF (-1) +typedef unichar (*mp_lexer_stream_next_char_t)(void*); +typedef void (*mp_lexer_stream_close_t)(void*); + +typedef struct _mp_lexer_t mp_lexer_t; + +void mp_token_show(const mp_token_t *tok); +void mp_token_show_error_prefix(const mp_token_t *tok); +bool mp_token_show_error(const mp_token_t *tok, const char *msg); + +mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close); +void mp_lexer_free(mp_lexer_t *lex); +void mp_lexer_to_next(mp_lexer_t *lex); +const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex); +bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind); /* unused -bool py_lexer_is_str(py_lexer_t *lex, const char *str); -bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind); -bool py_lexer_opt_str(py_lexer_t *lex, const char *str); +bool mp_lexer_is_str(mp_lexer_t *lex, const char *str); +bool mp_lexer_opt_kind(mp_lexer_t *lex, mp_token_kind_t kind); +bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str); */ -bool py_lexer_show_error(py_lexer_t *lex, const char *msg); -bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg); - -#endif /* INCLUDED_LEXER_H */ +bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg); +bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg); |