1 files changed, 141 insertions, 0 deletions
diff --git a/py/lexer.h b/py/lexer.h
new file mode 100644
index 0000000000..32ab48a084
--- /dev/null
+++ b/py/lexer.h
@@ -0,0 +1,141 @@
+/* lexer.h -- simple tokeniser for Python implementation
+ */
+
+#ifndef INCLUDED_LEXER_H
+#define INCLUDED_LEXER_H
+
+/* uses (byte) length instead of null termination
+ * tokens are the same - UTF-8 with (byte) length
+ */
+
+typedef enum _py_token_kind_t {
+    PY_TOKEN_END,                   // 0
+
+    PY_TOKEN_INVALID,
+    PY_TOKEN_LONELY_STRING_OPEN,
+
+    PY_TOKEN_NEWLINE,               // 3
+    PY_TOKEN_INDENT,                // 4
+    PY_TOKEN_DEDENT,                // 5
+
+    PY_TOKEN_NAME,                  // 6
+    PY_TOKEN_NUMBER,
+    PY_TOKEN_STRING,
+    PY_TOKEN_BYTES,
+
+    PY_TOKEN_ELLIPSES,
+
+    PY_TOKEN_KW_FALSE,              // 11
+    PY_TOKEN_KW_NONE,
+    PY_TOKEN_KW_TRUE,
+    PY_TOKEN_KW_AND,
+    PY_TOKEN_KW_AS,
+    PY_TOKEN_KW_ASSERT,
+    PY_TOKEN_KW_BREAK,
+    PY_TOKEN_KW_CLASS,
+    PY_TOKEN_KW_CONTINUE,
+    PY_TOKEN_KW_DEF,                // 20
+    PY_TOKEN_KW_DEL,
+    PY_TOKEN_KW_ELIF,
+    PY_TOKEN_KW_ELSE,
+    PY_TOKEN_KW_EXCEPT,
+    PY_TOKEN_KW_FINALLY,
+    PY_TOKEN_KW_FOR,
+    PY_TOKEN_KW_FROM,
+    PY_TOKEN_KW_GLOBAL,
+    PY_TOKEN_KW_IF,
+    PY_TOKEN_KW_IMPORT,             // 30
+    PY_TOKEN_KW_IN,
+    PY_TOKEN_KW_IS,
+    PY_TOKEN_KW_LAMBDA,
+    PY_TOKEN_KW_NONLOCAL,
+    PY_TOKEN_KW_NOT,
+    PY_TOKEN_KW_OR,
+    PY_TOKEN_KW_PASS,
+    PY_TOKEN_KW_RAISE,
+    PY_TOKEN_KW_RETURN,
+    PY_TOKEN_KW_TRY,                // 40
+    PY_TOKEN_KW_WHILE,
+    PY_TOKEN_KW_WITH,
+    PY_TOKEN_KW_YIELD,
+
+    PY_TOKEN_OP_PLUS,               // 44
+    PY_TOKEN_OP_MINUS,
+    PY_TOKEN_OP_STAR,
+    PY_TOKEN_OP_DBL_STAR,
+    PY_TOKEN_OP_SLASH,
+    PY_TOKEN_OP_DBL_SLASH,
+    PY_TOKEN_OP_PERCENT,
+    PY_TOKEN_OP_LESS,
+    PY_TOKEN_OP_DBL_LESS,
+    PY_TOKEN_OP_MORE,
+    PY_TOKEN_OP_DBL_MORE,           // 54
+    PY_TOKEN_OP_AMPERSAND,
+    PY_TOKEN_OP_PIPE,
+    PY_TOKEN_OP_CARET,
+    PY_TOKEN_OP_TILDE,
+    PY_TOKEN_OP_LESS_EQUAL,
+    PY_TOKEN_OP_MORE_EQUAL,
+    PY_TOKEN_OP_DBL_EQUAL,
+    PY_TOKEN_OP_NOT_EQUAL,
+
+    PY_TOKEN_DEL_PAREN_OPEN,        // 63
+    PY_TOKEN_DEL_PAREN_CLOSE,
+    PY_TOKEN_DEL_BRACKET_OPEN,
+    PY_TOKEN_DEL_BRACKET_CLOSE,
+    PY_TOKEN_DEL_BRACE_OPEN,
+    PY_TOKEN_DEL_BRACE_CLOSE,
+    PY_TOKEN_DEL_COMMA,
+    PY_TOKEN_DEL_COLON,
+    PY_TOKEN_DEL_PERIOD,
+    PY_TOKEN_DEL_SEMICOLON,
+    PY_TOKEN_DEL_AT,                // 73
+    PY_TOKEN_DEL_EQUAL,
+    PY_TOKEN_DEL_PLUS_EQUAL,
+    PY_TOKEN_DEL_MINUS_EQUAL,
+    PY_TOKEN_DEL_STAR_EQUAL,
+    PY_TOKEN_DEL_SLASH_EQUAL,
+    PY_TOKEN_DEL_DBL_SLASH_EQUAL,
+    PY_TOKEN_DEL_PERCENT_EQUAL,
+    PY_TOKEN_DEL_AMPERSAND_EQUAL,
+    PY_TOKEN_DEL_PIPE_EQUAL,
+    PY_TOKEN_DEL_CARET_EQUAL,       // 83
+    PY_TOKEN_DEL_DBL_MORE_EQUAL,
+    PY_TOKEN_DEL_DBL_LESS_EQUAL,
+    PY_TOKEN_DEL_DBL_STAR_EQUAL,
+    PY_TOKEN_DEL_MINUS_MORE,
+} py_token_kind_t;
+
+typedef struct _py_token_t {
+    const char *src_name;       // (file) name of source
+    uint src_line;              // actual source line
+    uint src_column;            // actual source column
+
+    py_token_kind_t kind;       // kind of token
+    uint cont_line;             // token belongs to this line in a continued line
+    const char *str;            // string of token
+    uint len;                   // (byte) length of string of token
+} py_token_t;
+
+typedef struct _py_lexer_t py_lexer_t;
+
+void py_token_show(const py_token_t *tok);
+void py_token_show_error_prefix(const py_token_t *tok);
+bool py_token_show_error(const py_token_t *tok, const char *msg);
+
+py_lexer_t *py_lexer_from_file(const char *filename);
+py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
+void py_lexer_free(py_lexer_t *lex);
+void py_lexer_to_next(py_lexer_t *lex);
+const py_token_t *py_lexer_cur(const py_lexer_t *lex);
+bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
+/* unused
+bool py_lexer_is_str(py_lexer_t *lex, const char *str);
+bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_is_next_str(py_lexer_t *lex, const char *str);
+bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
+*/
+bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
+
+#endif /* INCLUDED_LEXER_H */