1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
/* lexer.h -- simple tokeniser for Python implementation
*/
#ifndef INCLUDED_LEXER_H
#define INCLUDED_LEXER_H
/* uses (byte) length instead of null termination
* tokens are the same - UTF-8 with (byte) length
*/
typedef enum _py_token_kind_t {
PY_TOKEN_END, // 0
PY_TOKEN_INVALID,
PY_TOKEN_DEDENT_MISMATCH,
PY_TOKEN_LONELY_STRING_OPEN,
PY_TOKEN_NEWLINE, // 4
PY_TOKEN_INDENT, // 5
PY_TOKEN_DEDENT, // 6
PY_TOKEN_NAME, // 7
PY_TOKEN_NUMBER,
PY_TOKEN_STRING,
PY_TOKEN_BYTES,
PY_TOKEN_ELLIPSES,
PY_TOKEN_KW_FALSE, // 12
PY_TOKEN_KW_NONE,
PY_TOKEN_KW_TRUE,
PY_TOKEN_KW_AND,
PY_TOKEN_KW_AS,
PY_TOKEN_KW_ASSERT,
PY_TOKEN_KW_BREAK,
PY_TOKEN_KW_CLASS,
PY_TOKEN_KW_CONTINUE,
PY_TOKEN_KW_DEF, // 21
PY_TOKEN_KW_DEL,
PY_TOKEN_KW_ELIF,
PY_TOKEN_KW_ELSE,
PY_TOKEN_KW_EXCEPT,
PY_TOKEN_KW_FINALLY,
PY_TOKEN_KW_FOR,
PY_TOKEN_KW_FROM,
PY_TOKEN_KW_GLOBAL,
PY_TOKEN_KW_IF,
PY_TOKEN_KW_IMPORT, // 31
PY_TOKEN_KW_IN,
PY_TOKEN_KW_IS,
PY_TOKEN_KW_LAMBDA,
PY_TOKEN_KW_NONLOCAL,
PY_TOKEN_KW_NOT,
PY_TOKEN_KW_OR,
PY_TOKEN_KW_PASS,
PY_TOKEN_KW_RAISE,
PY_TOKEN_KW_RETURN,
PY_TOKEN_KW_TRY, // 41
PY_TOKEN_KW_WHILE,
PY_TOKEN_KW_WITH,
PY_TOKEN_KW_YIELD,
PY_TOKEN_OP_PLUS, // 45
PY_TOKEN_OP_MINUS,
PY_TOKEN_OP_STAR,
PY_TOKEN_OP_DBL_STAR,
PY_TOKEN_OP_SLASH,
PY_TOKEN_OP_DBL_SLASH,
PY_TOKEN_OP_PERCENT,
PY_TOKEN_OP_LESS,
PY_TOKEN_OP_DBL_LESS,
PY_TOKEN_OP_MORE,
PY_TOKEN_OP_DBL_MORE, // 55
PY_TOKEN_OP_AMPERSAND,
PY_TOKEN_OP_PIPE,
PY_TOKEN_OP_CARET,
PY_TOKEN_OP_TILDE,
PY_TOKEN_OP_LESS_EQUAL,
PY_TOKEN_OP_MORE_EQUAL,
PY_TOKEN_OP_DBL_EQUAL,
PY_TOKEN_OP_NOT_EQUAL,
PY_TOKEN_DEL_PAREN_OPEN, // 64
PY_TOKEN_DEL_PAREN_CLOSE,
PY_TOKEN_DEL_BRACKET_OPEN,
PY_TOKEN_DEL_BRACKET_CLOSE,
PY_TOKEN_DEL_BRACE_OPEN,
PY_TOKEN_DEL_BRACE_CLOSE,
PY_TOKEN_DEL_COMMA,
PY_TOKEN_DEL_COLON,
PY_TOKEN_DEL_PERIOD,
PY_TOKEN_DEL_SEMICOLON,
PY_TOKEN_DEL_AT, // 74
PY_TOKEN_DEL_EQUAL,
PY_TOKEN_DEL_PLUS_EQUAL,
PY_TOKEN_DEL_MINUS_EQUAL,
PY_TOKEN_DEL_STAR_EQUAL,
PY_TOKEN_DEL_SLASH_EQUAL,
PY_TOKEN_DEL_DBL_SLASH_EQUAL,
PY_TOKEN_DEL_PERCENT_EQUAL,
PY_TOKEN_DEL_AMPERSAND_EQUAL,
PY_TOKEN_DEL_PIPE_EQUAL,
PY_TOKEN_DEL_CARET_EQUAL, // 84
PY_TOKEN_DEL_DBL_MORE_EQUAL,
PY_TOKEN_DEL_DBL_LESS_EQUAL,
PY_TOKEN_DEL_DBL_STAR_EQUAL,
PY_TOKEN_DEL_MINUS_MORE,
} py_token_kind_t;
typedef struct _py_token_t {
const char *src_name; // name of source
uint src_line; // source line
uint src_column; // source column
py_token_kind_t kind; // kind of token
const char *str; // string of token (valid only while this token is current token)
uint len; // (byte) length of string of token
} py_token_t;
// the next-char function must return the next character in the stream
// it must return PY_LEXER_CHAR_EOF if end of stream
// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF
#define PY_LEXER_CHAR_EOF (-1)
typedef unichar (*py_lexer_stream_next_char_t)(void*);
typedef void (*py_lexer_stream_close_t)(void*);
typedef struct _py_lexer_t py_lexer_t;
void py_token_show(const py_token_t *tok);
void py_token_show_error_prefix(const py_token_t *tok);
bool py_token_show_error(const py_token_t *tok, const char *msg);
py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close);
void py_lexer_free(py_lexer_t *lex);
void py_lexer_to_next(py_lexer_t *lex);
const py_token_t *py_lexer_cur(const py_lexer_t *lex);
bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
/* unused
bool py_lexer_is_str(py_lexer_t *lex, const char *str);
bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
*/
bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg);
#endif /* INCLUDED_LEXER_H */
|