diff options
author | Tom Collins <tom.collins@digi.com> | 2017-05-04 16:31:08 -0700 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2017-05-09 14:43:23 +1000 |
commit | 2998647c4e2c765cba72e390d467e4ba9dcf3262 (patch) | |
tree | d611da509d6bc03d8a14fef7ab73372eaa81a9db /py/lexer.c | |
parent | e711e2d44a24024fcd7d7f5b39f285f979b66a77 (diff) | |
download | micropython-2998647c4e2c765cba72e390d467e4ba9dcf3262.tar.gz micropython-2998647c4e2c765cba72e390d467e4ba9dcf3262.zip |
py/lexer: Simplify lexer startup by using dummy bytes and next_char().
Now consistently uses the EOL processing ("\r" and "\r\n" convert to "\n")
and EOF processing (ensure "\n" before EOF) provided by next_char().
In particular the lexer can now correctly handle input that starts with CR.
Diffstat (limited to 'py/lexer.c')
-rw-r--r-- | py/lexer.c | 29 |
1 files changed, 7 insertions, 22 deletions
diff --git a/py/lexer.c b/py/lexer.c index 05651abecf..4cbf31d904 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -677,7 +677,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) { lex->source_name = src_name; lex->reader = reader; lex->line = 1; - lex->column = 1; + lex->column = -2; // account for 3 dummy bytes lex->emit_dent = 0; lex->nested_bracket_level = 0; lex->alloc_indent_level = MICROPY_ALLOC_LEXER_INDENT_INIT; @@ -688,27 +688,12 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) { // store sentinel for first indentation level lex->indent_level[0] = 0; - // preload characters - lex->chr0 = reader.readbyte(reader.data); - lex->chr1 = reader.readbyte(reader.data); - lex->chr2 = reader.readbyte(reader.data); - - // if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end - if (lex->chr0 == MP_LEXER_EOF) { - lex->chr0 = '\n'; - } else if (lex->chr1 == MP_LEXER_EOF) { - if (lex->chr0 == '\r') { - lex->chr0 = '\n'; - } else if (lex->chr0 != '\n') { - lex->chr1 = '\n'; - } - } else if (lex->chr2 == MP_LEXER_EOF) { - if (lex->chr1 == '\r') { - lex->chr1 = '\n'; - } else if (lex->chr1 != '\n') { - lex->chr2 = '\n'; - } - } + // load lexer with start of file, advancing lex->column to 1 + // start with dummy bytes and use next_char() for proper EOL/EOF handling + lex->chr0 = lex->chr1 = lex->chr2 = 0; + next_char(lex); + next_char(lex); + next_char(lex); // preload first token mp_lexer_to_next(lex); |