summaryrefslogtreecommitdiffstatshomepage
path: root/py/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/lexer.c')
-rw-r--r--py/lexer.c29
1 files changed, 7 insertions, 22 deletions
diff --git a/py/lexer.c b/py/lexer.c
index 05651abecf..4cbf31d904 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -677,7 +677,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
lex->source_name = src_name;
lex->reader = reader;
lex->line = 1;
- lex->column = 1;
+ lex->column = -2; // account for 3 dummy bytes
lex->emit_dent = 0;
lex->nested_bracket_level = 0;
lex->alloc_indent_level = MICROPY_ALLOC_LEXER_INDENT_INIT;
@@ -688,27 +688,12 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
// store sentinel for first indentation level
lex->indent_level[0] = 0;
- // preload characters
- lex->chr0 = reader.readbyte(reader.data);
- lex->chr1 = reader.readbyte(reader.data);
- lex->chr2 = reader.readbyte(reader.data);
-
- // if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
- if (lex->chr0 == MP_LEXER_EOF) {
- lex->chr0 = '\n';
- } else if (lex->chr1 == MP_LEXER_EOF) {
- if (lex->chr0 == '\r') {
- lex->chr0 = '\n';
- } else if (lex->chr0 != '\n') {
- lex->chr1 = '\n';
- }
- } else if (lex->chr2 == MP_LEXER_EOF) {
- if (lex->chr1 == '\r') {
- lex->chr1 = '\n';
- } else if (lex->chr1 != '\n') {
- lex->chr2 = '\n';
- }
- }
+ // load lexer with start of file, advancing lex->column to 1
+ // start with dummy bytes and use next_char() for proper EOL/EOF handling
+ lex->chr0 = lex->chr1 = lex->chr2 = 0;
+ next_char(lex);
+ next_char(lex);
+ next_char(lex);
// preload first token
mp_lexer_to_next(lex);