py: Support unicode (utf-8 encoded) identifiers in Python source.

Enabled simply by making the identifier lexing code 8-bit clean.
author: Damien George <damien.p.george@gmail.com> 2015-06-09 10:58:07 +0000
committer: Damien George <damien.p.george@gmail.com> 2015-06-09 10:58:07 +0000
commit: 7ed58cb66379f4d87e3e8fbb68baada19048ac18 (patch)
tree: 4ccd045b1680a2a4132025542f450ee337c5b3ff /py
parent: 6e56bb623c66007cd3fe6e0a48c3af9f1e0814fc (diff)
download: micropython-7ed58cb66379f4d87e3e8fbb68baada19048ac18.tar.gz
micropython-7ed58cb66379f4d87e3e8fbb68baada19048ac18.zip
1 files changed, 5 insertions, 6 deletions
diff --git a/py/lexer.c b/py/lexer.c
index 12cb5ae5b2..97c84cf118 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -112,12 +112,11 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {
     return lex->chr1 >= '0' && lex->chr1 <= '7';
 }
 
-// TODO UNICODE include unicode characters in definition of identifiers
+// to easily parse utf-8 identifiers we allow any raw byte with high bit set
 STATIC bool is_head_of_identifier(mp_lexer_t *lex) {
-    return is_letter(lex) || lex->chr0 == '_';
+    return is_letter(lex) || lex->chr0 == '_' || lex->chr0 >= 0x80;
 }
 
-// TODO UNICODE include unicode characters in definition of identifiers
 STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
     return is_head_of_identifier(lex) || is_digit(lex);
 }
@@ -523,13 +522,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
     } else if (is_head_of_identifier(lex)) {
         lex->tok_kind = MP_TOKEN_NAME;
 
-        // get first char
-        vstr_add_char(&lex->vstr, CUR_CHAR(lex));
+        // get first char (add as byte to remain 8-bit clean and support utf-8)
+        vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
         next_char(lex);
 
         // get tail chars
         while (!is_end(lex) && is_tail_of_identifier(lex)) {
-            vstr_add_char(&lex->vstr, CUR_CHAR(lex));
+            vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
             next_char(lex);
         }
author	Damien George <damien.p.george@gmail.com>	2015-06-09 10:58:07 +0000
committer	Damien George <damien.p.george@gmail.com>	2015-06-09 10:58:07 +0000
commit	7ed58cb66379f4d87e3e8fbb68baada19048ac18 (patch)
tree	4ccd045b1680a2a4132025542f450ee337c5b3ff /py
parent	6e56bb623c66007cd3fe6e0a48c3af9f1e0814fc (diff)
download	micropython-7ed58cb66379f4d87e3e8fbb68baada19048ac18.tar.gz micropython-7ed58cb66379f4d87e3e8fbb68baada19048ac18.zip