py: Improve encoding scheme for line-number to bytecode map.

Reduces by about a factor of 10 on average the amount of RAM needed to store the line-number to bytecode map in the bytecode prelude. Using CPython3.4's stdlib for statistics: previously, an average of 13 bytes were used per (bytecode offset, line-number offset) pair, and now with this improvement, that's down to 1.3 bytes on average. Large RAM usage before was due to some very large steps in line numbers, both from the start of the first line in a function way down in the file, and also functions that have big comments and/or big strings in them (both cases were significant). Although the savings are large on average for the CPython stdlib, it won't have such a big effect for small scripts used in embedded programming. Addresses issue #648.
author: Damien George <damien.p.george@gmail.com> 2014-07-31 16:12:01 +0000
committer: Damien George <damien.p.george@gmail.com> 2014-07-31 16:12:01 +0000
commit: 4747becc6423e8161519ad6850e09137f14742a8 (patch)
tree: 347d25bcac7fdf71c2ac60c837b74b92ea0abd58 /py
parent: 8cc2018d47bc15a1b10295965fc0ccd27c0dcbba (diff)
download: micropython-4747becc6423e8161519ad6850e09137f14742a8.tar.gz
micropython-4747becc6423e8161519ad6850e09137f14742a8.zip
3 files changed, 48 insertions, 10 deletions
diff --git a/py/emitbc.c b/py/emitbc.c
index 365ec458a2..161e8c7f6d 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -115,12 +115,24 @@ STATIC void emit_write_code_info_qstr(emit_t* emit, qstr qstr) {
 #if MICROPY_ENABLE_SOURCE_LINE
 STATIC void emit_write_code_info_bytes_lines(emit_t* emit, uint bytes_to_skip, uint lines_to_skip) {
     assert(bytes_to_skip > 0 || lines_to_skip > 0);
+    //printf("  %d %d\n", bytes_to_skip, lines_to_skip);
     while (bytes_to_skip > 0 || lines_to_skip > 0) {
-        uint b = MIN(bytes_to_skip, 31);
-        uint l = MIN(lines_to_skip, 7);
+        mp_uint_t b, l;
+        if (lines_to_skip <= 6) {
+            // use 0b0LLBBBBB encoding
+            b = MIN(bytes_to_skip, 0x1f);
+            l = MIN(lines_to_skip, 0x3);
+            *emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
+        } else {
+            // use 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
+            b = MIN(bytes_to_skip, 0xf);
+            l = MIN(lines_to_skip, 0x7ff);
+            byte *ci = emit_get_cur_to_write_code_info(emit, 2);
+            ci[0] = 0x80 | b | ((l >> 4) & 0x70);
+            ci[1] = l;
+        }
         bytes_to_skip -= b;
         lines_to_skip -= l;
-        *emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
     }
 }
 #endif
@@ -363,7 +375,6 @@ STATIC void emit_bc_set_source_line(emit_t *emit, int source_line) {
         uint bytes_to_skip = emit->bytecode_offset - emit->last_source_line_offset;
         uint lines_to_skip = source_line - emit->last_source_line;
         emit_write_code_info_bytes_lines(emit, bytes_to_skip, lines_to_skip);
-        //printf("  %d %d\n", bytes_to_skip, lines_to_skip);
         emit->last_source_line_offset = emit->bytecode_offset;
         emit->last_source_line = source_line;
     }
diff --git a/py/showbc.c b/py/showbc.c
index 12400fa7c2..6c10333c9f 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -95,9 +95,18 @@ void mp_bytecode_print(const void *descr, const byte *ip, int len) {
         mp_int_t bc = (code_info + code_info_size) - ip;
         mp_uint_t source_line = 1;
         printf("  bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
-        for (const byte* ci = code_info + 12; *ci; ci++) {
-            bc += *ci & 31;
-            source_line += *ci >> 5;
+        for (const byte* ci = code_info + 12; *ci;) {
+            if ((ci[0] & 0x80) == 0) {
+                // 0b0LLBBBBB encoding
+                bc += ci[0] & 0x1f;
+                source_line += ci[0] >> 5;
+                ci += 1;
+            } else {
+                // 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
+                bc += ci[0] & 0xf;
+                source_line += ((ci[0] << 4) & 0x700) | ci[1];
+                ci += 2;
+            }
             printf("  bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
         }
     }
diff --git a/py/vm.c b/py/vm.c
index ade2ee2b5a..c0116bbeb9 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -931,9 +931,27 @@ exception_handler:
                 const byte* ci = code_info + 12;
                 if (*ci) {
                     source_line = 1;
-                    for (; *ci && bc >= ((*ci) & 31); ci++) {
-                        bc -= *ci & 31;
-                        source_line += *ci >> 5;
+                    mp_uint_t c;
+                    while ((c = *ci)) {
+                        mp_uint_t b, l;
+                        if ((c & 0x80) == 0) {
+                            // 0b0LLBBBBB encoding
+                            b = c & 0x1f;
+                            l = c >> 5;
+                            ci += 1;
+                        } else {
+                            // 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
+                            b = c & 0xf;
+                            l = ((c << 4) & 0x700) | ci[1];
+                            ci += 2;
+                        }
+                        if (bc >= b) {
+                            bc -= b;
+                            source_line += l;
+                        } else {
+                            // found source line corresponding to bytecode offset
+                            break;
+                        }
                     }
                 }
                 mp_obj_exception_add_traceback(nlr.ret_val, source_file, source_line, block_name);
author	Damien George <damien.p.george@gmail.com>	2014-07-31 16:12:01 +0000
committer	Damien George <damien.p.george@gmail.com>	2014-07-31 16:12:01 +0000
commit	4747becc6423e8161519ad6850e09137f14742a8 (patch)
tree	347d25bcac7fdf71c2ac60c837b74b92ea0abd58 /py
parent	8cc2018d47bc15a1b10295965fc0ccd27c0dcbba (diff)
download	micropython-4747becc6423e8161519ad6850e09137f14742a8.tar.gz micropython-4747becc6423e8161519ad6850e09137f14742a8.zip