diff options
author | Damien George <damien.p.george@gmail.com> | 2014-07-31 16:12:01 +0000 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2014-07-31 16:12:01 +0000 |
commit | 4747becc6423e8161519ad6850e09137f14742a8 (patch) | |
tree | 347d25bcac7fdf71c2ac60c837b74b92ea0abd58 /py | |
parent | 8cc2018d47bc15a1b10295965fc0ccd27c0dcbba (diff) | |
download | micropython-4747becc6423e8161519ad6850e09137f14742a8.tar.gz micropython-4747becc6423e8161519ad6850e09137f14742a8.zip |
py: Improve encoding scheme for line-number to bytecode map.
Reduces by about a factor of 10 on average the amount of RAM needed to
store the line-number to bytecode map in the bytecode prelude.
Using CPython3.4's stdlib for statistics: previously, an average of
13 bytes were used per (bytecode offset, line-number offset) pair, and
now with this improvement, that's down to 1.3 bytes on average.
Large RAM usage before was due to some very large steps in line numbers,
both from the start of the first line in a function way down in the
file, and also functions that have big comments and/or big strings in
them (both cases were significant).
Although the savings are large on average for the CPython stdlib, it
won't have such a big effect for small scripts used in embedded
programming.
Addresses issue #648.
Diffstat (limited to 'py')
-rw-r--r-- | py/emitbc.c | 19 | ||||
-rw-r--r-- | py/showbc.c | 15 | ||||
-rw-r--r-- | py/vm.c | 24 |
3 files changed, 48 insertions, 10 deletions
diff --git a/py/emitbc.c b/py/emitbc.c index 365ec458a2..161e8c7f6d 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -115,12 +115,24 @@ STATIC void emit_write_code_info_qstr(emit_t* emit, qstr qstr) { #if MICROPY_ENABLE_SOURCE_LINE STATIC void emit_write_code_info_bytes_lines(emit_t* emit, uint bytes_to_skip, uint lines_to_skip) { assert(bytes_to_skip > 0 || lines_to_skip > 0); + //printf(" %d %d\n", bytes_to_skip, lines_to_skip); while (bytes_to_skip > 0 || lines_to_skip > 0) { - uint b = MIN(bytes_to_skip, 31); - uint l = MIN(lines_to_skip, 7); + mp_uint_t b, l; + if (lines_to_skip <= 6) { + // use 0b0LLBBBBB encoding + b = MIN(bytes_to_skip, 0x1f); + l = MIN(lines_to_skip, 0x3); + *emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5); + } else { + // use 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte) + b = MIN(bytes_to_skip, 0xf); + l = MIN(lines_to_skip, 0x7ff); + byte *ci = emit_get_cur_to_write_code_info(emit, 2); + ci[0] = 0x80 | b | ((l >> 4) & 0x70); + ci[1] = l; + } bytes_to_skip -= b; lines_to_skip -= l; - *emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5); } } #endif @@ -363,7 +375,6 @@ STATIC void emit_bc_set_source_line(emit_t *emit, int source_line) { uint bytes_to_skip = emit->bytecode_offset - emit->last_source_line_offset; uint lines_to_skip = source_line - emit->last_source_line; emit_write_code_info_bytes_lines(emit, bytes_to_skip, lines_to_skip); - //printf(" %d %d\n", bytes_to_skip, lines_to_skip); emit->last_source_line_offset = emit->bytecode_offset; emit->last_source_line = source_line; } diff --git a/py/showbc.c b/py/showbc.c index 12400fa7c2..6c10333c9f 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -95,9 +95,18 @@ void mp_bytecode_print(const void *descr, const byte *ip, int len) { mp_int_t bc = (code_info + code_info_size) - ip; mp_uint_t source_line = 1; printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line); - for (const byte* ci = code_info + 12; *ci; ci++) { - bc += *ci & 31; - source_line += *ci >> 5; + for (const byte* ci = code_info + 12; *ci;) { + if ((ci[0] & 0x80) == 0) { + // 0b0LLBBBBB encoding + bc += ci[0] & 0x1f; + source_line += ci[0] >> 5; + ci += 1; + } else { + // 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte) + bc += ci[0] & 0xf; + source_line += ((ci[0] << 4) & 0x700) | ci[1]; + ci += 2; + } printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line); } } @@ -931,9 +931,27 @@ exception_handler: const byte* ci = code_info + 12; if (*ci) { source_line = 1; - for (; *ci && bc >= ((*ci) & 31); ci++) { - bc -= *ci & 31; - source_line += *ci >> 5; + mp_uint_t c; + while ((c = *ci)) { + mp_uint_t b, l; + if ((c & 0x80) == 0) { + // 0b0LLBBBBB encoding + b = c & 0x1f; + l = c >> 5; + ci += 1; + } else { + // 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte) + b = c & 0xf; + l = ((c << 4) & 0x700) | ci[1]; + ci += 2; + } + if (bc >= b) { + bc -= b; + source_line += l; + } else { + // found source line corresponding to bytecode offset + break; + } } } mp_obj_exception_add_traceback(nlr.ret_val, source_file, source_line, block_name); |