diff options
Diffstat (limited to 'tools/mpy-tool.py')
-rwxr-xr-x | tools/mpy-tool.py | 1308 |
1 files changed, 930 insertions, 378 deletions
diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 58a4a75c91..da6352dab7 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -29,27 +29,49 @@ from __future__ import print_function import platform if platform.python_version_tuple()[0] == "2": - str_cons = lambda val, enc=None: val + from binascii import hexlify as hexlify_py2 + + str_cons = lambda val, enc=None: str(val) bytes_cons = lambda val, enc=None: bytearray(val) is_str_type = lambda o: type(o) is str is_bytes_type = lambda o: type(o) is bytearray is_int_type = lambda o: type(o) is int or type(o) is long + + def hexlify_to_str(b): + x = hexlify_py2(b) + return ":".join(x[i : i + 2] for i in range(0, len(x), 2)) + else: + from binascii import hexlify + str_cons = str bytes_cons = bytes is_str_type = lambda o: type(o) is str is_bytes_type = lambda o: type(o) is bytes is_int_type = lambda o: type(o) is int + + def hexlify_to_str(b): + return str(hexlify(b, ":"), "ascii") + + # end compatibility code import sys import struct -from collections import namedtuple sys.path.append(sys.path[0] + "/../py") import makeqstrdata as qstrutil +class MPYReadError(Exception): + def __init__(self, filename, msg): + self.filename = filename + self.msg = msg + + def __str__(self): + return "%s: %s" % (self.filename, self.msg) + + class FreezeError(Exception): def __init__(self, rawcode, msg): self.rawcode = rawcode @@ -60,7 +82,7 @@ class FreezeError(Exception): class Config: - MPY_VERSION = 5 + MPY_VERSION = 6 MICROPY_LONGINT_IMPL_NONE = 0 MICROPY_LONGINT_IMPL_LONGLONG = 1 MICROPY_LONGINT_IMPL_MPZ = 2 @@ -82,20 +104,6 @@ for n in qstrutil.static_qstr_list: global_qstrs.append(QStrType(n)) -class QStrWindow: - def __init__(self, size): - self.window = [] - self.size = size - - def push(self, val): - self.window = [val] + self.window[: self.size - 1] - - def access(self, idx): - val = self.window[idx] - self.window = [val] + self.window[:idx] + self.window[idx + 1 :] - return val - - MP_CODE_BYTECODE = 2 MP_CODE_NATIVE_PY = 3 MP_CODE_NATIVE_VIPER = 4 @@ -113,6 +121,10 @@ MP_NATIVE_ARCH_ARMV7EMDP = 8 MP_NATIVE_ARCH_XTENSA = 9 MP_NATIVE_ARCH_XTENSAWIN = 10 +MP_SCOPE_FLAG_VIPERRELOC = 0x10 +MP_SCOPE_FLAG_VIPERRODATA = 0x20 +MP_SCOPE_FLAG_VIPERBSS = 0x40 + MP_BC_MASK_EXTRA_BYTE = 0x9E MP_BC_FORMAT_BYTE = 0 @@ -120,11 +132,180 @@ MP_BC_FORMAT_QSTR = 1 MP_BC_FORMAT_VAR_UINT = 2 MP_BC_FORMAT_OFFSET = 3 -# extra byte if caching enabled: -MP_BC_LOAD_NAME = 0x11 -MP_BC_LOAD_GLOBAL = 0x12 -MP_BC_LOAD_ATTR = 0x13 -MP_BC_STORE_ATTR = 0x18 +mp_unary_op_method_name = ( + "__pos__", + "__neg__", + "__invert__", + "<not>", +) + +mp_binary_op_method_name = ( + "__lt__", + "__gt__", + "__eq__", + "__le__", + "__ge__", + "__ne__", + "<in>", + "<is>", + "<exception match>", + "__ior__", + "__ixor__", + "__iand__", + "__ilshift__", + "__irshift__", + "__iadd__", + "__isub__", + "__imul__", + "__imatmul__", + "__ifloordiv__", + "__itruediv__", + "__imod__", + "__ipow__", + "__or__", + "__xor__", + "__and__", + "__lshift__", + "__rshift__", + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__floordiv__", + "__truediv__", + "__mod__", + "__pow__", +) + + +class Opcodes: + # fmt: off + # Load, Store, Delete, Import, Make, Build, Unpack, Call, Jump, Exception, For, sTack, Return, Yield, Op + MP_BC_BASE_RESERVED = (0x00) # ---------------- + MP_BC_BASE_QSTR_O = (0x10) # LLLLLLSSSDDII--- + MP_BC_BASE_VINT_E = (0x20) # MMLLLLSSDDBBBBBB + MP_BC_BASE_VINT_O = (0x30) # UUMMCCCC-------- + MP_BC_BASE_JUMP_E = (0x40) # J-JJJJJEEEEF---- + MP_BC_BASE_BYTE_O = (0x50) # LLLLSSDTTTTTEEFF + MP_BC_BASE_BYTE_E = (0x60) # --BREEEYYI------ + MP_BC_LOAD_CONST_SMALL_INT_MULTI = (0x70) # LLLLLLLLLLLLLLLL + # = (0x80) # LLLLLLLLLLLLLLLL + # = (0x90) # LLLLLLLLLLLLLLLL + # = (0xa0) # LLLLLLLLLLLLLLLL + MP_BC_LOAD_FAST_MULTI = (0xb0) # LLLLLLLLLLLLLLLL + MP_BC_STORE_FAST_MULTI = (0xc0) # SSSSSSSSSSSSSSSS + MP_BC_UNARY_OP_MULTI = (0xd0) # OOOOOOO + MP_BC_BINARY_OP_MULTI = (0xd7) # OOOOOOOOO + # = (0xe0) # OOOOOOOOOOOOOOOO + # = (0xf0) # OOOOOOOOOO------ + + MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM = 64 + MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS = 16 + MP_BC_LOAD_FAST_MULTI_NUM = 16 + MP_BC_STORE_FAST_MULTI_NUM = 16 + MP_BC_UNARY_OP_MULTI_NUM = 4 # MP_UNARY_OP_NUM_BYTECODE + MP_BC_BINARY_OP_MULTI_NUM = 35 # MP_BINARY_OP_NUM_BYTECODE + + MP_BC_LOAD_CONST_FALSE = (MP_BC_BASE_BYTE_O + 0x00) + MP_BC_LOAD_CONST_NONE = (MP_BC_BASE_BYTE_O + 0x01) + MP_BC_LOAD_CONST_TRUE = (MP_BC_BASE_BYTE_O + 0x02) + MP_BC_LOAD_CONST_SMALL_INT = (MP_BC_BASE_VINT_E + 0x02) # signed var-int + MP_BC_LOAD_CONST_STRING = (MP_BC_BASE_QSTR_O + 0x00) # qstr + MP_BC_LOAD_CONST_OBJ = (MP_BC_BASE_VINT_E + 0x03) # ptr + MP_BC_LOAD_NULL = (MP_BC_BASE_BYTE_O + 0x03) + + MP_BC_LOAD_FAST_N = (MP_BC_BASE_VINT_E + 0x04) # uint + MP_BC_LOAD_DEREF = (MP_BC_BASE_VINT_E + 0x05) # uint + MP_BC_LOAD_NAME = (MP_BC_BASE_QSTR_O + 0x01) # qstr + MP_BC_LOAD_GLOBAL = (MP_BC_BASE_QSTR_O + 0x02) # qstr + MP_BC_LOAD_ATTR = (MP_BC_BASE_QSTR_O + 0x03) # qstr + MP_BC_LOAD_METHOD = (MP_BC_BASE_QSTR_O + 0x04) # qstr + MP_BC_LOAD_SUPER_METHOD = (MP_BC_BASE_QSTR_O + 0x05) # qstr + MP_BC_LOAD_BUILD_CLASS = (MP_BC_BASE_BYTE_O + 0x04) + MP_BC_LOAD_SUBSCR = (MP_BC_BASE_BYTE_O + 0x05) + + MP_BC_STORE_FAST_N = (MP_BC_BASE_VINT_E + 0x06) # uint + MP_BC_STORE_DEREF = (MP_BC_BASE_VINT_E + 0x07) # uint + MP_BC_STORE_NAME = (MP_BC_BASE_QSTR_O + 0x06) # qstr + MP_BC_STORE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x07) # qstr + MP_BC_STORE_ATTR = (MP_BC_BASE_QSTR_O + 0x08) # qstr + MP_BC_STORE_SUBSCR = (MP_BC_BASE_BYTE_O + 0x06) + + MP_BC_DELETE_FAST = (MP_BC_BASE_VINT_E + 0x08) # uint + MP_BC_DELETE_DEREF = (MP_BC_BASE_VINT_E + 0x09) # uint + MP_BC_DELETE_NAME = (MP_BC_BASE_QSTR_O + 0x09) # qstr + MP_BC_DELETE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x0a) # qstr + + MP_BC_DUP_TOP = (MP_BC_BASE_BYTE_O + 0x07) + MP_BC_DUP_TOP_TWO = (MP_BC_BASE_BYTE_O + 0x08) + MP_BC_POP_TOP = (MP_BC_BASE_BYTE_O + 0x09) + MP_BC_ROT_TWO = (MP_BC_BASE_BYTE_O + 0x0a) + MP_BC_ROT_THREE = (MP_BC_BASE_BYTE_O + 0x0b) + + MP_BC_JUMP = (MP_BC_BASE_JUMP_E + 0x02) # rel byte code offset, 16-bit signed, in excess + MP_BC_POP_JUMP_IF_TRUE = (MP_BC_BASE_JUMP_E + 0x03) # rel byte code offset, 16-bit signed, in excess + MP_BC_POP_JUMP_IF_FALSE = (MP_BC_BASE_JUMP_E + 0x04) # rel byte code offset, 16-bit signed, in excess + MP_BC_JUMP_IF_TRUE_OR_POP = (MP_BC_BASE_JUMP_E + 0x05) # rel byte code offset, 16-bit signed, in excess + MP_BC_JUMP_IF_FALSE_OR_POP = (MP_BC_BASE_JUMP_E + 0x06) # rel byte code offset, 16-bit signed, in excess + MP_BC_UNWIND_JUMP = (MP_BC_BASE_JUMP_E + 0x00) # rel byte code offset, 16-bit signed, in excess; then a byte + MP_BC_SETUP_WITH = (MP_BC_BASE_JUMP_E + 0x07) # rel byte code offset, 16-bit unsigned + MP_BC_SETUP_EXCEPT = (MP_BC_BASE_JUMP_E + 0x08) # rel byte code offset, 16-bit unsigned + MP_BC_SETUP_FINALLY = (MP_BC_BASE_JUMP_E + 0x09) # rel byte code offset, 16-bit unsigned + MP_BC_POP_EXCEPT_JUMP = (MP_BC_BASE_JUMP_E + 0x0a) # rel byte code offset, 16-bit unsigned + MP_BC_FOR_ITER = (MP_BC_BASE_JUMP_E + 0x0b) # rel byte code offset, 16-bit unsigned + MP_BC_WITH_CLEANUP = (MP_BC_BASE_BYTE_O + 0x0c) + MP_BC_END_FINALLY = (MP_BC_BASE_BYTE_O + 0x0d) + MP_BC_GET_ITER = (MP_BC_BASE_BYTE_O + 0x0e) + MP_BC_GET_ITER_STACK = (MP_BC_BASE_BYTE_O + 0x0f) + + MP_BC_BUILD_TUPLE = (MP_BC_BASE_VINT_E + 0x0a) # uint + MP_BC_BUILD_LIST = (MP_BC_BASE_VINT_E + 0x0b) # uint + MP_BC_BUILD_MAP = (MP_BC_BASE_VINT_E + 0x0c) # uint + MP_BC_STORE_MAP = (MP_BC_BASE_BYTE_E + 0x02) + MP_BC_BUILD_SET = (MP_BC_BASE_VINT_E + 0x0d) # uint + MP_BC_BUILD_SLICE = (MP_BC_BASE_VINT_E + 0x0e) # uint + MP_BC_STORE_COMP = (MP_BC_BASE_VINT_E + 0x0f) # uint + MP_BC_UNPACK_SEQUENCE = (MP_BC_BASE_VINT_O + 0x00) # uint + MP_BC_UNPACK_EX = (MP_BC_BASE_VINT_O + 0x01) # uint + + MP_BC_RETURN_VALUE = (MP_BC_BASE_BYTE_E + 0x03) + MP_BC_RAISE_LAST = (MP_BC_BASE_BYTE_E + 0x04) + MP_BC_RAISE_OBJ = (MP_BC_BASE_BYTE_E + 0x05) + MP_BC_RAISE_FROM = (MP_BC_BASE_BYTE_E + 0x06) + MP_BC_YIELD_VALUE = (MP_BC_BASE_BYTE_E + 0x07) + MP_BC_YIELD_FROM = (MP_BC_BASE_BYTE_E + 0x08) + + MP_BC_MAKE_FUNCTION = (MP_BC_BASE_VINT_O + 0x02) # uint + MP_BC_MAKE_FUNCTION_DEFARGS = (MP_BC_BASE_VINT_O + 0x03) # uint + MP_BC_MAKE_CLOSURE = (MP_BC_BASE_VINT_E + 0x00) # uint; extra byte + MP_BC_MAKE_CLOSURE_DEFARGS = (MP_BC_BASE_VINT_E + 0x01) # uint; extra byte + MP_BC_CALL_FUNCTION = (MP_BC_BASE_VINT_O + 0x04) # uint + MP_BC_CALL_FUNCTION_VAR_KW = (MP_BC_BASE_VINT_O + 0x05) # uint + MP_BC_CALL_METHOD = (MP_BC_BASE_VINT_O + 0x06) # uint + MP_BC_CALL_METHOD_VAR_KW = (MP_BC_BASE_VINT_O + 0x07) # uint + + MP_BC_IMPORT_NAME = (MP_BC_BASE_QSTR_O + 0x0b) # qstr + MP_BC_IMPORT_FROM = (MP_BC_BASE_QSTR_O + 0x0c) # qstr + MP_BC_IMPORT_STAR = (MP_BC_BASE_BYTE_E + 0x09) + # fmt: on + + # Create a dict mapping opcode value to opcode name. + mapping = ["unknown" for _ in range(256)] + for op_name in list(locals()): + if op_name.startswith("MP_BC_"): + mapping[locals()[op_name]] = op_name[len("MP_BC_") :] + for i in range(MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM): + name = "LOAD_CONST_SMALL_INT %d" % (i - MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS) + mapping[MP_BC_LOAD_CONST_SMALL_INT_MULTI + i] = name + for i in range(MP_BC_LOAD_FAST_MULTI_NUM): + mapping[MP_BC_LOAD_FAST_MULTI + i] = "LOAD_FAST %d" % i + for i in range(MP_BC_STORE_FAST_MULTI_NUM): + mapping[MP_BC_STORE_FAST_MULTI + i] = "STORE_FAST %d" % i + for i in range(MP_BC_UNARY_OP_MULTI_NUM): + mapping[MP_BC_UNARY_OP_MULTI + i] = "UNARY_OP %d %s" % (i, mp_unary_op_method_name[i]) + for i in range(MP_BC_BINARY_OP_MULTI_NUM): + mapping[MP_BC_BINARY_OP_MULTI + i] = "BINARY_OP %d %s" % (i, mp_binary_op_method_name[i]) + # this function mirrors that in py/bc.c def mp_opcode_format(bytecode, ip, count_var_uint): @@ -147,6 +328,26 @@ def mp_opcode_format(bytecode, ip, count_var_uint): return f, ip - ip_start +def mp_opcode_decode(bytecode, ip): + opcode = bytecode[ip] + ip_start = ip + f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3 + extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0 + ip += 1 + arg = 0 + if f in (MP_BC_FORMAT_QSTR, MP_BC_FORMAT_VAR_UINT): + arg = bytecode[ip] & 0x7F + while bytecode[ip] & 0x80 != 0: + ip += 1 + arg = arg << 7 | bytecode[ip] & 0x7F + ip += 1 + elif f == MP_BC_FORMAT_OFFSET: + arg = bytecode[ip] | bytecode[ip + 1] << 8 + ip += 2 + ip += extra_byte + return f, ip - ip_start, arg + + def read_prelude_sig(read_byte): z = read_byte() # xSSSSEAA @@ -201,6 +402,7 @@ def extract_prelude(bytecode, ip): n_kwonly_args, n_def_pos_args, ) = read_prelude_sig(local_read_byte) + n_info, n_cell = read_prelude_size(local_read_byte) ip = ip_ref[0] @@ -208,74 +410,186 @@ def extract_prelude(bytecode, ip): ip = ip2 + n_info + n_cell # ip now points to first opcode # ip2 points to simple_name qstr - return ip, ip2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args) - -class MPFunTable: - pass + # Extract simple_name and argument qstrs (var uints). + args = [] + for arg_num in range(1 + n_pos_args + n_kwonly_args): + value = 0 + while True: + b = local_read_byte() + value = (value << 7) | (b & 0x7F) + if b & 0x80 == 0: + break + args.append(value) + + return ( + ip2, + ip, + ip_ref[0], + (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args), + args, + ) -class RawCode(object): - # a set of all escaped names, to make sure they are unique - escaped_names = set() +class MPFunTable: + def __repr__(self): + return "mp_fun_table" - # convert code kind number to string - code_kind_str = { - MP_CODE_BYTECODE: "MP_CODE_BYTECODE", - MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY", - MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER", - MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM", - } - def __init__(self, code_kind, bytecode, prelude_offset, qstrs, objs, raw_codes): - # set core variables - self.code_kind = code_kind - self.bytecode = bytecode - self.prelude_offset = prelude_offset - self.qstrs = qstrs - self.objs = objs - self.raw_codes = raw_codes - - if self.prelude_offset is None: - # no prelude, assign a dummy simple_name - self.prelude_offset = 0 - self.simple_name = global_qstrs[1] - else: - # extract prelude - self.ip, self.ip2, self.prelude = extract_prelude(self.bytecode, self.prelude_offset) - self.simple_name = self._unpack_qstr(self.ip2) - self.source_file = self._unpack_qstr(self.ip2 + 2) - self.line_info_offset = self.ip2 + 4 +class CompiledModule: + def __init__( + self, + mpy_source_file, + mpy_segments, + header, + qstr_table, + obj_table, + raw_code, + raw_code_file_offset, + escaped_name, + ): + self.mpy_source_file = mpy_source_file + self.mpy_segments = mpy_segments + self.source_file = qstr_table[0] + self.header = header + self.qstr_table = qstr_table + self.obj_table = obj_table + self.raw_code_file_offset = raw_code_file_offset + self.raw_code = raw_code + self.escaped_name = escaped_name def _unpack_qstr(self, ip): qst = self.bytecode[ip] | self.bytecode[ip + 1] << 8 return global_qstrs[qst] - def dump(self): - # dump children first - for rc in self.raw_codes: - rc.freeze("") - # TODO + def hexdump(self): + with open(self.mpy_source_file, "rb") as f: + WIDTH = 16 + COL_OFF = "\033[0m" + COL_TABLE = ( + ("", ""), # META + ("\033[0;31m", "\033[0;91m"), # QSTR + ("\033[0;32m", "\033[0;92m"), # OBJ + ("\033[0;34m", "\033[0;94m"), # CODE + ) + cur_col = "" + cur_col_index = 0 + offset = 0 + segment_index = 0 + while True: + data = bytes_cons(f.read(WIDTH)) + if not data: + break + + # Print out the hex dump of this line of data. + line_hex = cur_col + line_chr = cur_col + line_comment = "" + for i in range(len(data)): + # Determine the colour of the data, if any, and the line comment. + while segment_index < len(self.mpy_segments): + if offset + i == self.mpy_segments[segment_index].start: + cur_col = COL_TABLE[self.mpy_segments[segment_index].kind][ + cur_col_index + ] + cur_col_index = 1 - cur_col_index + line_hex += cur_col + line_chr += cur_col + line_comment += " %s%s%s" % ( + cur_col, + self.mpy_segments[segment_index].name, + COL_OFF, + ) + if offset + i == self.mpy_segments[segment_index].end: + cur_col = "" + line_hex += COL_OFF + line_chr += COL_OFF + segment_index += 1 + else: + break + + # Add to the hex part of the line. + if i % 2 == 0: + line_hex += " " + line_hex += "%02x" % data[i] + + # Add to the characters part of the line. + if 0x20 <= data[i] <= 0x7E: + line_chr += "%s" % chr(data[i]) + else: + line_chr += "." + + # Print out this line. + if cur_col: + line_hex += COL_OFF + line_chr += COL_OFF + pad = " " * ((WIDTH - len(data)) * 5 // 2) + print("%08x:%s%s %s %s" % (offset, line_hex, pad, line_chr, line_comment)) + offset += WIDTH + + def disassemble(self): + print("mpy_source_file:", self.mpy_source_file) + print("source_file:", self.source_file.str) + print("header:", hexlify_to_str(self.header)) + print("qstr_table[%u]:" % len(self.qstr_table)) + for q in self.qstr_table: + print(" %s" % q.str) + print("obj_table:", self.obj_table) + self.raw_code.disassemble() + + def freeze(self, compiled_module_index): + print() + print("/" * 80) + print("// frozen module %s" % self.escaped_name) + print("// - original source file: %s" % self.mpy_source_file) + print("// - frozen file name: %s" % self.source_file.str) + print("// - .mpy header: %s" % ":".join("%02x" % b for b in self.header)) + print() - def freeze_children(self, parent_name): - self.escaped_name = parent_name + self.simple_name.qstr_esc + self.raw_code.freeze() + print() - # make sure the escaped name is unique - i = 2 - while self.escaped_name in RawCode.escaped_names: - self.escaped_name = parent_name + self.simple_name.qstr_esc + str(i) - i += 1 - RawCode.escaped_names.add(self.escaped_name) + self.freeze_constants() - # emit children first - for rc in self.raw_codes: - rc.freeze(self.escaped_name + "_") + print() + print("static const mp_frozen_module_t frozen_module_%s = {" % self.escaped_name) + print(" .constants = {") + if len(self.qstr_table): + print( + " .qstr_table = (qstr_short_t *)&const_qstr_table_data_%s," + % self.escaped_name + ) + else: + print(" .qstr_table = NULL,") + if len(self.obj_table): + print(" .obj_table = (mp_obj_t *)&const_obj_table_data_%s," % self.escaped_name) + else: + print(" .obj_table = NULL,") + print(" },") + print(" .rc = &raw_code_%s," % self.raw_code.escaped_name) + print("};") def freeze_constants(self): + global const_str_content, const_int_content, const_obj_content + + if len(self.qstr_table): + print( + "static const qstr_short_t const_qstr_table_data_%s[%u] = {" + % (self.escaped_name, len(self.qstr_table)) + ) + for q in self.qstr_table: + print(" %s," % q.qstr_id) + print("};") + + if not len(self.obj_table): + return + # generate constant objects - for i, obj in enumerate(self.objs): + print() + print("// constants") + for i, obj in enumerate(self.obj_table): obj_name = "const_obj_%s_%u" % (self.escaped_name, i) - if obj is MPFunTable: + if isinstance(obj, MPFunTable): pass elif obj is Ellipsis: print("#define %s mp_const_ellipsis_obj" % obj_name) @@ -286,7 +600,7 @@ class RawCode(object): else: obj_type = "mp_type_bytes" print( - 'STATIC const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};' + 'static const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};' % ( obj_name, obj_type, @@ -295,6 +609,8 @@ class RawCode(object): "".join(("\\x%02x" % b) for b in obj), ) ) + const_str_content += len(obj) + const_obj_content += 4 * 4 elif is_int_type(obj): if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE: # TODO check if we can actually fit this long-int into a small-int @@ -316,76 +632,139 @@ class RawCode(object): ndigs = len(digs) digs = ",".join(("%#x" % d) for d in digs) print( - "STATIC const mp_obj_int_t %s = {{&mp_type_int}, " + "static const mp_obj_int_t %s = {{&mp_type_int}, " "{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t*)(const uint%u_t[]){%s}}};" % (obj_name, neg, ndigs, ndigs, bits_per_dig, bits_per_dig, digs) ) + const_int_content += (digs.count(",") + 1) * bits_per_dig // 8 + const_obj_content += 4 * 4 elif type(obj) is float: print( "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B" ) print( - "STATIC const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};" + "static const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};" % (obj_name, obj) ) print("#endif") + const_obj_content += 3 * 4 elif type(obj) is complex: print( - "STATIC const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};" + "static const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};" % (obj_name, obj.real, obj.imag) ) else: raise FreezeError(self, "freezing of object %r is not implemented" % (obj,)) - # generate constant table, if it has any entries - const_table_len = len(self.qstrs) + len(self.objs) + len(self.raw_codes) - if const_table_len: - print( - "STATIC const mp_rom_obj_t const_table_data_%s[%u] = {" - % (self.escaped_name, const_table_len) - ) - for qst in self.qstrs: - print(" MP_ROM_QSTR(%s)," % global_qstrs[qst].qstr_id) - for i in range(len(self.objs)): - if self.objs[i] is MPFunTable: - print(" &mp_fun_table,") - elif type(self.objs[i]) is float: - print( - "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B" - ) - print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i)) - print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C") - n = struct.unpack("<I", struct.pack("<f", self.objs[i]))[0] - n = ((n & ~0x3) | 2) + 0x80800000 - print(" (mp_rom_obj_t)(0x%08x)," % (n,)) - print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D") - n = struct.unpack("<Q", struct.pack("<d", self.objs[i]))[0] - n += 0x8004000000000000 - print(" (mp_rom_obj_t)(0x%016x)," % (n,)) - print("#endif") - else: - print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i)) - for rc in self.raw_codes: - print(" MP_ROM_PTR(&raw_code_%s)," % rc.escaped_name) + # generate constant table + print() + print("// constant table") + print( + "static const mp_rom_obj_t const_obj_table_data_%s[%u] = {" + % (self.escaped_name, len(self.obj_table)) + ) + for i in range(len(self.obj_table)): + if isinstance(self.obj_table[i], MPFunTable): + print(" &mp_fun_table,") + elif type(self.obj_table[i]) is float: + print( + "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B" + ) + print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i)) + print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C") + n = struct.unpack("<I", struct.pack("<f", self.obj_table[i]))[0] + n = ((n & ~0x3) | 2) + 0x80800000 + print(" (mp_rom_obj_t)(0x%08x)," % (n,)) + print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D") + n = struct.unpack("<Q", struct.pack("<d", self.obj_table[i]))[0] + n += 0x8004000000000000 + print(" (mp_rom_obj_t)(0x%016x)," % (n,)) + print("#endif") + else: + print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i)) + print("};") + + global const_table_ptr_content + const_table_ptr_content += len(self.obj_table) + + +class RawCode(object): + # a set of all escaped names, to make sure they are unique + escaped_names = set() + + # convert code kind number to string + code_kind_str = { + MP_CODE_BYTECODE: "MP_CODE_BYTECODE", + MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY", + MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER", + MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM", + } + + def __init__(self, cm_escaped_name, qstr_table, fun_data, prelude_offset, code_kind): + self.qstr_table = qstr_table + self.fun_data = fun_data + self.prelude_offset = prelude_offset + self.code_kind = code_kind + + if code_kind in (MP_CODE_BYTECODE, MP_CODE_NATIVE_PY): + ( + self.offset_names, + self.offset_opcodes, + self.offset_line_info, + self.prelude, + self.names, + ) = extract_prelude(self.fun_data, prelude_offset) + self.scope_flags = self.prelude[2] + self.n_pos_args = self.prelude[3] + self.simple_name = self.qstr_table[self.names[0]] + else: + self.simple_name = self.qstr_table[0] + + escaped_name = cm_escaped_name + "_" + self.simple_name.qstr_esc + + # make sure the escaped name is unique + i = 2 + unique_escaped_name = escaped_name + while unique_escaped_name in self.escaped_names: + unique_escaped_name = escaped_name + str(i) + i += 1 + self.escaped_names.add(unique_escaped_name) + self.escaped_name = unique_escaped_name + + def disassemble_children(self): + print(" children:", [rc.simple_name.str for rc in self.children]) + for rc in self.children: + rc.disassemble() + + def freeze_children(self): + # Freeze children and generate table of children. + if len(self.children): + for rc in self.children: + print("// child of %s" % self.escaped_name) + rc.freeze() + print() + print("static const mp_raw_code_t *const children_%s[] = {" % self.escaped_name) + for rc in self.children: + print(" &raw_code_%s," % rc.escaped_name) print("};") + print() - def freeze_module(self, qstr_links=(), type_sig=0): - # generate module - if self.simple_name.str != "<module>": - print("STATIC ", end="") - print("const mp_raw_code_t raw_code_%s = {" % self.escaped_name) + def freeze_raw_code(self, qstr_links=(), type_sig=0): + # Generate mp_raw_code_t. + print("static const mp_raw_code_t raw_code_%s = {" % self.escaped_name) print(" .kind = %s," % RawCode.code_kind_str[self.code_kind]) - print(" .scope_flags = 0x%02x," % self.prelude[2]) - print(" .n_pos_args = %u," % self.prelude[3]) + print(" .scope_flags = 0x%02x," % self.scope_flags) + print(" .n_pos_args = %u," % self.n_pos_args) print(" .fun_data = fun_data_%s," % self.escaped_name) - if len(self.qstrs) + len(self.objs) + len(self.raw_codes): - print(" .const_table = (mp_uint_t*)const_table_data_%s," % self.escaped_name) + print(" #if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_DEBUG_PRINTERS") + print(" .fun_data_len = %u," % len(self.fun_data)) + print(" #endif") + if len(self.children): + print(" .children = (void *)&children_%s," % self.escaped_name) else: - print(" .const_table = NULL,") + print(" .children = NULL,") print(" #if MICROPY_PERSISTENT_CODE_SAVE") - print(" .fun_data_len = %u," % len(self.bytecode)) - print(" .n_obj = %u," % len(self.objs)) - print(" .n_raw_code = %u," % len(self.raw_codes)) + print(" .n_children = %u," % len(self.children)) if self.code_kind == MP_CODE_BYTECODE: print(" #if MICROPY_PY_SYS_SETTRACE") print(" .prelude = {") @@ -395,13 +774,14 @@ class RawCode(object): print(" .n_pos_args = %u," % self.prelude[3]) print(" .n_kwonly_args = %u," % self.prelude[4]) print(" .n_def_pos_args = %u," % self.prelude[5]) - print(" .qstr_block_name = %s," % self.simple_name.qstr_id) - print(" .qstr_source_file = %s," % self.source_file.qstr_id) + print(" .qstr_block_name_idx = %u," % self.names[0]) print( " .line_info = fun_data_%s + %u," - % (self.escaped_name, self.line_info_offset) + % (self.escaped_name, self.offset_line_info) + ) + print( + " .opcodes = fun_data_%s + %u," % (self.escaped_name, self.offset_opcodes) ) - print(" .opcodes = fun_data_%s + %u," % (self.escaped_name, self.ip)) print(" },") print(" .line_of_definition = %u," % 0) # TODO print(" #endif") @@ -416,66 +796,109 @@ class RawCode(object): print(" #endif") print("};") + global raw_code_count, raw_code_content + raw_code_count += 1 + raw_code_content += 4 * 4 + class RawCodeBytecode(RawCode): - def __init__(self, bytecode, qstrs, objs, raw_codes): + def __init__(self, cm_escaped_name, qstr_table, obj_table, fun_data): + self.obj_table = obj_table super(RawCodeBytecode, self).__init__( - MP_CODE_BYTECODE, bytecode, 0, qstrs, objs, raw_codes + cm_escaped_name, qstr_table, fun_data, 0, MP_CODE_BYTECODE ) - def freeze(self, parent_name): - self.freeze_children(parent_name) + def disassemble(self): + bc = self.fun_data + print("simple_name:", self.simple_name.str) + print(" raw bytecode:", len(bc), hexlify_to_str(bc)) + print(" prelude:", self.prelude) + print(" args:", [self.qstr_table[i].str for i in self.names[1:]]) + print(" line info:", hexlify_to_str(bc[self.offset_line_info : self.offset_opcodes])) + ip = self.offset_opcodes + while ip < len(bc): + fmt, sz, arg = mp_opcode_decode(bc, ip) + if bc[ip] == Opcodes.MP_BC_LOAD_CONST_OBJ: + arg = "%r" % self.obj_table[arg] + if fmt == MP_BC_FORMAT_QSTR: + arg = self.qstr_table[arg].str + elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET): + pass + else: + arg = "" + print( + " %-11s %s %s" % (hexlify_to_str(bc[ip : ip + sz]), Opcodes.mapping[bc[ip]], arg) + ) + ip += sz + self.disassemble_children() + def freeze(self): # generate bytecode data - print() + bc = self.fun_data print( - "// frozen bytecode for file %s, scope %s%s" - % (self.source_file.str, parent_name, self.simple_name.str) + "// frozen bytecode for file %s, scope %s" + % (self.qstr_table[0].str, self.escaped_name) ) - print("STATIC const byte fun_data_%s[%u] = {" % (self.escaped_name, len(self.bytecode))) - print(" ", end="") - for i in range(self.ip2): - print(" 0x%02x," % self.bytecode[i], end="") - print() - print(" ", self.simple_name.qstr_id, "& 0xff,", self.simple_name.qstr_id, ">> 8,") - print(" ", self.source_file.qstr_id, "& 0xff,", self.source_file.qstr_id, ">> 8,") - print(" ", end="") - for i in range(self.ip2 + 4, self.ip): - print(" 0x%02x," % self.bytecode[i], end="") - print() - ip = self.ip - while ip < len(self.bytecode): - f, sz = mp_opcode_format(self.bytecode, ip, True) - if f == 1: - qst = self._unpack_qstr(ip + 1).qstr_id - extra = "" if sz == 3 else " 0x%02x," % self.bytecode[ip + 3] - print(" ", "0x%02x," % self.bytecode[ip], qst, "& 0xff,", qst, ">> 8,", extra) - else: - print(" ", "".join("0x%02x, " % self.bytecode[ip + i] for i in range(sz))) + print("static const byte fun_data_%s[%u] = {" % (self.escaped_name, len(bc))) + + print(" ", end="") + for b in bc[: self.offset_names]: + print("0x%02x," % b, end="") + print(" // prelude") + + print(" ", end="") + for b in bc[self.offset_names : self.offset_line_info]: + print("0x%02x," % b, end="") + print(" // names: %s" % ", ".join(self.qstr_table[i].str for i in self.names)) + + print(" ", end="") + for b in bc[self.offset_line_info : self.offset_opcodes]: + print("0x%02x," % b, end="") + print(" // code info") + + ip = self.offset_opcodes + while ip < len(bc): + fmt, sz, arg = mp_opcode_decode(bc, ip) + opcode_name = Opcodes.mapping[bc[ip]] + if fmt == MP_BC_FORMAT_QSTR: + opcode_name += " " + self.qstr_table[arg].str + elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET): + opcode_name += " %u" % arg + print( + " %s, // %s" % (",".join("0x%02x" % b for b in bc[ip : ip + sz]), opcode_name) + ) ip += sz + print("};") - self.freeze_constants() - self.freeze_module() + self.freeze_children() + self.freeze_raw_code() + + global bc_content + bc_content += len(bc) class RawCodeNative(RawCode): def __init__( self, - code_kind, + cm_escaped_name, + qstr_table, + kind, fun_data, prelude_offset, - prelude, qstr_links, - qstrs, - objs, - raw_codes, + scope_flags, + n_pos_args, type_sig, ): super(RawCodeNative, self).__init__( - code_kind, fun_data, prelude_offset, qstrs, objs, raw_codes + cm_escaped_name, qstr_table, fun_data, prelude_offset, kind ) - self.prelude = prelude + + if kind in (MP_CODE_NATIVE_VIPER, MP_CODE_NATIVE_ASM): + self.scope_flags = scope_flags + self.n_pos_args = n_pos_args + self.qstr_links = qstr_links self.type_sig = type_sig if config.native_arch in ( @@ -502,11 +925,32 @@ class RawCodeNative(RawCode): # ARMVxxM -- two byte align. self.fun_data_attributes += " __attribute__ ((aligned (2)))" + def disassemble(self): + fun_data = self.fun_data + print("simple_name:", self.simple_name.str) + print( + " raw data:", + len(fun_data), + hexlify_to_str(fun_data[:32]), + "..." if len(fun_data) > 32 else "", + ) + if self.code_kind != MP_CODE_NATIVE_PY: + return + print(" prelude:", self.prelude) + print(" args:", [self.qstr_table[i].str for i in self.names[1:]]) + print(" line info:", fun_data[self.offset_line_info : self.offset_opcodes]) + ip = 0 + while ip < self.prelude_offset: + sz = 16 + print(" ", hexlify_to_str(fun_data[ip : min(ip + sz, self.prelude_offset)])) + ip += sz + self.disassemble_children() + def _asm_thumb_rewrite_mov(self, pc, val): - print(" (%u & 0xf0) | (%s >> 12)," % (self.bytecode[pc], val), end="") - print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.bytecode[pc + 1], val), end="") + print(" (%u & 0xf0) | (%s >> 12)," % (self.fun_data[pc], val), end="") + print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.fun_data[pc + 1], val), end="") print(" (%s & 0xff)," % (val,), end="") - print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.bytecode[pc + 3], val)) + print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.fun_data[pc + 3], val)) def _link_qstr(self, pc, kind, qst): if kind == 0: @@ -543,32 +987,22 @@ class RawCodeNative(RawCode): else: assert 0 - def freeze(self, parent_name): - if self.prelude[2] & ~0x0F: + def freeze(self): + if self.scope_flags & ~0x0F: raise FreezeError("unable to freeze code with relocations") - self.freeze_children(parent_name) - # generate native code data print() - if self.code_kind == MP_CODE_NATIVE_PY: - print( - "// frozen native code for file %s, scope %s%s" - % (self.source_file.str, parent_name, self.simple_name.str) - ) - elif self.code_kind == MP_CODE_NATIVE_VIPER: - print("// frozen viper code for scope %s" % (parent_name,)) - else: - print("// frozen assembler code for scope %s" % (parent_name,)) print( - "STATIC const byte fun_data_%s[%u] %s = {" - % (self.escaped_name, len(self.bytecode), self.fun_data_attributes) + "// frozen native code for file %s, scope %s" + % (self.qstr_table[0].str, self.escaped_name) + ) + print( + "static const byte fun_data_%s[%u] %s = {" + % (self.escaped_name, len(self.fun_data), self.fun_data_attributes) ) - if self.code_kind == MP_CODE_NATIVE_PY: - i_top = self.prelude_offset - else: - i_top = len(self.bytecode) + i_top = len(self.fun_data) i = 0 qi = 0 while i < i_top: @@ -585,229 +1019,253 @@ class RawCodeNative(RawCode): i16 = min(i16, self.qstr_links[qi][0]) print(" ", end="") for ii in range(i, i16): - print(" 0x%02x," % self.bytecode[ii], end="") + print(" 0x%02x," % self.fun_data[ii], end="") print() i = i16 - if self.code_kind == MP_CODE_NATIVE_PY: - print(" ", end="") - for i in range(self.prelude_offset, self.ip2): - print(" 0x%02x," % self.bytecode[i], end="") - print() - - print(" ", self.simple_name.qstr_id, "& 0xff,", self.simple_name.qstr_id, ">> 8,") - print(" ", self.source_file.qstr_id, "& 0xff,", self.source_file.qstr_id, ">> 8,") - - print(" ", end="") - for i in range(self.ip2 + 4, self.ip): - print(" 0x%02x," % self.bytecode[i], end="") - print() - print("};") - self.freeze_constants() - self.freeze_module(self.qstr_links, self.type_sig) + self.freeze_children() + self.freeze_raw_code(self.qstr_links, self.type_sig) -class BytecodeBuffer: - def __init__(self, size): - self.buf = bytearray(size) - self.idx = 0 +class MPYSegment: + META = 0 + QSTR = 1 + OBJ = 2 + CODE = 3 - def is_full(self): - return self.idx == len(self.buf) + def __init__(self, kind, name, start, end): + self.kind = kind + self.name = name + self.start = start + self.end = end - def append(self, b): - self.buf[self.idx] = b - self.idx += 1 +class MPYReader: + def __init__(self, filename, fileobj): + self.filename = filename + self.fileobj = fileobj -def read_byte(f, out=None): - b = bytes_cons(f.read(1))[0] - if out is not None: - out.append(b) - return b + def tell(self): + return self.fileobj.tell() + def read_byte(self): + return bytes_cons(self.fileobj.read(1))[0] -def read_uint(f, out=None): - i = 0 - while True: - b = read_byte(f, out) - i = (i << 7) | (b & 0x7F) - if b & 0x80 == 0: - break - return i + def read_bytes(self, n): + return bytes_cons(self.fileobj.read(n)) + def read_uint(self): + i = 0 + while True: + b = self.read_byte() + i = (i << 7) | (b & 0x7F) + if b & 0x80 == 0: + break + return i -def read_qstr(f, qstr_win): - ln = read_uint(f) - if ln == 0: - # static qstr - return bytes_cons(f.read(1))[0] + +def read_qstr(reader, segments): + start_pos = reader.tell() + ln = reader.read_uint() if ln & 1: - # qstr in table - return qstr_win.access(ln >> 1) + # static qstr + segments.append( + MPYSegment(MPYSegment.META, global_qstrs[ln >> 1].str, start_pos, start_pos) + ) + return ln >> 1 ln >>= 1 - data = str_cons(f.read(ln), "utf8") + start_pos = reader.tell() + data = str_cons(reader.read_bytes(ln), "utf8") + reader.read_byte() # read and discard null terminator + segments.append(MPYSegment(MPYSegment.QSTR, data, start_pos, reader.tell())) global_qstrs.append(QStrType(data)) - qstr_win.push(len(global_qstrs) - 1) return len(global_qstrs) - 1 -def read_obj(f): - obj_type = f.read(1) - if obj_type == b"e": +def read_obj(reader, segments): + obj_type = reader.read_bytes(1) + if obj_type == b"t": + return MPFunTable() + elif obj_type == b"e": return Ellipsis else: - buf = f.read(read_uint(f)) + ln = reader.read_uint() + start_pos = reader.tell() + buf = reader.read_bytes(ln) + if obj_type in (b"s", b"b"): + reader.read_byte() # read and discard null terminator if obj_type == b"s": - return str_cons(buf, "utf8") + obj = str_cons(buf, "utf8") elif obj_type == b"b": - return bytes_cons(buf) + obj = buf elif obj_type == b"i": - return int(str_cons(buf, "ascii"), 10) + obj = int(str_cons(buf, "ascii"), 10) elif obj_type == b"f": - return float(str_cons(buf, "ascii")) + obj = float(str_cons(buf, "ascii")) elif obj_type == b"c": - return complex(str_cons(buf, "ascii")) + obj = complex(str_cons(buf, "ascii")) else: - assert 0 + raise MPYReadError(reader.filename, "corrupt .mpy file") + segments.append(MPYSegment(MPYSegment.OBJ, obj, start_pos, reader.tell())) + return obj -def read_prelude(f, bytecode, qstr_win): - ( - n_state, - n_exc_stack, - scope_flags, - n_pos_args, - n_kwonly_args, - n_def_pos_args, - ) = read_prelude_sig(lambda: read_byte(f, bytecode)) - n_info, n_cell = read_prelude_size(lambda: read_byte(f, bytecode)) - read_qstr_and_pack(f, bytecode, qstr_win) # simple_name - read_qstr_and_pack(f, bytecode, qstr_win) # source_file - for _ in range(n_info - 4 + n_cell): - read_byte(f, bytecode) - return n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args - - -def read_qstr_and_pack(f, bytecode, qstr_win): - qst = read_qstr(f, qstr_win) - bytecode.append(qst & 0xFF) - bytecode.append(qst >> 8) - - -def read_bytecode(file, bytecode, qstr_win): - while not bytecode.is_full(): - op = read_byte(file, bytecode) - f, sz = mp_opcode_format(bytecode.buf, bytecode.idx - 1, False) - sz -= 1 - if f == MP_BC_FORMAT_QSTR: - read_qstr_and_pack(file, bytecode, qstr_win) - sz -= 2 - elif f == MP_BC_FORMAT_VAR_UINT: - while read_byte(file, bytecode) & 0x80: - pass - for _ in range(sz): - read_byte(file, bytecode) - - -def read_raw_code(f, qstr_win): - kind_len = read_uint(f) +def read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments): + # Read raw code header. + kind_len = reader.read_uint() kind = (kind_len & 3) + MP_CODE_BYTECODE - fun_data_len = kind_len >> 2 - fun_data = BytecodeBuffer(fun_data_len) + has_children = (kind_len >> 2) & 1 + fun_data_len = kind_len >> 3 + + # Read the body of the raw code. + file_offset = reader.tell() + fun_data = reader.read_bytes(fun_data_len) + segments_len = len(segments) if kind == MP_CODE_BYTECODE: - prelude = read_prelude(f, fun_data, qstr_win) - read_bytecode(f, fun_data, qstr_win) + # Create bytecode raw code. + rc = RawCodeBytecode(cm_escaped_name, qstr_table, obj_table, fun_data) else: - fun_data.buf[:] = f.read(fun_data_len) - + # Create native raw code. qstr_links = [] if kind in (MP_CODE_NATIVE_PY, MP_CODE_NATIVE_VIPER): - # load qstr link table - n_qstr_link = read_uint(f) + # Read qstr link table. + n_qstr_link = reader.read_uint() for _ in range(n_qstr_link): - off = read_uint(f) - qst = read_qstr(f, qstr_win) + off = reader.read_uint() + qst = read_qstr(reader, segments) qstr_links.append((off >> 2, off & 3, qst)) - type_sig = 0 + native_scope_flags = 0 + native_n_pos_args = 0 + native_type_sig = 0 if kind == MP_CODE_NATIVE_PY: - prelude_offset = read_uint(f) - _, name_idx, prelude = extract_prelude(fun_data.buf, prelude_offset) - fun_data.idx = name_idx # rewind to where qstrs are in prelude - read_qstr_and_pack(f, fun_data, qstr_win) # simple_name - read_qstr_and_pack(f, fun_data, qstr_win) # source_file + prelude_offset = reader.read_uint() else: - prelude_offset = None - scope_flags = read_uint(f) - n_pos_args = 0 - if kind == MP_CODE_NATIVE_ASM: - n_pos_args = read_uint(f) - type_sig = read_uint(f) - prelude = (None, None, scope_flags, n_pos_args, 0) - - qstrs = [] - objs = [] - raw_codes = [] - if kind != MP_CODE_NATIVE_ASM: - # load constant table - n_obj = read_uint(f) - n_raw_code = read_uint(f) - qstrs = [read_qstr(f, qstr_win) for _ in range(prelude[3] + prelude[4])] - if kind != MP_CODE_BYTECODE: - objs.append(MPFunTable) - objs.extend([read_obj(f) for _ in range(n_obj)]) - raw_codes = [read_raw_code(f, qstr_win) for _ in range(n_raw_code)] + prelude_offset = 0 + native_scope_flags = reader.read_uint() + if kind == MP_CODE_NATIVE_VIPER: + # Read any additional sections for native viper. + if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA: + rodata_size = reader.read_uint() + if native_scope_flags & MP_SCOPE_FLAG_VIPERBSS: + bss_size = reader.read_uint() + if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA: + reader.read_bytes(rodata_size) + if native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC: + while True: + op = reader.read_byte() + if op == 0xFF: + break + if op & 1: + addr = reader.read_uint() + op >>= 1 + if op <= 5 and op & 1: + n = reader.read_uint() + else: + assert kind == MP_CODE_NATIVE_ASM + native_n_pos_args = reader.read_uint() + native_type_sig = reader.read_uint() - if kind == MP_CODE_BYTECODE: - return RawCodeBytecode(fun_data.buf, qstrs, objs, raw_codes) - else: - return RawCodeNative( + rc = RawCodeNative( + cm_escaped_name, + qstr_table, kind, - fun_data.buf, + fun_data, prelude_offset, - prelude, qstr_links, - qstrs, - objs, - raw_codes, - type_sig, + native_scope_flags, + native_n_pos_args, + native_type_sig, ) + # Add a segment for the raw code data. + segments.insert( + segments_len, + MPYSegment(MPYSegment.CODE, rc.simple_name.str, file_offset, file_offset + fun_data_len), + ) + + # Read children, if there are any. + rc.children = [] + if has_children: + n_children = reader.read_uint() + for _ in range(n_children): + rc.children.append( + read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments) + ) + + return rc + def read_mpy(filename): - with open(filename, "rb") as f: - header = bytes_cons(f.read(4)) + with open(filename, "rb") as fileobj: + reader = MPYReader(filename, fileobj) + segments = [] + + # Read and verify the header. + header = reader.read_bytes(4) if header[0] != ord("M"): - raise Exception("not a valid .mpy file") + raise MPYReadError(filename, "not a valid .mpy file") if header[1] != config.MPY_VERSION: - raise Exception("incompatible .mpy version") + raise MPYReadError(filename, "incompatible .mpy version") feature_byte = header[2] - qw_size = read_uint(f) config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_byte & 2) != 0 mpy_native_arch = feature_byte >> 2 if mpy_native_arch != MP_NATIVE_ARCH_NONE: if config.native_arch == MP_NATIVE_ARCH_NONE: config.native_arch = mpy_native_arch elif config.native_arch != mpy_native_arch: - raise Exception("native architecture mismatch") + raise MPYReadError(filename, "native architecture mismatch") config.mp_small_int_bits = header[3] - qstr_win = QStrWindow(qw_size) - rc = read_raw_code(f, qstr_win) - rc.mpy_source_file = filename - rc.qstr_win_size = qw_size - return rc + + # Read number of qstrs, and number of objects. + n_qstr = reader.read_uint() + n_obj = reader.read_uint() + + # Read qstrs and construct qstr table. + qstr_table = [] + for i in range(n_qstr): + q = read_qstr(reader, segments) + qstr_table.append(global_qstrs[q]) + + # Read objects and construct object table. + obj_table = [] + for i in range(n_obj): + obj_table.append(read_obj(reader, segments)) + + # Compute the compiled-module escaped name. + cm_escaped_name = qstr_table[0].str.replace("/", "_")[:-3] + + # Read the outer raw code, which will in turn read all its children. + raw_code_file_offset = reader.tell() + raw_code = read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments) + + # Create the outer-level compiled module representing the whole .mpy file. + return CompiledModule( + filename, + segments, + header, + qstr_table, + obj_table, + raw_code, + raw_code_file_offset, + cm_escaped_name, + ) -def dump_mpy(raw_codes): - for rc in raw_codes: - rc.dump() +def hexdump_mpy(compiled_modules): + for cm in compiled_modules: + cm.hexdump() -def freeze_mpy(base_qstrs, raw_codes): +def disassemble_mpy(compiled_modules): + for cm in compiled_modules: + cm.disassemble() + + +def freeze_mpy(base_qstrs, compiled_modules): # add to qstrs new = {} for q in global_qstrs: @@ -864,6 +1322,17 @@ def freeze_mpy(base_qstrs, raw_codes): # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len qstr_pool_alloc = min(len(new), 10) + global bc_content, const_str_content, const_int_content, const_obj_content, const_table_qstr_content, const_table_ptr_content, raw_code_count, raw_code_content + qstr_content = 0 + bc_content = 0 + const_str_content = 0 + const_int_content = 0 + const_obj_content = 0 + const_table_qstr_content = 0 + const_table_ptr_content = 0 + raw_code_count = 0 + raw_code_content = 0 + print() print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") qstr_size = {"metadata": 0, "data": 0} @@ -892,32 +1361,49 @@ def freeze_mpy(base_qstrs, raw_codes): print(" {") for _, _, qstr, qbytes in new: print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes)) + qstr_content += ( + config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1 + ) print(" },") print("};") - for rc in raw_codes: - rc.freeze(rc.source_file.str.replace("/", "_")[:-3] + "_") + # Freeze all modules. + for idx, cm in enumerate(compiled_modules): + cm.freeze(idx) + + # Print separator, separating individual modules from global data structures. + print() + print("/" * 80) + print("// collection of all frozen modules") + # Define the string of frozen module names. print() print("const char mp_frozen_names[] = {") - print("#ifdef MP_FROZEN_STR_NAMES") + print(" #ifdef MP_FROZEN_STR_NAMES") # makemanifest.py might also include some frozen string content. - print("MP_FROZEN_STR_NAMES") - print("#endif") - for rc in raw_codes: - module_name = rc.source_file.str - print('"%s\\0"' % module_name) - print('"\\0"};') - - print("const mp_raw_code_t *const mp_frozen_mpy_content[] = {") - for rc in raw_codes: - print(" &raw_code_%s," % rc.escaped_name) + print(" MP_FROZEN_STR_NAMES") + print(" #endif") + mp_frozen_mpy_names_content = 1 + for cm in compiled_modules: + module_name = cm.source_file.str + print(' "%s\\0"' % module_name) + mp_frozen_mpy_names_content += len(cm.source_file.str) + 1 + print(' "\\0"') + print("};") + + # Define the array of pointers to frozen module content. + print() + print("const mp_frozen_module_t *const mp_frozen_mpy_content[] = {") + for cm in compiled_modules: + print(" &frozen_module_%s," % cm.escaped_name) print("};") + mp_frozen_mpy_content_size = len(compiled_modules * 4) # If a port defines MICROPY_FROZEN_LIST_ITEM then list all modules wrapped in that macro. + print() print("#ifdef MICROPY_FROZEN_LIST_ITEM") - for rc in raw_codes: - module_name = rc.source_file.str + for cm in compiled_modules: + module_name = cm.source_file.str if module_name.endswith("/__init__.py"): short_name = module_name[: -len("/__init__.py")] else: @@ -925,45 +1411,94 @@ def freeze_mpy(base_qstrs, raw_codes): print('MICROPY_FROZEN_LIST_ITEM("%s", "%s")' % (short_name, module_name)) print("#endif") + print() + print("/*") + print("byte sizes:") + print("qstr content: %d unique, %d bytes" % (len(new), qstr_content)) + print("bc content: %d" % bc_content) + print("const str content: %d" % const_str_content) + print("const int content: %d" % const_int_content) + print("const obj content: %d" % const_obj_content) + print( + "const table qstr content: %d entries, %d bytes" + % (const_table_qstr_content, const_table_qstr_content * 4) + ) + print( + "const table ptr content: %d entries, %d bytes" + % (const_table_ptr_content, const_table_ptr_content * 4) + ) + print("raw code content: %d * 4 = %d" % (raw_code_count, raw_code_content)) + print("mp_frozen_mpy_names_content: %d" % mp_frozen_mpy_names_content) + print("mp_frozen_mpy_content_size: %d" % mp_frozen_mpy_content_size) + print( + "total: %d" + % ( + qstr_content + + bc_content + + const_str_content + + const_int_content + + const_obj_content + + const_table_qstr_content * 4 + + const_table_ptr_content * 4 + + raw_code_content + + mp_frozen_mpy_names_content + + mp_frozen_mpy_content_size + ) + ) + print("*/") + def merge_mpy(raw_codes, output_file): - assert len(raw_codes) <= 31 # so var-uints all fit in 1 byte + assert len(raw_codes) <= 2 # so var-uints all fit in 1 byte merged_mpy = bytearray() if len(raw_codes) == 1: with open(raw_codes[0].mpy_source_file, "rb") as f: merged_mpy.extend(f.read()) else: - header = bytearray(5) + main_rc = None + for rc in raw_codes: + if len(rc.qstr_table) > 1 or len(rc.obj_table) > 0: + # Must use qstr_table and obj_table from this raw_code + if main_rc is not None: + raise Exception( + "can't merge files when more than one has a populated qstr or obj table" + ) + main_rc = rc + if main_rc is None: + main_rc = raw_codes[0] + + header = bytearray(4) header[0] = ord("M") header[1] = config.MPY_VERSION header[2] = config.native_arch << 2 | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1 header[3] = config.mp_small_int_bits - header[4] = 32 # qstr_win_size merged_mpy.extend(header) + # Copy n_qstr, n_obj, qstr_table, obj_table from main_rc. + with open(main_rc.mpy_source_file, "rb") as f: + data = f.read(main_rc.raw_code_file_offset) + merged_mpy.extend(data[4:]) + bytecode = bytearray() - bytecode_len = 6 + len(raw_codes) * 5 + 2 - bytecode.append(bytecode_len << 2) # kind and length + bytecode_len = 3 + len(raw_codes) * 5 + 2 + bytecode.append(bytecode_len << 3 | 1 << 2) # kind, has_children and length bytecode.append(0b00000000) # signature prelude - bytecode.append(0b00001000) # size prelude - bytecode.extend(b"\x00\x01") # MP_QSTR_ - bytecode.extend(b"\x00\x01") # MP_QSTR_ + bytecode.append(0b00000010) # size prelude; n_info=1 + bytecode.extend(b"\x00") # simple_name: qstr index 0 (will use source filename) for idx in range(len(raw_codes)): bytecode.append(0x32) # MP_BC_MAKE_FUNCTION bytecode.append(idx) # index raw code bytecode.extend(b"\x34\x00\x59") # MP_BC_CALL_FUNCTION, 0 args, MP_BC_POP_TOP bytecode.extend(b"\x51\x63") # MP_BC_LOAD_NONE, MP_BC_RETURN_VALUE - bytecode.append(0) # n_obj - bytecode.append(len(raw_codes)) # n_raw_code - merged_mpy.extend(bytecode) + merged_mpy.append(len(raw_codes)) # n_children + for rc in raw_codes: with open(rc.mpy_source_file, "rb") as f: - f.read(4) # skip header - read_uint(f) # skip qstr_win_size + f.seek(rc.raw_code_file_offset) data = f.read() # read rest of mpy file merged_mpy.extend(data) @@ -978,7 +1513,12 @@ def main(): import argparse cmd_parser = argparse.ArgumentParser(description="A tool to work with MicroPython .mpy files.") - cmd_parser.add_argument("-d", "--dump", action="store_true", help="dump contents of files") + cmd_parser.add_argument( + "-x", "--hexdump", action="store_true", help="output an annotated hex dump of files" + ) + cmd_parser.add_argument( + "-d", "--disassemble", action="store_true", help="output disassembled contents of files" + ) cmd_parser.add_argument("-f", "--freeze", action="store_true", help="freeze files") cmd_parser.add_argument( "--merge", action="store_true", help="merge multiple .mpy files into one" @@ -1018,20 +1558,32 @@ def main(): else: config.MICROPY_QSTR_BYTES_IN_LEN = 1 config.MICROPY_QSTR_BYTES_IN_HASH = 1 - base_qstrs = {} + base_qstrs = list(qstrutil.static_qstr_list) + + # Load all .mpy files. + try: + compiled_modules = [read_mpy(file) for file in args.files] + except MPYReadError as er: + print(er, file=sys.stderr) + sys.exit(1) - raw_codes = [read_mpy(file) for file in args.files] + if args.hexdump: + hexdump_mpy(compiled_modules) - if args.dump: - dump_mpy(raw_codes) - elif args.freeze: + if args.disassemble: + if args.hexdump: + print() + disassemble_mpy(compiled_modules) + + if args.freeze: try: - freeze_mpy(base_qstrs, raw_codes) + freeze_mpy(base_qstrs, compiled_modules) except FreezeError as er: print(er, file=sys.stderr) sys.exit(1) - elif args.merge: - merged_mpy = merge_mpy(raw_codes, args.output) + + if args.merge: + merge_mpy(compiled_modules, args.output) if __name__ == "__main__": |