summaryrefslogtreecommitdiffstatshomepage
path: root/tools/mpy-tool.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/mpy-tool.py')
-rwxr-xr-xtools/mpy-tool.py1308
1 files changed, 930 insertions, 378 deletions
diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py
index 58a4a75c91..da6352dab7 100755
--- a/tools/mpy-tool.py
+++ b/tools/mpy-tool.py
@@ -29,27 +29,49 @@ from __future__ import print_function
import platform
if platform.python_version_tuple()[0] == "2":
- str_cons = lambda val, enc=None: val
+ from binascii import hexlify as hexlify_py2
+
+ str_cons = lambda val, enc=None: str(val)
bytes_cons = lambda val, enc=None: bytearray(val)
is_str_type = lambda o: type(o) is str
is_bytes_type = lambda o: type(o) is bytearray
is_int_type = lambda o: type(o) is int or type(o) is long
+
+ def hexlify_to_str(b):
+ x = hexlify_py2(b)
+ return ":".join(x[i : i + 2] for i in range(0, len(x), 2))
+
else:
+ from binascii import hexlify
+
str_cons = str
bytes_cons = bytes
is_str_type = lambda o: type(o) is str
is_bytes_type = lambda o: type(o) is bytes
is_int_type = lambda o: type(o) is int
+
+ def hexlify_to_str(b):
+ return str(hexlify(b, ":"), "ascii")
+
+
# end compatibility code
import sys
import struct
-from collections import namedtuple
sys.path.append(sys.path[0] + "/../py")
import makeqstrdata as qstrutil
+class MPYReadError(Exception):
+ def __init__(self, filename, msg):
+ self.filename = filename
+ self.msg = msg
+
+ def __str__(self):
+ return "%s: %s" % (self.filename, self.msg)
+
+
class FreezeError(Exception):
def __init__(self, rawcode, msg):
self.rawcode = rawcode
@@ -60,7 +82,7 @@ class FreezeError(Exception):
class Config:
- MPY_VERSION = 5
+ MPY_VERSION = 6
MICROPY_LONGINT_IMPL_NONE = 0
MICROPY_LONGINT_IMPL_LONGLONG = 1
MICROPY_LONGINT_IMPL_MPZ = 2
@@ -82,20 +104,6 @@ for n in qstrutil.static_qstr_list:
global_qstrs.append(QStrType(n))
-class QStrWindow:
- def __init__(self, size):
- self.window = []
- self.size = size
-
- def push(self, val):
- self.window = [val] + self.window[: self.size - 1]
-
- def access(self, idx):
- val = self.window[idx]
- self.window = [val] + self.window[:idx] + self.window[idx + 1 :]
- return val
-
-
MP_CODE_BYTECODE = 2
MP_CODE_NATIVE_PY = 3
MP_CODE_NATIVE_VIPER = 4
@@ -113,6 +121,10 @@ MP_NATIVE_ARCH_ARMV7EMDP = 8
MP_NATIVE_ARCH_XTENSA = 9
MP_NATIVE_ARCH_XTENSAWIN = 10
+MP_SCOPE_FLAG_VIPERRELOC = 0x10
+MP_SCOPE_FLAG_VIPERRODATA = 0x20
+MP_SCOPE_FLAG_VIPERBSS = 0x40
+
MP_BC_MASK_EXTRA_BYTE = 0x9E
MP_BC_FORMAT_BYTE = 0
@@ -120,11 +132,180 @@ MP_BC_FORMAT_QSTR = 1
MP_BC_FORMAT_VAR_UINT = 2
MP_BC_FORMAT_OFFSET = 3
-# extra byte if caching enabled:
-MP_BC_LOAD_NAME = 0x11
-MP_BC_LOAD_GLOBAL = 0x12
-MP_BC_LOAD_ATTR = 0x13
-MP_BC_STORE_ATTR = 0x18
+mp_unary_op_method_name = (
+ "__pos__",
+ "__neg__",
+ "__invert__",
+ "<not>",
+)
+
+mp_binary_op_method_name = (
+ "__lt__",
+ "__gt__",
+ "__eq__",
+ "__le__",
+ "__ge__",
+ "__ne__",
+ "<in>",
+ "<is>",
+ "<exception match>",
+ "__ior__",
+ "__ixor__",
+ "__iand__",
+ "__ilshift__",
+ "__irshift__",
+ "__iadd__",
+ "__isub__",
+ "__imul__",
+ "__imatmul__",
+ "__ifloordiv__",
+ "__itruediv__",
+ "__imod__",
+ "__ipow__",
+ "__or__",
+ "__xor__",
+ "__and__",
+ "__lshift__",
+ "__rshift__",
+ "__add__",
+ "__sub__",
+ "__mul__",
+ "__matmul__",
+ "__floordiv__",
+ "__truediv__",
+ "__mod__",
+ "__pow__",
+)
+
+
+class Opcodes:
+ # fmt: off
+ # Load, Store, Delete, Import, Make, Build, Unpack, Call, Jump, Exception, For, sTack, Return, Yield, Op
+ MP_BC_BASE_RESERVED = (0x00) # ----------------
+ MP_BC_BASE_QSTR_O = (0x10) # LLLLLLSSSDDII---
+ MP_BC_BASE_VINT_E = (0x20) # MMLLLLSSDDBBBBBB
+ MP_BC_BASE_VINT_O = (0x30) # UUMMCCCC--------
+ MP_BC_BASE_JUMP_E = (0x40) # J-JJJJJEEEEF----
+ MP_BC_BASE_BYTE_O = (0x50) # LLLLSSDTTTTTEEFF
+ MP_BC_BASE_BYTE_E = (0x60) # --BREEEYYI------
+ MP_BC_LOAD_CONST_SMALL_INT_MULTI = (0x70) # LLLLLLLLLLLLLLLL
+ # = (0x80) # LLLLLLLLLLLLLLLL
+ # = (0x90) # LLLLLLLLLLLLLLLL
+ # = (0xa0) # LLLLLLLLLLLLLLLL
+ MP_BC_LOAD_FAST_MULTI = (0xb0) # LLLLLLLLLLLLLLLL
+ MP_BC_STORE_FAST_MULTI = (0xc0) # SSSSSSSSSSSSSSSS
+ MP_BC_UNARY_OP_MULTI = (0xd0) # OOOOOOO
+ MP_BC_BINARY_OP_MULTI = (0xd7) # OOOOOOOOO
+ # = (0xe0) # OOOOOOOOOOOOOOOO
+ # = (0xf0) # OOOOOOOOOO------
+
+ MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM = 64
+ MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS = 16
+ MP_BC_LOAD_FAST_MULTI_NUM = 16
+ MP_BC_STORE_FAST_MULTI_NUM = 16
+ MP_BC_UNARY_OP_MULTI_NUM = 4 # MP_UNARY_OP_NUM_BYTECODE
+ MP_BC_BINARY_OP_MULTI_NUM = 35 # MP_BINARY_OP_NUM_BYTECODE
+
+ MP_BC_LOAD_CONST_FALSE = (MP_BC_BASE_BYTE_O + 0x00)
+ MP_BC_LOAD_CONST_NONE = (MP_BC_BASE_BYTE_O + 0x01)
+ MP_BC_LOAD_CONST_TRUE = (MP_BC_BASE_BYTE_O + 0x02)
+ MP_BC_LOAD_CONST_SMALL_INT = (MP_BC_BASE_VINT_E + 0x02) # signed var-int
+ MP_BC_LOAD_CONST_STRING = (MP_BC_BASE_QSTR_O + 0x00) # qstr
+ MP_BC_LOAD_CONST_OBJ = (MP_BC_BASE_VINT_E + 0x03) # ptr
+ MP_BC_LOAD_NULL = (MP_BC_BASE_BYTE_O + 0x03)
+
+ MP_BC_LOAD_FAST_N = (MP_BC_BASE_VINT_E + 0x04) # uint
+ MP_BC_LOAD_DEREF = (MP_BC_BASE_VINT_E + 0x05) # uint
+ MP_BC_LOAD_NAME = (MP_BC_BASE_QSTR_O + 0x01) # qstr
+ MP_BC_LOAD_GLOBAL = (MP_BC_BASE_QSTR_O + 0x02) # qstr
+ MP_BC_LOAD_ATTR = (MP_BC_BASE_QSTR_O + 0x03) # qstr
+ MP_BC_LOAD_METHOD = (MP_BC_BASE_QSTR_O + 0x04) # qstr
+ MP_BC_LOAD_SUPER_METHOD = (MP_BC_BASE_QSTR_O + 0x05) # qstr
+ MP_BC_LOAD_BUILD_CLASS = (MP_BC_BASE_BYTE_O + 0x04)
+ MP_BC_LOAD_SUBSCR = (MP_BC_BASE_BYTE_O + 0x05)
+
+ MP_BC_STORE_FAST_N = (MP_BC_BASE_VINT_E + 0x06) # uint
+ MP_BC_STORE_DEREF = (MP_BC_BASE_VINT_E + 0x07) # uint
+ MP_BC_STORE_NAME = (MP_BC_BASE_QSTR_O + 0x06) # qstr
+ MP_BC_STORE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x07) # qstr
+ MP_BC_STORE_ATTR = (MP_BC_BASE_QSTR_O + 0x08) # qstr
+ MP_BC_STORE_SUBSCR = (MP_BC_BASE_BYTE_O + 0x06)
+
+ MP_BC_DELETE_FAST = (MP_BC_BASE_VINT_E + 0x08) # uint
+ MP_BC_DELETE_DEREF = (MP_BC_BASE_VINT_E + 0x09) # uint
+ MP_BC_DELETE_NAME = (MP_BC_BASE_QSTR_O + 0x09) # qstr
+ MP_BC_DELETE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x0a) # qstr
+
+ MP_BC_DUP_TOP = (MP_BC_BASE_BYTE_O + 0x07)
+ MP_BC_DUP_TOP_TWO = (MP_BC_BASE_BYTE_O + 0x08)
+ MP_BC_POP_TOP = (MP_BC_BASE_BYTE_O + 0x09)
+ MP_BC_ROT_TWO = (MP_BC_BASE_BYTE_O + 0x0a)
+ MP_BC_ROT_THREE = (MP_BC_BASE_BYTE_O + 0x0b)
+
+ MP_BC_JUMP = (MP_BC_BASE_JUMP_E + 0x02) # rel byte code offset, 16-bit signed, in excess
+ MP_BC_POP_JUMP_IF_TRUE = (MP_BC_BASE_JUMP_E + 0x03) # rel byte code offset, 16-bit signed, in excess
+ MP_BC_POP_JUMP_IF_FALSE = (MP_BC_BASE_JUMP_E + 0x04) # rel byte code offset, 16-bit signed, in excess
+ MP_BC_JUMP_IF_TRUE_OR_POP = (MP_BC_BASE_JUMP_E + 0x05) # rel byte code offset, 16-bit signed, in excess
+ MP_BC_JUMP_IF_FALSE_OR_POP = (MP_BC_BASE_JUMP_E + 0x06) # rel byte code offset, 16-bit signed, in excess
+ MP_BC_UNWIND_JUMP = (MP_BC_BASE_JUMP_E + 0x00) # rel byte code offset, 16-bit signed, in excess; then a byte
+ MP_BC_SETUP_WITH = (MP_BC_BASE_JUMP_E + 0x07) # rel byte code offset, 16-bit unsigned
+ MP_BC_SETUP_EXCEPT = (MP_BC_BASE_JUMP_E + 0x08) # rel byte code offset, 16-bit unsigned
+ MP_BC_SETUP_FINALLY = (MP_BC_BASE_JUMP_E + 0x09) # rel byte code offset, 16-bit unsigned
+ MP_BC_POP_EXCEPT_JUMP = (MP_BC_BASE_JUMP_E + 0x0a) # rel byte code offset, 16-bit unsigned
+ MP_BC_FOR_ITER = (MP_BC_BASE_JUMP_E + 0x0b) # rel byte code offset, 16-bit unsigned
+ MP_BC_WITH_CLEANUP = (MP_BC_BASE_BYTE_O + 0x0c)
+ MP_BC_END_FINALLY = (MP_BC_BASE_BYTE_O + 0x0d)
+ MP_BC_GET_ITER = (MP_BC_BASE_BYTE_O + 0x0e)
+ MP_BC_GET_ITER_STACK = (MP_BC_BASE_BYTE_O + 0x0f)
+
+ MP_BC_BUILD_TUPLE = (MP_BC_BASE_VINT_E + 0x0a) # uint
+ MP_BC_BUILD_LIST = (MP_BC_BASE_VINT_E + 0x0b) # uint
+ MP_BC_BUILD_MAP = (MP_BC_BASE_VINT_E + 0x0c) # uint
+ MP_BC_STORE_MAP = (MP_BC_BASE_BYTE_E + 0x02)
+ MP_BC_BUILD_SET = (MP_BC_BASE_VINT_E + 0x0d) # uint
+ MP_BC_BUILD_SLICE = (MP_BC_BASE_VINT_E + 0x0e) # uint
+ MP_BC_STORE_COMP = (MP_BC_BASE_VINT_E + 0x0f) # uint
+ MP_BC_UNPACK_SEQUENCE = (MP_BC_BASE_VINT_O + 0x00) # uint
+ MP_BC_UNPACK_EX = (MP_BC_BASE_VINT_O + 0x01) # uint
+
+ MP_BC_RETURN_VALUE = (MP_BC_BASE_BYTE_E + 0x03)
+ MP_BC_RAISE_LAST = (MP_BC_BASE_BYTE_E + 0x04)
+ MP_BC_RAISE_OBJ = (MP_BC_BASE_BYTE_E + 0x05)
+ MP_BC_RAISE_FROM = (MP_BC_BASE_BYTE_E + 0x06)
+ MP_BC_YIELD_VALUE = (MP_BC_BASE_BYTE_E + 0x07)
+ MP_BC_YIELD_FROM = (MP_BC_BASE_BYTE_E + 0x08)
+
+ MP_BC_MAKE_FUNCTION = (MP_BC_BASE_VINT_O + 0x02) # uint
+ MP_BC_MAKE_FUNCTION_DEFARGS = (MP_BC_BASE_VINT_O + 0x03) # uint
+ MP_BC_MAKE_CLOSURE = (MP_BC_BASE_VINT_E + 0x00) # uint; extra byte
+ MP_BC_MAKE_CLOSURE_DEFARGS = (MP_BC_BASE_VINT_E + 0x01) # uint; extra byte
+ MP_BC_CALL_FUNCTION = (MP_BC_BASE_VINT_O + 0x04) # uint
+ MP_BC_CALL_FUNCTION_VAR_KW = (MP_BC_BASE_VINT_O + 0x05) # uint
+ MP_BC_CALL_METHOD = (MP_BC_BASE_VINT_O + 0x06) # uint
+ MP_BC_CALL_METHOD_VAR_KW = (MP_BC_BASE_VINT_O + 0x07) # uint
+
+ MP_BC_IMPORT_NAME = (MP_BC_BASE_QSTR_O + 0x0b) # qstr
+ MP_BC_IMPORT_FROM = (MP_BC_BASE_QSTR_O + 0x0c) # qstr
+ MP_BC_IMPORT_STAR = (MP_BC_BASE_BYTE_E + 0x09)
+ # fmt: on
+
+ # Create a dict mapping opcode value to opcode name.
+ mapping = ["unknown" for _ in range(256)]
+ for op_name in list(locals()):
+ if op_name.startswith("MP_BC_"):
+ mapping[locals()[op_name]] = op_name[len("MP_BC_") :]
+ for i in range(MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM):
+ name = "LOAD_CONST_SMALL_INT %d" % (i - MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS)
+ mapping[MP_BC_LOAD_CONST_SMALL_INT_MULTI + i] = name
+ for i in range(MP_BC_LOAD_FAST_MULTI_NUM):
+ mapping[MP_BC_LOAD_FAST_MULTI + i] = "LOAD_FAST %d" % i
+ for i in range(MP_BC_STORE_FAST_MULTI_NUM):
+ mapping[MP_BC_STORE_FAST_MULTI + i] = "STORE_FAST %d" % i
+ for i in range(MP_BC_UNARY_OP_MULTI_NUM):
+ mapping[MP_BC_UNARY_OP_MULTI + i] = "UNARY_OP %d %s" % (i, mp_unary_op_method_name[i])
+ for i in range(MP_BC_BINARY_OP_MULTI_NUM):
+ mapping[MP_BC_BINARY_OP_MULTI + i] = "BINARY_OP %d %s" % (i, mp_binary_op_method_name[i])
+
# this function mirrors that in py/bc.c
def mp_opcode_format(bytecode, ip, count_var_uint):
@@ -147,6 +328,26 @@ def mp_opcode_format(bytecode, ip, count_var_uint):
return f, ip - ip_start
+def mp_opcode_decode(bytecode, ip):
+ opcode = bytecode[ip]
+ ip_start = ip
+ f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
+ extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
+ ip += 1
+ arg = 0
+ if f in (MP_BC_FORMAT_QSTR, MP_BC_FORMAT_VAR_UINT):
+ arg = bytecode[ip] & 0x7F
+ while bytecode[ip] & 0x80 != 0:
+ ip += 1
+ arg = arg << 7 | bytecode[ip] & 0x7F
+ ip += 1
+ elif f == MP_BC_FORMAT_OFFSET:
+ arg = bytecode[ip] | bytecode[ip + 1] << 8
+ ip += 2
+ ip += extra_byte
+ return f, ip - ip_start, arg
+
+
def read_prelude_sig(read_byte):
z = read_byte()
# xSSSSEAA
@@ -201,6 +402,7 @@ def extract_prelude(bytecode, ip):
n_kwonly_args,
n_def_pos_args,
) = read_prelude_sig(local_read_byte)
+
n_info, n_cell = read_prelude_size(local_read_byte)
ip = ip_ref[0]
@@ -208,74 +410,186 @@ def extract_prelude(bytecode, ip):
ip = ip2 + n_info + n_cell
# ip now points to first opcode
# ip2 points to simple_name qstr
- return ip, ip2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args)
-
-class MPFunTable:
- pass
+ # Extract simple_name and argument qstrs (var uints).
+ args = []
+ for arg_num in range(1 + n_pos_args + n_kwonly_args):
+ value = 0
+ while True:
+ b = local_read_byte()
+ value = (value << 7) | (b & 0x7F)
+ if b & 0x80 == 0:
+ break
+ args.append(value)
+
+ return (
+ ip2,
+ ip,
+ ip_ref[0],
+ (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args),
+ args,
+ )
-class RawCode(object):
- # a set of all escaped names, to make sure they are unique
- escaped_names = set()
+class MPFunTable:
+ def __repr__(self):
+ return "mp_fun_table"
- # convert code kind number to string
- code_kind_str = {
- MP_CODE_BYTECODE: "MP_CODE_BYTECODE",
- MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY",
- MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER",
- MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM",
- }
- def __init__(self, code_kind, bytecode, prelude_offset, qstrs, objs, raw_codes):
- # set core variables
- self.code_kind = code_kind
- self.bytecode = bytecode
- self.prelude_offset = prelude_offset
- self.qstrs = qstrs
- self.objs = objs
- self.raw_codes = raw_codes
-
- if self.prelude_offset is None:
- # no prelude, assign a dummy simple_name
- self.prelude_offset = 0
- self.simple_name = global_qstrs[1]
- else:
- # extract prelude
- self.ip, self.ip2, self.prelude = extract_prelude(self.bytecode, self.prelude_offset)
- self.simple_name = self._unpack_qstr(self.ip2)
- self.source_file = self._unpack_qstr(self.ip2 + 2)
- self.line_info_offset = self.ip2 + 4
+class CompiledModule:
+ def __init__(
+ self,
+ mpy_source_file,
+ mpy_segments,
+ header,
+ qstr_table,
+ obj_table,
+ raw_code,
+ raw_code_file_offset,
+ escaped_name,
+ ):
+ self.mpy_source_file = mpy_source_file
+ self.mpy_segments = mpy_segments
+ self.source_file = qstr_table[0]
+ self.header = header
+ self.qstr_table = qstr_table
+ self.obj_table = obj_table
+ self.raw_code_file_offset = raw_code_file_offset
+ self.raw_code = raw_code
+ self.escaped_name = escaped_name
def _unpack_qstr(self, ip):
qst = self.bytecode[ip] | self.bytecode[ip + 1] << 8
return global_qstrs[qst]
- def dump(self):
- # dump children first
- for rc in self.raw_codes:
- rc.freeze("")
- # TODO
+ def hexdump(self):
+ with open(self.mpy_source_file, "rb") as f:
+ WIDTH = 16
+ COL_OFF = "\033[0m"
+ COL_TABLE = (
+ ("", ""), # META
+ ("\033[0;31m", "\033[0;91m"), # QSTR
+ ("\033[0;32m", "\033[0;92m"), # OBJ
+ ("\033[0;34m", "\033[0;94m"), # CODE
+ )
+ cur_col = ""
+ cur_col_index = 0
+ offset = 0
+ segment_index = 0
+ while True:
+ data = bytes_cons(f.read(WIDTH))
+ if not data:
+ break
+
+ # Print out the hex dump of this line of data.
+ line_hex = cur_col
+ line_chr = cur_col
+ line_comment = ""
+ for i in range(len(data)):
+ # Determine the colour of the data, if any, and the line comment.
+ while segment_index < len(self.mpy_segments):
+ if offset + i == self.mpy_segments[segment_index].start:
+ cur_col = COL_TABLE[self.mpy_segments[segment_index].kind][
+ cur_col_index
+ ]
+ cur_col_index = 1 - cur_col_index
+ line_hex += cur_col
+ line_chr += cur_col
+ line_comment += " %s%s%s" % (
+ cur_col,
+ self.mpy_segments[segment_index].name,
+ COL_OFF,
+ )
+ if offset + i == self.mpy_segments[segment_index].end:
+ cur_col = ""
+ line_hex += COL_OFF
+ line_chr += COL_OFF
+ segment_index += 1
+ else:
+ break
+
+ # Add to the hex part of the line.
+ if i % 2 == 0:
+ line_hex += " "
+ line_hex += "%02x" % data[i]
+
+ # Add to the characters part of the line.
+ if 0x20 <= data[i] <= 0x7E:
+ line_chr += "%s" % chr(data[i])
+ else:
+ line_chr += "."
+
+ # Print out this line.
+ if cur_col:
+ line_hex += COL_OFF
+ line_chr += COL_OFF
+ pad = " " * ((WIDTH - len(data)) * 5 // 2)
+ print("%08x:%s%s %s %s" % (offset, line_hex, pad, line_chr, line_comment))
+ offset += WIDTH
+
+ def disassemble(self):
+ print("mpy_source_file:", self.mpy_source_file)
+ print("source_file:", self.source_file.str)
+ print("header:", hexlify_to_str(self.header))
+ print("qstr_table[%u]:" % len(self.qstr_table))
+ for q in self.qstr_table:
+ print(" %s" % q.str)
+ print("obj_table:", self.obj_table)
+ self.raw_code.disassemble()
+
+ def freeze(self, compiled_module_index):
+ print()
+ print("/" * 80)
+ print("// frozen module %s" % self.escaped_name)
+ print("// - original source file: %s" % self.mpy_source_file)
+ print("// - frozen file name: %s" % self.source_file.str)
+ print("// - .mpy header: %s" % ":".join("%02x" % b for b in self.header))
+ print()
- def freeze_children(self, parent_name):
- self.escaped_name = parent_name + self.simple_name.qstr_esc
+ self.raw_code.freeze()
+ print()
- # make sure the escaped name is unique
- i = 2
- while self.escaped_name in RawCode.escaped_names:
- self.escaped_name = parent_name + self.simple_name.qstr_esc + str(i)
- i += 1
- RawCode.escaped_names.add(self.escaped_name)
+ self.freeze_constants()
- # emit children first
- for rc in self.raw_codes:
- rc.freeze(self.escaped_name + "_")
+ print()
+ print("static const mp_frozen_module_t frozen_module_%s = {" % self.escaped_name)
+ print(" .constants = {")
+ if len(self.qstr_table):
+ print(
+ " .qstr_table = (qstr_short_t *)&const_qstr_table_data_%s,"
+ % self.escaped_name
+ )
+ else:
+ print(" .qstr_table = NULL,")
+ if len(self.obj_table):
+ print(" .obj_table = (mp_obj_t *)&const_obj_table_data_%s," % self.escaped_name)
+ else:
+ print(" .obj_table = NULL,")
+ print(" },")
+ print(" .rc = &raw_code_%s," % self.raw_code.escaped_name)
+ print("};")
def freeze_constants(self):
+ global const_str_content, const_int_content, const_obj_content
+
+ if len(self.qstr_table):
+ print(
+ "static const qstr_short_t const_qstr_table_data_%s[%u] = {"
+ % (self.escaped_name, len(self.qstr_table))
+ )
+ for q in self.qstr_table:
+ print(" %s," % q.qstr_id)
+ print("};")
+
+ if not len(self.obj_table):
+ return
+
# generate constant objects
- for i, obj in enumerate(self.objs):
+ print()
+ print("// constants")
+ for i, obj in enumerate(self.obj_table):
obj_name = "const_obj_%s_%u" % (self.escaped_name, i)
- if obj is MPFunTable:
+ if isinstance(obj, MPFunTable):
pass
elif obj is Ellipsis:
print("#define %s mp_const_ellipsis_obj" % obj_name)
@@ -286,7 +600,7 @@ class RawCode(object):
else:
obj_type = "mp_type_bytes"
print(
- 'STATIC const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};'
+ 'static const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};'
% (
obj_name,
obj_type,
@@ -295,6 +609,8 @@ class RawCode(object):
"".join(("\\x%02x" % b) for b in obj),
)
)
+ const_str_content += len(obj)
+ const_obj_content += 4 * 4
elif is_int_type(obj):
if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE:
# TODO check if we can actually fit this long-int into a small-int
@@ -316,76 +632,139 @@ class RawCode(object):
ndigs = len(digs)
digs = ",".join(("%#x" % d) for d in digs)
print(
- "STATIC const mp_obj_int_t %s = {{&mp_type_int}, "
+ "static const mp_obj_int_t %s = {{&mp_type_int}, "
"{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t*)(const uint%u_t[]){%s}}};"
% (obj_name, neg, ndigs, ndigs, bits_per_dig, bits_per_dig, digs)
)
+ const_int_content += (digs.count(",") + 1) * bits_per_dig // 8
+ const_obj_content += 4 * 4
elif type(obj) is float:
print(
"#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
)
print(
- "STATIC const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};"
+ "static const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};"
% (obj_name, obj)
)
print("#endif")
+ const_obj_content += 3 * 4
elif type(obj) is complex:
print(
- "STATIC const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};"
+ "static const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};"
% (obj_name, obj.real, obj.imag)
)
else:
raise FreezeError(self, "freezing of object %r is not implemented" % (obj,))
- # generate constant table, if it has any entries
- const_table_len = len(self.qstrs) + len(self.objs) + len(self.raw_codes)
- if const_table_len:
- print(
- "STATIC const mp_rom_obj_t const_table_data_%s[%u] = {"
- % (self.escaped_name, const_table_len)
- )
- for qst in self.qstrs:
- print(" MP_ROM_QSTR(%s)," % global_qstrs[qst].qstr_id)
- for i in range(len(self.objs)):
- if self.objs[i] is MPFunTable:
- print(" &mp_fun_table,")
- elif type(self.objs[i]) is float:
- print(
- "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
- )
- print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i))
- print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C")
- n = struct.unpack("<I", struct.pack("<f", self.objs[i]))[0]
- n = ((n & ~0x3) | 2) + 0x80800000
- print(" (mp_rom_obj_t)(0x%08x)," % (n,))
- print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D")
- n = struct.unpack("<Q", struct.pack("<d", self.objs[i]))[0]
- n += 0x8004000000000000
- print(" (mp_rom_obj_t)(0x%016x)," % (n,))
- print("#endif")
- else:
- print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i))
- for rc in self.raw_codes:
- print(" MP_ROM_PTR(&raw_code_%s)," % rc.escaped_name)
+ # generate constant table
+ print()
+ print("// constant table")
+ print(
+ "static const mp_rom_obj_t const_obj_table_data_%s[%u] = {"
+ % (self.escaped_name, len(self.obj_table))
+ )
+ for i in range(len(self.obj_table)):
+ if isinstance(self.obj_table[i], MPFunTable):
+ print(" &mp_fun_table,")
+ elif type(self.obj_table[i]) is float:
+ print(
+ "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
+ )
+ print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i))
+ print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C")
+ n = struct.unpack("<I", struct.pack("<f", self.obj_table[i]))[0]
+ n = ((n & ~0x3) | 2) + 0x80800000
+ print(" (mp_rom_obj_t)(0x%08x)," % (n,))
+ print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D")
+ n = struct.unpack("<Q", struct.pack("<d", self.obj_table[i]))[0]
+ n += 0x8004000000000000
+ print(" (mp_rom_obj_t)(0x%016x)," % (n,))
+ print("#endif")
+ else:
+ print(" MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i))
+ print("};")
+
+ global const_table_ptr_content
+ const_table_ptr_content += len(self.obj_table)
+
+
+class RawCode(object):
+ # a set of all escaped names, to make sure they are unique
+ escaped_names = set()
+
+ # convert code kind number to string
+ code_kind_str = {
+ MP_CODE_BYTECODE: "MP_CODE_BYTECODE",
+ MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY",
+ MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER",
+ MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM",
+ }
+
+ def __init__(self, cm_escaped_name, qstr_table, fun_data, prelude_offset, code_kind):
+ self.qstr_table = qstr_table
+ self.fun_data = fun_data
+ self.prelude_offset = prelude_offset
+ self.code_kind = code_kind
+
+ if code_kind in (MP_CODE_BYTECODE, MP_CODE_NATIVE_PY):
+ (
+ self.offset_names,
+ self.offset_opcodes,
+ self.offset_line_info,
+ self.prelude,
+ self.names,
+ ) = extract_prelude(self.fun_data, prelude_offset)
+ self.scope_flags = self.prelude[2]
+ self.n_pos_args = self.prelude[3]
+ self.simple_name = self.qstr_table[self.names[0]]
+ else:
+ self.simple_name = self.qstr_table[0]
+
+ escaped_name = cm_escaped_name + "_" + self.simple_name.qstr_esc
+
+ # make sure the escaped name is unique
+ i = 2
+ unique_escaped_name = escaped_name
+ while unique_escaped_name in self.escaped_names:
+ unique_escaped_name = escaped_name + str(i)
+ i += 1
+ self.escaped_names.add(unique_escaped_name)
+ self.escaped_name = unique_escaped_name
+
+ def disassemble_children(self):
+ print(" children:", [rc.simple_name.str for rc in self.children])
+ for rc in self.children:
+ rc.disassemble()
+
+ def freeze_children(self):
+ # Freeze children and generate table of children.
+ if len(self.children):
+ for rc in self.children:
+ print("// child of %s" % self.escaped_name)
+ rc.freeze()
+ print()
+ print("static const mp_raw_code_t *const children_%s[] = {" % self.escaped_name)
+ for rc in self.children:
+ print(" &raw_code_%s," % rc.escaped_name)
print("};")
+ print()
- def freeze_module(self, qstr_links=(), type_sig=0):
- # generate module
- if self.simple_name.str != "<module>":
- print("STATIC ", end="")
- print("const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
+ def freeze_raw_code(self, qstr_links=(), type_sig=0):
+ # Generate mp_raw_code_t.
+ print("static const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
print(" .kind = %s," % RawCode.code_kind_str[self.code_kind])
- print(" .scope_flags = 0x%02x," % self.prelude[2])
- print(" .n_pos_args = %u," % self.prelude[3])
+ print(" .scope_flags = 0x%02x," % self.scope_flags)
+ print(" .n_pos_args = %u," % self.n_pos_args)
print(" .fun_data = fun_data_%s," % self.escaped_name)
- if len(self.qstrs) + len(self.objs) + len(self.raw_codes):
- print(" .const_table = (mp_uint_t*)const_table_data_%s," % self.escaped_name)
+ print(" #if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_DEBUG_PRINTERS")
+ print(" .fun_data_len = %u," % len(self.fun_data))
+ print(" #endif")
+ if len(self.children):
+ print(" .children = (void *)&children_%s," % self.escaped_name)
else:
- print(" .const_table = NULL,")
+ print(" .children = NULL,")
print(" #if MICROPY_PERSISTENT_CODE_SAVE")
- print(" .fun_data_len = %u," % len(self.bytecode))
- print(" .n_obj = %u," % len(self.objs))
- print(" .n_raw_code = %u," % len(self.raw_codes))
+ print(" .n_children = %u," % len(self.children))
if self.code_kind == MP_CODE_BYTECODE:
print(" #if MICROPY_PY_SYS_SETTRACE")
print(" .prelude = {")
@@ -395,13 +774,14 @@ class RawCode(object):
print(" .n_pos_args = %u," % self.prelude[3])
print(" .n_kwonly_args = %u," % self.prelude[4])
print(" .n_def_pos_args = %u," % self.prelude[5])
- print(" .qstr_block_name = %s," % self.simple_name.qstr_id)
- print(" .qstr_source_file = %s," % self.source_file.qstr_id)
+ print(" .qstr_block_name_idx = %u," % self.names[0])
print(
" .line_info = fun_data_%s + %u,"
- % (self.escaped_name, self.line_info_offset)
+ % (self.escaped_name, self.offset_line_info)
+ )
+ print(
+ " .opcodes = fun_data_%s + %u," % (self.escaped_name, self.offset_opcodes)
)
- print(" .opcodes = fun_data_%s + %u," % (self.escaped_name, self.ip))
print(" },")
print(" .line_of_definition = %u," % 0) # TODO
print(" #endif")
@@ -416,66 +796,109 @@ class RawCode(object):
print(" #endif")
print("};")
+ global raw_code_count, raw_code_content
+ raw_code_count += 1
+ raw_code_content += 4 * 4
+
class RawCodeBytecode(RawCode):
- def __init__(self, bytecode, qstrs, objs, raw_codes):
+ def __init__(self, cm_escaped_name, qstr_table, obj_table, fun_data):
+ self.obj_table = obj_table
super(RawCodeBytecode, self).__init__(
- MP_CODE_BYTECODE, bytecode, 0, qstrs, objs, raw_codes
+ cm_escaped_name, qstr_table, fun_data, 0, MP_CODE_BYTECODE
)
- def freeze(self, parent_name):
- self.freeze_children(parent_name)
+ def disassemble(self):
+ bc = self.fun_data
+ print("simple_name:", self.simple_name.str)
+ print(" raw bytecode:", len(bc), hexlify_to_str(bc))
+ print(" prelude:", self.prelude)
+ print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
+ print(" line info:", hexlify_to_str(bc[self.offset_line_info : self.offset_opcodes]))
+ ip = self.offset_opcodes
+ while ip < len(bc):
+ fmt, sz, arg = mp_opcode_decode(bc, ip)
+ if bc[ip] == Opcodes.MP_BC_LOAD_CONST_OBJ:
+ arg = "%r" % self.obj_table[arg]
+ if fmt == MP_BC_FORMAT_QSTR:
+ arg = self.qstr_table[arg].str
+ elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET):
+ pass
+ else:
+ arg = ""
+ print(
+ " %-11s %s %s" % (hexlify_to_str(bc[ip : ip + sz]), Opcodes.mapping[bc[ip]], arg)
+ )
+ ip += sz
+ self.disassemble_children()
+ def freeze(self):
# generate bytecode data
- print()
+ bc = self.fun_data
print(
- "// frozen bytecode for file %s, scope %s%s"
- % (self.source_file.str, parent_name, self.simple_name.str)
+ "// frozen bytecode for file %s, scope %s"
+ % (self.qstr_table[0].str, self.escaped_name)
)
- print("STATIC const byte fun_data_%s[%u] = {" % (self.escaped_name, len(self.bytecode)))
- print(" ", end="")
- for i in range(self.ip2):
- print(" 0x%02x," % self.bytecode[i], end="")
- print()
- print(" ", self.simple_name.qstr_id, "& 0xff,", self.simple_name.qstr_id, ">> 8,")
- print(" ", self.source_file.qstr_id, "& 0xff,", self.source_file.qstr_id, ">> 8,")
- print(" ", end="")
- for i in range(self.ip2 + 4, self.ip):
- print(" 0x%02x," % self.bytecode[i], end="")
- print()
- ip = self.ip
- while ip < len(self.bytecode):
- f, sz = mp_opcode_format(self.bytecode, ip, True)
- if f == 1:
- qst = self._unpack_qstr(ip + 1).qstr_id
- extra = "" if sz == 3 else " 0x%02x," % self.bytecode[ip + 3]
- print(" ", "0x%02x," % self.bytecode[ip], qst, "& 0xff,", qst, ">> 8,", extra)
- else:
- print(" ", "".join("0x%02x, " % self.bytecode[ip + i] for i in range(sz)))
+ print("static const byte fun_data_%s[%u] = {" % (self.escaped_name, len(bc)))
+
+ print(" ", end="")
+ for b in bc[: self.offset_names]:
+ print("0x%02x," % b, end="")
+ print(" // prelude")
+
+ print(" ", end="")
+ for b in bc[self.offset_names : self.offset_line_info]:
+ print("0x%02x," % b, end="")
+ print(" // names: %s" % ", ".join(self.qstr_table[i].str for i in self.names))
+
+ print(" ", end="")
+ for b in bc[self.offset_line_info : self.offset_opcodes]:
+ print("0x%02x," % b, end="")
+ print(" // code info")
+
+ ip = self.offset_opcodes
+ while ip < len(bc):
+ fmt, sz, arg = mp_opcode_decode(bc, ip)
+ opcode_name = Opcodes.mapping[bc[ip]]
+ if fmt == MP_BC_FORMAT_QSTR:
+ opcode_name += " " + self.qstr_table[arg].str
+ elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET):
+ opcode_name += " %u" % arg
+ print(
+ " %s, // %s" % (",".join("0x%02x" % b for b in bc[ip : ip + sz]), opcode_name)
+ )
ip += sz
+
print("};")
- self.freeze_constants()
- self.freeze_module()
+ self.freeze_children()
+ self.freeze_raw_code()
+
+ global bc_content
+ bc_content += len(bc)
class RawCodeNative(RawCode):
def __init__(
self,
- code_kind,
+ cm_escaped_name,
+ qstr_table,
+ kind,
fun_data,
prelude_offset,
- prelude,
qstr_links,
- qstrs,
- objs,
- raw_codes,
+ scope_flags,
+ n_pos_args,
type_sig,
):
super(RawCodeNative, self).__init__(
- code_kind, fun_data, prelude_offset, qstrs, objs, raw_codes
+ cm_escaped_name, qstr_table, fun_data, prelude_offset, kind
)
- self.prelude = prelude
+
+ if kind in (MP_CODE_NATIVE_VIPER, MP_CODE_NATIVE_ASM):
+ self.scope_flags = scope_flags
+ self.n_pos_args = n_pos_args
+
self.qstr_links = qstr_links
self.type_sig = type_sig
if config.native_arch in (
@@ -502,11 +925,32 @@ class RawCodeNative(RawCode):
# ARMVxxM -- two byte align.
self.fun_data_attributes += " __attribute__ ((aligned (2)))"
+ def disassemble(self):
+ fun_data = self.fun_data
+ print("simple_name:", self.simple_name.str)
+ print(
+ " raw data:",
+ len(fun_data),
+ hexlify_to_str(fun_data[:32]),
+ "..." if len(fun_data) > 32 else "",
+ )
+ if self.code_kind != MP_CODE_NATIVE_PY:
+ return
+ print(" prelude:", self.prelude)
+ print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
+ print(" line info:", fun_data[self.offset_line_info : self.offset_opcodes])
+ ip = 0
+ while ip < self.prelude_offset:
+ sz = 16
+ print(" ", hexlify_to_str(fun_data[ip : min(ip + sz, self.prelude_offset)]))
+ ip += sz
+ self.disassemble_children()
+
def _asm_thumb_rewrite_mov(self, pc, val):
- print(" (%u & 0xf0) | (%s >> 12)," % (self.bytecode[pc], val), end="")
- print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.bytecode[pc + 1], val), end="")
+ print(" (%u & 0xf0) | (%s >> 12)," % (self.fun_data[pc], val), end="")
+ print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.fun_data[pc + 1], val), end="")
print(" (%s & 0xff)," % (val,), end="")
- print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.bytecode[pc + 3], val))
+ print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.fun_data[pc + 3], val))
def _link_qstr(self, pc, kind, qst):
if kind == 0:
@@ -543,32 +987,22 @@ class RawCodeNative(RawCode):
else:
assert 0
- def freeze(self, parent_name):
- if self.prelude[2] & ~0x0F:
+ def freeze(self):
+ if self.scope_flags & ~0x0F:
raise FreezeError("unable to freeze code with relocations")
- self.freeze_children(parent_name)
-
# generate native code data
print()
- if self.code_kind == MP_CODE_NATIVE_PY:
- print(
- "// frozen native code for file %s, scope %s%s"
- % (self.source_file.str, parent_name, self.simple_name.str)
- )
- elif self.code_kind == MP_CODE_NATIVE_VIPER:
- print("// frozen viper code for scope %s" % (parent_name,))
- else:
- print("// frozen assembler code for scope %s" % (parent_name,))
print(
- "STATIC const byte fun_data_%s[%u] %s = {"
- % (self.escaped_name, len(self.bytecode), self.fun_data_attributes)
+ "// frozen native code for file %s, scope %s"
+ % (self.qstr_table[0].str, self.escaped_name)
+ )
+ print(
+ "static const byte fun_data_%s[%u] %s = {"
+ % (self.escaped_name, len(self.fun_data), self.fun_data_attributes)
)
- if self.code_kind == MP_CODE_NATIVE_PY:
- i_top = self.prelude_offset
- else:
- i_top = len(self.bytecode)
+ i_top = len(self.fun_data)
i = 0
qi = 0
while i < i_top:
@@ -585,229 +1019,253 @@ class RawCodeNative(RawCode):
i16 = min(i16, self.qstr_links[qi][0])
print(" ", end="")
for ii in range(i, i16):
- print(" 0x%02x," % self.bytecode[ii], end="")
+ print(" 0x%02x," % self.fun_data[ii], end="")
print()
i = i16
- if self.code_kind == MP_CODE_NATIVE_PY:
- print(" ", end="")
- for i in range(self.prelude_offset, self.ip2):
- print(" 0x%02x," % self.bytecode[i], end="")
- print()
-
- print(" ", self.simple_name.qstr_id, "& 0xff,", self.simple_name.qstr_id, ">> 8,")
- print(" ", self.source_file.qstr_id, "& 0xff,", self.source_file.qstr_id, ">> 8,")
-
- print(" ", end="")
- for i in range(self.ip2 + 4, self.ip):
- print(" 0x%02x," % self.bytecode[i], end="")
- print()
-
print("};")
- self.freeze_constants()
- self.freeze_module(self.qstr_links, self.type_sig)
+ self.freeze_children()
+ self.freeze_raw_code(self.qstr_links, self.type_sig)
-class BytecodeBuffer:
- def __init__(self, size):
- self.buf = bytearray(size)
- self.idx = 0
+class MPYSegment:
+ META = 0
+ QSTR = 1
+ OBJ = 2
+ CODE = 3
- def is_full(self):
- return self.idx == len(self.buf)
+ def __init__(self, kind, name, start, end):
+ self.kind = kind
+ self.name = name
+ self.start = start
+ self.end = end
- def append(self, b):
- self.buf[self.idx] = b
- self.idx += 1
+class MPYReader:
+ def __init__(self, filename, fileobj):
+ self.filename = filename
+ self.fileobj = fileobj
-def read_byte(f, out=None):
- b = bytes_cons(f.read(1))[0]
- if out is not None:
- out.append(b)
- return b
+ def tell(self):
+ return self.fileobj.tell()
+ def read_byte(self):
+ return bytes_cons(self.fileobj.read(1))[0]
-def read_uint(f, out=None):
- i = 0
- while True:
- b = read_byte(f, out)
- i = (i << 7) | (b & 0x7F)
- if b & 0x80 == 0:
- break
- return i
+ def read_bytes(self, n):
+ return bytes_cons(self.fileobj.read(n))
+ def read_uint(self):
+ i = 0
+ while True:
+ b = self.read_byte()
+ i = (i << 7) | (b & 0x7F)
+ if b & 0x80 == 0:
+ break
+ return i
-def read_qstr(f, qstr_win):
- ln = read_uint(f)
- if ln == 0:
- # static qstr
- return bytes_cons(f.read(1))[0]
+
+def read_qstr(reader, segments):
+ start_pos = reader.tell()
+ ln = reader.read_uint()
if ln & 1:
- # qstr in table
- return qstr_win.access(ln >> 1)
+ # static qstr
+ segments.append(
+ MPYSegment(MPYSegment.META, global_qstrs[ln >> 1].str, start_pos, start_pos)
+ )
+ return ln >> 1
ln >>= 1
- data = str_cons(f.read(ln), "utf8")
+ start_pos = reader.tell()
+ data = str_cons(reader.read_bytes(ln), "utf8")
+ reader.read_byte() # read and discard null terminator
+ segments.append(MPYSegment(MPYSegment.QSTR, data, start_pos, reader.tell()))
global_qstrs.append(QStrType(data))
- qstr_win.push(len(global_qstrs) - 1)
return len(global_qstrs) - 1
-def read_obj(f):
- obj_type = f.read(1)
- if obj_type == b"e":
+def read_obj(reader, segments):
+ obj_type = reader.read_bytes(1)
+ if obj_type == b"t":
+ return MPFunTable()
+ elif obj_type == b"e":
return Ellipsis
else:
- buf = f.read(read_uint(f))
+ ln = reader.read_uint()
+ start_pos = reader.tell()
+ buf = reader.read_bytes(ln)
+ if obj_type in (b"s", b"b"):
+ reader.read_byte() # read and discard null terminator
if obj_type == b"s":
- return str_cons(buf, "utf8")
+ obj = str_cons(buf, "utf8")
elif obj_type == b"b":
- return bytes_cons(buf)
+ obj = buf
elif obj_type == b"i":
- return int(str_cons(buf, "ascii"), 10)
+ obj = int(str_cons(buf, "ascii"), 10)
elif obj_type == b"f":
- return float(str_cons(buf, "ascii"))
+ obj = float(str_cons(buf, "ascii"))
elif obj_type == b"c":
- return complex(str_cons(buf, "ascii"))
+ obj = complex(str_cons(buf, "ascii"))
else:
- assert 0
+ raise MPYReadError(reader.filename, "corrupt .mpy file")
+ segments.append(MPYSegment(MPYSegment.OBJ, obj, start_pos, reader.tell()))
+ return obj
-def read_prelude(f, bytecode, qstr_win):
- (
- n_state,
- n_exc_stack,
- scope_flags,
- n_pos_args,
- n_kwonly_args,
- n_def_pos_args,
- ) = read_prelude_sig(lambda: read_byte(f, bytecode))
- n_info, n_cell = read_prelude_size(lambda: read_byte(f, bytecode))
- read_qstr_and_pack(f, bytecode, qstr_win) # simple_name
- read_qstr_and_pack(f, bytecode, qstr_win) # source_file
- for _ in range(n_info - 4 + n_cell):
- read_byte(f, bytecode)
- return n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args
-
-
-def read_qstr_and_pack(f, bytecode, qstr_win):
- qst = read_qstr(f, qstr_win)
- bytecode.append(qst & 0xFF)
- bytecode.append(qst >> 8)
-
-
-def read_bytecode(file, bytecode, qstr_win):
- while not bytecode.is_full():
- op = read_byte(file, bytecode)
- f, sz = mp_opcode_format(bytecode.buf, bytecode.idx - 1, False)
- sz -= 1
- if f == MP_BC_FORMAT_QSTR:
- read_qstr_and_pack(file, bytecode, qstr_win)
- sz -= 2
- elif f == MP_BC_FORMAT_VAR_UINT:
- while read_byte(file, bytecode) & 0x80:
- pass
- for _ in range(sz):
- read_byte(file, bytecode)
-
-
-def read_raw_code(f, qstr_win):
- kind_len = read_uint(f)
+def read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments):
+ # Read raw code header.
+ kind_len = reader.read_uint()
kind = (kind_len & 3) + MP_CODE_BYTECODE
- fun_data_len = kind_len >> 2
- fun_data = BytecodeBuffer(fun_data_len)
+ has_children = (kind_len >> 2) & 1
+ fun_data_len = kind_len >> 3
+
+ # Read the body of the raw code.
+ file_offset = reader.tell()
+ fun_data = reader.read_bytes(fun_data_len)
+ segments_len = len(segments)
if kind == MP_CODE_BYTECODE:
- prelude = read_prelude(f, fun_data, qstr_win)
- read_bytecode(f, fun_data, qstr_win)
+ # Create bytecode raw code.
+ rc = RawCodeBytecode(cm_escaped_name, qstr_table, obj_table, fun_data)
else:
- fun_data.buf[:] = f.read(fun_data_len)
-
+ # Create native raw code.
qstr_links = []
if kind in (MP_CODE_NATIVE_PY, MP_CODE_NATIVE_VIPER):
- # load qstr link table
- n_qstr_link = read_uint(f)
+ # Read qstr link table.
+ n_qstr_link = reader.read_uint()
for _ in range(n_qstr_link):
- off = read_uint(f)
- qst = read_qstr(f, qstr_win)
+ off = reader.read_uint()
+ qst = read_qstr(reader, segments)
qstr_links.append((off >> 2, off & 3, qst))
- type_sig = 0
+ native_scope_flags = 0
+ native_n_pos_args = 0
+ native_type_sig = 0
if kind == MP_CODE_NATIVE_PY:
- prelude_offset = read_uint(f)
- _, name_idx, prelude = extract_prelude(fun_data.buf, prelude_offset)
- fun_data.idx = name_idx # rewind to where qstrs are in prelude
- read_qstr_and_pack(f, fun_data, qstr_win) # simple_name
- read_qstr_and_pack(f, fun_data, qstr_win) # source_file
+ prelude_offset = reader.read_uint()
else:
- prelude_offset = None
- scope_flags = read_uint(f)
- n_pos_args = 0
- if kind == MP_CODE_NATIVE_ASM:
- n_pos_args = read_uint(f)
- type_sig = read_uint(f)
- prelude = (None, None, scope_flags, n_pos_args, 0)
-
- qstrs = []
- objs = []
- raw_codes = []
- if kind != MP_CODE_NATIVE_ASM:
- # load constant table
- n_obj = read_uint(f)
- n_raw_code = read_uint(f)
- qstrs = [read_qstr(f, qstr_win) for _ in range(prelude[3] + prelude[4])]
- if kind != MP_CODE_BYTECODE:
- objs.append(MPFunTable)
- objs.extend([read_obj(f) for _ in range(n_obj)])
- raw_codes = [read_raw_code(f, qstr_win) for _ in range(n_raw_code)]
+ prelude_offset = 0
+ native_scope_flags = reader.read_uint()
+ if kind == MP_CODE_NATIVE_VIPER:
+ # Read any additional sections for native viper.
+ if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA:
+ rodata_size = reader.read_uint()
+ if native_scope_flags & MP_SCOPE_FLAG_VIPERBSS:
+ bss_size = reader.read_uint()
+ if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA:
+ reader.read_bytes(rodata_size)
+ if native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC:
+ while True:
+ op = reader.read_byte()
+ if op == 0xFF:
+ break
+ if op & 1:
+ addr = reader.read_uint()
+ op >>= 1
+ if op <= 5 and op & 1:
+ n = reader.read_uint()
+ else:
+ assert kind == MP_CODE_NATIVE_ASM
+ native_n_pos_args = reader.read_uint()
+ native_type_sig = reader.read_uint()
- if kind == MP_CODE_BYTECODE:
- return RawCodeBytecode(fun_data.buf, qstrs, objs, raw_codes)
- else:
- return RawCodeNative(
+ rc = RawCodeNative(
+ cm_escaped_name,
+ qstr_table,
kind,
- fun_data.buf,
+ fun_data,
prelude_offset,
- prelude,
qstr_links,
- qstrs,
- objs,
- raw_codes,
- type_sig,
+ native_scope_flags,
+ native_n_pos_args,
+ native_type_sig,
)
+ # Add a segment for the raw code data.
+ segments.insert(
+ segments_len,
+ MPYSegment(MPYSegment.CODE, rc.simple_name.str, file_offset, file_offset + fun_data_len),
+ )
+
+ # Read children, if there are any.
+ rc.children = []
+ if has_children:
+ n_children = reader.read_uint()
+ for _ in range(n_children):
+ rc.children.append(
+ read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments)
+ )
+
+ return rc
+
def read_mpy(filename):
- with open(filename, "rb") as f:
- header = bytes_cons(f.read(4))
+ with open(filename, "rb") as fileobj:
+ reader = MPYReader(filename, fileobj)
+ segments = []
+
+ # Read and verify the header.
+ header = reader.read_bytes(4)
if header[0] != ord("M"):
- raise Exception("not a valid .mpy file")
+ raise MPYReadError(filename, "not a valid .mpy file")
if header[1] != config.MPY_VERSION:
- raise Exception("incompatible .mpy version")
+ raise MPYReadError(filename, "incompatible .mpy version")
feature_byte = header[2]
- qw_size = read_uint(f)
config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_byte & 2) != 0
mpy_native_arch = feature_byte >> 2
if mpy_native_arch != MP_NATIVE_ARCH_NONE:
if config.native_arch == MP_NATIVE_ARCH_NONE:
config.native_arch = mpy_native_arch
elif config.native_arch != mpy_native_arch:
- raise Exception("native architecture mismatch")
+ raise MPYReadError(filename, "native architecture mismatch")
config.mp_small_int_bits = header[3]
- qstr_win = QStrWindow(qw_size)
- rc = read_raw_code(f, qstr_win)
- rc.mpy_source_file = filename
- rc.qstr_win_size = qw_size
- return rc
+
+ # Read number of qstrs, and number of objects.
+ n_qstr = reader.read_uint()
+ n_obj = reader.read_uint()
+
+ # Read qstrs and construct qstr table.
+ qstr_table = []
+ for i in range(n_qstr):
+ q = read_qstr(reader, segments)
+ qstr_table.append(global_qstrs[q])
+
+ # Read objects and construct object table.
+ obj_table = []
+ for i in range(n_obj):
+ obj_table.append(read_obj(reader, segments))
+
+ # Compute the compiled-module escaped name.
+ cm_escaped_name = qstr_table[0].str.replace("/", "_")[:-3]
+
+ # Read the outer raw code, which will in turn read all its children.
+ raw_code_file_offset = reader.tell()
+ raw_code = read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments)
+
+ # Create the outer-level compiled module representing the whole .mpy file.
+ return CompiledModule(
+ filename,
+ segments,
+ header,
+ qstr_table,
+ obj_table,
+ raw_code,
+ raw_code_file_offset,
+ cm_escaped_name,
+ )
-def dump_mpy(raw_codes):
- for rc in raw_codes:
- rc.dump()
+def hexdump_mpy(compiled_modules):
+ for cm in compiled_modules:
+ cm.hexdump()
-def freeze_mpy(base_qstrs, raw_codes):
+def disassemble_mpy(compiled_modules):
+ for cm in compiled_modules:
+ cm.disassemble()
+
+
+def freeze_mpy(base_qstrs, compiled_modules):
# add to qstrs
new = {}
for q in global_qstrs:
@@ -864,6 +1322,17 @@ def freeze_mpy(base_qstrs, raw_codes):
# As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
qstr_pool_alloc = min(len(new), 10)
+ global bc_content, const_str_content, const_int_content, const_obj_content, const_table_qstr_content, const_table_ptr_content, raw_code_count, raw_code_content
+ qstr_content = 0
+ bc_content = 0
+ const_str_content = 0
+ const_int_content = 0
+ const_obj_content = 0
+ const_table_qstr_content = 0
+ const_table_ptr_content = 0
+ raw_code_count = 0
+ raw_code_content = 0
+
print()
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
qstr_size = {"metadata": 0, "data": 0}
@@ -892,32 +1361,49 @@ def freeze_mpy(base_qstrs, raw_codes):
print(" {")
for _, _, qstr, qbytes in new:
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
+ qstr_content += (
+ config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
+ )
print(" },")
print("};")
- for rc in raw_codes:
- rc.freeze(rc.source_file.str.replace("/", "_")[:-3] + "_")
+ # Freeze all modules.
+ for idx, cm in enumerate(compiled_modules):
+ cm.freeze(idx)
+
+ # Print separator, separating individual modules from global data structures.
+ print()
+ print("/" * 80)
+ print("// collection of all frozen modules")
+ # Define the string of frozen module names.
print()
print("const char mp_frozen_names[] = {")
- print("#ifdef MP_FROZEN_STR_NAMES")
+ print(" #ifdef MP_FROZEN_STR_NAMES")
# makemanifest.py might also include some frozen string content.
- print("MP_FROZEN_STR_NAMES")
- print("#endif")
- for rc in raw_codes:
- module_name = rc.source_file.str
- print('"%s\\0"' % module_name)
- print('"\\0"};')
-
- print("const mp_raw_code_t *const mp_frozen_mpy_content[] = {")
- for rc in raw_codes:
- print(" &raw_code_%s," % rc.escaped_name)
+ print(" MP_FROZEN_STR_NAMES")
+ print(" #endif")
+ mp_frozen_mpy_names_content = 1
+ for cm in compiled_modules:
+ module_name = cm.source_file.str
+ print(' "%s\\0"' % module_name)
+ mp_frozen_mpy_names_content += len(cm.source_file.str) + 1
+ print(' "\\0"')
+ print("};")
+
+ # Define the array of pointers to frozen module content.
+ print()
+ print("const mp_frozen_module_t *const mp_frozen_mpy_content[] = {")
+ for cm in compiled_modules:
+ print(" &frozen_module_%s," % cm.escaped_name)
print("};")
+ mp_frozen_mpy_content_size = len(compiled_modules * 4)
# If a port defines MICROPY_FROZEN_LIST_ITEM then list all modules wrapped in that macro.
+ print()
print("#ifdef MICROPY_FROZEN_LIST_ITEM")
- for rc in raw_codes:
- module_name = rc.source_file.str
+ for cm in compiled_modules:
+ module_name = cm.source_file.str
if module_name.endswith("/__init__.py"):
short_name = module_name[: -len("/__init__.py")]
else:
@@ -925,45 +1411,94 @@ def freeze_mpy(base_qstrs, raw_codes):
print('MICROPY_FROZEN_LIST_ITEM("%s", "%s")' % (short_name, module_name))
print("#endif")
+ print()
+ print("/*")
+ print("byte sizes:")
+ print("qstr content: %d unique, %d bytes" % (len(new), qstr_content))
+ print("bc content: %d" % bc_content)
+ print("const str content: %d" % const_str_content)
+ print("const int content: %d" % const_int_content)
+ print("const obj content: %d" % const_obj_content)
+ print(
+ "const table qstr content: %d entries, %d bytes"
+ % (const_table_qstr_content, const_table_qstr_content * 4)
+ )
+ print(
+ "const table ptr content: %d entries, %d bytes"
+ % (const_table_ptr_content, const_table_ptr_content * 4)
+ )
+ print("raw code content: %d * 4 = %d" % (raw_code_count, raw_code_content))
+ print("mp_frozen_mpy_names_content: %d" % mp_frozen_mpy_names_content)
+ print("mp_frozen_mpy_content_size: %d" % mp_frozen_mpy_content_size)
+ print(
+ "total: %d"
+ % (
+ qstr_content
+ + bc_content
+ + const_str_content
+ + const_int_content
+ + const_obj_content
+ + const_table_qstr_content * 4
+ + const_table_ptr_content * 4
+ + raw_code_content
+ + mp_frozen_mpy_names_content
+ + mp_frozen_mpy_content_size
+ )
+ )
+ print("*/")
+
def merge_mpy(raw_codes, output_file):
- assert len(raw_codes) <= 31 # so var-uints all fit in 1 byte
+ assert len(raw_codes) <= 2 # so var-uints all fit in 1 byte
merged_mpy = bytearray()
if len(raw_codes) == 1:
with open(raw_codes[0].mpy_source_file, "rb") as f:
merged_mpy.extend(f.read())
else:
- header = bytearray(5)
+ main_rc = None
+ for rc in raw_codes:
+ if len(rc.qstr_table) > 1 or len(rc.obj_table) > 0:
+ # Must use qstr_table and obj_table from this raw_code
+ if main_rc is not None:
+ raise Exception(
+ "can't merge files when more than one has a populated qstr or obj table"
+ )
+ main_rc = rc
+ if main_rc is None:
+ main_rc = raw_codes[0]
+
+ header = bytearray(4)
header[0] = ord("M")
header[1] = config.MPY_VERSION
header[2] = config.native_arch << 2 | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1
header[3] = config.mp_small_int_bits
- header[4] = 32 # qstr_win_size
merged_mpy.extend(header)
+ # Copy n_qstr, n_obj, qstr_table, obj_table from main_rc.
+ with open(main_rc.mpy_source_file, "rb") as f:
+ data = f.read(main_rc.raw_code_file_offset)
+ merged_mpy.extend(data[4:])
+
bytecode = bytearray()
- bytecode_len = 6 + len(raw_codes) * 5 + 2
- bytecode.append(bytecode_len << 2) # kind and length
+ bytecode_len = 3 + len(raw_codes) * 5 + 2
+ bytecode.append(bytecode_len << 3 | 1 << 2) # kind, has_children and length
bytecode.append(0b00000000) # signature prelude
- bytecode.append(0b00001000) # size prelude
- bytecode.extend(b"\x00\x01") # MP_QSTR_
- bytecode.extend(b"\x00\x01") # MP_QSTR_
+ bytecode.append(0b00000010) # size prelude; n_info=1
+ bytecode.extend(b"\x00") # simple_name: qstr index 0 (will use source filename)
for idx in range(len(raw_codes)):
bytecode.append(0x32) # MP_BC_MAKE_FUNCTION
bytecode.append(idx) # index raw code
bytecode.extend(b"\x34\x00\x59") # MP_BC_CALL_FUNCTION, 0 args, MP_BC_POP_TOP
bytecode.extend(b"\x51\x63") # MP_BC_LOAD_NONE, MP_BC_RETURN_VALUE
- bytecode.append(0) # n_obj
- bytecode.append(len(raw_codes)) # n_raw_code
-
merged_mpy.extend(bytecode)
+ merged_mpy.append(len(raw_codes)) # n_children
+
for rc in raw_codes:
with open(rc.mpy_source_file, "rb") as f:
- f.read(4) # skip header
- read_uint(f) # skip qstr_win_size
+ f.seek(rc.raw_code_file_offset)
data = f.read() # read rest of mpy file
merged_mpy.extend(data)
@@ -978,7 +1513,12 @@ def main():
import argparse
cmd_parser = argparse.ArgumentParser(description="A tool to work with MicroPython .mpy files.")
- cmd_parser.add_argument("-d", "--dump", action="store_true", help="dump contents of files")
+ cmd_parser.add_argument(
+ "-x", "--hexdump", action="store_true", help="output an annotated hex dump of files"
+ )
+ cmd_parser.add_argument(
+ "-d", "--disassemble", action="store_true", help="output disassembled contents of files"
+ )
cmd_parser.add_argument("-f", "--freeze", action="store_true", help="freeze files")
cmd_parser.add_argument(
"--merge", action="store_true", help="merge multiple .mpy files into one"
@@ -1018,20 +1558,32 @@ def main():
else:
config.MICROPY_QSTR_BYTES_IN_LEN = 1
config.MICROPY_QSTR_BYTES_IN_HASH = 1
- base_qstrs = {}
+ base_qstrs = list(qstrutil.static_qstr_list)
+
+ # Load all .mpy files.
+ try:
+ compiled_modules = [read_mpy(file) for file in args.files]
+ except MPYReadError as er:
+ print(er, file=sys.stderr)
+ sys.exit(1)
- raw_codes = [read_mpy(file) for file in args.files]
+ if args.hexdump:
+ hexdump_mpy(compiled_modules)
- if args.dump:
- dump_mpy(raw_codes)
- elif args.freeze:
+ if args.disassemble:
+ if args.hexdump:
+ print()
+ disassemble_mpy(compiled_modules)
+
+ if args.freeze:
try:
- freeze_mpy(base_qstrs, raw_codes)
+ freeze_mpy(base_qstrs, compiled_modules)
except FreezeError as er:
print(er, file=sys.stderr)
sys.exit(1)
- elif args.merge:
- merged_mpy = merge_mpy(raw_codes, args.output)
+
+ if args.merge:
+ merge_mpy(compiled_modules, args.output)
if __name__ == "__main__":