summaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
authorJim Mussared <jim.mussared@gmail.com>2021-09-06 12:28:06 +1000
committerDamien George <damien@micropython.org>2021-09-16 16:04:03 +1000
commitb326edf68c5edb648fac4dc2a3403ee33510e179 (patch)
treedf3a4e0666eb4a7ff85329befe306ec452b8cd64 /tools
parent60c6d5594f165cf3af6e66076f8dceb24e0d859f (diff)
downloadmicropython-b326edf68c5edb648fac4dc2a3403ee33510e179.tar.gz
micropython-b326edf68c5edb648fac4dc2a3403ee33510e179.zip
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE.
This commit removes all parts of code associated with the existing MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the -mcache-lookup-bc option to mpy-cross. This feature originally provided a significant performance boost for Unix, but wasn't able to be enabled for MCU targets (due to frozen bytecode), and added significant extra complexity to generating and distributing .mpy files. The equivalent performance gain is now provided by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has been enabled on the unix port in the previous commit). It's hard to provide precise performance numbers, but tests have been run on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V, xtensa) and they all generally agree on the qualitative improvements seen by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE. For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined with MAP_LOOKUP_CACHE is: diff of scores (higher is better) N=2000 M=2000 bccache -> attrmapcache diff diff% (error%) bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%) bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%) bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%) bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%) bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%) bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%) bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%) misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%) misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%) misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%) misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%) This shows that the new optimisations are at least as good as the existing inline-bytecode-caching, and are sometimes much better (because the new ones apply caching to a wider variety of map lookups). The new optimisations can also benefit code generated by the native emitter, because they apply to the runtime rather than the generated code. The improvement for the native emitter when LOAD_ATTR_FAST_PATH and MAP_LOOKUP_CACHE are enabled is (same Linux environment as above): diff of scores (higher is better) N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%) bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%) bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%) bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%) bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%) bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%) bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%) bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%) misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%) misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%) misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%) misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%) In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options: - are simpler; - take less code size; - are faster (generally); - work with code generated by the native emitter; - can be used on embedded targets with a small and constant RAM overhead; - allow the same .mpy bytecode to run on all targets. See #7680 for further discussion. And see also #7653 for a discussion about simplifying mpy-cross options. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
Diffstat (limited to 'tools')
-rwxr-xr-xtools/mpy-tool.py28
-rwxr-xr-xtools/mpy_cross_all.py5
-rwxr-xr-xtools/mpy_ld.py9
3 files changed, 5 insertions, 37 deletions
diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py
index bfc3cf27e3..6868ed5d4e 100755
--- a/tools/mpy-tool.py
+++ b/tools/mpy-tool.py
@@ -132,14 +132,6 @@ def mp_opcode_format(bytecode, ip, count_var_uint):
ip_start = ip
f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
if f == MP_BC_FORMAT_QSTR:
- if config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE:
- if (
- opcode == MP_BC_LOAD_NAME
- or opcode == MP_BC_LOAD_GLOBAL
- or opcode == MP_BC_LOAD_ATTR
- or opcode == MP_BC_STORE_ATTR
- ):
- ip += 1
ip += 3
else:
extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
@@ -440,10 +432,7 @@ class RawCodeBytecode(RawCode):
"// frozen bytecode for file %s, scope %s%s"
% (self.source_file.str, parent_name, self.simple_name.str)
)
- print("STATIC ", end="")
- if not config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE:
- print("const ", end="")
- print("byte fun_data_%s[%u] = {" % (self.escaped_name, len(self.bytecode)))
+ print("STATIC const byte fun_data_%s[%u] = {" % (self.escaped_name, len(self.bytecode)))
print(" ", end="")
for i in range(self.ip2):
print(" 0x%02x," % self.bytecode[i], end="")
@@ -798,7 +787,6 @@ def read_mpy(filename):
raise Exception("incompatible .mpy version")
feature_byte = header[2]
qw_size = read_uint(f)
- config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = (feature_byte & 1) != 0
config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_byte & 2) != 0
mpy_native_arch = feature_byte >> 2
if mpy_native_arch != MP_NATIVE_ARCH_NONE:
@@ -836,14 +824,6 @@ def freeze_mpy(base_qstrs, raw_codes):
print('#include "py/nativeglue.h"')
print()
- print(
- "#if MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE != %u"
- % config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE
- )
- print('#error "incompatible MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE"')
- print("#endif")
- print()
-
print("#if MICROPY_LONGINT_IMPL != %u" % config.MICROPY_LONGINT_IMPL)
print('#error "incompatible MICROPY_LONGINT_IMPL"')
print("#endif")
@@ -940,11 +920,7 @@ def merge_mpy(raw_codes, output_file):
header = bytearray(5)
header[0] = ord("M")
header[1] = config.MPY_VERSION
- header[2] = (
- config.native_arch << 2
- | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1
- | config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE
- )
+ header[2] = config.native_arch << 2 | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1
header[3] = config.mp_small_int_bits
header[4] = 32 # qstr_win_size
merged_mpy.extend(header)
diff --git a/tools/mpy_cross_all.py b/tools/mpy_cross_all.py
index d542bde42e..4b1edf9d6c 100755
--- a/tools/mpy_cross_all.py
+++ b/tools/mpy_cross_all.py
@@ -6,14 +6,11 @@ import os.path
argparser = argparse.ArgumentParser(description="Compile all .py files to .mpy recursively")
argparser.add_argument("-o", "--out", help="output directory (default: input dir)")
argparser.add_argument("--target", help="select MicroPython target config")
-argparser.add_argument(
- "-mcache-lookup-bc", action="store_true", help="cache map lookups in the bytecode"
-)
argparser.add_argument("dir", help="input directory")
args = argparser.parse_args()
TARGET_OPTS = {
- "unix": "-mcache-lookup-bc",
+ "unix": "",
"baremetal": "",
}
diff --git a/tools/mpy_ld.py b/tools/mpy_ld.py
index 8522499438..6bc1dbac05 100755
--- a/tools/mpy_ld.py
+++ b/tools/mpy_ld.py
@@ -48,7 +48,6 @@ MP_CODE_NATIVE_VIPER = 4
MP_SCOPE_FLAG_VIPERRELOC = 0x10
MP_SCOPE_FLAG_VIPERRODATA = 0x20
MP_SCOPE_FLAG_VIPERBSS = 0x40
-MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = 1
MICROPY_PY_BUILTINS_STR_UNICODE = 2
MP_SMALL_INT_BITS = 31
QSTR_WINDOW_SIZE = 32
@@ -118,9 +117,7 @@ class ArchData:
ARCH_DATA = {
"x86": ArchData(
"EM_386",
- MP_NATIVE_ARCH_X86 << 2
- | MICROPY_PY_BUILTINS_STR_UNICODE
- | MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE,
+ MP_NATIVE_ARCH_X86 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
2,
4,
(R_386_PC32, R_386_GOT32, R_386_GOT32X),
@@ -128,9 +125,7 @@ ARCH_DATA = {
),
"x64": ArchData(
"EM_X86_64",
- MP_NATIVE_ARCH_X64 << 2
- | MICROPY_PY_BUILTINS_STR_UNICODE
- | MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE,
+ MP_NATIVE_ARCH_X64 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE,
2,
8,
(R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX),