summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorDamien George <damien@micropython.org>2024-09-25 14:06:00 +1000
committerDamien George <damien@micropython.org>2024-09-26 22:15:28 +1000
commit5b22bde044b27aaf82cde8d7609ba37015b37b4c (patch)
tree6b88129a7aa71af7074d74a433f4f728e055ed11
parentf4ab9d924790581989f2398fe30bbac5d680577f (diff)
downloadmicropython-5b22bde044b27aaf82cde8d7609ba37015b37b4c.tar.gz
micropython-5b22bde044b27aaf82cde8d7609ba37015b37b4c.zip
py/persistentcode: Explicitly track native BSS/rodata when needed.
Signed-off-by: Damien George <damien@micropython.org>
-rw-r--r--py/mpconfig.h58
-rw-r--r--py/persistentcode.c41
-rw-r--r--py/runtime.c4
-rw-r--r--tests/micropython/import_mpy_native_gc.py39
-rw-r--r--tests/micropython/import_mpy_native_gc.py.exp5
-rw-r--r--tests/micropython/import_mpy_native_gc_module/Makefile11
-rw-r--r--tests/micropython/import_mpy_native_gc_module/test.c38
7 files changed, 144 insertions, 52 deletions
diff --git a/py/mpconfig.h b/py/mpconfig.h
index b5414312c7..34eafa9e5d 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -425,18 +425,6 @@
// Convenience definition for whether any native or inline assembler emitter is enabled
#define MICROPY_EMIT_MACHINE_CODE (MICROPY_EMIT_NATIVE || MICROPY_EMIT_INLINE_ASM)
-// Whether native relocatable code loaded from .mpy files is explicitly tracked
-// so that the GC cannot reclaim it. Needed on architectures that allocate
-// executable memory on the MicroPython heap and don't explicitly track this
-// data some other way.
-#ifndef MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
-#if !MICROPY_EMIT_MACHINE_CODE || defined(MP_PLAT_ALLOC_EXEC) || defined(MP_PLAT_COMMIT_EXEC)
-#define MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE (0)
-#else
-#define MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE (1)
-#endif
-#endif
-
/*****************************************************************************/
/* Compiler configuration */
@@ -1992,14 +1980,48 @@ typedef double mp_float_t;
#define MICROPY_MAKE_POINTER_CALLABLE(p) (p)
#endif
-// If these MP_PLAT_*_EXEC macros are overridden then the memory allocated by them
-// must be somehow reachable for marking by the GC, since the native code
-// generators store pointers to GC managed memory in the code.
+// Whether native text/BSS/rodata memory loaded from .mpy files is explicitly tracked
+// so that the GC cannot reclaim it.
+//
+// In general a port should let these options have their defaults, but the defaults here
+// can be overridden if needed by defining both MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA
+// and MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA.
+#ifndef MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA
+#if MICROPY_EMIT_MACHINE_CODE && MICROPY_PERSISTENT_CODE_LOAD
+// Pointer tracking is required when loading native code is enabled.
+#if defined(MP_PLAT_ALLOC_EXEC) || defined(MP_PLAT_COMMIT_EXEC)
+// If a port defined a custom allocator or commit function for native text, then the
+// text does not need to be tracked (its allocation is managed by the port). But the
+// BSS/rodata must be tracked (if there is any) because if there are any pointers to it
+// in the function data, they aren't traced by the GC.
+#define MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA (0)
+#define MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA (1)
+#else
+// If a port uses the default allocator (the GC heap) then all native text is allocated
+// on the GC heap. But it's not guaranteed that a pointer to the head of the block of
+// native text (which may contain multiple native functions) will be retained for the GC
+// to trace. This is because native functions can start inside the big block of text
+// and so it's possible that the only GC-reachable pointers are pointers inside.
+// Therefore the big block is explicitly tracked. If there is any BSS/rodata memory,
+// then it does not need to be explicitly tracked because a pointer to it is stored into
+// the function text via `mp_native_relocate()`.
+#define MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA (1)
+#define MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA (0)
+#endif
+#else // MICROPY_EMIT_MACHINE_CODE && MICROPY_PERSISTENT_CODE_LOAD
+// Pointer tracking not needed when loading native code is disabled.
+#define MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA (0)
+#define MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA (0)
+#endif
+#endif
+
+// If these macros are defined then the memory allocated by them does not need to be
+// traced by the GC. But if they are left undefined then the GC heap will be used as
+// the allocator and the memory must be traced by the GC. See also above logic for
+// enabling MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA and
+// MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA.
#ifndef MP_PLAT_ALLOC_EXEC
#define MP_PLAT_ALLOC_EXEC(min_size, ptr, size) do { *ptr = m_new(byte, min_size); *size = min_size; } while (0)
-#endif
-
-#ifndef MP_PLAT_FREE_EXEC
#define MP_PLAT_FREE_EXEC(ptr, size) m_del(byte, ptr, size)
#endif
diff --git a/py/persistentcode.c b/py/persistentcode.c
index 5088c05f03..be93eaa5b4 100644
--- a/py/persistentcode.c
+++ b/py/persistentcode.c
@@ -72,6 +72,20 @@ typedef struct _bytecode_prelude_t {
static int read_byte(mp_reader_t *reader);
static size_t read_uint(mp_reader_t *reader);
+#if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA
+
+// An mp_obj_list_t that tracks native text/BSS/rodata to prevent the GC from reclaiming them.
+MP_REGISTER_ROOT_POINTER(mp_obj_t persistent_code_root_pointers);
+
+static void track_root_pointer(void *ptr) {
+ if (MP_STATE_PORT(persistent_code_root_pointers) == MP_OBJ_NULL) {
+ MP_STATE_PORT(persistent_code_root_pointers) = mp_obj_new_list(0, NULL);
+ }
+ mp_obj_list_append(MP_STATE_PORT(persistent_code_root_pointers), MP_OBJ_FROM_PTR(ptr));
+}
+
+#endif
+
#if MICROPY_EMIT_MACHINE_CODE
typedef struct _reloc_info_t {
@@ -299,11 +313,10 @@ static mp_raw_code_t *load_raw_code(mp_reader_t *reader, mp_module_context_t *co
read_bytes(reader, rodata, rodata_size);
}
- // Viper code with BSS/rodata should not have any children.
- // Reuse the children pointer to reference the BSS/rodata
- // memory so that it is not reclaimed by the GC.
- assert(!has_children);
- children = (void *)data;
+ #if MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA
+ // Track the BSS/rodata memory so it's not reclaimed by the GC.
+ track_root_pointer(data);
+ #endif
}
}
#endif
@@ -351,16 +364,9 @@ static mp_raw_code_t *load_raw_code(mp_reader_t *reader, mp_module_context_t *co
fun_data = MP_PLAT_COMMIT_EXEC(fun_data, fun_data_len, opt_ri);
#else
if (native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC) {
- #if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
- // If native code needs relocations then it's not guaranteed that a pointer to
- // the head of `buf` (containing the machine code) will be retained for the GC
- // to trace. This is because native functions can start inside `buf` and so
- // it's possible that the only GC-reachable pointers are pointers inside `buf`.
- // So put this `buf` on a list of reachable root pointers.
- if (MP_STATE_PORT(track_reloc_code_list) == MP_OBJ_NULL) {
- MP_STATE_PORT(track_reloc_code_list) = mp_obj_new_list(0, NULL);
- }
- mp_obj_list_append(MP_STATE_PORT(track_reloc_code_list), MP_OBJ_FROM_PTR(fun_data));
+ #if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA
+ // Track the function data memory so it's not reclaimed by the GC.
+ track_root_pointer(fun_data);
#endif
// Do the relocations.
mp_native_relocate(&ri, fun_data, (uintptr_t)fun_data);
@@ -662,8 +668,3 @@ void mp_raw_code_save_file(mp_compiled_module_t *cm, qstr filename) {
#endif // MICROPY_PERSISTENT_CODE_SAVE_FILE
#endif // MICROPY_PERSISTENT_CODE_SAVE
-
-#if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
-// An mp_obj_list_t that tracks relocated native code to prevent the GC from reclaiming them.
-MP_REGISTER_ROOT_POINTER(mp_obj_t track_reloc_code_list);
-#endif
diff --git a/py/runtime.c b/py/runtime.c
index acb45c94b0..deb55bf283 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -119,8 +119,8 @@ void mp_init(void) {
MP_STATE_VM(mp_module_builtins_override_dict) = NULL;
#endif
- #if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
- MP_STATE_VM(track_reloc_code_list) = MP_OBJ_NULL;
+ #if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA
+ MP_STATE_VM(persistent_code_root_pointers) = MP_OBJ_NULL;
#endif
#if MICROPY_PY_OS_DUPTERM
diff --git a/tests/micropython/import_mpy_native_gc.py b/tests/micropython/import_mpy_native_gc.py
index 4b7acd9c65..bdeb612b49 100644
--- a/tests/micropython/import_mpy_native_gc.py
+++ b/tests/micropython/import_mpy_native_gc.py
@@ -1,4 +1,4 @@
-# Test that native code loaded from a .mpy file is retained after a GC.
+# Test that native text/BSS/rodata loaded from a .mpy file is retained after a GC.
try:
import gc, sys, io, vfs
@@ -44,17 +44,20 @@ class UserFS:
return UserFile(self.files[path])
-# Pre-compiled examples/natmod/features0 example for various architectures, keyed
+# Pre-compiled import_mpy_native_gc_module example for various architectures, keyed
# by the required value of sys.implementation._mpy (without sub-version).
-# cd examples/natmod/features0
-# make clean
-# make ARCH=x64 # or ARCH=armv6m
-# cat features0.mpy | python -c 'import sys; print(sys.stdin.buffer.read())'
+# To rebuild:
+# $ cd import_mpy_native_gc_module
+# $ make clean
+# $ make ARCH=x64 # or ARCH=armv6m or ARCH=xtensawin
+# Then copy the bytes object printed on the last line.
features0_file_contents = {
# -march=x64
- 0x806: b'M\x06\x0b\x1f\x02\x004build/features0.native.mpy\x00\x12factorial\x00\x8a\x02\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb7x\x02\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x11$\r&\xaf \x01"\xff',
+ 0x806: b"M\x06\x0b\x1f\x03\x002build/test_x64.native.mpy\x00\x08add1\x00\x0cunused\x00\x91B\xe9I\x00\x00\x00H\x8b\x05\xf4\x00\x00\x00H\x8b\x00\xc3H\x8b\x05\xf9\x00\x00\x00\xbe\x02\x00\x00\x00\x8b8H\x8b\x05\xdb\x00\x00\x00H\x8b@ \xff\xe0H\x8b\x05\xce\x00\x00\x00S\xbe\x02\x00\x00\x00H\x8bX \xffP\x18\xbe\x02\x00\x00\x00H\x8dx\x01H\x89\xd8[\xff\xe0AVAUATUSH\x8b\x1d\xa3\x00\x00\x00H\x8bG\x08L\x8bk(H\x8bx\x08A\xff\xd5L\x8b5\x95\x00\x00\x00L\x8bchH\x8d5r\x00\x00\x00H\x89\xc5H\x8b\x05\x88\x00\x00\x00A\x0f\xb7~\x04\xc7\x00@\xe2\x01\x00A\xff\xd4H\x8d5C\x00\x00\x00\xbfV\x00\x00\x00A\xff\xd4A\x0f\xb7~\x02H\x8d5\x1f\x00\x00\x00A\xff\xd4H\x89\xefA\xff\xd5H\x8b\x03[]A\\A]A^\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x04\x11@\rB\tD\xaf4\x016\xad8\x01:\xaf<\x01>\xff",
# -march=armv6m
- 0x1006: b"M\x06\x13\x1f\x02\x004build/features0.native.mpy\x00\x12factorial\x00\x88\x02\x18\xe0\x00\x00\x10\xb5\tK\tJ{D\x9cX\x02!\xe3h\x98G\x03\x00\x01 \x00+\x02\xd0XC\x01;\xfa\xe7\x02!#i\x98G\x10\xbd\xc0Fj\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05\x00\x07K\x08I\xf3XyDX\x88ck\x98G(\x00\xb8G h\xf8\xbd\xc0F:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x11<\r>\xaf8\x01:\xff",
+ 0x1006: b"M\x06\x13\x1f\x03\x008build/test_armv6m.native.mpy\x00\x08add1\x00\x0cunused\x00\x8eb0\xe0\x00\x00\x00\x00\x00\x00\x02K\x03J{D\x9bX\x18hpG\xd0\x00\x00\x00\x00\x00\x00\x00\x10\xb5\x05K\x05I\x06J{D\x9aX[X\x10h\x02!\x1bi\x98G\x10\xbd\xb8\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x10\xb5\x06K\x06J{D\x9bX\x02!\x1ci\xdbh\x98G\x02!\x010\xa0G\x10\xbd\xc0F\x96\x00\x00\x00\x00\x00\x00\x00\xf7\xb5\x12O\x12K\x7fD\xfdX\x12Lki|D\x00\x93ChXh\x00\x9b\x98G\x0fK\x01\x90\x0fJ\xfbXnk\x1a`\x0eK!\x00\xffX\xb8\x88\xb0G!\x00V \x081\xb0G!\x00x\x88\x101\xb0G\x01\x98\x00\x9b\x98G(h\xfe\xbd\xc0Fr\x00\x00\x00\x00\x00\x00\x00R\x00\x00\x00\x08\x00\x00\x00@\xe2\x01\x00\x04\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x1d\x00\x00\x00\x00\x00\x00\x00A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x04\x11p\rr\tt\xafd\x01f\xadh\x01j\xafl\x01n\xff",
+ # -march=xtensawin
+ 0x2806: b"M\x06+\x1f\x03\x00>build/test_xtensawin.native.mpy\x00\x08add1\x00\x0cunused\x00\x8a\x12\x06\x16\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x006A\x00\x81\xf9\xff(\x08\x1d\xf0\x00\x006A\x00\x91\xfb\xff\x81\xf5\xff\xa8\t\x88H\x0c+\xe0\x08\x00-\n\x1d\xf0\x00\x006A\x00\x81\xf0\xff\xad\x02xH\x888\x0c+\xe0\x08\x00\x0c+\x1b\xaa\xe0\x07\x00-\n\x1d\xf06A\x00a\xe9\xff\x88\x122&\x05\xa2(\x01\xe0\x03\x00q\xe6\xff\x81\xea\xff\x92\xa7\x89\xa0\x99\x11H\xd6]\n\xb1\xe3\xff\xa2\x17\x02\x99\x08\xe0\x04\x00\xb1\xe2\xff\\j\xe0\x04\x00\xb1\xe1\xff\xa2\x17\x01\xe0\x04\x00\xad\x05\xe0\x03\x00(\x06\x1d\xf0p\x18\x04\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00(\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x11\x02\r\x04\x07\x06\x03\t\x0c\xaf\x01\x01\x03\xad\x05\x01\x07\xaf\t\x01\x0b\xff",
}
# Populate armv7m-derived archs based on armv6m.
@@ -76,11 +79,24 @@ sys.path.append("/userfs")
# Import the native function.
gc.collect()
-from features0 import factorial
+from features0 import get, add1
+
+# Test that the native functions work to begin with.
+print(get())
+print(add1(12))
# Free the module that contained the function.
del sys.modules["features0"]
+
+# Sweep the stack to remove any stray pointers that we are aiming to reclaim.
+def recurse(n):
+ if n:
+ recurse(n - 1)
+
+
+recurse(10)
+
# Run a GC cycle which should reclaim the module but not the function.
gc.collect()
@@ -88,8 +104,9 @@ gc.collect()
for i in range(1000):
[]
-# Run the native function, it should not have been freed or overwritten.
-print(factorial(10))
+# Run the native function, its text/BSS/rodata should not have been freed or overwritten.
+print(get())
+print(add1(12))
# Unmount and undo path addition.
vfs.umount("/userfs")
diff --git a/tests/micropython/import_mpy_native_gc.py.exp b/tests/micropython/import_mpy_native_gc.py.exp
index 3fbd4a8698..4250a13c72 100644
--- a/tests/micropython/import_mpy_native_gc.py.exp
+++ b/tests/micropython/import_mpy_native_gc.py.exp
@@ -1 +1,4 @@
-3628800
+123456
+13
+123456
+13
diff --git a/tests/micropython/import_mpy_native_gc_module/Makefile b/tests/micropython/import_mpy_native_gc_module/Makefile
new file mode 100644
index 0000000000..935ba6b59f
--- /dev/null
+++ b/tests/micropython/import_mpy_native_gc_module/Makefile
@@ -0,0 +1,11 @@
+MPY_DIR = ../../..
+
+MOD = test_$(ARCH)
+SRC = test.c
+ARCH = x64
+
+.PHONY: main
+main: all
+ $(Q)cat $(MOD).mpy | python -c 'import sys; print(sys.stdin.buffer.read())'
+
+include $(MPY_DIR)/py/dynruntime.mk
diff --git a/tests/micropython/import_mpy_native_gc_module/test.c b/tests/micropython/import_mpy_native_gc_module/test.c
new file mode 100644
index 0000000000..7ec7db42da
--- /dev/null
+++ b/tests/micropython/import_mpy_native_gc_module/test.c
@@ -0,0 +1,38 @@
+// This test native module is used by import_mpy_native_gc.py.
+// It has:
+// - A variable in the BSS, to check that the BSS is not reclaimed by the GC.
+// - An unused native function at the start so that subsequent native functions
+// don't start at the beginning of the native function data. This tests that the
+// GC doesn't reclaim the native function data even when the only pointer to that
+// data is pointing inside the allocated memory.
+
+#include "py/dynruntime.h"
+
+uint32_t bss_variable;
+
+static mp_obj_t unused(mp_obj_t x_obj) {
+ return mp_const_none;
+}
+static MP_DEFINE_CONST_FUN_OBJ_1(unused_obj, unused);
+
+static mp_obj_t get(void) {
+ return mp_obj_new_int(bss_variable);
+}
+static MP_DEFINE_CONST_FUN_OBJ_0(get_obj, get);
+
+static mp_obj_t add1(mp_obj_t x_obj) {
+ return mp_obj_new_int(mp_obj_get_int(x_obj) + 1);
+}
+static MP_DEFINE_CONST_FUN_OBJ_1(add1_obj, add1);
+
+mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *args) {
+ MP_DYNRUNTIME_INIT_ENTRY
+
+ bss_variable = 123456;
+
+ mp_store_global(MP_QSTR_unused, MP_OBJ_FROM_PTR(&unused_obj));
+ mp_store_global(MP_QSTR_get, MP_OBJ_FROM_PTR(&get_obj));
+ mp_store_global(MP_QSTR_add1, MP_OBJ_FROM_PTR(&add1_obj));
+
+ MP_DYNRUNTIME_INIT_EXIT
+}