aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Python/import.c
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2024-05-06 22:21:51 -0600
committerGitHub <noreply@github.com>2024-05-07 04:21:51 +0000
commitb2cd54a4fb2ecdb7b1d30bda8af3314d3a32031e (patch)
tree829360e381eef6107cb2aa836bd7c4d79e77f514 /Python/import.c
parent1a23716d4ba6c6dadd76a3662db580f451f660e3 (diff)
downloadcpython-b2cd54a4fb2ecdb7b1d30bda8af3314d3a32031e.tar.gz
cpython-b2cd54a4fb2ecdb7b1d30bda8af3314d3a32031e.zip
gh-117953: Always Run Extension Init Func in Main Interpreter First (gh-118157)
This change makes sure all extension/builtin modules have their init function run first by the main interpreter before proceeding with import in the original interpreter (main or otherwise). This means when the import of a single-phase init module fails in an isolated subinterpreter, it won't tie any global state/callbacks to the subinterpreter.
Diffstat (limited to 'Python/import.c')
-rw-r--r--Python/import.c252
1 files changed, 197 insertions, 55 deletions
diff --git a/Python/import.c b/Python/import.c
index 0b58567dde1..ba44477318d 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict)
return -1;
}
// XXX We may want to make the copy immortal.
- // XXX This incref shouldn't be necessary. We are decref'ing
- // one to many times somewhere.
- Py_INCREF(copied);
value->_m_dict = (struct cached_m_dict){
.copied=copied,
@@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
}
#endif
+static PyThreadState *
+switch_to_main_interpreter(PyThreadState *tstate)
+{
+ if (_Py_IsMainInterpreter(tstate->interp)) {
+ return tstate;
+ }
+ PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main());
+ if (main_tstate == NULL) {
+ return NULL;
+ }
+ main_tstate->_whence = _PyThreadState_WHENCE_EXEC;
+#ifndef NDEBUG
+ PyThreadState *old_tstate = PyThreadState_Swap(main_tstate);
+ assert(old_tstate == tstate);
+#else
+ (void)PyThreadState_Swap(main_tstate);
+#endif
+ return main_tstate;
+}
+
static PyObject *
get_core_module_dict(PyInterpreterState *interp,
PyObject *name, PyObject *path)
@@ -1936,24 +1953,171 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
struct extensions_cache_value *cached = NULL;
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
+ /* We cannot know if the module is single-phase init or
+ * multi-phase init until after we call its init function. Even
+ * in isolated interpreters (that do not support single-phase init),
+ * the init function will run without restriction. For multi-phase
+ * init modules that isn't a problem because the init function only
+ * runs PyModuleDef_Init() on the module's def and then returns it.
+ *
+ * However, for single-phase init the module's init function will
+ * create the module, create other objects (and allocate other
+ * memory), populate it and its module state, and initialze static
+ * types. Some modules store other objects and data in global C
+ * variables and register callbacks with the runtime/stdlib or
+ * even external libraries (which is part of why we can't just
+ * dlclose() the module in the error case). That's a problem
+ * for isolated interpreters since all of the above happens
+ * and only then * will the import fail. Memory will leak,
+ * callbacks will still get used, and sometimes there
+ * will be crashes (memory access violations
+ * and use-after-free).
+ *
+ * To put it another way, if the module is single-phase init
+ * then the import will probably break interpreter isolation
+ * and should fail ASAP. However, the module's init function
+ * will still get run. That means it may still store state
+ * in the shared-object/DLL address space (which never gets
+ * closed/cleared), including objects (e.g. static types).
+ * This is a problem for isolated subinterpreters since each
+ * has its own object allocator. If the loaded shared-object
+ * still holds a reference to an object after the corresponding
+ * interpreter has finalized then either we must let it leak
+ * or else any later use of that object by another interpreter
+ * (or across multiple init-fini cycles) will crash the process.
+ *
+ * To avoid all of that, we make sure the module's init function
+ * is always run first with the main interpreter active. If it was
+ * already the main interpreter then we can continue loading the
+ * module like normal. Otherwise, right after the init function,
+ * we take care of some import state bookkeeping, switch back
+ * to the subinterpreter, check for single-phase init,
+ * and then continue loading like normal. */
+
+ bool switched = false;
+ /* We *could* leave in place a legacy interpreter here
+ * (one that shares obmalloc/GIL with main interp),
+ * but there isn't a big advantage, we anticipate
+ * such interpreters will be increasingly uncommon,
+ * and the code is a bit simpler if we always switch
+ * to the main interpreter. */
+ PyThreadState *main_tstate = switch_to_main_interpreter(tstate);
+ if (main_tstate == NULL) {
+ return NULL;
+ }
+ else if (main_tstate != tstate) {
+ switched = true;
+ /* In the switched case, we could play it safe
+ * by getting the main interpreter's import lock here.
+ * It's unlikely to matter though. */
+ }
+
struct _Py_ext_module_loader_result res;
- if (_PyImport_RunModInitFunc(p0, info, &res) < 0) {
+ int rc = _PyImport_RunModInitFunc(p0, info, &res);
+ if (rc < 0) {
/* We discard res.def. */
assert(res.module == NULL);
- _Py_ext_module_loader_result_apply_error(&res, name_buf);
- return NULL;
}
- assert(!PyErr_Occurred());
- assert(res.err == NULL);
+ else {
+ assert(!PyErr_Occurred());
+ assert(res.err == NULL);
+
+ mod = res.module;
+ res.module = NULL;
+ def = res.def;
+ assert(def != NULL);
+
+ /* Do anything else that should be done
+ * while still using the main interpreter. */
+ if (res.kind == _Py_ext_module_kind_SINGLEPHASE) {
+ /* Remember the filename as the __file__ attribute */
+ if (info->filename != NULL) {
+ // XXX There's a refleak somewhere with the filename.
+ // Until we can track it down, we intern it.
+ PyObject *filename = Py_NewRef(info->filename);
+ PyUnicode_InternInPlace(&filename);
+ if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) {
+ PyErr_Clear(); /* Not important enough to report */
+ }
+ }
+
+ /* Update global import state. */
+ assert(def->m_base.m_index != 0);
+ struct singlephase_global_update singlephase = {
+ // XXX Modules that share a def should each get their own index,
+ // whereas currently they share (which means the per-interpreter
+ // cache is less reliable than it should be).
+ .m_index=def->m_base.m_index,
+ .origin=info->origin,
+#ifdef Py_GIL_DISABLED
+ .md_gil=((PyModuleObject *)mod)->md_gil,
+#endif
+ };
+ // gh-88216: Extensions and def->m_base.m_copy can be updated
+ // when the extension module doesn't support sub-interpreters.
+ if (def->m_size == -1) {
+ /* We will reload from m_copy. */
+ assert(def->m_base.m_init == NULL);
+ singlephase.m_dict = PyModule_GetDict(mod);
+ assert(singlephase.m_dict != NULL);
+ }
+ else {
+ /* We will reload via the init function. */
+ assert(def->m_size >= 0);
+ assert(def->m_base.m_copy == NULL);
+ singlephase.m_init = p0;
+ }
+ cached = update_global_state_for_extension(
+ tstate, info->path, info->name, def, &singlephase);
+ if (cached == NULL) {
+ assert(PyErr_Occurred());
+ goto main_finally;
+ }
+ }
+ }
- mod = res.module;
- res.module = NULL;
- def = res.def;
- assert(def != NULL);
+main_finally:
+ /* Switch back to the subinterpreter. */
+ if (switched) {
+ assert(main_tstate != tstate);
+
+ /* Handle any exceptions, which we cannot propagate directly
+ * to the subinterpreter. */
+ if (PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
+ /* We trust it will be caught again soon. */
+ PyErr_Clear();
+ }
+ else {
+ /* Printing the exception should be sufficient. */
+ PyErr_PrintEx(0);
+ }
+ }
+
+ /* Any module we got from the init function will have to be
+ * reloaded in the subinterpreter. */
+ Py_CLEAR(mod);
+
+ PyThreadState_Clear(main_tstate);
+ (void)PyThreadState_Swap(tstate);
+ PyThreadState_Delete(main_tstate);
+ }
+
+ /*****************************************************************/
+ /* At this point we are back to the interpreter we started with. */
+ /*****************************************************************/
+
+ /* Finally we handle the error return from _PyImport_RunModInitFunc(). */
+ if (rc < 0) {
+ _Py_ext_module_loader_result_apply_error(&res, name_buf);
+ goto error;
+ }
if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
assert_multiphase_def(def);
assert(mod == NULL);
+ /* Note that we cheat a little by not repeating the calls
+ * to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */
mod = PyModule_FromDefAndSpec(def, spec);
if (mod == NULL) {
goto error;
@@ -1962,57 +2126,35 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
else {
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
assert_singlephase_def(def);
- assert(PyModule_Check(mod));
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
goto error;
}
+ assert(!PyErr_Occurred());
- /* Remember the filename as the __file__ attribute */
- if (info->filename != NULL) {
- if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
- PyErr_Clear(); /* Not important enough to report */
+ if (switched) {
+ /* We switched to the main interpreter to run the init
+ * function, so now we will "reload" the module from the
+ * cached data using the original subinterpreter. */
+ assert(mod == NULL);
+ mod = reload_singlephase_extension(tstate, cached, info);
+ if (mod == NULL) {
+ goto error;
}
- }
-
- /* Update global import state. */
- assert(def->m_base.m_index != 0);
- struct singlephase_global_update singlephase = {
- // XXX Modules that share a def should each get their own index,
- // whereas currently they share (which means the per-interpreter
- // cache is less reliable than it should be).
- .m_index=def->m_base.m_index,
- .origin=info->origin,
-#ifdef Py_GIL_DISABLED
- .md_gil=((PyModuleObject *)mod)->md_gil,
-#endif
- };
- // gh-88216: Extensions and def->m_base.m_copy can be updated
- // when the extension module doesn't support sub-interpreters.
- if (def->m_size == -1) {
- /* We will reload from m_copy. */
- assert(def->m_base.m_init == NULL);
- singlephase.m_dict = PyModule_GetDict(mod);
- assert(singlephase.m_dict != NULL);
+ assert(!PyErr_Occurred());
+ assert(PyModule_Check(mod));
}
else {
- /* We will reload via the init function. */
- assert(def->m_size >= 0);
- assert(def->m_base.m_copy == NULL);
- singlephase.m_init = p0;
- }
- cached = update_global_state_for_extension(
- tstate, info->path, info->name, def, &singlephase);
- if (cached == NULL) {
- goto error;
- }
-
- /* Update per-interpreter import state. */
- PyObject *modules = get_modules_dict(tstate, true);
- if (finish_singlephase_extension(
- tstate, mod, cached, info->name, modules) < 0)
- {
- goto error;
+ assert(mod != NULL);
+ assert(PyModule_Check(mod));
+
+ /* Update per-interpreter import state. */
+ PyObject *modules = get_modules_dict(tstate, true);
+ if (finish_singlephase_extension(
+ tstate, mod, cached, info->name, modules) < 0)
+ {
+ goto error;
+ }
}
}