diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2024-05-06 22:21:51 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-07 04:21:51 +0000 |
commit | b2cd54a4fb2ecdb7b1d30bda8af3314d3a32031e (patch) | |
tree | 829360e381eef6107cb2aa836bd7c4d79e77f514 /Python/import.c | |
parent | 1a23716d4ba6c6dadd76a3662db580f451f660e3 (diff) | |
download | cpython-b2cd54a4fb2ecdb7b1d30bda8af3314d3a32031e.tar.gz cpython-b2cd54a4fb2ecdb7b1d30bda8af3314d3a32031e.zip |
gh-117953: Always Run Extension Init Func in Main Interpreter First (gh-118157)
This change makes sure all extension/builtin modules have their init function run first by the main interpreter before proceeding with import in the original interpreter (main or otherwise). This means when the import of a single-phase init module fails in an isolated subinterpreter, it won't tie any global state/callbacks to the subinterpreter.
Diffstat (limited to 'Python/import.c')
-rw-r--r-- | Python/import.c | 252 |
1 files changed, 197 insertions, 55 deletions
diff --git a/Python/import.c b/Python/import.c index 0b58567dde1..ba44477318d 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) return -1; } // XXX We may want to make the copy immortal. - // XXX This incref shouldn't be necessary. We are decref'ing - // one to many times somewhere. - Py_INCREF(copied); value->_m_dict = (struct cached_m_dict){ .copied=copied, @@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name) } #endif +static PyThreadState * +switch_to_main_interpreter(PyThreadState *tstate) +{ + if (_Py_IsMainInterpreter(tstate->interp)) { + return tstate; + } + PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main()); + if (main_tstate == NULL) { + return NULL; + } + main_tstate->_whence = _PyThreadState_WHENCE_EXEC; +#ifndef NDEBUG + PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); + assert(old_tstate == tstate); +#else + (void)PyThreadState_Swap(main_tstate); +#endif + return main_tstate; +} + static PyObject * get_core_module_dict(PyInterpreterState *interp, PyObject *name, PyObject *path) @@ -1936,24 +1953,171 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, struct extensions_cache_value *cached = NULL; const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + /* We cannot know if the module is single-phase init or + * multi-phase init until after we call its init function. Even + * in isolated interpreters (that do not support single-phase init), + * the init function will run without restriction. For multi-phase + * init modules that isn't a problem because the init function only + * runs PyModuleDef_Init() on the module's def and then returns it. + * + * However, for single-phase init the module's init function will + * create the module, create other objects (and allocate other + * memory), populate it and its module state, and initialze static + * types. Some modules store other objects and data in global C + * variables and register callbacks with the runtime/stdlib or + * even external libraries (which is part of why we can't just + * dlclose() the module in the error case). That's a problem + * for isolated interpreters since all of the above happens + * and only then * will the import fail. Memory will leak, + * callbacks will still get used, and sometimes there + * will be crashes (memory access violations + * and use-after-free). + * + * To put it another way, if the module is single-phase init + * then the import will probably break interpreter isolation + * and should fail ASAP. However, the module's init function + * will still get run. That means it may still store state + * in the shared-object/DLL address space (which never gets + * closed/cleared), including objects (e.g. static types). + * This is a problem for isolated subinterpreters since each + * has its own object allocator. If the loaded shared-object + * still holds a reference to an object after the corresponding + * interpreter has finalized then either we must let it leak + * or else any later use of that object by another interpreter + * (or across multiple init-fini cycles) will crash the process. + * + * To avoid all of that, we make sure the module's init function + * is always run first with the main interpreter active. If it was + * already the main interpreter then we can continue loading the + * module like normal. Otherwise, right after the init function, + * we take care of some import state bookkeeping, switch back + * to the subinterpreter, check for single-phase init, + * and then continue loading like normal. */ + + bool switched = false; + /* We *could* leave in place a legacy interpreter here + * (one that shares obmalloc/GIL with main interp), + * but there isn't a big advantage, we anticipate + * such interpreters will be increasingly uncommon, + * and the code is a bit simpler if we always switch + * to the main interpreter. */ + PyThreadState *main_tstate = switch_to_main_interpreter(tstate); + if (main_tstate == NULL) { + return NULL; + } + else if (main_tstate != tstate) { + switched = true; + /* In the switched case, we could play it safe + * by getting the main interpreter's import lock here. + * It's unlikely to matter though. */ + } + struct _Py_ext_module_loader_result res; - if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { + int rc = _PyImport_RunModInitFunc(p0, info, &res); + if (rc < 0) { /* We discard res.def. */ assert(res.module == NULL); - _Py_ext_module_loader_result_apply_error(&res, name_buf); - return NULL; } - assert(!PyErr_Occurred()); - assert(res.err == NULL); + else { + assert(!PyErr_Occurred()); + assert(res.err == NULL); + + mod = res.module; + res.module = NULL; + def = res.def; + assert(def != NULL); + + /* Do anything else that should be done + * while still using the main interpreter. */ + if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + // XXX There's a refleak somewhere with the filename. + // Until we can track it down, we intern it. + PyObject *filename = Py_NewRef(info->filename); + PyUnicode_InternInPlace(&filename); + if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } + } + + /* Update global import state. */ + assert(def->m_base.m_index != 0); + struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, + .origin=info->origin, +#ifdef Py_GIL_DISABLED + .md_gil=((PyModuleObject *)mod)->md_gil, +#endif + }; + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + if (def->m_size == -1) { + /* We will reload from m_copy. */ + assert(def->m_base.m_init == NULL); + singlephase.m_dict = PyModule_GetDict(mod); + assert(singlephase.m_dict != NULL); + } + else { + /* We will reload via the init function. */ + assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); + singlephase.m_init = p0; + } + cached = update_global_state_for_extension( + tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { + assert(PyErr_Occurred()); + goto main_finally; + } + } + } - mod = res.module; - res.module = NULL; - def = res.def; - assert(def != NULL); +main_finally: + /* Switch back to the subinterpreter. */ + if (switched) { + assert(main_tstate != tstate); + + /* Handle any exceptions, which we cannot propagate directly + * to the subinterpreter. */ + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_MemoryError)) { + /* We trust it will be caught again soon. */ + PyErr_Clear(); + } + else { + /* Printing the exception should be sufficient. */ + PyErr_PrintEx(0); + } + } + + /* Any module we got from the init function will have to be + * reloaded in the subinterpreter. */ + Py_CLEAR(mod); + + PyThreadState_Clear(main_tstate); + (void)PyThreadState_Swap(tstate); + PyThreadState_Delete(main_tstate); + } + + /*****************************************************************/ + /* At this point we are back to the interpreter we started with. */ + /*****************************************************************/ + + /* Finally we handle the error return from _PyImport_RunModInitFunc(). */ + if (rc < 0) { + _Py_ext_module_loader_result_apply_error(&res, name_buf); + goto error; + } if (res.kind == _Py_ext_module_kind_MULTIPHASE) { assert_multiphase_def(def); assert(mod == NULL); + /* Note that we cheat a little by not repeating the calls + * to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */ mod = PyModule_FromDefAndSpec(def, spec); if (mod == NULL) { goto error; @@ -1962,57 +2126,35 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, else { assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); assert_singlephase_def(def); - assert(PyModule_Check(mod)); if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { goto error; } + assert(!PyErr_Occurred()); - /* Remember the filename as the __file__ attribute */ - if (info->filename != NULL) { - if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { - PyErr_Clear(); /* Not important enough to report */ + if (switched) { + /* We switched to the main interpreter to run the init + * function, so now we will "reload" the module from the + * cached data using the original subinterpreter. */ + assert(mod == NULL); + mod = reload_singlephase_extension(tstate, cached, info); + if (mod == NULL) { + goto error; } - } - - /* Update global import state. */ - assert(def->m_base.m_index != 0); - struct singlephase_global_update singlephase = { - // XXX Modules that share a def should each get their own index, - // whereas currently they share (which means the per-interpreter - // cache is less reliable than it should be). - .m_index=def->m_base.m_index, - .origin=info->origin, -#ifdef Py_GIL_DISABLED - .md_gil=((PyModuleObject *)mod)->md_gil, -#endif - }; - // gh-88216: Extensions and def->m_base.m_copy can be updated - // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1) { - /* We will reload from m_copy. */ - assert(def->m_base.m_init == NULL); - singlephase.m_dict = PyModule_GetDict(mod); - assert(singlephase.m_dict != NULL); + assert(!PyErr_Occurred()); + assert(PyModule_Check(mod)); } else { - /* We will reload via the init function. */ - assert(def->m_size >= 0); - assert(def->m_base.m_copy == NULL); - singlephase.m_init = p0; - } - cached = update_global_state_for_extension( - tstate, info->path, info->name, def, &singlephase); - if (cached == NULL) { - goto error; - } - - /* Update per-interpreter import state. */ - PyObject *modules = get_modules_dict(tstate, true); - if (finish_singlephase_extension( - tstate, mod, cached, info->name, modules) < 0) - { - goto error; + assert(mod != NULL); + assert(PyModule_Check(mod)); + + /* Update per-interpreter import state. */ + PyObject *modules = get_modules_dict(tstate, true); + if (finish_singlephase_extension( + tstate, mod, cached, info->name, modules) < 0) + { + goto error; + } } } |