diff options
Diffstat (limited to 'Objects/codeobject.c')
-rw-r--r-- | Objects/codeobject.c | 475 |
1 files changed, 452 insertions, 23 deletions
diff --git a/Objects/codeobject.c b/Objects/codeobject.c index bf24a4af445..ba178abc0c0 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -17,6 +17,7 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() #include "pycore_uniqueid.h" // _PyObject_AssignUniqueId() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/codeobject.c.h" #include <stdbool.h> @@ -1690,12 +1691,414 @@ PyCode_GetFreevars(PyCodeObject *code) } +#define GET_OPARG(co, i, initial) (initial) +// We may want to move these macros to pycore_opcode_utils.h +// and use them in Python/bytecodes.c. +#define LOAD_GLOBAL_NAME_INDEX(oparg) ((oparg)>>1) +#define LOAD_ATTR_NAME_INDEX(oparg) ((oparg)>>1) + +#ifndef Py_DEBUG +#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#else +static inline PyObject * +GETITEM(PyObject *v, Py_ssize_t i) +{ + assert(PyTuple_Check(v)); + assert(i >= 0); + assert(i < PyTuple_GET_SIZE(v)); + assert(PyTuple_GET_ITEM(v, i) != NULL); + return PyTuple_GET_ITEM(v, i); +} +#endif + +static int +identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, + PyObject *globalnames, PyObject *attrnames, + PyObject *globalsns, PyObject *builtinsns, + struct co_unbound_counts *counts, int *p_numdupes) +{ + // This function is inspired by inspect.getclosurevars(). + // It would be nicer if we had something similar to co_localspluskinds, + // but for co_names. + assert(globalnames != NULL); + assert(PySet_Check(globalnames)); + assert(PySet_GET_SIZE(globalnames) == 0 || counts != NULL); + assert(attrnames != NULL); + assert(PySet_Check(attrnames)); + assert(PySet_GET_SIZE(attrnames) == 0 || counts != NULL); + assert(globalsns == NULL || PyDict_Check(globalsns)); + assert(builtinsns == NULL || PyDict_Check(builtinsns)); + assert(counts == NULL || counts->total == 0); + struct co_unbound_counts unbound = {0}; + int numdupes = 0; + Py_ssize_t len = Py_SIZE(co); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (inst.op.code == LOAD_ATTR) { + int oparg = GET_OPARG(co, i, inst.op.arg); + int index = LOAD_ATTR_NAME_INDEX(oparg); + PyObject *name = GETITEM(co->co_names, index); + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + continue; + } + unbound.total += 1; + unbound.numattrs += 1; + if (PySet_Add(attrnames, name) < 0) { + return -1; + } + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } + } + else if (inst.op.code == LOAD_GLOBAL) { + int oparg = GET_OPARG(co, i, inst.op.arg); + int index = LOAD_ATTR_NAME_INDEX(oparg); + PyObject *name = GETITEM(co->co_names, index); + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + continue; + } + unbound.total += 1; + unbound.globals.total += 1; + if (globalsns != NULL && PyDict_Contains(globalsns, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + unbound.globals.numglobal += 1; + } + else if (builtinsns != NULL && PyDict_Contains(builtinsns, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + unbound.globals.numbuiltin += 1; + } + else { + unbound.globals.numunknown += 1; + } + if (PySet_Add(globalnames, name) < 0) { + return -1; + } + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } + } + } + if (counts != NULL) { + *counts = unbound; + } + if (p_numdupes != NULL) { + *p_numdupes = numdupes; + } + return 0; +} + + +void +_PyCode_GetVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts) +{ + assert(counts != NULL); + + // Count the locals, cells, and free vars. + struct co_locals_counts locals = {0}; + int numfree = 0; + PyObject *kinds = co->co_localspluskinds; + Py_ssize_t numlocalplusfree = PyBytes_GET_SIZE(kinds); + for (int i = 0; i < numlocalplusfree; i++) { + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); + if (kind & CO_FAST_FREE) { + assert(!(kind & CO_FAST_LOCAL)); + assert(!(kind & CO_FAST_HIDDEN)); + assert(!(kind & CO_FAST_ARG)); + numfree += 1; + } + else { + // Apparently not all non-free vars a CO_FAST_LOCAL. + assert(kind); + locals.total += 1; + if (kind & CO_FAST_ARG) { + locals.args.total += 1; + if (kind & CO_FAST_ARG_VAR) { + if (kind & CO_FAST_ARG_POS) { + assert(!(kind & CO_FAST_ARG_KW)); + assert(!locals.args.varargs); + locals.args.varargs = 1; + } + else { + assert(kind & CO_FAST_ARG_KW); + assert(!locals.args.varkwargs); + locals.args.varkwargs = 1; + } + } + else if (kind & CO_FAST_ARG_POS) { + if (kind & CO_FAST_ARG_KW) { + locals.args.numposorkw += 1; + } + else { + locals.args.numposonly += 1; + } + } + else { + assert(kind & CO_FAST_ARG_KW); + locals.args.numkwonly += 1; + } + if (kind & CO_FAST_CELL) { + locals.cells.total += 1; + locals.cells.numargs += 1; + } + // Args are never hidden currently. + assert(!(kind & CO_FAST_HIDDEN)); + } + else { + if (kind & CO_FAST_CELL) { + locals.cells.total += 1; + locals.cells.numothers += 1; + if (kind & CO_FAST_HIDDEN) { + locals.hidden.total += 1; + locals.hidden.numcells += 1; + } + } + else { + locals.numpure += 1; + if (kind & CO_FAST_HIDDEN) { + locals.hidden.total += 1; + locals.hidden.numpure += 1; + } + } + } + } + } + assert(locals.args.total == ( + co->co_argcount + co->co_kwonlyargcount + + !!(co->co_flags & CO_VARARGS) + + !!(co->co_flags & CO_VARKEYWORDS))); + assert(locals.args.numposonly == co->co_posonlyargcount); + assert(locals.args.numposonly + locals.args.numposorkw == co->co_argcount); + assert(locals.args.numkwonly == co->co_kwonlyargcount); + assert(locals.cells.total == co->co_ncellvars); + assert(locals.args.total + locals.numpure == co->co_nlocals); + assert(locals.total + locals.cells.numargs == co->co_nlocals + co->co_ncellvars); + assert(locals.total + numfree == co->co_nlocalsplus); + assert(numfree == co->co_nfreevars); + + // Get the unbound counts. + assert(PyTuple_GET_SIZE(co->co_names) >= 0); + assert(PyTuple_GET_SIZE(co->co_names) < INT_MAX); + int numunbound = (int)PyTuple_GET_SIZE(co->co_names); + struct co_unbound_counts unbound = { + .total = numunbound, + // numglobal and numattrs can be set later + // with _PyCode_SetUnboundVarCounts(). + .numunknown = numunbound, + }; + + // "Return" the result. + *counts = (_PyCode_var_counts_t){ + .total = locals.total + numfree + unbound.total, + .locals = locals, + .numfree = numfree, + .unbound = unbound, + }; +} + +int +_PyCode_SetUnboundVarCounts(PyThreadState *tstate, + PyCodeObject *co, _PyCode_var_counts_t *counts, + PyObject *globalnames, PyObject *attrnames, + PyObject *globalsns, PyObject *builtinsns) +{ + int res = -1; + PyObject *globalnames_owned = NULL; + PyObject *attrnames_owned = NULL; + + // Prep the name sets. + if (globalnames == NULL) { + globalnames_owned = PySet_New(NULL); + if (globalnames_owned == NULL) { + goto finally; + } + globalnames = globalnames_owned; + } + else if (!PySet_Check(globalnames)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a set for \"globalnames\", got %R", globalnames); + goto finally; + } + if (attrnames == NULL) { + attrnames_owned = PySet_New(NULL); + if (attrnames_owned == NULL) { + goto finally; + } + attrnames = attrnames_owned; + } + else if (!PySet_Check(attrnames)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a set for \"attrnames\", got %R", attrnames); + goto finally; + } + + // Fill in unbound.globals and unbound.numattrs. + struct co_unbound_counts unbound = {0}; + int numdupes = 0; + Py_BEGIN_CRITICAL_SECTION(co); + res = identify_unbound_names( + tstate, co, globalnames, attrnames, globalsns, builtinsns, + &unbound, &numdupes); + Py_END_CRITICAL_SECTION(); + if (res < 0) { + goto finally; + } + assert(unbound.numunknown == 0); + assert(unbound.total - numdupes <= counts->unbound.total); + assert(counts->unbound.numunknown == counts->unbound.total); + // There may be a name that is both a global and an attr. + int totalunbound = counts->unbound.total + numdupes; + unbound.numunknown = totalunbound - unbound.total; + unbound.total = totalunbound; + counts->unbound = unbound; + counts->total += numdupes; + res = 0; + +finally: + Py_XDECREF(globalnames_owned); + Py_XDECREF(attrnames_owned); + return res; +} + + +int +_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + // We don't worry about co_executors, co_instrumentation, + // or co_monitoring. They are essentially ephemeral. + if (co->co_extra != NULL) { + errmsg = "only basic code objects are supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts, + const char **p_errmsg) +{ + const char *errmsg = NULL; + if (counts->numfree > 0) { // It's a closure. + errmsg = "closures not supported"; + } + else if (counts->unbound.globals.numglobal > 0) { + errmsg = "globals not supported"; + } + else if (counts->unbound.globals.numbuiltin > 0 + && counts->unbound.globals.numunknown > 0) + { + errmsg = "globals not supported"; + } + // Otherwise we don't check counts.unbound.globals.numunknown since we can't + // distinguish beween globals and builtins here. + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_VerifyStateless(PyThreadState *tstate, + PyCodeObject *co, PyObject *globalnames, + PyObject *globalsns, PyObject *builtinsns) +{ + const char *errmsg; + _PyCode_var_counts_t counts = {0}; + _PyCode_GetVarCounts(co, &counts); + if (_PyCode_SetUnboundVarCounts( + tstate, co, &counts, globalnames, NULL, + globalsns, builtinsns) < 0) + { + return -1; + } + // We may consider relaxing the internal state constraints + // if it becomes a problem. + if (!_PyCode_CheckNoInternalState(co, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + if (builtinsns != NULL) { + // Make sure the next check will fail for globals, + // even if there aren't any builtins. + counts.unbound.globals.numbuiltin += 1; + } + if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + // Note that we don't check co->co_flags & CO_NESTED for anything here. + return 0; +} + + +int +_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + if (co->co_flags & CO_GENERATOR) { + errmsg = "generators not supported"; + } + else if (co->co_flags & CO_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ITERABLE_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ASYNC_GENERATOR) { + errmsg = "generators not supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + /* Here "value" means a non-None value, since a bare return is identical * to returning None explicitly. Likewise a missing return statement * at the end of the function is turned into "return None". */ -int -_PyCode_ReturnsOnlyNone(PyCodeObject *co) +static int +code_returns_only_none(PyCodeObject *co) { + if (!_PyCode_CheckPureFunction(co, NULL)) { + return 0; + } + int len = (int)Py_SIZE(co); + assert(len > 0); + + // The last instruction either returns or raises. We can take advantage + // of that for a quick exit. + _Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1); + // Look up None in co_consts. Py_ssize_t nconsts = PyTuple_Size(co->co_consts); int none_index = 0; @@ -1706,31 +2109,57 @@ _PyCode_ReturnsOnlyNone(PyCodeObject *co) } if (none_index == nconsts) { // None wasn't there, which means there was no implicit return, - // "return", or "return None". That means there must be - // an explicit return (non-None). - return 0; - } + // "return", or "return None". - // Walk the bytecode, looking for RETURN_VALUE. - Py_ssize_t len = Py_SIZE(co); - for (int i = 0; i < len; i++) { - _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); - if (IS_RETURN_OPCODE(inst.op.code)) { - assert(i != 0); - // Ignore it if it returns None. - _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); - if (prev.op.code == LOAD_CONST) { - // We don't worry about EXTENDED_ARG for now. - if (prev.op.arg == none_index) { - continue; + // That means there must be + // an explicit return (non-None), or it only raises. + if (IS_RETURN_OPCODE(final.op.code)) { + // It was an explicit return (non-None). + return 0; + } + // It must end with a raise then. We still have to walk the + // bytecode to see if there's any explicit return (non-None). + assert(IS_RAISE_OPCODE(final.op.code)); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + // We alraedy know it isn't returning None. + return 0; + } + } + // It must only raise. + } + else { + // Walk the bytecode, looking for RETURN_VALUE. + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + assert(i != 0); + // Ignore it if it returns None. + _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); + if (prev.op.code == LOAD_CONST) { + // We don't worry about EXTENDED_ARG for now. + if (prev.op.arg == none_index) { + continue; + } } + return 0; } - return 0; } } return 1; } +int +_PyCode_ReturnsOnlyNone(PyCodeObject *co) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(co); + res = code_returns_only_none(co); + Py_END_CRITICAL_SECTION(); + return res; +} + #ifdef _Py_TIER2 @@ -1955,6 +2384,8 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyObject *self) { + PyThreadState *tstate = PyThreadState_GET(); + _Py_atomic_add_uint64(&tstate->interp->_code_object_generation, 1); PyCodeObject *co = _PyCodeObject_CAST(self); _PyObject_ResurrectStart(self); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); @@ -2006,9 +2437,7 @@ code_dealloc(PyObject *self) Py_XDECREF(co->_co_cached->_co_varnames); PyMem_Free(co->_co_cached); } - if (co->co_weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, co->co_weakreflist); free_monitoring_data(co->_co_monitoring); #ifdef Py_GIL_DISABLED // The first element always points to the mutable bytecode at the end of @@ -2939,7 +3368,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); |