diff options
Diffstat (limited to 'Objects')
32 files changed, 2792 insertions, 549 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index fc407ec6bf9..87ea1162e03 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1075,10 +1075,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* Unescape a backslash-escaped string. */ -PyObject *_PyBytes_DecodeEscape(const char *s, +PyObject *_PyBytes_DecodeEscape2(const char *s, Py_ssize_t len, const char *errors, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { int c; char *p; @@ -1092,7 +1093,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, return NULL; writer.overallocate = 1; - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; end = s + len; while (s < end) { @@ -1130,9 +1132,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, c = (c<<3) + *s++ - '0'; } if (c > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; } } *p++ = c; @@ -1173,9 +1176,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, break; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = (unsigned char)s[-1]; + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; } *p++ = '\\'; s--; @@ -1195,18 +1199,19 @@ PyObject *PyBytes_DecodeEscape(const char *s, Py_ssize_t Py_UNUSED(unicode), const char *Py_UNUSED(recode_encoding)) { - const char* first_invalid_escape; - PyObject *result = _PyBytes_DecodeEscape(s, len, errors, - &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyBytes_DecodeEscape2(s, len, errors, + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "b\"\\%.3s\" is an invalid octal escape sequence. " + "b\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -1216,7 +1221,7 @@ PyObject *PyBytes_DecodeEscape(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "b\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; diff --git a/Objects/call.c b/Objects/call.c index b1610dababd..c9a18bcc3da 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -834,12 +834,15 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, assert(PyVectorcall_NARGS(nargsf) >= 1); PyThreadState *tstate = _PyThreadState_GET(); - PyObject *callable = NULL; + _PyCStackRef method; + _PyThreadState_PushCStackRef(tstate, &method); /* Use args[0] as "self" argument */ - int unbound = _PyObject_GetMethod(args[0], name, &callable); - if (callable == NULL) { + int unbound = _PyObject_GetMethodStackRef(tstate, args[0], name, &method.ref); + if (PyStackRef_IsNull(method.ref)) { + _PyThreadState_PopCStackRef(tstate, &method); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); if (unbound) { /* We must remove PY_VECTORCALL_ARGUMENTS_OFFSET since @@ -855,7 +858,7 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable); PyObject *result = _PyObject_VectorcallTstate(tstate, callable, args, nargsf, kwnames); - Py_DECREF(callable); + _PyThreadState_PopCStackRef(tstate, &method); return result; } @@ -868,11 +871,14 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...) return null_error(tstate); } - PyObject *callable = NULL; - int is_method = _PyObject_GetMethod(obj, name, &callable); - if (callable == NULL) { + _PyCStackRef method; + _PyThreadState_PushCStackRef(tstate, &method); + int is_method = _PyObject_GetMethodStackRef(tstate, obj, name, &method.ref); + if (PyStackRef_IsNull(method.ref)) { + _PyThreadState_PopCStackRef(tstate, &method); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); obj = is_method ? obj : NULL; va_list vargs; @@ -880,7 +886,7 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...) PyObject *result = object_vacall(tstate, obj, callable, vargs); va_end(vargs); - Py_DECREF(callable); + _PyThreadState_PopCStackRef(tstate, &method); return result; } @@ -897,12 +903,15 @@ _PyObject_CallMethodIdObjArgs(PyObject *obj, _Py_Identifier *name, ...) if (!oname) { return NULL; } - - PyObject *callable = NULL; - int is_method = _PyObject_GetMethod(obj, oname, &callable); - if (callable == NULL) { + _PyCStackRef method; + _PyThreadState_PushCStackRef(tstate, &method); + int is_method = _PyObject_GetMethodStackRef(tstate, obj, oname, &method.ref); + if (PyStackRef_IsNull(method.ref)) { + _PyThreadState_PopCStackRef(tstate, &method); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); + obj = is_method ? obj : NULL; va_list vargs; @@ -910,7 +919,7 @@ _PyObject_CallMethodIdObjArgs(PyObject *obj, _Py_Identifier *name, ...) PyObject *result = object_vacall(tstate, obj, callable, vargs); va_end(vargs); - Py_DECREF(callable); + _PyThreadState_PopCStackRef(tstate, &method); return result; } diff --git a/Objects/classobject.c b/Objects/classobject.c index 58e1d179773..e71f301f2ef 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -7,6 +7,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/classobject.c.h" @@ -245,8 +246,7 @@ method_dealloc(PyObject *self) { PyMethodObject *im = _PyMethodObject_CAST(self); _PyObject_GC_UNTRACK(im); - if (im->im_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *)im); + FT_CLEAR_WEAKREFS(self, im->im_weakreflist); Py_DECREF(im->im_func); Py_XDECREF(im->im_self); assert(Py_IS_TYPE(self, &PyMethod_Type)); diff --git a/Objects/clinic/interpolationobject.c.h b/Objects/clinic/interpolationobject.c.h new file mode 100644 index 00000000000..7a94dabafc9 --- /dev/null +++ b/Objects/clinic/interpolationobject.c.h @@ -0,0 +1,89 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +static PyObject * +interpolation_new_impl(PyTypeObject *type, PyObject *value, + PyObject *expression, PyObject *conversion, + PyObject *format_spec); + +static PyObject * +interpolation_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(value), &_Py_ID(expression), &_Py_ID(conversion), &_Py_ID(format_spec), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"value", "expression", "conversion", "format_spec", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "Interpolation", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2; + PyObject *value; + PyObject *expression; + PyObject *conversion = Py_None; + PyObject *format_spec = &_Py_STR(empty); + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 2, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + value = fastargs[0]; + if (!PyUnicode_Check(fastargs[1])) { + _PyArg_BadArgument("Interpolation", "argument 'expression'", "str", fastargs[1]); + goto exit; + } + expression = fastargs[1]; + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[2]) { + if (!_conversion_converter(fastargs[2], &conversion)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (!PyUnicode_Check(fastargs[3])) { + _PyArg_BadArgument("Interpolation", "argument 'format_spec'", "str", fastargs[3]); + goto exit; + } + format_spec = fastargs[3]; +skip_optional_pos: + return_value = interpolation_new_impl(type, value, expression, conversion, format_spec); + +exit: + return return_value; +} +/*[clinic end generated code: output=599742a5ccd6f060 input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index bf24a4af445..ba178abc0c0 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -17,6 +17,7 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() #include "pycore_uniqueid.h" // _PyObject_AssignUniqueId() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/codeobject.c.h" #include <stdbool.h> @@ -1690,12 +1691,414 @@ PyCode_GetFreevars(PyCodeObject *code) } +#define GET_OPARG(co, i, initial) (initial) +// We may want to move these macros to pycore_opcode_utils.h +// and use them in Python/bytecodes.c. +#define LOAD_GLOBAL_NAME_INDEX(oparg) ((oparg)>>1) +#define LOAD_ATTR_NAME_INDEX(oparg) ((oparg)>>1) + +#ifndef Py_DEBUG +#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) +#else +static inline PyObject * +GETITEM(PyObject *v, Py_ssize_t i) +{ + assert(PyTuple_Check(v)); + assert(i >= 0); + assert(i < PyTuple_GET_SIZE(v)); + assert(PyTuple_GET_ITEM(v, i) != NULL); + return PyTuple_GET_ITEM(v, i); +} +#endif + +static int +identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, + PyObject *globalnames, PyObject *attrnames, + PyObject *globalsns, PyObject *builtinsns, + struct co_unbound_counts *counts, int *p_numdupes) +{ + // This function is inspired by inspect.getclosurevars(). + // It would be nicer if we had something similar to co_localspluskinds, + // but for co_names. + assert(globalnames != NULL); + assert(PySet_Check(globalnames)); + assert(PySet_GET_SIZE(globalnames) == 0 || counts != NULL); + assert(attrnames != NULL); + assert(PySet_Check(attrnames)); + assert(PySet_GET_SIZE(attrnames) == 0 || counts != NULL); + assert(globalsns == NULL || PyDict_Check(globalsns)); + assert(builtinsns == NULL || PyDict_Check(builtinsns)); + assert(counts == NULL || counts->total == 0); + struct co_unbound_counts unbound = {0}; + int numdupes = 0; + Py_ssize_t len = Py_SIZE(co); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (inst.op.code == LOAD_ATTR) { + int oparg = GET_OPARG(co, i, inst.op.arg); + int index = LOAD_ATTR_NAME_INDEX(oparg); + PyObject *name = GETITEM(co->co_names, index); + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + continue; + } + unbound.total += 1; + unbound.numattrs += 1; + if (PySet_Add(attrnames, name) < 0) { + return -1; + } + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } + } + else if (inst.op.code == LOAD_GLOBAL) { + int oparg = GET_OPARG(co, i, inst.op.arg); + int index = LOAD_ATTR_NAME_INDEX(oparg); + PyObject *name = GETITEM(co->co_names, index); + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + continue; + } + unbound.total += 1; + unbound.globals.total += 1; + if (globalsns != NULL && PyDict_Contains(globalsns, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + unbound.globals.numglobal += 1; + } + else if (builtinsns != NULL && PyDict_Contains(builtinsns, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + unbound.globals.numbuiltin += 1; + } + else { + unbound.globals.numunknown += 1; + } + if (PySet_Add(globalnames, name) < 0) { + return -1; + } + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } + } + } + if (counts != NULL) { + *counts = unbound; + } + if (p_numdupes != NULL) { + *p_numdupes = numdupes; + } + return 0; +} + + +void +_PyCode_GetVarCounts(PyCodeObject *co, _PyCode_var_counts_t *counts) +{ + assert(counts != NULL); + + // Count the locals, cells, and free vars. + struct co_locals_counts locals = {0}; + int numfree = 0; + PyObject *kinds = co->co_localspluskinds; + Py_ssize_t numlocalplusfree = PyBytes_GET_SIZE(kinds); + for (int i = 0; i < numlocalplusfree; i++) { + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); + if (kind & CO_FAST_FREE) { + assert(!(kind & CO_FAST_LOCAL)); + assert(!(kind & CO_FAST_HIDDEN)); + assert(!(kind & CO_FAST_ARG)); + numfree += 1; + } + else { + // Apparently not all non-free vars a CO_FAST_LOCAL. + assert(kind); + locals.total += 1; + if (kind & CO_FAST_ARG) { + locals.args.total += 1; + if (kind & CO_FAST_ARG_VAR) { + if (kind & CO_FAST_ARG_POS) { + assert(!(kind & CO_FAST_ARG_KW)); + assert(!locals.args.varargs); + locals.args.varargs = 1; + } + else { + assert(kind & CO_FAST_ARG_KW); + assert(!locals.args.varkwargs); + locals.args.varkwargs = 1; + } + } + else if (kind & CO_FAST_ARG_POS) { + if (kind & CO_FAST_ARG_KW) { + locals.args.numposorkw += 1; + } + else { + locals.args.numposonly += 1; + } + } + else { + assert(kind & CO_FAST_ARG_KW); + locals.args.numkwonly += 1; + } + if (kind & CO_FAST_CELL) { + locals.cells.total += 1; + locals.cells.numargs += 1; + } + // Args are never hidden currently. + assert(!(kind & CO_FAST_HIDDEN)); + } + else { + if (kind & CO_FAST_CELL) { + locals.cells.total += 1; + locals.cells.numothers += 1; + if (kind & CO_FAST_HIDDEN) { + locals.hidden.total += 1; + locals.hidden.numcells += 1; + } + } + else { + locals.numpure += 1; + if (kind & CO_FAST_HIDDEN) { + locals.hidden.total += 1; + locals.hidden.numpure += 1; + } + } + } + } + } + assert(locals.args.total == ( + co->co_argcount + co->co_kwonlyargcount + + !!(co->co_flags & CO_VARARGS) + + !!(co->co_flags & CO_VARKEYWORDS))); + assert(locals.args.numposonly == co->co_posonlyargcount); + assert(locals.args.numposonly + locals.args.numposorkw == co->co_argcount); + assert(locals.args.numkwonly == co->co_kwonlyargcount); + assert(locals.cells.total == co->co_ncellvars); + assert(locals.args.total + locals.numpure == co->co_nlocals); + assert(locals.total + locals.cells.numargs == co->co_nlocals + co->co_ncellvars); + assert(locals.total + numfree == co->co_nlocalsplus); + assert(numfree == co->co_nfreevars); + + // Get the unbound counts. + assert(PyTuple_GET_SIZE(co->co_names) >= 0); + assert(PyTuple_GET_SIZE(co->co_names) < INT_MAX); + int numunbound = (int)PyTuple_GET_SIZE(co->co_names); + struct co_unbound_counts unbound = { + .total = numunbound, + // numglobal and numattrs can be set later + // with _PyCode_SetUnboundVarCounts(). + .numunknown = numunbound, + }; + + // "Return" the result. + *counts = (_PyCode_var_counts_t){ + .total = locals.total + numfree + unbound.total, + .locals = locals, + .numfree = numfree, + .unbound = unbound, + }; +} + +int +_PyCode_SetUnboundVarCounts(PyThreadState *tstate, + PyCodeObject *co, _PyCode_var_counts_t *counts, + PyObject *globalnames, PyObject *attrnames, + PyObject *globalsns, PyObject *builtinsns) +{ + int res = -1; + PyObject *globalnames_owned = NULL; + PyObject *attrnames_owned = NULL; + + // Prep the name sets. + if (globalnames == NULL) { + globalnames_owned = PySet_New(NULL); + if (globalnames_owned == NULL) { + goto finally; + } + globalnames = globalnames_owned; + } + else if (!PySet_Check(globalnames)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a set for \"globalnames\", got %R", globalnames); + goto finally; + } + if (attrnames == NULL) { + attrnames_owned = PySet_New(NULL); + if (attrnames_owned == NULL) { + goto finally; + } + attrnames = attrnames_owned; + } + else if (!PySet_Check(attrnames)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a set for \"attrnames\", got %R", attrnames); + goto finally; + } + + // Fill in unbound.globals and unbound.numattrs. + struct co_unbound_counts unbound = {0}; + int numdupes = 0; + Py_BEGIN_CRITICAL_SECTION(co); + res = identify_unbound_names( + tstate, co, globalnames, attrnames, globalsns, builtinsns, + &unbound, &numdupes); + Py_END_CRITICAL_SECTION(); + if (res < 0) { + goto finally; + } + assert(unbound.numunknown == 0); + assert(unbound.total - numdupes <= counts->unbound.total); + assert(counts->unbound.numunknown == counts->unbound.total); + // There may be a name that is both a global and an attr. + int totalunbound = counts->unbound.total + numdupes; + unbound.numunknown = totalunbound - unbound.total; + unbound.total = totalunbound; + counts->unbound = unbound; + counts->total += numdupes; + res = 0; + +finally: + Py_XDECREF(globalnames_owned); + Py_XDECREF(attrnames_owned); + return res; +} + + +int +_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + // We don't worry about co_executors, co_instrumentation, + // or co_monitoring. They are essentially ephemeral. + if (co->co_extra != NULL) { + errmsg = "only basic code objects are supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts, + const char **p_errmsg) +{ + const char *errmsg = NULL; + if (counts->numfree > 0) { // It's a closure. + errmsg = "closures not supported"; + } + else if (counts->unbound.globals.numglobal > 0) { + errmsg = "globals not supported"; + } + else if (counts->unbound.globals.numbuiltin > 0 + && counts->unbound.globals.numunknown > 0) + { + errmsg = "globals not supported"; + } + // Otherwise we don't check counts.unbound.globals.numunknown since we can't + // distinguish beween globals and builtins here. + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_VerifyStateless(PyThreadState *tstate, + PyCodeObject *co, PyObject *globalnames, + PyObject *globalsns, PyObject *builtinsns) +{ + const char *errmsg; + _PyCode_var_counts_t counts = {0}; + _PyCode_GetVarCounts(co, &counts); + if (_PyCode_SetUnboundVarCounts( + tstate, co, &counts, globalnames, NULL, + globalsns, builtinsns) < 0) + { + return -1; + } + // We may consider relaxing the internal state constraints + // if it becomes a problem. + if (!_PyCode_CheckNoInternalState(co, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + if (builtinsns != NULL) { + // Make sure the next check will fail for globals, + // even if there aren't any builtins. + counts.unbound.globals.numbuiltin += 1; + } + if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + // Note that we don't check co->co_flags & CO_NESTED for anything here. + return 0; +} + + +int +_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + if (co->co_flags & CO_GENERATOR) { + errmsg = "generators not supported"; + } + else if (co->co_flags & CO_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ITERABLE_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ASYNC_GENERATOR) { + errmsg = "generators not supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + /* Here "value" means a non-None value, since a bare return is identical * to returning None explicitly. Likewise a missing return statement * at the end of the function is turned into "return None". */ -int -_PyCode_ReturnsOnlyNone(PyCodeObject *co) +static int +code_returns_only_none(PyCodeObject *co) { + if (!_PyCode_CheckPureFunction(co, NULL)) { + return 0; + } + int len = (int)Py_SIZE(co); + assert(len > 0); + + // The last instruction either returns or raises. We can take advantage + // of that for a quick exit. + _Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1); + // Look up None in co_consts. Py_ssize_t nconsts = PyTuple_Size(co->co_consts); int none_index = 0; @@ -1706,31 +2109,57 @@ _PyCode_ReturnsOnlyNone(PyCodeObject *co) } if (none_index == nconsts) { // None wasn't there, which means there was no implicit return, - // "return", or "return None". That means there must be - // an explicit return (non-None). - return 0; - } + // "return", or "return None". - // Walk the bytecode, looking for RETURN_VALUE. - Py_ssize_t len = Py_SIZE(co); - for (int i = 0; i < len; i++) { - _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); - if (IS_RETURN_OPCODE(inst.op.code)) { - assert(i != 0); - // Ignore it if it returns None. - _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); - if (prev.op.code == LOAD_CONST) { - // We don't worry about EXTENDED_ARG for now. - if (prev.op.arg == none_index) { - continue; + // That means there must be + // an explicit return (non-None), or it only raises. + if (IS_RETURN_OPCODE(final.op.code)) { + // It was an explicit return (non-None). + return 0; + } + // It must end with a raise then. We still have to walk the + // bytecode to see if there's any explicit return (non-None). + assert(IS_RAISE_OPCODE(final.op.code)); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + // We alraedy know it isn't returning None. + return 0; + } + } + // It must only raise. + } + else { + // Walk the bytecode, looking for RETURN_VALUE. + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + assert(i != 0); + // Ignore it if it returns None. + _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); + if (prev.op.code == LOAD_CONST) { + // We don't worry about EXTENDED_ARG for now. + if (prev.op.arg == none_index) { + continue; + } } + return 0; } - return 0; } } return 1; } +int +_PyCode_ReturnsOnlyNone(PyCodeObject *co) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(co); + res = code_returns_only_none(co); + Py_END_CRITICAL_SECTION(); + return res; +} + #ifdef _Py_TIER2 @@ -1955,6 +2384,8 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyObject *self) { + PyThreadState *tstate = PyThreadState_GET(); + _Py_atomic_add_uint64(&tstate->interp->_code_object_generation, 1); PyCodeObject *co = _PyCodeObject_CAST(self); _PyObject_ResurrectStart(self); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); @@ -2006,9 +2437,7 @@ code_dealloc(PyObject *self) Py_XDECREF(co->_co_cached->_co_varnames); PyMem_Free(co->_co_cached); } - if (co->co_weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, co->co_weakreflist); free_monitoring_data(co->_co_monitoring); #ifdef Py_GIL_DISABLED // The first element always points to the mutable bytecode at the end of @@ -2939,7 +3368,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/complexobject.c b/Objects/complexobject.c index c2dd320ae73..b66ebe131ae 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -1,4 +1,3 @@ - /* Complex object implementation */ /* Borrows heavily from floatobject.c */ @@ -9,6 +8,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter() #include "pycore_floatobject.h" // _Py_convert_int_to_double() +#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_object.h" // _PyObject_Init() #include "pycore_pymath.h" // _Py_ADJUST_ERANGE2() @@ -410,16 +410,32 @@ complex_subtype_from_c_complex(PyTypeObject *type, Py_complex cval) PyObject * PyComplex_FromCComplex(Py_complex cval) { - /* Inline PyObject_New */ - PyComplexObject *op = PyObject_Malloc(sizeof(PyComplexObject)); + PyComplexObject *op = _Py_FREELIST_POP(PyComplexObject, complexes); + if (op == NULL) { - return PyErr_NoMemory(); + /* Inline PyObject_New */ + op = PyObject_Malloc(sizeof(PyComplexObject)); + if (op == NULL) { + return PyErr_NoMemory(); + } + _PyObject_Init((PyObject*)op, &PyComplex_Type); } - _PyObject_Init((PyObject*)op, &PyComplex_Type); op->cval = cval; return (PyObject *) op; } +static void +complex_dealloc(PyObject *op) +{ + assert(PyComplex_Check(op)); + if (PyComplex_CheckExact(op)) { + _Py_FREELIST_FREE(complexes, op, PyObject_Free); + } + else { + Py_TYPE(op)->tp_free(op); + } +} + static PyObject * complex_subtype_from_doubles(PyTypeObject *type, double real, double imag) { @@ -1383,7 +1399,7 @@ PyTypeObject PyComplex_Type = { "complex", sizeof(PyComplexObject), 0, - 0, /* tp_dealloc */ + complex_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 5ff36edd3dd..10c465b95ac 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1311,11 +1311,9 @@ wrapper_dealloc(PyObject *self) { wrapperobject *wp = (wrapperobject *)self; PyObject_GC_UnTrack(wp); - Py_TRASHCAN_BEGIN(wp, wrapper_dealloc) Py_XDECREF(wp->descr); Py_XDECREF(wp->self); PyObject_GC_Del(wp); - Py_TRASHCAN_END } static PyObject * diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 4fdfd63cd4f..6b7b150f0e2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -547,13 +547,13 @@ static inline uint8_t calculate_log2_keysize(Py_ssize_t minsize) { #if SIZEOF_LONG == SIZEOF_SIZE_T - minsize = (minsize | PyDict_MINSIZE) - 1; - return _Py_bit_length(minsize | (PyDict_MINSIZE-1)); + minsize = Py_MAX(minsize, PyDict_MINSIZE); + return _Py_bit_length(minsize - 1); #elif defined(_MSC_VER) - // On 64bit Windows, sizeof(long) == 4. - minsize = (minsize | PyDict_MINSIZE) - 1; + // On 64bit Windows, sizeof(long) == 4. We cannot use _Py_bit_length. + minsize = Py_MAX(minsize, PyDict_MINSIZE); unsigned long msb; - _BitScanReverse64(&msb, (uint64_t)minsize); + _BitScanReverse64(&msb, (uint64_t)minsize - 1); return (uint8_t)(msb + 1); #else uint8_t log2_size; @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif @@ -2916,6 +2916,11 @@ clear_lock_held(PyObject *op) } void +_PyDict_Clear_LockHeld(PyObject *op) { + clear_lock_held(op); +} + +void PyDict_Clear(PyObject *op) { Py_BEGIN_CRITICAL_SECTION(op); @@ -3178,9 +3183,10 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp, Py_ssize_t pos = 0; PyObject *key; Py_hash_t hash; - - if (dictresize(interp, mp, - estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) { + uint8_t new_size = Py_MAX( + estimate_log2_keysize(PySet_GET_SIZE(iterable)), + DK_LOG_SIZE(mp->ma_keys)); + if (dictresize(interp, mp, new_size, 0)) { Py_DECREF(mp); return NULL; } @@ -3285,7 +3291,6 @@ dict_dealloc(PyObject *self) /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(mp); - Py_TRASHCAN_BEGIN(mp, dict_dealloc) if (values != NULL) { if (values->embedded == 0) { for (i = 0, n = values->capacity; i < n; i++) { @@ -3305,7 +3310,6 @@ dict_dealloc(PyObject *self) else { Py_TYPE(mp)->tp_free((PyObject *)mp); } - Py_TRASHCAN_END } @@ -3730,13 +3734,14 @@ merge_from_seq2_lock_held(PyObject *d, PyObject *seq2, int override) } /* Convert item to sequence, and verify length 2. */ - fast = PySequence_Fast(item, ""); + fast = PySequence_Fast(item, "object is not iterable"); if (fast == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, - "cannot convert dictionary update " + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + _PyErr_FormatNote( + "Cannot convert dictionary update " "sequence element #%zd to a sequence", i); + } goto Fail; } n = PySequence_Fast_GET_SIZE(fast); @@ -3853,7 +3858,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe } } - Py_ssize_t orig_size = other->ma_keys->dk_nentries; + Py_ssize_t orig_size = other->ma_used; Py_ssize_t pos = 0; Py_hash_t hash; PyObject *key, *value; @@ -3887,7 +3892,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe if (err != 0) return -1; - if (orig_size != other->ma_keys->dk_nentries) { + if (orig_size != other->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dict mutated during update"); return -1; @@ -4852,7 +4857,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) d->ma_used = 0; d->_ma_watcher_tag = 0; - dictkeys_incref(Py_EMPTY_KEYS); + // We don't inc ref empty keys because they're immortal + assert((Py_EMPTY_KEYS)->dk_refcnt == _Py_DICT_IMMORTAL_INITIAL_REFCNT); d->ma_keys = Py_EMPTY_KEYS; d->ma_values = NULL; ASSERT_CONSISTENT(d); diff --git a/Objects/exceptions.c b/Objects/exceptions.c index d642130eaae..b17cac83551 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -150,10 +150,8 @@ BaseException_dealloc(PyObject *op) // bpo-44348: The trashcan mechanism prevents stack overflow when deleting // long chains of exceptions. For example, exceptions can be chained // through the __context__ attributes or the __traceback__ attribute. - Py_TRASHCAN_BEGIN(self, BaseException_dealloc) (void)BaseException_clear(op); Py_TYPE(self)->tp_free(self); - Py_TRASHCAN_END } static int diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 76ed24d29fd..93e1973d6b3 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2196,12 +2196,33 @@ PyFloat_Pack4(double x, char *data, int le) uint64_t v; memcpy(&v, &x, 8); +#ifndef __riscv if ((v & (1ULL << 51)) == 0) { uint32_t u32; memcpy(&u32, &y, 4); u32 &= ~(1 << 22); /* make sNaN */ memcpy(&y, &u32, 4); } +#else + uint32_t u32; + + memcpy(&u32, &y, 4); + if ((v & (1ULL << 51)) == 0) { + u32 &= ~(1 << 22); + } + /* Workaround RISC-V: "If a NaN value is converted to a + * different floating-point type, the result is the + * canonical NaN of the new type". The canonical NaN here + * is a positive qNaN with zero payload. */ + if (v & (1ULL << 63)) { + u32 |= (1 << 31); /* set sign */ + } + /* add payload */ + u32 -= (u32 & 0x3fffff); + u32 += (uint32_t)((v & 0x7ffffffffffffULL) >> 29); + + memcpy(&y, &u32, 4); +#endif } unsigned char s[sizeof(float)]; @@ -2493,16 +2514,34 @@ PyFloat_Unpack4(const char *data, int le) uint32_t v; memcpy(&v, &x, 4); +#ifndef __riscv if ((v & (1 << 22)) == 0) { double y = x; /* will make qNaN double */ - union double_val { - double d; - uint64_t u64; - } *py = (union double_val *)&y; - - py->u64 &= ~(1ULL << 51); /* make sNaN */ + uint64_t u64; + memcpy(&u64, &y, 8); + u64 &= ~(1ULL << 51); /* make sNaN */ + memcpy(&y, &u64, 8); return y; } +#else + double y = x; + uint64_t u64; + + memcpy(&u64, &y, 8); + if ((v & (1 << 22)) == 0) { + u64 &= ~(1ULL << 51); + } + /* Workaround RISC-V, see PyFloat_Pack4() */ + if (v & (1 << 31)) { + u64 |= (1ULL << 63); /* set sign */ + } + /* add payload */ + u64 -= (u64 & 0x7ffffffffffffULL); + u64 += ((v & 0x3fffffULL) << 29); + + memcpy(&y, &u64, 8); + return y; +#endif } return x; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 7a62219c139..601fc69c4b1 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1386,6 +1386,10 @@ mark_stacks(PyCodeObject *code_obj, int len) stacks[j] = next_stack; break; case GET_ITER: + next_stack = push_value(pop_value(next_stack), Iterator); + next_stack = push_value(next_stack, Iterator); + stacks[next_i] = next_stack; + break; case GET_AITER: next_stack = push_value(pop_value(next_stack), Iterator); stacks[next_i] = next_stack; @@ -1917,7 +1921,6 @@ frame_dealloc(PyObject *op) _PyObject_GC_UNTRACK(f); } - Py_TRASHCAN_BEGIN(f, frame_dealloc); /* GH-106092: If f->f_frame was on the stack and we reached the maximum * nesting depth for deallocations, the trashcan may have delayed this * deallocation until after f->f_frame is freed. Avoid dereferencing @@ -1942,7 +1945,6 @@ frame_dealloc(PyObject *op) Py_CLEAR(f->f_locals_cache); Py_CLEAR(f->f_overwritten_fast_locals); PyObject_GC_Del(f); - Py_TRASHCAN_END; } static int diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 6d71dbb5a6a..9532c21fc70 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1,13 +1,16 @@ /* Function object implementation */ #include "Python.h" +#include "pycore_code.h" // _PyCode_VerifyStateless() #include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_function.h" // _PyFunction_Vectorcall #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_Occurred() +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() static const char * @@ -1146,9 +1149,7 @@ func_dealloc(PyObject *self) return; } _PyObject_GC_UNTRACK(op); - if (op->func_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *) op); - } + FT_CLEAR_WEAKREFS(self, op->func_weakreflist); (void)func_clear((PyObject*)op); // These aren't cleared by func_clear(). _Py_DECREF_CODE((PyCodeObject *)op->func_code); @@ -1240,6 +1241,64 @@ PyTypeObject PyFunction_Type = { }; +int +_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) +{ + assert(!PyErr_Occurred()); + assert(PyFunction_Check(func)); + + // Check the globals. + PyObject *globalsns = PyFunction_GET_GLOBALS(func); + if (globalsns != NULL && !PyDict_Check(globalsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported globals %R", globalsns); + return -1; + } + // Check the builtins. + PyObject *builtinsns = _PyFunction_GET_BUILTINS(func); + if (builtinsns != NULL && !PyDict_Check(builtinsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported builtins %R", builtinsns); + return -1; + } + // Disallow __defaults__. + PyObject *defaults = PyFunction_GET_DEFAULTS(func); + if (defaults != NULL) { + assert(PyTuple_Check(defaults)); // per PyFunction_New() + if (PyTuple_GET_SIZE(defaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "defaults not supported"); + return -1; + } + } + // Disallow __kwdefaults__. + PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func); + if (kwdefaults != NULL) { + assert(PyDict_Check(kwdefaults)); // per PyFunction_New() + if (PyDict_Size(kwdefaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "keyword defaults not supported"); + return -1; + } + } + // Disallow __closure__. + PyObject *closure = PyFunction_GET_CLOSURE(func); + if (closure != NULL) { + assert(PyTuple_Check(closure)); // per PyFunction_New() + if (PyTuple_GET_SIZE(closure) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported"); + return -1; + } + } + // Check the code. + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) { + return -1; + } + return 0; +} + + static int functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name) { @@ -1484,6 +1543,11 @@ static PyGetSetDef cm_getsetlist[] = { {NULL} /* Sentinel */ }; +static PyMethodDef cm_methodlist[] = { + {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, NULL}, + {NULL} /* Sentinel */ +}; + static PyObject* cm_repr(PyObject *self) { @@ -1542,7 +1606,7 @@ PyTypeObject PyClassMethod_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + cm_methodlist, /* tp_methods */ cm_memberlist, /* tp_members */ cm_getsetlist, /* tp_getset */ 0, /* tp_base */ @@ -1716,6 +1780,11 @@ static PyGetSetDef sm_getsetlist[] = { {NULL} /* Sentinel */ }; +static PyMethodDef sm_methodlist[] = { + {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, NULL}, + {NULL} /* Sentinel */ +}; + static PyObject* sm_repr(PyObject *self) { @@ -1772,7 +1841,7 @@ PyTypeObject PyStaticMethod_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + sm_methodlist, /* tp_methods */ sm_memberlist, /* tp_members */ sm_getsetlist, /* tp_getset */ 0, /* tp_base */ diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index ec3d01f00a3..3bb961aa2b6 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -7,6 +7,7 @@ #include "pycore_typevarobject.h" // _Py_typing_type_repr #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() #include "pycore_unionobject.h" // _Py_union_type_or, _PyGenericAlias_Check +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stdbool.h> @@ -33,9 +34,7 @@ ga_dealloc(PyObject *self) gaobject *alias = (gaobject *)self; _PyObject_GC_UNTRACK(self); - if (alias->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)alias); - } + FT_CLEAR_WEAKREFS(self, alias->weakreflist); Py_XDECREF(alias->origin); Py_XDECREF(alias->args); Py_XDECREF(alias->parameters); @@ -65,7 +64,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p) for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { return -1; } } @@ -109,7 +108,7 @@ ga_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -126,7 +125,7 @@ ga_repr(PyObject *self) } if (len == 0) { // for something like tuple[()] we should print a "()" - if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) { goto error; } } diff --git a/Objects/genobject.c b/Objects/genobject.c index 98b2c5004df..3e7d6257006 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -17,6 +17,7 @@ #include "pycore_pyerrors.h" // _PyErr_ClearExcState() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_warnings.h" // _PyErr_WarnUnawaitedCoroutine() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "opcode_ids.h" // RESUME, etc @@ -161,8 +162,7 @@ gen_dealloc(PyObject *self) _PyObject_GC_UNTRACK(gen); - if (gen->gi_weakreflist != NULL) - PyObject_ClearWeakRefs(self); + FT_CLEAR_WEAKREFS(self, gen->gi_weakreflist); _PyObject_GC_TRACK(self); @@ -704,7 +704,8 @@ static PyObject * gen_get_name(PyObject *self, void *Py_UNUSED(ignored)) { PyGenObject *op = _PyGen_CAST(self); - return Py_NewRef(op->gi_name); + PyObject *name = FT_ATOMIC_LOAD_PTR_ACQUIRE(op->gi_name); + return Py_NewRef(name); } static int @@ -718,7 +719,11 @@ gen_set_name(PyObject *self, PyObject *value, void *Py_UNUSED(ignored)) "__name__ must be set to a string object"); return -1; } - Py_XSETREF(op->gi_name, Py_NewRef(value)); + Py_BEGIN_CRITICAL_SECTION(self); + // gh-133931: To prevent use-after-free from other threads that reference + // the gi_name. + _PyObject_XSetRefDelayed(&op->gi_name, Py_NewRef(value)); + Py_END_CRITICAL_SECTION(); return 0; } @@ -726,7 +731,8 @@ static PyObject * gen_get_qualname(PyObject *self, void *Py_UNUSED(ignored)) { PyGenObject *op = _PyGen_CAST(self); - return Py_NewRef(op->gi_qualname); + PyObject *qualname = FT_ATOMIC_LOAD_PTR_ACQUIRE(op->gi_qualname); + return Py_NewRef(qualname); } static int @@ -740,7 +746,11 @@ gen_set_qualname(PyObject *self, PyObject *value, void *Py_UNUSED(ignored)) "__qualname__ must be set to a string object"); return -1; } - Py_XSETREF(op->gi_qualname, Py_NewRef(value)); + Py_BEGIN_CRITICAL_SECTION(self); + // gh-133931: To prevent use-after-free from other threads that reference + // the gi_qualname. + _PyObject_XSetRefDelayed(&op->gi_qualname, Py_NewRef(value)); + Py_END_CRITICAL_SECTION(); return 0; } @@ -1451,7 +1461,9 @@ typedef struct PyAsyncGenAThrow { /* Can be NULL, when in the "aclose()" mode (equivalent of "athrow(GeneratorExit)") */ - PyObject *agt_args; + PyObject *agt_typ; + PyObject *agt_tb; + PyObject *agt_val; AwaitableState agt_state; } PyAsyncGenAThrow; @@ -2078,7 +2090,9 @@ async_gen_athrow_dealloc(PyObject *self) _PyObject_GC_UNTRACK(self); Py_CLEAR(agt->agt_gen); - Py_CLEAR(agt->agt_args); + Py_XDECREF(agt->agt_typ); + Py_XDECREF(agt->agt_tb); + Py_XDECREF(agt->agt_val); PyObject_GC_Del(self); } @@ -2088,7 +2102,9 @@ async_gen_athrow_traverse(PyObject *self, visitproc visit, void *arg) { PyAsyncGenAThrow *agt = _PyAsyncGenAThrow_CAST(self); Py_VISIT(agt->agt_gen); - Py_VISIT(agt->agt_args); + Py_VISIT(agt->agt_typ); + Py_VISIT(agt->agt_tb); + Py_VISIT(agt->agt_val); return 0; } @@ -2116,7 +2132,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) if (o->agt_state == AWAITABLE_STATE_INIT) { if (o->agt_gen->ag_running_async) { o->agt_state = AWAITABLE_STATE_CLOSED; - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { PyErr_SetString( PyExc_RuntimeError, "aclose(): asynchronous generator is already running"); @@ -2143,7 +2159,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) o->agt_state = AWAITABLE_STATE_ITER; o->agt_gen->ag_running_async = 1; - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { /* aclose() mode */ o->agt_gen->ag_closed = 1; @@ -2157,19 +2173,10 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) goto yield_close; } } else { - PyObject *typ; - PyObject *tb = NULL; - PyObject *val = NULL; - - if (!PyArg_UnpackTuple(o->agt_args, "athrow", 1, 3, - &typ, &val, &tb)) { - return NULL; - } - retval = _gen_throw((PyGenObject *)gen, 0, /* Do not close generator when PyExc_GeneratorExit is passed */ - typ, val, tb); + o->agt_typ, o->agt_val, o->agt_tb); retval = async_gen_unwrap_value(o->agt_gen, retval); } if (retval == NULL) { @@ -2181,7 +2188,7 @@ async_gen_athrow_send(PyObject *self, PyObject *arg) assert(o->agt_state == AWAITABLE_STATE_ITER); retval = gen_send((PyObject *)gen, arg); - if (o->agt_args) { + if (o->agt_typ) { return async_gen_unwrap_value(o->agt_gen, retval); } else { /* aclose() mode */ @@ -2212,7 +2219,7 @@ check_error: if (PyErr_ExceptionMatches(PyExc_StopAsyncIteration) || PyErr_ExceptionMatches(PyExc_GeneratorExit)) { - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { /* when aclose() is called we don't want to propagate StopAsyncIteration or GeneratorExit; just raise StopIteration, signalling that this 'aclose()' await @@ -2241,7 +2248,7 @@ async_gen_athrow_throw(PyObject *self, PyObject *const *args, Py_ssize_t nargs) if (o->agt_state == AWAITABLE_STATE_INIT) { if (o->agt_gen->ag_running_async) { o->agt_state = AWAITABLE_STATE_CLOSED; - if (o->agt_args == NULL) { + if (o->agt_typ == NULL) { PyErr_SetString( PyExc_RuntimeError, "aclose(): asynchronous generator is already running"); @@ -2259,7 +2266,7 @@ async_gen_athrow_throw(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } PyObject *retval = gen_throw((PyObject*)o->agt_gen, args, nargs); - if (o->agt_args) { + if (o->agt_typ) { retval = async_gen_unwrap_value(o->agt_gen, retval); if (retval == NULL) { o->agt_gen->ag_running_async = 0; @@ -2334,7 +2341,7 @@ async_gen_athrow_finalize(PyObject *op) { PyAsyncGenAThrow *o = (PyAsyncGenAThrow*)op; if (o->agt_state == AWAITABLE_STATE_INIT) { - PyObject *method = o->agt_args ? &_Py_ID(athrow) : &_Py_ID(aclose); + PyObject *method = o->agt_typ ? &_Py_ID(athrow) : &_Py_ID(aclose); _PyErr_WarnUnawaitedAgenMethod(o->agt_gen, method); } } @@ -2403,13 +2410,23 @@ PyTypeObject _PyAsyncGenAThrow_Type = { static PyObject * async_gen_athrow_new(PyAsyncGenObject *gen, PyObject *args) { + PyObject *typ = NULL; + PyObject *tb = NULL; + PyObject *val = NULL; + if (args && !PyArg_UnpackTuple(args, "athrow", 1, 3, &typ, &val, &tb)) { + return NULL; + } + PyAsyncGenAThrow *o; o = PyObject_GC_New(PyAsyncGenAThrow, &_PyAsyncGenAThrow_Type); if (o == NULL) { return NULL; } o->agt_gen = (PyAsyncGenObject*)Py_NewRef(gen); - o->agt_args = Py_XNewRef(args); + o->agt_typ = Py_XNewRef(typ); + o->agt_tb = Py_XNewRef(tb); + o->agt_val = Py_XNewRef(val); + o->agt_state = AWAITABLE_STATE_INIT; _PyObject_GC_TRACK((PyObject*)o); return (PyObject*)o; diff --git a/Objects/interpolationobject.c b/Objects/interpolationobject.c new file mode 100644 index 00000000000..a5d407a7b0e --- /dev/null +++ b/Objects/interpolationobject.c @@ -0,0 +1,231 @@ +/* t-string Interpolation object implementation */ + +#include "Python.h" +#include "pycore_initconfig.h" // _PyStatus_OK +#include "pycore_interpolation.h" +#include "pycore_typeobject.h" // _PyType_GetDict + +static int +_conversion_converter(PyObject *arg, PyObject **conversion) +{ + if (arg == Py_None) { + return 1; + } + + if (!PyUnicode_Check(arg)) { + PyErr_Format(PyExc_TypeError, + "Interpolation() argument 'conversion' must be str, not %T", + arg); + return 0; + } + + Py_ssize_t len; + const char *conv_str = PyUnicode_AsUTF8AndSize(arg, &len); + if (len != 1 || !(conv_str[0] == 'a' || conv_str[0] == 'r' || conv_str[0] == 's')) { + PyErr_SetString(PyExc_ValueError, + "Interpolation() argument 'conversion' must be one of 's', 'a' or 'r'"); + return 0; + } + + *conversion = arg; + return 1; +} + +#include "clinic/interpolationobject.c.h" + +/*[clinic input] +class Interpolation "interpolationobject *" "&_PyInterpolation_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=161c64a16f9c4544]*/ + +typedef struct { + PyObject_HEAD + PyObject *value; + PyObject *expression; + PyObject *conversion; + PyObject *format_spec; +} interpolationobject; + +#define interpolationobject_CAST(op) \ + (assert(_PyInterpolation_CheckExact(op)), _Py_CAST(interpolationobject*, (op))) + +/*[clinic input] +@classmethod +Interpolation.__new__ as interpolation_new + + value: object + expression: object(subclass_of='&PyUnicode_Type') + conversion: object(converter='_conversion_converter') = None + format_spec: object(subclass_of='&PyUnicode_Type', c_default='&_Py_STR(empty)') = "" +[clinic start generated code]*/ + +static PyObject * +interpolation_new_impl(PyTypeObject *type, PyObject *value, + PyObject *expression, PyObject *conversion, + PyObject *format_spec) +/*[clinic end generated code: output=6488e288765bc1a9 input=d91711024068528c]*/ +{ + interpolationobject *self = PyObject_GC_New(interpolationobject, type); + if (!self) { + return NULL; + } + + self->value = Py_NewRef(value); + self->expression = Py_NewRef(expression); + self->conversion = Py_NewRef(conversion); + self->format_spec = Py_NewRef(format_spec); + PyObject_GC_Track(self); + return (PyObject *) self; +} + +static void +interpolation_dealloc(PyObject *op) +{ + PyObject_GC_UnTrack(op); + Py_TYPE(op)->tp_clear(op); + Py_TYPE(op)->tp_free(op); +} + +static int +interpolation_clear(PyObject *op) +{ + interpolationobject *self = interpolationobject_CAST(op); + Py_CLEAR(self->value); + Py_CLEAR(self->expression); + Py_CLEAR(self->conversion); + Py_CLEAR(self->format_spec); + return 0; +} + +static int +interpolation_traverse(PyObject *op, visitproc visit, void *arg) +{ + interpolationobject *self = interpolationobject_CAST(op); + Py_VISIT(self->value); + Py_VISIT(self->expression); + Py_VISIT(self->conversion); + Py_VISIT(self->format_spec); + return 0; +} + +static PyObject * +interpolation_repr(PyObject *op) +{ + interpolationobject *self = interpolationobject_CAST(op); + return PyUnicode_FromFormat("%s(%R, %R, %R, %R)", + _PyType_Name(Py_TYPE(self)), self->value, self->expression, + self->conversion, self->format_spec); +} + +static PyMemberDef interpolation_members[] = { + {"value", Py_T_OBJECT_EX, offsetof(interpolationobject, value), Py_READONLY, "Value"}, + {"expression", Py_T_OBJECT_EX, offsetof(interpolationobject, expression), Py_READONLY, "Expression"}, + {"conversion", Py_T_OBJECT_EX, offsetof(interpolationobject, conversion), Py_READONLY, "Conversion"}, + {"format_spec", Py_T_OBJECT_EX, offsetof(interpolationobject, format_spec), Py_READONLY, "Format specifier"}, + {NULL} +}; + +static PyObject* +interpolation_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) +{ + interpolationobject *self = interpolationobject_CAST(op); + return Py_BuildValue("(O(OOOO))", (PyObject *)Py_TYPE(op), + self->value, self->expression, + self->conversion, self->format_spec); +} + +static PyMethodDef interpolation_methods[] = { + {"__reduce__", interpolation_reduce, METH_NOARGS, + PyDoc_STR("__reduce__() -> (cls, state)")}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, + {NULL, NULL}, +}; + +PyTypeObject _PyInterpolation_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "string.templatelib.Interpolation", + .tp_doc = PyDoc_STR("Interpolation object"), + .tp_basicsize = sizeof(interpolationobject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_new = interpolation_new, + .tp_alloc = PyType_GenericAlloc, + .tp_dealloc = interpolation_dealloc, + .tp_clear = interpolation_clear, + .tp_free = PyObject_GC_Del, + .tp_repr = interpolation_repr, + .tp_members = interpolation_members, + .tp_methods = interpolation_methods, + .tp_traverse = interpolation_traverse, +}; + +PyStatus +_PyInterpolation_InitTypes(PyInterpreterState *interp) +{ + PyObject *tuple = Py_BuildValue("(ssss)", "value", "expression", "conversion", "format_spec"); + if (!tuple) { + goto error; + } + + PyObject *dict = _PyType_GetDict(&_PyInterpolation_Type); + if (!dict) { + Py_DECREF(tuple); + goto error; + } + + int status = PyDict_SetItemString(dict, "__match_args__", tuple); + Py_DECREF(tuple); + if (status < 0) { + goto error; + } + return _PyStatus_OK(); + +error: + return _PyStatus_ERR("Can't initialize interpolation types"); +} + +PyObject * +_PyInterpolation_Build(PyObject *value, PyObject *str, int conversion, PyObject *format_spec) +{ + interpolationobject *interpolation = PyObject_GC_New(interpolationobject, &_PyInterpolation_Type); + if (!interpolation) { + return NULL; + } + + interpolation->value = Py_NewRef(value); + interpolation->expression = Py_NewRef(str); + interpolation->format_spec = Py_NewRef(format_spec); + interpolation->conversion = NULL; + + if (conversion == 0) { + interpolation->conversion = Py_None; + } + else { + switch (conversion) { + case FVC_ASCII: + interpolation->conversion = _Py_LATIN1_CHR('a'); + break; + case FVC_REPR: + interpolation->conversion = _Py_LATIN1_CHR('r'); + break; + case FVC_STR: + interpolation->conversion = _Py_LATIN1_CHR('s'); + break; + default: + PyErr_SetString(PyExc_SystemError, + "Interpolation() argument 'conversion' must be one of 's', 'a' or 'r'"); + Py_DECREF(interpolation); + return NULL; + } + } + + PyObject_GC_Track(interpolation); + return (PyObject *) interpolation; +} + +PyObject * +_PyInterpolation_GetValueRef(PyObject *interpolation) +{ + return Py_NewRef(interpolationobject_CAST(interpolation)->value); +} diff --git a/Objects/listobject.c b/Objects/listobject.c index 7648c1dfe9f..1b36f4c25ab 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); @@ -550,7 +551,6 @@ list_dealloc(PyObject *self) PyListObject *op = (PyListObject *)self; Py_ssize_t i; PyObject_GC_UnTrack(op); - Py_TRASHCAN_BEGIN(op, list_dealloc) if (op->ob_item != NULL) { /* Do it backwards, for Christian Tismer. There's a simple test case where somehow this reduces @@ -569,7 +569,6 @@ list_dealloc(PyObject *self) else { PyObject_GC_Del(op); } - Py_TRASHCAN_END } static PyObject * @@ -1686,10 +1685,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) /* Avoid malloc for small temp arrays. */ #define MERGESTATE_TEMP_SIZE 256 -/* The largest value of minrun. This must be a power of 2, and >= 1, so that - * the compute_minrun() algorithm guarantees to return a result no larger than - * this, - */ +/* The largest value of minrun. This must be a power of 2, and >= 1 */ #define MAX_MINRUN 64 #if ((MAX_MINRUN) < 1) || ((MAX_MINRUN) & ((MAX_MINRUN) - 1)) #error "MAX_MINRUN must be a power of 2, and >= 1" @@ -1750,6 +1746,11 @@ struct s_MergeState { * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); + + /* Varisbles used for minrun computation. The "ideal" minrun length is + * the infinite precision listlen / 2**e. See listsort.txt. + */ + Py_ssize_t mr_current, mr_e, mr_mask; }; /* binarysort is the best method for sorting small arrays: it does few @@ -2211,6 +2212,14 @@ merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc, ms->min_gallop = MIN_GALLOP; ms->listlen = list_size; ms->basekeys = lo->keys; + + /* State for generating minrun values. See listsort.txt. */ + ms->mr_e = 0; + while (list_size >> ms->mr_e >= MAX_MINRUN) { + ++ms->mr_e; + } + ms->mr_mask = (1 << ms->mr_e) - 1; + ms->mr_current = 0; } /* Free all the temp memory owned by the MergeState. This must be called @@ -2688,27 +2697,15 @@ merge_force_collapse(MergeState *ms) return 0; } -/* Compute a good value for the minimum run length; natural runs shorter - * than this are boosted artificially via binary insertion. - * - * If n < MAX_MINRUN return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return MAX_MINRUN / 2. - * Else return an int k, MAX_MINRUN / 2 <= k <= MAX_MINRUN, such that n/k is - * close to, but strictly less than, an exact power of 2. - * - * See listsort.txt for more info. - */ -static Py_ssize_t -merge_compute_minrun(Py_ssize_t n) +/* Return the next minrun value to use. See listsort.txt. */ +Py_LOCAL_INLINE(Py_ssize_t) +minrun_next(MergeState *ms) { - Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ - - assert(n >= 0); - while (n >= MAX_MINRUN) { - r |= n & 1; - n >>= 1; - } - return n + r; + ms->mr_current += ms->listlen; + assert(ms->mr_current >= 0); /* no overflow */ + Py_ssize_t result = ms->mr_current >> ms->mr_e; + ms->mr_current &= ms->mr_mask; + return result; } /* Here we define custom comparison functions to optimize for the cases one commonly @@ -3076,7 +3073,6 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. */ - minrun = merge_compute_minrun(nremaining); do { Py_ssize_t n; @@ -3085,6 +3081,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < 0) goto fail; /* If short, extend to min(minrun, nremaining). */ + minrun = minrun_next(&ms); if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; @@ -3619,6 +3616,24 @@ list_slice_wrap(PyListObject *aa, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t return res; } +static inline PyObject* +list_slice_subscript(PyObject* self, PyObject* item) +{ + assert(PyList_Check(self)); + assert(PySlice_Check(item)); + Py_ssize_t start, stop, step; + if (PySlice_Unpack(item, &start, &stop, &step) < 0) { + return NULL; + } + return list_slice_wrap((PyListObject *)self, start, stop, step); +} + +PyObject * +_PyList_SliceSubscript(PyObject* _self, PyObject* item) +{ + return list_slice_subscript(_self, item); +} + static PyObject * list_subscript(PyObject* _self, PyObject* item) { @@ -3633,11 +3648,7 @@ list_subscript(PyObject* _self, PyObject* item) return list_item((PyObject *)self, i); } else if (PySlice_Check(item)) { - Py_ssize_t start, stop, step; - if (PySlice_Unpack(item, &start, &stop, &step) < 0) { - return NULL; - } - return list_slice_wrap(self, start, stop, step); + return list_slice_subscript(_self, item); } else { PyErr_Format(PyExc_TypeError, diff --git a/Objects/listsort.txt b/Objects/listsort.txt index f387d9c116e..5b2fc7d50a2 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -270,8 +270,8 @@ result. This has two primary good effects: Computing minrun ---------------- -If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the -whole array then; it's hard to beat that given the overheads of trying +If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the +whole array then; it's hard to beat that given the overheads of trying something fancier (see note BINSORT). When N is a power of 2, testing on random data showed that minrun values of @@ -288,7 +288,6 @@ that 32 isn't a good choice for the general case! Consider N=2112: >>> divmod(2112, 32) (66, 0) ->>> If the data is randomly ordered, we're very likely to end up with 66 runs each of length 32. The first 64 of these trigger a sequence of perfectly @@ -301,22 +300,94 @@ to get 64 elements into place). If we take minrun=33 in this case, then we're very likely to end up with 64 runs each of length 33, and then all merges are perfectly balanced. Better! -What we want to avoid is picking minrun such that in +The original code used a cheap heuristic to pick a minrun that avoided the +very worst cases of imbalance for the final merge, but "pretty bad" cases +still existed. - q, r = divmod(N, minrun) +In 2025, Stefan Pochmann found a much better approach, based on letting minrun +vary a bit from one run to the next. Under his scheme, at _all_ levels of the +merge tree: -q is a power of 2 and r>0 (then the last merge only gets r elements into -place, and r < minrun is small compared to N), or q a little larger than a -power of 2 regardless of r (then we've got a case similar to "2112", again -leaving too little work for the last merge to do). +- The number of runs is a power of 2. +- At most two different run lengths appear. +- When two do appear, the smaller is one less than the larger. +- The lengths of run pairs merged never differ by more than one. -Instead we pick a minrun in range(MAX_MINRUN / 2, MAX_MINRUN + 1) such that -N/minrun is exactly a power of 2, or if that isn't possible, is close to, but -strictly less than, a power of 2. This is easier to do than it may sound: -take the first log2(MAX_MINRUN) bits of N, and add 1 if any of the remaining -bits are set. In fact, that rule covers every case in this section, including -small N and exact powers of 2; merge_compute_minrun() is a deceptively simple -function. +So, in all respects, as perfectly balanced as possible. + +For the 2112 case, that also keeps minrun at 33, but we were lucky there +that 2112 is 33 times a power of 2. The new approach doesn't rely on luck. + +For example, with 315 random elements, the old scheme uses fixed minrun=40 and +produces runs of length 40, except for the last. The new scheme produces a +mix of lengths 39 and 40: + +old: 40 40 40 40 40 40 40 35 +new: 39 39 40 39 39 40 39 40 + +Both schemes produce eight runs, a power of 2. That's good for a balanced +merge tree. But the new scheme allows merges where left and right length +never differ by more than 1: + +39 39 40 39 39 40 39 40 + 78 79 79 79 + 157 158 + 315 + +(This shows merges downward, e.g., two runs of length 39 are merged and +become a run of length 78.) + +With larger lists, the old scheme can get even more unbalanced. For example, +with 32769 elements (that's 2**15 + 1), it uses minrun=33 and produces 993 +runs (of length 33). That's not even a power of 2. The new scheme instead +produces 1024 runs, all with length 32 except for the last one with length 33. + +How does it work? Ideally, all runs would be exactly equally long. For the +above example, each run would have 315/8 = 39.375 elements. Which of course +doesn't work. But we can get close: + +For the first run, we'd like 39.375 elements. Since that's impossible, we +instead use 39 (the floor) and remember the current leftover fraction 0.375. +For the second run, we add 0.375 + 39.375 = 39.75. Again impossible, so we +instead use 39 and remember 0.75. For the third run, we add 0.75 + 39.375 = +40.125. This time we get 40 and remember 0.125. And so on. Here's a Python +generator doing that: + +def gen_minruns_with_floats(n): + mr = n + while mr >= MAX_MINRUN: + mr /= 2 + + mr_current = 0 + while True: + mr_current += mr + yield int(mr_current) + mr_current %= 1 + +But while all arithmetic here can be done exactly using binery floating point, +floats have less precision that a Py_ssize_t, and mixing floats with ints is +needlessly expensive anyway. + +So here's an integer version, where the internal numbers are scaled up by +2**e, or rather not divided by 2**e. Instead, only each yielded minrun gets +divided (by right-shifting). For example instead of adding 39.375 and +reducing modulo 1, it just adds 315 and reduces modulo 8. And always divides +by 8 to get each actual minrun value: + +def gen_minruns_simpler(n): + e = 0 + while (n >> e) >= MAX_MINRUN: + e += 1 + mask = (1 << e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> e + mr_current &= mask + +See note MINRUN CODE for a full implementation and a driver that exhaustively +verifies the claims above for all list lengths through 2 million. The Merge Pattern @@ -820,3 +891,75 @@ partially mitigated by pre-scanning the data to determine whether the data is homogeneous with respect to type. If so, it is sometimes possible to substitute faster type-specific comparisons for the slower, generic PyObject_RichCompareBool. + +MINRUN CODE +from itertools import accumulate +try: + from itertools import batched +except ImportError: + from itertools import islice + def batched(xs, k): + it = iter(xs) + while chunk := tuple(islice(it, k)): + yield chunk + +MAX_MINRUN = 64 + +def gen_minruns(n): + # In listobject.c, initialization is done in merge_init(), and + # the body of the loop in minrun_next(). + mr_e = 0 + while (n >> mr_e) >= MAX_MINRUN: + mr_e += 1 + mr_mask = (1 << mr_e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> mr_e + mr_current &= mr_mask + +def chew(n, show=False): + if n < 1: + return + + sizes = [] + tot = 0 + for size in gen_minruns(n): + sizes.append(size) + tot += size + if tot >= n: + break + assert tot == n + print(n, len(sizes)) + + small, large = MAX_MINRUN // 2, MAX_MINRUN + while len(sizes) > 1: + assert not len(sizes) & 1 + assert len(sizes).bit_count() == 1 # i.e., power of 2 + assert sum(sizes) == n + assert min(sizes) >= min(n, small) + assert max(sizes) <= large + + d = set(sizes) + assert len(d) <= 2 + if len(d) == 2: + lo, hi = sorted(d) + assert lo + 1 == hi + + mr = n / len(sizes) + for i, s in enumerate(accumulate(sizes, initial=0)): + assert int(mr * i) == s + + newsizes = [] + for a, b in batched(sizes, 2): + assert abs(a - b) <= 1 + newsizes.append(a + b) + sizes = newsizes + smsll = large + large *= 2 + + assert sizes[0] == n + +for n in range(2_000_001): + chew(n)
\ No newline at end of file diff --git a/Objects/longobject.c b/Objects/longobject.c index 40d90ecf4fa..557bb6e1dd9 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -10,6 +10,7 @@ #include "pycore_long.h" // _Py_SmallInts #include "pycore_object.h" // _PyObject_Init() #include "pycore_runtime.h" // _PY_NSMALLPOSINTS +#include "pycore_stackref.h" #include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin() #include "pycore_unicodeobject.h" // _PyUnicode_Equal() @@ -316,6 +317,33 @@ _PyLong_FromSTwoDigits(stwodigits x) return (PyLongObject*)_PyLong_FromLarge(x); } +/* Create a new medium int object from a medium int. + * Do not raise. Return NULL if not medium or can't allocate. */ +static inline _PyStackRef +medium_from_stwodigits(stwodigits x) +{ + if (IS_SMALL_INT(x)) { + return PyStackRef_FromPyObjectBorrow(get_small_int((sdigit)x)); + } + assert(x != 0); + if(!is_medium_int(x)) { + return PyStackRef_NULL; + } + PyLongObject *v = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints); + if (v == NULL) { + v = PyObject_Malloc(sizeof(PyLongObject)); + if (v == NULL) { + return PyStackRef_NULL; + } + _PyObject_Init((PyObject*)v, &PyLong_Type); + } + digit abs_x = x < 0 ? (digit)(-x) : (digit)x; + _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1); + v->long_value.ob_digit[0] = abs_x; + return PyStackRef_FromPyObjectStealMortal((PyObject *)v); +} + + /* If a freshly-allocated int is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ Py_LOCAL_INLINE(void) @@ -971,16 +999,9 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, ++numsignificantbytes; } - /* How many Python int digits do we need? We have - 8*numsignificantbytes bits, and each Python int digit has - PyLong_SHIFT bits, so it's the ceiling of the quotient. */ - /* catch overflow before it happens */ - if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { - PyErr_SetString(PyExc_OverflowError, - "byte array too long to convert to int"); - return NULL; - } - ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; + /* avoid integer overflow */ + ndigits = numsignificantbytes / PyLong_SHIFT * 8 + + (numsignificantbytes % PyLong_SHIFT * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; v = long_alloc(ndigits); if (v == NULL) return NULL; @@ -1760,6 +1781,10 @@ UNSIGNED_INT_CONVERTER(UnsignedInt, unsigned int) UNSIGNED_INT_CONVERTER(UnsignedLong, unsigned long) UNSIGNED_INT_CONVERTER(UnsignedLongLong, unsigned long long) UNSIGNED_INT_CONVERTER(Size_t, size_t) +UNSIGNED_INT_CONVERTER(UInt8, uint8_t) +UNSIGNED_INT_CONVERTER(UInt16, uint16_t) +UNSIGNED_INT_CONVERTER(UInt32, uint32_t) +UNSIGNED_INT_CONVERTER(UInt64, uint64_t) #define CHECK_BINOP(v,w) \ @@ -3774,10 +3799,12 @@ long_add(PyLongObject *a, PyLongObject *b) return z; } -PyObject * -_PyLong_Add(PyLongObject *a, PyLongObject *b) +_PyStackRef +_PyCompactLong_Add(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_add(a, b); + assert(_PyLong_BothAreCompact(a, b)); + stwodigits v = medium_value(a) + medium_value(b); + return medium_from_stwodigits(v); } static PyObject * @@ -3817,10 +3844,12 @@ long_sub(PyLongObject *a, PyLongObject *b) return z; } -PyObject * -_PyLong_Subtract(PyLongObject *a, PyLongObject *b) +_PyStackRef +_PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_sub(a, b); + assert(_PyLong_BothAreCompact(a, b)); + stwodigits v = medium_value(a) - medium_value(b); + return medium_from_stwodigits(v); } static PyObject * @@ -4264,10 +4293,14 @@ long_mul(PyLongObject *a, PyLongObject *b) return z; } -PyObject * -_PyLong_Multiply(PyLongObject *a, PyLongObject *b) +/* This function returns NULL if the result is not compact, + * or if it fails to allocate, but never raises */ +_PyStackRef +_PyCompactLong_Multiply(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_mul(a, b); + assert(_PyLong_BothAreCompact(a, b)); + stwodigits v = medium_value(a) * medium_value(b); + return medium_from_stwodigits(v); } static PyObject * diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 8b28662631b..e6e469ca270 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -8,6 +8,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() /* undefine macro trampoline to PyCFunction_NewEx */ @@ -166,13 +167,8 @@ static void meth_dealloc(PyObject *self) { PyCFunctionObject *m = _PyCFunctionObject_CAST(self); - // The Py_TRASHCAN mechanism requires that we be able to - // call PyObject_GC_UnTrack twice on an object. PyObject_GC_UnTrack(m); - Py_TRASHCAN_BEGIN(m, meth_dealloc); - if (m->m_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject*) m); - } + FT_CLEAR_WEAKREFS(self, m->m_weakreflist); // We need to access ml_flags here rather than later. // `m->m_ml` might have the same lifetime // as `m_self` when it's dynamically allocated. @@ -190,7 +186,6 @@ meth_dealloc(PyObject *self) assert(Py_IS_TYPE(self, &PyCFunction_Type)); _Py_FREELIST_FREE(pycfunctionobject, m, PyObject_GC_Del); } - Py_TRASHCAN_END; } static PyObject * diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f363ef173cb..862395e7881 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -12,8 +12,8 @@ #include "pycore_object.h" // _PyType_AllocNoTrack #include "pycore_pyerrors.h" // _PyErr_FormatFromCause() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "osdefs.h" // MAXPATHLEN @@ -827,8 +827,7 @@ module_dealloc(PyObject *self) if (verbose && m->md_name) { PySys_FormatStderr("# destroy %U\n", m->md_name); } - if (m->md_weaklist != NULL) - PyObject_ClearWeakRefs((PyObject *) m); + FT_CLEAR_WEAKREFS(self, m->md_weaklist); /* bpo-39824: Don't call m_free() if m_size > 0 and md_state=NULL */ if (m->md_def && m->md_def->m_free @@ -1058,7 +1057,7 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) int is_possibly_shadowing_stdlib = 0; if (is_possibly_shadowing) { PyObject *stdlib_modules; - if (_PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { + if (PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { goto done; } if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index caebe6bf543..0fc2bcea4cb 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -124,9 +124,10 @@ namespace_repr(PyObject *ns) if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; - value = PyDict_GetItemWithError(d, key); - if (value != NULL) { + int has_key = PyDict_GetItemRef(d, key, &value); + if (has_key == 1) { item = PyUnicode_FromFormat("%U=%R", key, value); + Py_DECREF(value); if (item == NULL) { loop_error = 1; } @@ -135,7 +136,7 @@ namespace_repr(PyObject *ns) Py_DECREF(item); } } - else if (PyErr_Occurred()) { + else if (has_key < 0) { loop_error = 1; } } diff --git a/Objects/object.c b/Objects/object.c index 99bb1d9c0bf..1223983753a 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -15,6 +15,8 @@ #include "pycore_hamt.h" // _PyHamtItems_Type #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_instruction_sequence.h" // _PyInstructionSequence_Type +#include "pycore_interpframe.h" // _PyFrame_Stackbase() +#include "pycore_interpolation.h" // _PyInterpolation_Type #include "pycore_list.h" // _PyList_DebugMallocStats() #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_memoryobject.h" // _PyManagedBuffer_Type @@ -25,6 +27,7 @@ #include "pycore_pymem.h" // _PyMem_IsPtrFreed() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_symtable.h" // PySTEntry_Type +#include "pycore_template.h" // _PyTemplate_Type _PyTemplateIter_Type #include "pycore_tuple.h" // _PyTuple_DebugMallocStats() #include "pycore_typeobject.h" // _PyBufferWrapper_Type #include "pycore_typevarobject.h" // _PyTypeAlias_Type @@ -922,6 +925,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) // In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear() // In the default build, freelists are per-interpreter and cleared in finalize_interp_types() clear_freelist(&freelists->floats, is_finalization, free_object); + clear_freelist(&freelists->complexes, is_finalization, free_object); for (Py_ssize_t i = 0; i < PyTuple_MAXSAVESIZE; i++) { clear_freelist(&freelists->tuples[i], is_finalization, free_object); } @@ -1127,11 +1131,14 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) res = PyObject_RichCompare(v, w, op); if (res == NULL) return -1; - if (PyBool_Check(res)) + if (PyBool_Check(res)) { ok = (res == Py_True); - else + assert(_Py_IsImmortal(res)); + } + else { ok = PyObject_IsTrue(res); - Py_DECREF(res); + Py_DECREF(res); + } return ok; } @@ -1661,6 +1668,116 @@ _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) return 0; } +int +_PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, + PyObject *name, _PyStackRef *method) +{ + int meth_found = 0; + + assert(PyStackRef_IsNull(*method)); + + PyTypeObject *tp = Py_TYPE(obj); + if (!_PyType_IsReady(tp)) { + if (PyType_Ready(tp) < 0) { + return 0; + } + } + + if (tp->tp_getattro != PyObject_GenericGetAttr || !PyUnicode_CheckExact(name)) { + PyObject *res = PyObject_GetAttr(obj, name); + if (res != NULL) { + *method = PyStackRef_FromPyObjectSteal(res); + } + return 0; + } + + _PyType_LookupStackRefAndVersion(tp, name, method); + PyObject *descr = PyStackRef_AsPyObjectBorrow(*method); + descrgetfunc f = NULL; + if (descr != NULL) { + if (_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) { + meth_found = 1; + } + else { + f = Py_TYPE(descr)->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + PyObject *value = f(descr, obj, (PyObject *)Py_TYPE(obj)); + PyStackRef_CLEAR(*method); + if (value != NULL) { + *method = PyStackRef_FromPyObjectSteal(value); + } + return 0; + } + } + } + PyObject *dict, *attr; + if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && + _PyObject_TryGetInstanceAttribute(obj, name, &attr)) { + if (attr != NULL) { + PyStackRef_CLEAR(*method); + *method = PyStackRef_FromPyObjectSteal(attr); + return 0; + } + dict = NULL; + } + else if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { + dict = (PyObject *)_PyObject_GetManagedDict(obj); + } + else { + PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + if (dictptr != NULL) { + dict = FT_ATOMIC_LOAD_PTR_ACQUIRE(*dictptr); + } + else { + dict = NULL; + } + } + if (dict != NULL) { + // TODO: use _Py_dict_lookup_threadsafe_stackref + Py_INCREF(dict); + PyObject *value; + if (PyDict_GetItemRef(dict, name, &value) != 0) { + // found or error + Py_DECREF(dict); + PyStackRef_CLEAR(*method); + if (value != NULL) { + *method = PyStackRef_FromPyObjectSteal(value); + } + return 0; + } + // not found + Py_DECREF(dict); + } + + if (meth_found) { + assert(!PyStackRef_IsNull(*method)); + return 1; + } + + if (f != NULL) { + PyObject *value = f(descr, obj, (PyObject *)Py_TYPE(obj)); + PyStackRef_CLEAR(*method); + if (value) { + *method = PyStackRef_FromPyObjectSteal(value); + } + return 0; + } + + if (descr != NULL) { + assert(!PyStackRef_IsNull(*method)); + return 0; + } + + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute '%U'", + tp->tp_name, name); + + _PyObject_SetAttributeErrorContext(obj, name); + assert(PyStackRef_IsNull(*method)); + return 0; +} + + /* Generic GetAttr functions - put these in your tp_[gs]etattro slot. */ PyObject * @@ -1903,34 +2020,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) int PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr = _PyObject_GetDictPtr(obj); - if (dictptr == NULL) { - if (_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_INLINE_VALUES) && - _PyObject_GetManagedDict(obj) == NULL - ) { - /* Was unable to convert to dict */ - PyErr_NoMemory(); - } - else { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - } - return -1; - } if (value == NULL) { PyErr_SetString(PyExc_TypeError, "cannot delete __dict__"); return -1; } - if (!PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - Py_BEGIN_CRITICAL_SECTION(obj); - Py_XSETREF(*dictptr, Py_NewRef(value)); - Py_END_CRITICAL_SECTION(); - return 0; + return _PyObject_SetDict(obj, value); } @@ -1993,9 +2087,25 @@ _dir_locals(void) PyObject *names; PyObject *locals; - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (!_PyErr_Occurred(tstate)) { + locals = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(locals); + } + } + if (locals == NULL) { return NULL; + } names = PyMapping_Keys(locals); Py_DECREF(locals); @@ -2409,6 +2519,7 @@ static PyTypeObject* static_types[] = { &_PyHamt_CollisionNode_Type, &_PyHamt_Type, &_PyInstructionSequence_Type, + &_PyInterpolation_Type, &_PyLegacyEventHandler_Type, &_PyLineIterator, &_PyManagedBuffer_Type, @@ -2418,6 +2529,8 @@ static PyTypeObject* static_types[] = { &_PyNone_Type, &_PyNotImplemented_Type, &_PyPositionsIterator, + &_PyTemplate_Type, + &_PyTemplateIter_Type, &_PyUnicodeASCIIIter_Type, &_PyUnion_Type, #ifdef _Py_TIER2 @@ -2617,6 +2730,29 @@ PyUnstable_Object_EnableDeferredRefcount(PyObject *op) } int +PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *op) +{ + if (!_PyObject_IsUniquelyReferenced(op)) { + return 0; + } + + _PyInterpreterFrame *frame = _PyEval_GetFrame(); + if (frame == NULL) { + return 0; + } + + _PyStackRef *base = _PyFrame_Stackbase(frame); + _PyStackRef *stackpointer = frame->stackpointer; + while (stackpointer > base) { + stackpointer--; + if (op == PyStackRef_AsPyObjectBorrow(*stackpointer)) { + return PyStackRef_IsHeapSafe(*stackpointer); + } + } + return 0; +} + +int PyUnstable_TryIncRef(PyObject *op) { return _Py_TryIncref(op); @@ -2902,19 +3038,27 @@ finally: /* Trashcan support. */ /* Add op to the gcstate->trash_delete_later list. Called when the current - * call-stack depth gets large. op must be a currently untracked gc'ed - * object, with refcount 0. Py_DECREF must already have been called on it. + * call-stack depth gets large. op must be a gc'ed object, with refcount 0. + * Py_DECREF must already have been called on it. */ void _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { - _PyObject_ASSERT(op, _PyObject_IS_GC(op)); - _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); _PyObject_ASSERT(op, Py_REFCNT(op) == 0); + PyTypeObject *tp = Py_TYPE(op); + assert(tp->tp_flags & Py_TPFLAGS_HAVE_GC); + int tracked = 0; + if (tp->tp_is_gc == NULL || tp->tp_is_gc(op)) { + tracked = _PyObject_GC_IS_TRACKED(op); + if (tracked) { + _PyObject_GC_UNTRACK(op); + } + } + uintptr_t tagged_ptr = ((uintptr_t)tstate->delete_later) | tracked; #ifdef Py_GIL_DISABLED - op->ob_tid = (uintptr_t)tstate->delete_later; + op->ob_tid = tagged_ptr; #else - _PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)tstate->delete_later); + _Py_AS_GC(op)->_gc_next = tagged_ptr; #endif tstate->delete_later = op; } @@ -2929,13 +3073,17 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_GIL_DISABLED - tstate->delete_later = (PyObject*) op->ob_tid; + uintptr_t tagged_ptr = op->ob_tid; op->ob_tid = 0; _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED); #else - tstate->delete_later = (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); + uintptr_t tagged_ptr = _Py_AS_GC(op)->_gc_next; + _Py_AS_GC(op)->_gc_next = 0; #endif - + tstate->delete_later = (PyObject *)(tagged_ptr & ~1); + if (tagged_ptr & 1) { + _PyObject_GC_TRACK(op); + } /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but * Py_DECREF was already called on this object, and in @@ -2998,13 +3146,26 @@ _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg, } +/* +When deallocating a container object, it's possible to trigger an unbounded +chain of deallocations, as each Py_DECREF in turn drops the refcount on "the +next" object in the chain to 0. This can easily lead to stack overflows. +To avoid that, if the C stack is nearing its limit, instead of calling +dealloc on the object, it is added to a queue to be freed later when the +stack is shallower */ void _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); + unsigned long gc_flag = type->tp_flags & Py_TPFLAGS_HAVE_GC; destructor dealloc = type->tp_dealloc; -#ifdef Py_DEBUG PyThreadState *tstate = _PyThreadState_GET(); + intptr_t margin = _Py_RecursionLimit_GetMargin(tstate); + if (margin < 2 && gc_flag) { + _PyTrash_thread_deposit_object(tstate, (PyObject *)op); + return; + } +#ifdef Py_DEBUG #if !defined(Py_GIL_DISABLED) && !defined(Py_STACKREF_DEBUG) /* This assertion doesn't hold for the free-threading build, as * PyStackRef_CLOSE_SPECIALIZED is not implemented */ @@ -3046,6 +3207,9 @@ _Py_Dealloc(PyObject *op) Py_XDECREF(old_exc); Py_DECREF(type); #endif + if (tstate->delete_later && margin >= 4 && gc_flag) { + _PyTrash_thread_destroy_chain(tstate); + } } @@ -3199,3 +3363,11 @@ PyUnstable_IsImmortal(PyObject *op) assert(op != NULL); return _Py_IsImmortal(op); } + +int +PyUnstable_Object_IsUniquelyReferenced(PyObject *op) +{ + _Py_AssertHoldsTstate(); + assert(op != NULL); + return _PyObject_IsUniquelyReferenced(op); +} diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b209808da90..deb7fd957e5 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page) } +#ifdef Py_GIL_DISABLED + +// If we are deferring collection of more than this amount of memory for +// mimalloc pages, advance the write sequence. Advancing allows these +// pages to be re-used in a different thread or for a different size class. +#define QSBR_PAGE_MEM_LIMIT 4096*20 + +// Return true if the global write sequence should be advanced for a mimalloc +// page that is deferred from collection. +static bool +should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) +{ + size_t bsize = mi_page_block_size(page); + size_t page_size = page->capacity*bsize; + if (page_size > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + qsbr->deferred_page_memory += page_size; + if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + return false; +} +#endif + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) _PyMem_mi_page_clear_qsbr(page); page->retire_expire = 0; - page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + + if (should_advance_qsbr_for_page(tstate->qsbr, page)) { + page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); + } + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); return false; } @@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } + +#ifdef Py_GIL_DISABLED + +// For deferred advance on free: the number of deferred items before advancing +// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally +// want to process a chunk before it overflows. +#define QSBR_DEFERRED_LIMIT 127 + +// If the deferred memory exceeds 1 MiB, advance the write sequence. This +// helps limit memory usage due to QSBR delaying frees too long. +#define QSBR_FREE_MEM_LIMIT 1024*1024 + +// Return true if the global write sequence should be advanced for a deferred +// memory free. +static bool +should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size) +{ + if (size > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + qsbr->deferred_count++; + qsbr->deferred_memory += size; + if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT || + qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + return false; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + if (should_advance_qsbr_for_free(tstate->qsbr, size)) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_shared_next(tstate->qsbr->shared); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + // Normally the processing of delayed items is done from the eval + // breaker. Processing here is a safety measure to ensure too much + // work does not accumulate. _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1305,25 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + // We use 0 as the size since we don't have an easy way to know the + // actual size. If we are freeing many objects, the write sequence + // will be advanced due to QSBR_DEFERRED_LIMIT. + free_delayed(((uintptr_t)ptr)|0x01, 0); + } +} +#endif + +#ifdef Py_GIL_DISABLED +void +_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value) +{ + PyObject *old = *ptr; + FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value); + if (old == NULL) { + return; + } + if (!_Py_IsImmortal(old)) { + _PyObject_XDecRefDelayed(old); } } #endif @@ -1238,7 +1335,7 @@ work_queue_first(struct llist_node *head) } static void -process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, +process_queue(struct llist_node *head, _PyThreadStateImpl *tstate, bool keep_empty, delayed_dealloc_cb cb, void *state) { while (!llist_empty(head)) { @@ -1246,7 +1343,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, if (buf->rd_idx < buf->wr_idx) { struct _mem_work_item *item = &buf->array[buf->rd_idx]; - if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) { + if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) { return; } @@ -1270,11 +1367,11 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, static void process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { assert(PyMutex_IsLocked(&queue->mutex)); - process_queue(&queue->head, qsbr, false, cb, state); + process_queue(&queue->head, tstate, false, cb, state); int more_work = !llist_empty(&queue->head); _Py_atomic_store_int_relaxed(&queue->has_work, more_work); @@ -1282,7 +1379,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue, static void maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { if (!_Py_atomic_load_int_relaxed(&queue->has_work)) { @@ -1291,7 +1388,7 @@ maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, // Try to acquire the lock, but don't block if it's already held. if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) { - process_interp_queue(queue, qsbr, cb, state); + process_interp_queue(queue, tstate, cb, state); PyMutex_Unlock(&queue->mutex); } } @@ -1302,11 +1399,13 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate_impl->qsbr->should_process = false; + // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL); } void @@ -1316,10 +1415,10 @@ _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, voi _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state); } void @@ -1348,7 +1447,7 @@ _PyMem_AbandonDelayed(PyThreadState *tstate) // Process the merged queue now (see gh-130794). _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr, NULL, NULL); + process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL); PyMutex_Unlock(&interp->mem_free_queue.mutex); diff --git a/Objects/odictobject.c b/Objects/odictobject.c index 1412acb50ac..02fcbbaa0d4 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -473,6 +473,7 @@ later: #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_tuple.h" // _PyTuple_Recycle() #include <stddef.h> // offsetof() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/odictobject.c.h" @@ -1389,16 +1390,12 @@ odict_dealloc(PyObject *op) { PyODictObject *self = _PyODictObject_CAST(op); PyObject_GC_UnTrack(self); - Py_TRASHCAN_BEGIN(self, odict_dealloc) Py_XDECREF(self->od_inst_dict); - if (self->od_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *)self); + FT_CLEAR_WEAKREFS(op, self->od_weakreflist); _odict_clear_nodes(self); PyDict_Type.tp_dealloc((PyObject *)self); - - Py_TRASHCAN_END } /* tp_repr */ diff --git a/Objects/picklebufobject.c b/Objects/picklebufobject.c index 3ce800de04c..50f17687bc4 100644 --- a/Objects/picklebufobject.c +++ b/Objects/picklebufobject.c @@ -1,6 +1,7 @@ /* PickleBuffer object implementation */ #include "Python.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> typedef struct { @@ -111,8 +112,7 @@ picklebuf_dealloc(PyObject *op) { PyPickleBufferObject *self = (PyPickleBufferObject*)op; PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) self); + FT_CLEAR_WEAKREFS(op, self->weakreflist); PyBuffer_Release(&self->view); Py_TYPE(self)->tp_free((PyObject *) self); } diff --git a/Objects/setobject.c b/Objects/setobject.c index 90e7e6ae14a..6e4fc5957ca 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -40,6 +40,7 @@ #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_RELAXED() #include "pycore_pyerrors.h" // _PyErr_SetKeyError() #include "pycore_setobject.h" // _PySet_NextEntry() definition +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "stringlib/eq.h" // unicode_eq() #include <stddef.h> // offsetof() @@ -536,9 +537,7 @@ set_dealloc(PyObject *self) /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(so); - Py_TRASHCAN_BEGIN(so, set_dealloc) - if (so->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) so); + FT_CLEAR_WEAKREFS(self, so->weakreflist); for (entry = so->table; used > 0; entry++) { if (entry->key && entry->key != dummy) { @@ -549,7 +548,6 @@ set_dealloc(PyObject *self) if (so->table != so->smalltable) PyMem_Free(so->table); Py_TYPE(so)->tp_free(so); - Py_TRASHCAN_END } static PyObject * diff --git a/Objects/templateobject.c b/Objects/templateobject.c new file mode 100644 index 00000000000..4293a311c44 --- /dev/null +++ b/Objects/templateobject.c @@ -0,0 +1,488 @@ +/* t-string Template object implementation */ + +#include "Python.h" +#include "pycore_interpolation.h" // _PyInterpolation_CheckExact() +#include "pycore_runtime.h" // _Py_STR() +#include "pycore_template.h" + +typedef struct { + PyObject_HEAD + PyObject *stringsiter; + PyObject *interpolationsiter; + int from_strings; +} templateiterobject; + +#define templateiterobject_CAST(op) \ + (assert(_PyTemplateIter_CheckExact(op)), _Py_CAST(templateiterobject*, (op))) + +static PyObject * +templateiter_next(PyObject *op) +{ + templateiterobject *self = templateiterobject_CAST(op); + PyObject *item; + if (self->from_strings) { + item = PyIter_Next(self->stringsiter); + self->from_strings = 0; + if (item == NULL) { + return NULL; + } + if (PyUnicode_GET_LENGTH(item) == 0) { + Py_SETREF(item, PyIter_Next(self->interpolationsiter)); + self->from_strings = 1; + } + } else { + item = PyIter_Next(self->interpolationsiter); + self->from_strings = 1; + } + return item; +} + +static void +templateiter_dealloc(PyObject *op) +{ + PyObject_GC_UnTrack(op); + Py_TYPE(op)->tp_clear(op); + Py_TYPE(op)->tp_free(op); +} + +static int +templateiter_clear(PyObject *op) +{ + templateiterobject *self = templateiterobject_CAST(op); + Py_CLEAR(self->stringsiter); + Py_CLEAR(self->interpolationsiter); + return 0; +} + +static int +templateiter_traverse(PyObject *op, visitproc visit, void *arg) +{ + templateiterobject *self = templateiterobject_CAST(op); + Py_VISIT(self->stringsiter); + Py_VISIT(self->interpolationsiter); + return 0; +} + +PyTypeObject _PyTemplateIter_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "string.templatelib.TemplateIter", + .tp_doc = PyDoc_STR("Template iterator object"), + .tp_basicsize = sizeof(templateiterobject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_alloc = PyType_GenericAlloc, + .tp_dealloc = templateiter_dealloc, + .tp_clear = templateiter_clear, + .tp_free = PyObject_GC_Del, + .tp_traverse = templateiter_traverse, + .tp_iter = PyObject_SelfIter, + .tp_iternext = templateiter_next, +}; + +typedef struct { + PyObject_HEAD + PyObject *strings; + PyObject *interpolations; +} templateobject; + +#define templateobject_CAST(op) \ + (assert(_PyTemplate_CheckExact(op)), _Py_CAST(templateobject*, (op))) + +static PyObject * +template_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + if (kwds != NULL) { + PyErr_SetString(PyExc_TypeError, "Template.__new__ only accepts *args arguments"); + return NULL; + } + + Py_ssize_t argslen = PyTuple_GET_SIZE(args); + Py_ssize_t stringslen = 0; + Py_ssize_t interpolationslen = 0; + int last_was_str = 0; + + for (Py_ssize_t i = 0; i < argslen; i++) { + PyObject *item = PyTuple_GET_ITEM(args, i); + if (PyUnicode_Check(item)) { + if (!last_was_str) { + stringslen++; + } + last_was_str = 1; + } + else if (_PyInterpolation_CheckExact(item)) { + if (!last_was_str) { + stringslen++; + } + interpolationslen++; + last_was_str = 0; + } + else { + PyErr_Format( + PyExc_TypeError, + "Template.__new__ *args need to be of type 'str' or 'Interpolation', got %T", + item); + return NULL; + } + } + if (!last_was_str) { + stringslen++; + } + + PyObject *strings = PyTuple_New(stringslen); + if (!strings) { + return NULL; + } + + PyObject *interpolations = PyTuple_New(interpolationslen); + if (!interpolations) { + Py_DECREF(strings); + return NULL; + } + + last_was_str = 0; + Py_ssize_t stringsidx = 0, interpolationsidx = 0; + for (Py_ssize_t i = 0; i < argslen; i++) { + PyObject *item = PyTuple_GET_ITEM(args, i); + if (PyUnicode_Check(item)) { + if (last_was_str) { + PyObject *laststring = PyTuple_GET_ITEM(strings, stringsidx - 1); + PyObject *concat = PyUnicode_Concat(laststring, item); + Py_DECREF(laststring); + if (!concat) { + Py_DECREF(strings); + Py_DECREF(interpolations); + return NULL; + } + PyTuple_SET_ITEM(strings, stringsidx - 1, concat); + } + else { + PyTuple_SET_ITEM(strings, stringsidx++, Py_NewRef(item)); + } + last_was_str = 1; + } + else if (_PyInterpolation_CheckExact(item)) { + if (!last_was_str) { + _Py_DECLARE_STR(empty, ""); + PyTuple_SET_ITEM(strings, stringsidx++, &_Py_STR(empty)); + } + PyTuple_SET_ITEM(interpolations, interpolationsidx++, Py_NewRef(item)); + last_was_str = 0; + } + } + if (!last_was_str) { + _Py_DECLARE_STR(empty, ""); + PyTuple_SET_ITEM(strings, stringsidx++, &_Py_STR(empty)); + } + + PyObject *template = _PyTemplate_Build(strings, interpolations); + Py_DECREF(strings); + Py_DECREF(interpolations); + return template; +} + +static void +template_dealloc(PyObject *op) +{ + PyObject_GC_UnTrack(op); + Py_TYPE(op)->tp_clear(op); + Py_TYPE(op)->tp_free(op); +} + +static int +template_clear(PyObject *op) +{ + templateobject *self = templateobject_CAST(op); + Py_CLEAR(self->strings); + Py_CLEAR(self->interpolations); + return 0; +} + +static int +template_traverse(PyObject *op, visitproc visit, void *arg) +{ + templateobject *self = templateobject_CAST(op); + Py_VISIT(self->strings); + Py_VISIT(self->interpolations); + return 0; +} + +static PyObject * +template_repr(PyObject *op) +{ + templateobject *self = templateobject_CAST(op); + return PyUnicode_FromFormat("%s(strings=%R, interpolations=%R)", + _PyType_Name(Py_TYPE(self)), + self->strings, + self->interpolations); +} + +static PyObject * +template_iter(PyObject *op) +{ + templateobject *self = templateobject_CAST(op); + templateiterobject *iter = PyObject_GC_New(templateiterobject, &_PyTemplateIter_Type); + if (iter == NULL) { + return NULL; + } + + PyObject *stringsiter = PyObject_GetIter(self->strings); + if (stringsiter == NULL) { + Py_DECREF(iter); + return NULL; + } + + PyObject *interpolationsiter = PyObject_GetIter(self->interpolations); + if (interpolationsiter == NULL) { + Py_DECREF(iter); + Py_DECREF(stringsiter); + return NULL; + } + + iter->stringsiter = stringsiter; + iter->interpolationsiter = interpolationsiter; + iter->from_strings = 1; + PyObject_GC_Track(iter); + return (PyObject *)iter; +} + +static PyObject * +template_strings_append_str(PyObject *strings, PyObject *str) +{ + Py_ssize_t stringslen = PyTuple_GET_SIZE(strings); + PyObject *string = PyTuple_GET_ITEM(strings, stringslen - 1); + PyObject *concat = PyUnicode_Concat(string, str); + if (concat == NULL) { + return NULL; + } + + PyObject *newstrings = PyTuple_New(stringslen); + if (newstrings == NULL) { + Py_DECREF(concat); + return NULL; + } + + for (Py_ssize_t i = 0; i < stringslen - 1; i++) { + PyTuple_SET_ITEM(newstrings, i, Py_NewRef(PyTuple_GET_ITEM(strings, i))); + } + PyTuple_SET_ITEM(newstrings, stringslen - 1, concat); + + return newstrings; +} + +static PyObject * +template_strings_prepend_str(PyObject *strings, PyObject *str) +{ + Py_ssize_t stringslen = PyTuple_GET_SIZE(strings); + PyObject *string = PyTuple_GET_ITEM(strings, 0); + PyObject *concat = PyUnicode_Concat(str, string); + if (concat == NULL) { + return NULL; + } + + PyObject *newstrings = PyTuple_New(stringslen); + if (newstrings == NULL) { + Py_DECREF(concat); + return NULL; + } + + PyTuple_SET_ITEM(newstrings, 0, concat); + for (Py_ssize_t i = 1; i < stringslen; i++) { + PyTuple_SET_ITEM(newstrings, i, Py_NewRef(PyTuple_GET_ITEM(strings, i))); + } + + return newstrings; +} + +static PyObject * +template_strings_concat(PyObject *left, PyObject *right) +{ + Py_ssize_t left_stringslen = PyTuple_GET_SIZE(left); + PyObject *left_laststring = PyTuple_GET_ITEM(left, left_stringslen - 1); + Py_ssize_t right_stringslen = PyTuple_GET_SIZE(right); + PyObject *right_firststring = PyTuple_GET_ITEM(right, 0); + + PyObject *concat = PyUnicode_Concat(left_laststring, right_firststring); + if (concat == NULL) { + return NULL; + } + + PyObject *newstrings = PyTuple_New(left_stringslen + right_stringslen - 1); + if (newstrings == NULL) { + Py_DECREF(concat); + return NULL; + } + + Py_ssize_t index = 0; + for (Py_ssize_t i = 0; i < left_stringslen - 1; i++) { + PyTuple_SET_ITEM(newstrings, index++, Py_NewRef(PyTuple_GET_ITEM(left, i))); + } + PyTuple_SET_ITEM(newstrings, index++, concat); + for (Py_ssize_t i = 1; i < right_stringslen; i++) { + PyTuple_SET_ITEM(newstrings, index++, Py_NewRef(PyTuple_GET_ITEM(right, i))); + } + + return newstrings; +} + +static PyObject * +template_concat_templates(templateobject *self, templateobject *other) +{ + PyObject *newstrings = template_strings_concat(self->strings, other->strings); + if (newstrings == NULL) { + return NULL; + } + + PyObject *newinterpolations = PySequence_Concat(self->interpolations, other->interpolations); + if (newinterpolations == NULL) { + Py_DECREF(newstrings); + return NULL; + } + + PyObject *newtemplate = _PyTemplate_Build(newstrings, newinterpolations); + Py_DECREF(newstrings); + Py_DECREF(newinterpolations); + return newtemplate; +} + +static PyObject * +template_concat_template_str(templateobject *self, PyObject *other) +{ + PyObject *newstrings = template_strings_append_str(self->strings, other); + if (newstrings == NULL) { + return NULL; + } + + PyObject *newtemplate = _PyTemplate_Build(newstrings, self->interpolations); + Py_DECREF(newstrings); + return newtemplate; +} + +static PyObject * +template_concat_str_template(templateobject *self, PyObject *other) +{ + PyObject *newstrings = template_strings_prepend_str(self->strings, other); + if (newstrings == NULL) { + return NULL; + } + + PyObject *newtemplate = _PyTemplate_Build(newstrings, self->interpolations); + Py_DECREF(newstrings); + return newtemplate; +} + +PyObject * +_PyTemplate_Concat(PyObject *self, PyObject *other) +{ + if (_PyTemplate_CheckExact(self) && _PyTemplate_CheckExact(other)) { + return template_concat_templates((templateobject *) self, (templateobject *) other); + } + else if ((_PyTemplate_CheckExact(self)) && PyUnicode_Check(other)) { + return template_concat_template_str((templateobject *) self, other); + } + else if (PyUnicode_Check(self) && (_PyTemplate_CheckExact(other))) { + return template_concat_str_template((templateobject *) other, self); + } + else { + Py_RETURN_NOTIMPLEMENTED; + } +} + +static PyObject * +template_values_get(PyObject *op, void *Py_UNUSED(data)) +{ + templateobject *self = templateobject_CAST(op); + + Py_ssize_t len = PyTuple_GET_SIZE(self->interpolations); + PyObject *values = PyTuple_New(PyTuple_GET_SIZE(self->interpolations)); + if (values == NULL) { + return NULL; + } + + for (Py_ssize_t i = 0; i < len; i++) { + PyObject *item = PyTuple_GET_ITEM(self->interpolations, i); + PyTuple_SET_ITEM(values, i, _PyInterpolation_GetValueRef(item)); + } + + return values; +} + +static PyMemberDef template_members[] = { + {"strings", Py_T_OBJECT_EX, offsetof(templateobject, strings), Py_READONLY, "Strings"}, + {"interpolations", Py_T_OBJECT_EX, offsetof(templateobject, interpolations), Py_READONLY, "Interpolations"}, + {NULL}, +}; + +static PyGetSetDef template_getset[] = { + {"values", template_values_get, NULL, + PyDoc_STR("Values of interpolations"), NULL}, + {NULL}, +}; + +static PySequenceMethods template_as_sequence = { + .sq_concat = _PyTemplate_Concat, +}; + +static PyObject* +template_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) +{ + PyObject *mod = PyImport_ImportModule("string.templatelib"); + if (mod == NULL) { + return NULL; + } + PyObject *func = PyObject_GetAttrString(mod, "_template_unpickle"); + Py_DECREF(mod); + if (func == NULL) { + return NULL; + } + + templateobject *self = templateobject_CAST(op); + PyObject *result = Py_BuildValue("O(OO)", + func, + self->strings, + self->interpolations); + + Py_DECREF(func); + return result; +} + +static PyMethodDef template_methods[] = { + {"__reduce__", template_reduce, METH_NOARGS, NULL}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, + {NULL, NULL}, +}; + +PyTypeObject _PyTemplate_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "string.templatelib.Template", + .tp_doc = PyDoc_STR("Template object"), + .tp_basicsize = sizeof(templateobject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_as_sequence = &template_as_sequence, + .tp_new = template_new, + .tp_alloc = PyType_GenericAlloc, + .tp_dealloc = template_dealloc, + .tp_clear = template_clear, + .tp_free = PyObject_GC_Del, + .tp_repr = template_repr, + .tp_members = template_members, + .tp_methods = template_methods, + .tp_getset = template_getset, + .tp_iter = template_iter, + .tp_traverse = template_traverse, +}; + +PyObject * +_PyTemplate_Build(PyObject *strings, PyObject *interpolations) +{ + templateobject *template = PyObject_GC_New(templateobject, &_PyTemplate_Type); + if (template == NULL) { + return NULL; + } + + template->strings = Py_NewRef(strings); + template->interpolations = Py_NewRef(interpolations); + PyObject_GC_Track(template); + return (PyObject *) template; +} diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 737c4e6d977..9b31758485c 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -207,7 +207,6 @@ tuple_dealloc(PyObject *self) } PyObject_GC_UnTrack(op); - Py_TRASHCAN_BEGIN(op, tuple_dealloc) Py_ssize_t i = Py_SIZE(op); while (--i >= 0) { @@ -217,8 +216,6 @@ tuple_dealloc(PyObject *self) if (!maybe_freelist_push(op)) { Py_TYPE(op)->tp_free((PyObject *)op); } - - Py_TRASHCAN_END } static PyObject * diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 4e614daaa69..6e7471cb594 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -48,7 +48,7 @@ class object "PyObject *" "&PyBaseObject_Type" & ((1 << MCACHE_SIZE_EXP) - 1)) #define MCACHE_HASH_METHOD(type, name) \ - MCACHE_HASH(FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag), \ + MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \ ((Py_ssize_t)(name)) >> 3) #define MCACHE_CACHEABLE_NAME(name) \ PyUnicode_CheckExact(name) && \ @@ -60,11 +60,19 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED -// There's a global lock for mutation of types. This avoids having to take -// additional locks while doing various subclass processing which may result -// in odd behaviors w.r.t. running with the GIL as the outer type lock could -// be released and reacquired during a subclass update if there's contention -// on the subclass lock. +// There's a global lock for types that ensures that tp_version_tag and +// _spec_cache are correctly updated if the type is modified. It also protects +// tp_mro, tp_bases, and tp_base. This avoids having to take additional locks +// while doing various subclass processing which may result in odd behaviors +// w.r.t. running with the GIL as the outer type lock could be released and +// reacquired during a subclass update if there's contention on the subclass +// lock. +// +// Note that this lock does not protect updates of other type slots or the +// tp_flags member. Instead, we either ensure those updates are done before +// the type has been revealed to other threads or we only do those updates +// while the stop-the-world mechanism is active. The slots and flags are read +// in many places without holding a lock and without atomics. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex #define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) #define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() @@ -74,8 +82,100 @@ class object "PyObject *" "&PyBaseObject_Type" #define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() +#ifdef Py_DEBUG +// Return true if the world is currently stopped. +static bool +types_world_is_stopped(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return interp->stoptheworld.world_stopped; +} +#endif + +// Checks that the type has not yet been revealed (exposed) to other +// threads. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and +// PyType_FromMetaclass() to indicate that a newly initialized type might be +// revealed. We only have ob_flags on 64-bit platforms. +#if SIZEOF_VOID_P > 4 +#define TYPE_IS_REVEALED(tp) ((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) != 0) +#else +#define TYPE_IS_REVEALED(tp) 0 +#endif + +#ifdef Py_DEBUG #define ASSERT_TYPE_LOCK_HELD() \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } + +// Checks if we can safely update type slots or tp_flags. +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) \ + assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) + +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) \ + if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } +#else +#define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) +#endif + +static void +types_stop_world(void) +{ + assert(!types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + assert(types_world_is_stopped()); +} + +static void +types_start_world(void) +{ + assert(types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StartTheWorld(interp); + assert(!types_world_is_stopped()); +} + +// This is used to temporarily prevent the TYPE_LOCK from being suspended +// when held by the topmost critical section. +static void +type_lock_prevent_release(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t *tagptr = &tstate->critical_section; + PyCriticalSection *c = (PyCriticalSection *)(*tagptr & ~_Py_CRITICAL_SECTION_MASK); + if (!(*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + assert(c->_cs_mutex == TYPE_LOCK); + c->_cs_mutex = NULL; + } + else { + PyCriticalSection2 *c2 = (PyCriticalSection2 *)c; + if (c->_cs_mutex == TYPE_LOCK) { + c->_cs_mutex = c2->_cs_mutex2; + c2->_cs_mutex2 = NULL; + } else { + assert(c2->_cs_mutex2 == TYPE_LOCK); + c2->_cs_mutex2 = NULL; + } + } +} + +static void +type_lock_allow_release(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t *tagptr = &tstate->critical_section; + PyCriticalSection *c = (PyCriticalSection *)(*tagptr & ~_Py_CRITICAL_SECTION_MASK); + if (!(*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + assert(c->_cs_mutex == NULL); + c->_cs_mutex = TYPE_LOCK; + } + else { + PyCriticalSection2 *c2 = (PyCriticalSection2 *)c; + assert(c2->_cs_mutex2 == NULL); + c2->_cs_mutex2 = TYPE_LOCK; + } +} #else @@ -84,6 +184,12 @@ class object "PyObject *" "&PyBaseObject_Type" #define BEGIN_TYPE_DICT_LOCK(d) #define END_TYPE_DICT_LOCK() #define ASSERT_TYPE_LOCK_HELD() +#define TYPE_IS_REVEALED(tp) 0 +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) +#define types_world_is_stopped() 1 +#define types_stop_world() +#define types_start_world() #endif @@ -106,6 +212,9 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static int slot_tp_setattro(PyObject *self, PyObject *name, PyObject *value); +static PyObject * +slot_tp_call(PyObject *self, PyObject *args, PyObject *kwds); + static inline PyTypeObject * type_from_ref(PyObject *ref) { @@ -346,21 +455,14 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { - if (tp->tp_flags & Py_TPFLAGS_READY) { - // It's possible the type object has been exposed to other threads - // if it's been marked ready. In that case, the type lock should be - // held when flags are modified. - ASSERT_TYPE_LOCK_HELD(); - } - // Since PyType_HasFeature() reads the flags without holding the type - // lock, we need an atomic store here. - FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); + tp->tp_flags = flags; } static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -498,6 +600,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_Check(bases)); + ASSERT_NEW_TYPE_OR_LOCKED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -542,7 +645,7 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(self); return self->tp_mro; } @@ -1027,7 +1130,6 @@ PyType_Unwatch(int watcher_id, PyObject* obj) static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { - ASSERT_TYPE_LOCK_HELD(); assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); #ifndef Py_GIL_DISABLED PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1075,7 +1177,12 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(type); +#ifdef Py_GIL_DISABLED + // This function is re-entrant and it's not safe to call it + // with the world stopped. + assert(!types_world_is_stopped()); +#endif if (type->tp_version_tag == 0) { return; } @@ -1106,6 +1213,8 @@ type_modified_unlocked(PyTypeObject *type) while (bits) { assert(i < TYPE_MAX_WATCHERS); if (bits & 1) { + // Note that PyErr_FormatUnraisable is potentially re-entrant + // and the watcher callback might be too. PyType_WatchCallback cb = interp->type_watchers[i]; if (cb && (cb(type) < 0)) { PyErr_FormatUnraisable( @@ -1245,14 +1354,6 @@ _PyType_LookupByVersion(unsigned int version) #endif } -unsigned int -_PyType_GetVersionForCurrentState(PyTypeObject *tp) -{ - return tp->tp_version_tag; -} - - - #define MAX_VERSIONS_PER_CLASS 1000 #if _Py_ATTR_CACHE_UNUSED < MAX_VERSIONS_PER_CLASS #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" @@ -1586,10 +1687,13 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) BEGIN_TYPE_LOCK(); type_modified_unlocked(type); + types_stop_world(); if (abstract) type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); else type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); END_TYPE_LOCK(); return 0; @@ -1624,15 +1728,15 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) return mro; } -static PyTypeObject *best_base(PyObject *); -static int mro_internal(PyTypeObject *, PyObject **); +static PyTypeObject *find_best_base(PyObject *); +static int mro_internal(PyTypeObject *, int, PyObject **); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); static int add_all_subclasses(PyTypeObject *type, PyObject *bases); static void remove_subclass(PyTypeObject *, PyTypeObject *); static void remove_all_subclasses(PyTypeObject *type, PyObject *bases); -static void update_all_slots(PyTypeObject *); +static int update_all_slots(PyTypeObject *); typedef int (*update_callback)(PyTypeObject *, void *); static int update_subclasses(PyTypeObject *type, PyObject *attr_name, @@ -1640,13 +1744,15 @@ static int update_subclasses(PyTypeObject *type, PyObject *attr_name, static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); +// Compute tp_mro for this type and all of its subclasses. This +// is called after __bases__ is assigned to an existing type. static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); PyObject *old_mro; - int res = mro_internal(type, &old_mro); + int res = mro_internal(type, 0, &old_mro); if (res <= 0) { /* error / reentrance */ return res; @@ -1708,9 +1814,9 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) +type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **best_base) { - // Check arguments + // Check arguments, this is re-entrant due to the PySys_Audit() call if (!check_set_special_type_attr(type, new_bases, "__bases__")) { return -1; } @@ -1759,20 +1865,29 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) } // Compute the new MRO and the new base class - PyTypeObject *new_base = best_base(new_bases); - if (new_base == NULL) + *best_base = find_best_base(new_bases); + if (*best_base == NULL) return -1; - if (!compatible_for_assignment(type->tp_base, new_base, "__bases__")) { + if (!compatible_for_assignment(type->tp_base, *best_base, "__bases__")) { return -1; } + return 0; +} + +static int +type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) +{ + ASSERT_TYPE_LOCK_HELD(); + + Py_ssize_t n; PyObject *old_bases = lookup_tp_bases(type); assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; set_tp_bases(type, Py_NewRef(new_bases), 0); - type->tp_base = (PyTypeObject *)Py_NewRef(new_base); + type->tp_base = (PyTypeObject *)Py_NewRef(best_base); PyObject *temp = PyList_New(0); if (temp == NULL) { @@ -1796,7 +1911,11 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) add to all new_bases */ remove_all_subclasses(type, old_bases); res = add_all_subclasses(type, new_bases); - update_all_slots(type); + if (update_all_slots(type) < 0) { + goto bail; + } + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ + type_modified_unlocked(type); } else { res = 0; @@ -1827,13 +1946,13 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) bail: if (lookup_tp_bases(type) == new_bases) { - assert(type->tp_base == new_base); + assert(type->tp_base == best_base); set_tp_bases(type, old_bases, 0); type->tp_base = old_base; Py_DECREF(new_bases); - Py_DECREF(new_base); + Py_DECREF(best_base); } else { Py_DECREF(old_bases); @@ -1848,9 +1967,13 @@ static int type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); + PyTypeObject *best_base; int res; BEGIN_TYPE_LOCK(); - res = type_set_bases_unlocked(type, new_bases); + res = type_check_new_bases(type, new_bases, &best_base); + if (res == 0) { + res = type_set_bases_unlocked(type, new_bases, best_base); + } END_TYPE_LOCK(); return res; } @@ -2065,19 +2188,46 @@ type_set_annotations(PyObject *tp, PyObject *value, void *Py_UNUSED(closure)) return -1; } - int result; PyObject *dict = PyType_GetDict(type); - if (value != NULL) { - /* set */ - result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); - } else { - /* delete */ - result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); - if (result == 0) { - PyErr_SetString(PyExc_AttributeError, "__annotations__"); + int result = PyDict_ContainsString(dict, "__annotations__"); + if (result < 0) { + Py_DECREF(dict); + return -1; + } + if (result) { + // If __annotations__ is currently in the dict, we update it, + if (value != NULL) { + result = PyDict_SetItem(dict, &_Py_ID(__annotations__), value); + } else { + result = PyDict_Pop(dict, &_Py_ID(__annotations__), NULL); + if (result == 0) { + // Somebody else just deleted it? + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } + if (result < 0) { Py_DECREF(dict); return -1; } + // Also clear __annotations_cache__ just in case. + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + } + else { + // Else we update only __annotations_cache__. + if (value != NULL) { + /* set */ + result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); + } else { + /* delete */ + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + if (result == 0) { + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } } if (result < 0) { Py_DECREF(dict); @@ -2575,7 +2725,6 @@ subtype_dealloc(PyObject *self) /* UnTrack and re-Track around the trashcan macro, alas */ /* See explanation at end of function for full disclosure */ PyObject_GC_UnTrack(self); - Py_TRASHCAN_BEGIN(self, subtype_dealloc); /* Find the nearest base with a different tp_dealloc */ base = type; @@ -2590,7 +2739,7 @@ subtype_dealloc(PyObject *self) _PyObject_GC_TRACK(self); if (PyObject_CallFinalizerFromDealloc(self) < 0) { /* Resurrected */ - goto endlabel; + return; } _PyObject_GC_UNTRACK(self); } @@ -2612,7 +2761,7 @@ subtype_dealloc(PyObject *self) type->tp_del(self); if (Py_REFCNT(self) > 0) { /* Resurrected */ - goto endlabel; + return; } _PyObject_GC_UNTRACK(self); } @@ -2675,46 +2824,6 @@ subtype_dealloc(PyObject *self) if (type_needs_decref) { _Py_DECREF_TYPE(type); } - - endlabel: - Py_TRASHCAN_END - - /* Explanation of the weirdness around the trashcan macros: - - Q. What do the trashcan macros do? - - A. Read the comment titled "Trashcan mechanism" in object.h. - For one, this explains why there must be a call to GC-untrack - before the trashcan begin macro. Without understanding the - trashcan code, the answers to the following questions don't make - sense. - - Q. Why do we GC-untrack before the trashcan and then immediately - GC-track again afterward? - - A. In the case that the base class is GC-aware, the base class - probably GC-untracks the object. If it does that using the - UNTRACK macro, this will crash when the object is already - untracked. Because we don't know what the base class does, the - only safe thing is to make sure the object is tracked when we - call the base class dealloc. But... The trashcan begin macro - requires that the object is *untracked* before it is called. So - the dance becomes: - - GC untrack - trashcan begin - GC track - - Q. Why did the last question say "immediately GC-track again"? - It's nowhere near immediately. - - A. Because the code *used* to re-track immediately. Bad Idea. - self has a refcount of 0, and if gc ever gets its hands on it - (which can happen if any weakref callback gets invoked), it - looks like trash to gc too, and gc also tries to delete self - then. But we're already deleting self. Double deallocation is - a subtle disaster. - */ } static PyTypeObject *solid_base(PyTypeObject *type); @@ -3092,6 +3201,7 @@ static PyObject * class_name(PyObject *cls) { PyObject *name; + // Note that this is potentially re-entrant. if (PyObject_GetOptionalAttr(cls, &_Py_ID(__name__), &name) == 0) { name = PyObject_Repr(cls); } @@ -3428,9 +3538,13 @@ mro_invoke(PyTypeObject *type) const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { + // Custom mro() method on metaclass. This is potentially re-entrant. + // We are called either from type_ready() or from type_set_bases(). mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); } else { + // In this case, the mro() method on the type object is being used and + // we know that these calls are not re-entrant. mro_result = mro_implementation_unlocked(type); } if (mro_result == NULL) @@ -3478,7 +3592,7 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); @@ -3526,21 +3640,11 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) return 1; } -static int -mro_internal(PyTypeObject *type, PyObject **p_old_mro) -{ - int res; - BEGIN_TYPE_LOCK(); - res = mro_internal_unlocked(type, 0, p_old_mro); - END_TYPE_LOCK(); - return res; -} - /* Calculate the best base amongst multiple base classes. This is the first one that's on the path to the "solid base". */ static PyTypeObject * -best_base(PyObject *bases) +find_best_base(PyObject *bases) { Py_ssize_t i, n; PyTypeObject *base, *winner, *candidate; @@ -3622,13 +3726,167 @@ solid_base(PyTypeObject *type) } } +#ifdef Py_GIL_DISABLED + +// The structures and functions below are used in the free-threaded build +// to safely make updates to type slots, on type_setattro() for a slot +// or when __bases__ is re-assigned. Since the slots are read without atomic +// operations and without locking, we can only safely update them while the +// world is stopped. However, with the world stopped, we are very limited on +// which APIs can be safely used. For example, calling _PyObject_HashFast() +// or _PyDict_GetItemRef_KnownHash() are not safe and can potentially cause +// deadlocks. Hashing can be re-entrant and _PyDict_GetItemRef_KnownHash can +// acquire a lock if the dictionary is not owned by the current thread, to +// mark it shared on reading. +// +// We do the slot updates in two steps. First, with TYPE_LOCK held, we lookup +// the descriptor for each slot, for each subclass. We build a queue of +// updates to perform but don't actually update the type structures. After we +// are finished the lookups, we stop-the-world and apply all of the updates. +// The apply_slot_updates() code is simple and easy to confirm that it is +// safe. + +typedef struct { + PyTypeObject *type; + void **slot_ptr; + void *slot_value; +} slot_update_item_t; + +// The number of slot updates performed is based on the number of changed +// slots and the number of subclasses. It's possible there are many updates +// required if there are many subclasses (potentially an unbounded amount). +// Usually the number of slot updates is small, most often zero or one. When +// running the unit tests, we don't exceed 20. The chunk size is set to +// handle the common case with a single chunk and to not require too many +// chunk allocations if there are many subclasses. +#define SLOT_UPDATE_CHUNK_SIZE 30 + +typedef struct _slot_update { + struct _slot_update *prev; + Py_ssize_t n; + slot_update_item_t updates[SLOT_UPDATE_CHUNK_SIZE]; +} slot_update_chunk_t; + +// a queue of updates to be performed +typedef struct { + slot_update_chunk_t *head; +} slot_update_t; + +static slot_update_chunk_t * +slot_update_new_chunk(void) +{ + slot_update_chunk_t *chunk = PyMem_Malloc(sizeof(slot_update_chunk_t)); + if (chunk == NULL) { + PyErr_NoMemory(); + return NULL; + } + chunk->prev = NULL; + chunk->n = 0; + return chunk; +} + +static void +slot_update_free_chunks(slot_update_t *updates) +{ + slot_update_chunk_t *chunk = updates->head; + while (chunk != NULL) { + slot_update_chunk_t *prev = chunk->prev; + PyMem_Free(chunk); + chunk = prev; + } +} + +static int +queue_slot_update(slot_update_t *updates, PyTypeObject *type, + void **slot_ptr, void *slot_value) +{ + if (*slot_ptr == slot_value) { + return 0; // slot pointer not actually changed, don't queue update + } + if (updates->head == NULL || updates->head->n == SLOT_UPDATE_CHUNK_SIZE) { + slot_update_chunk_t *chunk = slot_update_new_chunk(); + if (chunk == NULL) { + return -1; // out-of-memory + } + chunk->prev = updates->head; + updates->head = chunk; + } + slot_update_item_t *item = &updates->head->updates[updates->head->n]; + item->type = type; + item->slot_ptr = slot_ptr; + item->slot_value = slot_value; + updates->head->n++; + assert(updates->head->n <= SLOT_UPDATE_CHUNK_SIZE); + return 0; +} + +static void +apply_slot_updates(slot_update_t *updates) +{ + assert(types_world_is_stopped()); + slot_update_chunk_t *chunk = updates->head; + while (chunk != NULL) { + for (Py_ssize_t i = 0; i < chunk->n; i++) { + slot_update_item_t *item = &chunk->updates[i]; + *(item->slot_ptr) = item->slot_value; + if (item->slot_value == slot_tp_call) { + /* A generic __call__ is incompatible with vectorcall */ + type_clear_flags(item->type, Py_TPFLAGS_HAVE_VECTORCALL); + } + } + chunk = chunk->prev; + } +} + +static void +apply_type_slot_updates(slot_update_t *updates) +{ + // This must be done carefully to avoid data races and deadlocks. We + // have just updated the type __dict__, while holding TYPE_LOCK. We have + // collected all of the required type slot updates into the 'updates' + // queue. Note that those updates can apply to multiple types since + // subclasses might also be affected by the dict change. + // + // We need to prevent other threads from writing to the dict before we can + // finish updating the slots. The actual stores to the slots are done + // with the world stopped. If we block on the stop-the-world mutex then + // we could release TYPE_LOCK mutex and potentially allow other threads + // to update the dict. That's because TYPE_LOCK was acquired using a + // critical section. + // + // The type_lock_prevent_release() call prevents the TYPE_LOCK mutex from + // being released even if we block on the STM mutex. We need to take care + // that we do not deadlock because of that. It is safe because we always + // acquire locks in the same order: first the TYPE_LOCK mutex and then the + // STM mutex. + type_lock_prevent_release(); + types_stop_world(); + apply_slot_updates(updates); + types_start_world(); + type_lock_allow_release(); +} + +#else + +// dummy definition, this parameter is only NULL in the default build +typedef void slot_update_t; + +#endif + +/// data passed to update_slots_callback() +typedef struct { + slot_update_t *queued_updates; + pytype_slotdef **defs; +} update_callback_data_t; + static void object_dealloc(PyObject *); static PyObject *object_new(PyTypeObject *, PyObject *, PyObject *); static int object_init(PyObject *, PyObject *, PyObject *); -static int update_slot(PyTypeObject *, PyObject *); +static int update_slot(PyTypeObject *, PyObject *, slot_update_t *update); static void fixup_slot_dispatchers(PyTypeObject *); static int type_new_set_names(PyTypeObject *); static int type_new_init_subclass(PyTypeObject *, PyObject *); +static bool has_slotdef(PyObject *); /* * Helpers for __dict__ descriptor. We don't want to expose the dicts @@ -3690,10 +3948,35 @@ subtype_dict(PyObject *obj, void *context) return PyObject_GenericGetDict(obj, context); } +int +_PyObject_SetDict(PyObject *obj, PyObject *value) +{ + if (value != NULL && !PyDict_Check(value)) { + PyErr_Format(PyExc_TypeError, + "__dict__ must be set to a dictionary, " + "not a '%.200s'", Py_TYPE(value)->tp_name); + return -1; + } + if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + return _PyObject_SetManagedDict(obj, value); + } + PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + if (dictptr == NULL) { + PyErr_SetString(PyExc_AttributeError, + "This object has no __dict__"); + return -1; + } + Py_BEGIN_CRITICAL_SECTION(obj); + // gh-133980: To prevent use-after-free from other threads that reference + // the __dict__ + _PyObject_XSetRefDelayed(dictptr, Py_NewRef(value)); + Py_END_CRITICAL_SECTION(); + return 0; +} + static int subtype_setdict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr; PyTypeObject *base; base = get_builtin_base_with_dict(Py_TYPE(obj)); @@ -3711,28 +3994,7 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context) } return func(descr, obj, value); } - /* Almost like PyObject_GenericSetDict, but allow __dict__ to be deleted. */ - if (value != NULL && !PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - - if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - return _PyObject_SetManagedDict(obj, value); - } - else { - dictptr = _PyObject_ComputedDictPointer(obj); - if (dictptr == NULL) { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - return -1; - } - Py_CLEAR(*dictptr); - *dictptr = Py_XNewRef(value); - } - return 0; + return _PyObject_SetDict(obj, value); } static PyObject * @@ -3826,7 +4088,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds) unsigned long PyType_GetFlags(PyTypeObject *type) { - return FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags); + return type->tp_flags; } @@ -4604,6 +4866,10 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif return (PyObject *)type; @@ -4666,7 +4932,7 @@ type_new_get_bases(type_new_ctx *ctx, PyObject **type) } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(ctx->bases); + PyTypeObject *base = find_best_base(ctx->bases); if (base == NULL) { return -1; } @@ -5081,12 +5347,12 @@ PyType_FromMetaclass( } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(bases); // borrowed ref + PyTypeObject *base = find_best_base(bases); // borrowed ref if (base == NULL) { goto finally; } - // best_base should check Py_TPFLAGS_BASETYPE & raise a proper exception, - // here we just check its work + // find_best_base() should check Py_TPFLAGS_BASETYPE & raise a proper + // exception, here we just check its work assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)); /* Calculate sizes */ @@ -5317,6 +5583,10 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif finally: if (PyErr_Occurred()) { @@ -5440,7 +5710,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // type_ready_mro() ensures that no heap type is // contained in a static type MRO. - return NULL; + goto error; } else { PyHeapTypeObject *ht = (PyHeapTypeObject*)type; @@ -5480,13 +5750,15 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } END_TYPE_LOCK(); - if (res == NULL) { - PyErr_Format( - PyExc_TypeError, - "PyType_GetModuleByDef: No superclass of '%s' has the given module", - type->tp_name); + if (res != NULL) { + return res; } - return res; +error: + PyErr_Format( + PyExc_TypeError, + "PyType_GetModuleByDef: No superclass of '%s' has the given module", + type->tp_name); + return NULL; } @@ -5610,8 +5882,6 @@ PyObject_GetItemData(PyObject *obj) static PyObject * find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { - ASSERT_TYPE_LOCK_HELD(); - Py_hash_t hash = _PyObject_HashFast(name); if (hash == -1) { *error = -1; @@ -5920,9 +6190,13 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); - type_set_flags_with_mask(self, mask, flags); - END_TYPE_LOCK(); + unsigned long new_flags = (self->tp_flags & ~mask) | flags; + if (new_flags != self->tp_flags) { + types_stop_world(); + // can't use new_flags here since they could be out-of-date + self->tp_flags = (self->tp_flags & ~mask) | flags; + types_start_world(); + } } int @@ -5969,9 +6243,9 @@ set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags) void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); + types_stop_world(); set_flags_recursive(self, mask, flags); - END_TYPE_LOCK(); + types_start_world(); } /* This is similar to PyObject_GenericGetAttr(), @@ -6085,6 +6359,8 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } +// Called by type_setattro(). Updates both the type dict and +// the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) @@ -6114,10 +6390,30 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } - if (is_dunder_name(name)) { - return update_slot(type, name); - } + return 0; +} + +static int +update_slot_after_setattr(PyTypeObject *type, PyObject *name) +{ +#ifdef Py_GIL_DISABLED + // stack allocate one chunk since that's all we need + assert(SLOT_UPDATE_CHUNK_SIZE >= MAX_EQUIV); + slot_update_chunk_t chunk = {0}; + slot_update_t queued_updates = {&chunk}; + if (update_slot(type, name, &queued_updates) < 0) { + return -1; + } + if (queued_updates.head->n > 0) { + apply_type_slot_updates(&queued_updates); + ASSERT_TYPE_LOCK_HELD(); + // should never allocate another chunk + assert(chunk.prev == NULL); + } +#else + update_slot(type, name, NULL); +#endif return 0; } @@ -6175,7 +6471,9 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { - // We don't just do PyType_Ready because we could already be readying + // This is an unlikely case. PyType_Ready has not yet been done and + // we need to initialize tp_dict. We don't just do PyType_Ready + // because we could already be readying. BEGIN_TYPE_LOCK(); dict = type->tp_dict; if (dict == NULL) { @@ -6191,6 +6489,12 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) BEGIN_TYPE_DICT_LOCK(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); assert(_PyType_CheckConsistency(type)); + if (res == 0) { + if (is_dunder_name(name) && has_slotdef(name)) { + // The name corresponds to a type slot. + res = update_slot_after_setattr(type, name); + } + } END_TYPE_DICT_LOCK(); done: @@ -7120,15 +7424,10 @@ object_set_class(PyObject *self, PyObject *value, void *closure) return -1; } -#ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); -#endif + types_stop_world(); PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); -#ifdef Py_GIL_DISABLED - _PyEval_StartTheWorld(interp); -#endif + types_start_world(); if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { Py_DECREF(oldto); @@ -8536,7 +8835,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal_unlocked(type, initial, NULL) < 0) { + if (mro_internal(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -9721,6 +10020,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds) /* If staticbase is NULL now, it is a really weird type. In the spirit of backwards compatibility (?), just shut up. */ if (staticbase && staticbase->tp_new != type->tp_new) { + if (staticbase->tp_new == NULL) { + PyErr_Format(PyExc_TypeError, + "cannot create '%s' instances", subtype->tp_name); + return NULL; + } PyErr_Format(PyExc_TypeError, "%s.__new__(%s) is not safe, use %s.__new__()", type->tp_name, @@ -11059,12 +11363,21 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) { /* XXX Maybe this could be optimized more -- but is it worth it? */ +#ifdef Py_GIL_DISABLED + pytype_slotdef *ptrs[MAX_EQUIV]; + pytype_slotdef **pp = ptrs; + /* Collect all slotdefs that match name into ptrs. */ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) + *pp++ = p; + } + *pp = NULL; +#else /* pname and ptrs act as a little cache */ PyInterpreterState *interp = _PyInterpreterState_GET(); #define pname _Py_INTERP_CACHED_OBJECT(interp, type_slots_pname) #define ptrs _Py_INTERP_CACHED_OBJECT(interp, type_slots_ptrs) pytype_slotdef *p, **pp; - void **res, **ptr; if (pname != name) { /* Collect all slotdefs that match name into ptrs. */ @@ -11076,10 +11389,12 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) } *pp = NULL; } +#endif /* Look in all slots of the type matching the name. If exactly one of these has a filled-in slot, return a pointer to that slot. Otherwise, return NULL. */ + void **res, **ptr; res = NULL; for (pp = ptrs; *pp; pp++) { ptr = slotptr(type, (*pp)->offset); @@ -11089,11 +11404,25 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) return NULL; res = ptr; } - return res; +#ifndef Py_GIL_DISABLED #undef pname #undef ptrs +#endif + return res; } +// Return true if "name" corresponds to at least one slot definition. This is +// a more accurate but more expensive test compared to is_dunder_name(). +static bool +has_slotdef(PyObject *name) +{ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) { + return true; + } + } + return false; +} /* Common code for update_slots_callback() and fixup_slot_dispatchers(). * @@ -11146,13 +11475,22 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) * There are some further special cases for specific slots, like supporting * __hash__ = None for tp_hash and special code for tp_new. * - * When done, return a pointer to the next slotdef with a different offset, - * because that's convenient for fixup_slot_dispatchers(). This function never - * sets an exception: if an internal error happens (unlikely), it's ignored. */ -static pytype_slotdef * -update_one_slot(PyTypeObject *type, pytype_slotdef *p) + * When done, next_p is set to the next slotdef with a different offset, + * because that's convenient for fixup_slot_dispatchers(). + * + * If the queued_updates pointer is provided, the actual updates to the slot + * pointers are queued, rather than being immediately performed. That argument + * is only used for the free-threaded build since those updates need to be + * done while the world is stopped. + * + * This function will only return an error if the queued_updates argument is + * provided and allocating memory for the queue fails. Other exceptions that + * occur internally are ignored, such as when looking up descriptors. */ +static int +update_one_slot(PyTypeObject *type, pytype_slotdef *p, pytype_slotdef **next_p, + slot_update_t *queued_updates) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11175,7 +11513,10 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) do { ++p; } while (p->offset == offset); - return p; + if (next_p != NULL) { + *next_p = p; + } + return 0; } /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); @@ -11258,16 +11599,41 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) } if (p->function == slot_tp_call) { /* A generic __call__ is incompatible with vectorcall */ - type_clear_flags(type, Py_TPFLAGS_HAVE_VECTORCALL); + if (queued_updates == NULL) { + type_clear_flags(type, Py_TPFLAGS_HAVE_VECTORCALL); + } } } Py_DECREF(descr); } while ((++p)->offset == offset); - if (specific && !use_generic) - *ptr = specific; - else - *ptr = generic; - return p; + + void *slot_value; + if (specific && !use_generic) { + slot_value = specific; + } else { + slot_value = generic; + } + +#ifdef Py_GIL_DISABLED + if (queued_updates != NULL) { + // queue the update to perform later, while world is stopped + if (queue_slot_update(queued_updates, type, ptr, slot_value) < 0) { + return -1; + } + } else { + // do the update to the type structure now + *ptr = slot_value; + } +#else + // always do the update immediately + assert(queued_updates == NULL); + *ptr = slot_value; +#endif + + if (next_p != NULL) { + *next_p = p; + } + return 0; } /* In the type, update the slots whose slotdefs are gathered in the pp array. @@ -11275,18 +11641,21 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { - ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_TYPE_OR_LOCKED(type); - pytype_slotdef **pp = (pytype_slotdef **)data; + update_callback_data_t *update_data = (update_callback_data_t *)data; + pytype_slotdef **pp = update_data->defs; for (; *pp; pp++) { - update_one_slot(type, *pp); + if (update_one_slot(type, *pp, NULL, update_data->queued_updates) < 0) { + return -1; + } } return 0; } /* Update the slots after assignment to a class (type) attribute. */ static int -update_slot(PyTypeObject *type, PyObject *name) +update_slot(PyTypeObject *type, PyObject *name, slot_update_t *queued_updates) { pytype_slotdef *ptrs[MAX_EQUIV]; pytype_slotdef *p; @@ -11317,8 +11686,12 @@ update_slot(PyTypeObject *type, PyObject *name) } if (ptrs[0] == NULL) return 0; /* Not an attribute that affects any slots */ + + update_callback_data_t callback_data; + callback_data.defs = ptrs; + callback_data.queued_updates = queued_updates; return update_subclasses(type, name, - update_slots_callback, (void *)ptrs); + update_slots_callback, (void *)&callback_data); } /* Store the proper functions in the slot dispatches at class (type) @@ -11327,35 +11700,56 @@ update_slot(PyTypeObject *type, PyObject *name) static void fixup_slot_dispatchers(PyTypeObject *type) { - // This lock isn't strictly necessary because the type has not been - // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro - // where we'd like to assert that the type is locked. - BEGIN_TYPE_LOCK(); - assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { - p = update_one_slot(type, p); + update_one_slot(type, p, &p, NULL); } - - END_TYPE_LOCK(); } -static void +#ifdef Py_GIL_DISABLED + +// Called when __bases__ is re-assigned. +static int update_all_slots(PyTypeObject* type) { - pytype_slotdef *p; - - ASSERT_TYPE_LOCK_HELD(); + // Note that update_slot() can fail due to out-of-memory when allocating + // the queue chunks to hold the updates. That's unlikely since the number + // of updates is normally small but we handle that case. update_slot() + // can fail internally for other reasons (a lookup fails) but those + // errors are suppressed. + slot_update_t queued_updates = {0}; + for (pytype_slotdef *p = slotdefs; p->name; p++) { + if (update_slot(type, p->name_strobj, &queued_updates) < 0) { + if (queued_updates.head) { + slot_update_free_chunks(&queued_updates); + } + return -1; + } + } + if (queued_updates.head != NULL) { + apply_type_slot_updates(&queued_updates); + ASSERT_TYPE_LOCK_HELD(); + slot_update_free_chunks(&queued_updates); + } + return 0; +} - /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - type_modified_unlocked(type); +#else +// Called when __bases__ is re-assigned. +static int +update_all_slots(PyTypeObject* type) +{ + pytype_slotdef *p; for (p = slotdefs; p->name; p++) { - /* update_slot returns int but can't actually fail */ - update_slot(type, p->name_strobj); + /* update_slot returns int but can't actually fail in this case*/ + update_slot(type, p->name_strobj, NULL); } + return 0; } +#endif + PyObject * _PyType_GetSlotWrapperNames(void) @@ -11625,7 +12019,10 @@ PyType_Freeze(PyTypeObject *type) } BEGIN_TYPE_LOCK(); + types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); type_modified_unlocked(type); END_TYPE_LOCK(); diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 6c199a52aa0..cead6e69af5 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs) for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) { PyObject *item = PyTuple_GET_ITEM(value, i); if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { PyUnicodeWriter_Discard(writer); return NULL; } @@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p) } if (p == (PyObject *)&_PyNone_Type) { - return PyUnicodeWriter_WriteUTF8(writer, "None", 4); + return PyUnicodeWriter_WriteASCII(writer, "None", 4); } if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 && diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b3287546dd9..5c2308a0121 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -56,6 +56,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_pyhash.h" // _Py_HashSecret_t #include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_template.h" // _PyTemplate_Concat() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI #include "pycore_unicodeobject.h" // struct _Py_unicode_state @@ -166,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -static inline Py_hash_t PyUnicode_HASH(PyObject *op) -{ - assert(_PyUnicode_CHECK(op)); - return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); -} +#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) { @@ -3729,7 +3726,7 @@ PyUnicode_Decode(const char *s, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsDecodedObject(PyObject *unicode, const char *encoding, const char *errors) @@ -3739,12 +3736,6 @@ PyUnicode_AsDecodedObject(PyObject *unicode, return NULL; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedObject() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Decode() to decode from str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -3752,7 +3743,7 @@ PyUnicode_AsDecodedObject(PyObject *unicode, return PyCodec_Decode(unicode, encoding, errors); } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsDecodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -3764,12 +3755,6 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedUnicode() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Decode() to decode from str to str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -3792,7 +3777,7 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsEncodedObject(PyObject *unicode, const char *encoding, const char *errors) @@ -3804,13 +3789,6 @@ PyUnicode_AsEncodedObject(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedObject() is deprecated " - "and will be removed in 3.15; " - "use PyUnicode_AsEncodedString() to encode from str to bytes " - "or PyCodec_Encode() for generic encoding", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -4016,7 +3994,7 @@ PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsEncodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -4028,12 +4006,6 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedUnicode() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Encode() to encode from str to str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -6620,13 +6592,15 @@ _PyUnicode_GetNameCAPI(void) /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * -_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, +_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { const char *starts = s; + const char *initial_starts = starts; _PyUnicodeWriter writer; const char *end; PyObject *errorHandler = NULL; @@ -6634,7 +6608,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, _PyUnicode_Name_CAPI *ucnhash_capi; // so we can remember if we've seen an invalid escape char or not - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; if (size == 0) { if (consumed) { @@ -6722,9 +6697,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, } } if (ch > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = ch; + if (starts == initial_starts) { + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; + } } } WRITE_CHAR(ch); @@ -6819,9 +6797,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, goto error; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + if (starts == initial_starts) { + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; + } } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); @@ -6866,19 +6847,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, const char *errors, Py_ssize_t *consumed) { - const char *first_invalid_escape; - PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, consumed, - &first_invalid_escape); + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "\"\\%.3s\" is an invalid octal escape sequence. " + "\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -6888,7 +6870,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -11628,10 +11610,16 @@ PyUnicode_Concat(PyObject *left, PyObject *right) return NULL; if (!PyUnicode_Check(right)) { - PyErr_Format(PyExc_TypeError, - "can only concatenate str (not \"%.200s\") to str", - Py_TYPE(right)->tp_name); - return NULL; + if (_PyTemplate_CheckExact(right)) { + // str + tstring is implemented in the tstring type + return _PyTemplate_Concat(left, right); + } + else { + PyErr_Format(PyExc_TypeError, + "can only concatenate str (not \"%.200s\") to str", + Py_TYPE(right)->tp_name); + return NULL; + } } /* Shortcuts */ @@ -13937,7 +13925,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { - if (Py_TYPE(obj) == &PyLong_Type) { + PyTypeObject *type = Py_TYPE(obj); + if (type == &PyUnicode_Type) { + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); + } + + if (type == &PyLong_Type) { return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); } @@ -14086,6 +14079,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, @@ -15911,7 +15918,7 @@ immortalize_interned(PyObject *s) _Py_DecRefTotal(_PyThreadState_GET()); } #endif - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); _Py_SetImmortal(s); } @@ -16029,7 +16036,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(s); Py_DECREF(s); } - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ @@ -16165,7 +16172,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) Py_UNREACHABLE(); } if (!shared) { - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); } } #ifdef INTERNED_STATS diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 66435924b6c..2206ed80ef0 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -4,6 +4,7 @@ #include "pycore_typevarobject.h" // _PyTypeAlias_Type, _Py_typing_type_repr #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString #include "pycore_unionobject.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() typedef struct { @@ -21,9 +22,7 @@ unionobject_dealloc(PyObject *self) unionobject *alias = (unionobject *)self; _PyObject_GC_UNTRACK(self); - if (alias->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)alias); - } + FT_CLEAR_WEAKREFS(self, alias->weakreflist); Py_XDECREF(alias->args); Py_XDECREF(alias->hashable_args); @@ -290,7 +289,7 @@ union_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { - if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) { + if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) { goto error; } PyObject *p = PyTuple_GET_ITEM(alias->args, i); @@ -300,12 +299,12 @@ union_repr(PyObject *self) } #if 0 - PyUnicodeWriter_WriteUTF8(writer, "|args=", 6); + PyUnicodeWriter_WriteASCII(writer, "|args=", 6); PyUnicodeWriter_WriteRepr(writer, alias->args); - PyUnicodeWriter_WriteUTF8(writer, "|h=", 3); + PyUnicodeWriter_WriteASCII(writer, "|h=", 3); PyUnicodeWriter_WriteRepr(writer, alias->hashable_args); if (alias->unhashable_args) { - PyUnicodeWriter_WriteUTF8(writer, "|u=", 3); + PyUnicodeWriter_WriteASCII(writer, "|u=", 3); PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args); } #endif |