diff options
Diffstat (limited to 'Python')
53 files changed, 7121 insertions, 4082 deletions
diff --git a/Python/Python-ast.c b/Python/Python-ast.c index df035568f84..660bc598a48 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5528,6 +5528,32 @@ ast_type_replace_check(PyObject *self, Py_DECREF(unused); } } + + // Discard fields from 'expecting' that default to None + PyObject *field_types = NULL; + if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), + &_Py_ID(_field_types), + &field_types) < 0) + { + Py_DECREF(expecting); + return -1; + } + if (field_types != NULL) { + Py_ssize_t pos = 0; + PyObject *field_name, *field_type; + while (PyDict_Next(field_types, &pos, &field_name, &field_type)) { + if (_PyUnion_Check(field_type)) { + // optional field + if (PySet_Discard(expecting, field_name) < 0) { + Py_DECREF(expecting); + Py_DECREF(field_types); + return -1; + } + } + } + Py_DECREF(field_types); + } + // Now 'expecting' contains the fields or attributes // that would not be filled inside ast_type_replace(). Py_ssize_t m = PySet_GET_SIZE(expecting); @@ -5770,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth) for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -5794,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth) } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -5898,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth) } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/_warnings.c b/Python/_warnings.c index 39bf1b225cc..12e6172b0cf 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -6,7 +6,6 @@ #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_pylifecycle.h" // _Py_IsInterpreterFinalizing() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_traceback.h" // _Py_DisplaySourceLine() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() @@ -678,7 +677,7 @@ show_warning(PyThreadState *tstate, PyObject *filename, int lineno, goto error; } - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &f_stderr) <= 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &f_stderr) <= 0) { fprintf(stderr, "lost sys.stderr\n"); goto error; } diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index 0a3265dfeee..616752459ba 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -9,6 +9,9 @@ # } _Py_trampoline_func_start: #ifdef __x86_64__ +#if defined(__CET__) && (__CET__ & 1) + endbr64 +#endif sub $8, %rsp call *%rcx add $8, %rsp @@ -34,3 +37,22 @@ _Py_trampoline_func_start: .globl _Py_trampoline_func_end _Py_trampoline_func_end: .section .note.GNU-stack,"",@progbits +# Note for indicating the assembly code supports CET +#if defined(__x86_64__) && defined(__CET__) && (__CET__ & 1) + .section .note.gnu.property,"a" + .align 8 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .string "GNU" +1: + .align 8 + .long 0xc0000002 + .long 3f - 2f +2: + .long 0x3 +3: + .align 8 +4: +#endif // __x86_64__ diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index c121ec096ae..557c12cfda6 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -702,6 +702,13 @@ append_templatestr(PyUnicodeWriter *writer, expr_ty e) Py_ssize_t last_idx = 0; Py_ssize_t len = asdl_seq_LEN(e->v.TemplateStr.values); + if (len == 0) { + int result = _write_values_subarray(writer, e->v.TemplateStr.values, + 0, len - 1, 't', arena); + _PyArena_Free(arena); + return result; + } + for (Py_ssize_t i = 0; i < len; i++) { expr_ty value = asdl_seq_GET(e->v.TemplateStr.values, i); @@ -742,6 +749,7 @@ append_templatestr(PyUnicodeWriter *writer, expr_ty e) goto error; } } + _PyArena_Free(arena); return 0; @@ -774,33 +782,38 @@ append_joinedstr(PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) } static int -append_interpolation_value(PyUnicodeWriter *writer, expr_ty e) +append_interpolation_str(PyUnicodeWriter *writer, PyObject *str) { const char *outer_brace = "{"; - /* Grammar allows PR_TUPLE, but use >PR_TEST for adding parenthesis - around a lambda with ':' */ - PyObject *temp_fv_str = expr_as_unicode(e, PR_TEST + 1); - if (!temp_fv_str) { - return -1; - } - if (PyUnicode_Find(temp_fv_str, _Py_LATIN1_CHR('{'), 0, 1, 1) == 0) { + if (PyUnicode_Find(str, _Py_LATIN1_CHR('{'), 0, 1, 1) == 0) { /* Expression starts with a brace, split it with a space from the outer one. */ outer_brace = "{ "; } if (-1 == append_charp(writer, outer_brace)) { - Py_DECREF(temp_fv_str); return -1; } - if (-1 == PyUnicodeWriter_WriteStr(writer, temp_fv_str)) { - Py_DECREF(temp_fv_str); + if (-1 == PyUnicodeWriter_WriteStr(writer, str)) { return -1; } - Py_DECREF(temp_fv_str); return 0; } static int +append_interpolation_value(PyUnicodeWriter *writer, expr_ty e) +{ + /* Grammar allows PR_TUPLE, but use >PR_TEST for adding parenthesis + around a lambda with ':' */ + PyObject *temp_fv_str = expr_as_unicode(e, PR_TEST + 1); + if (!temp_fv_str) { + return -1; + } + int result = append_interpolation_str(writer, temp_fv_str); + Py_DECREF(temp_fv_str); + return result; +} + +static int append_interpolation_conversion(PyUnicodeWriter *writer, int conversion) { if (conversion < 0) { @@ -843,7 +856,7 @@ append_interpolation_format_spec(PyUnicodeWriter *writer, expr_ty e) static int append_interpolation(PyUnicodeWriter *writer, expr_ty e) { - if (-1 == append_interpolation_value(writer, e->v.Interpolation.value)) { + if (-1 == append_interpolation_str(writer, e->v.Interpolation.str)) { return -1; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 3d0295ee388..51d7297ec24 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -14,7 +14,6 @@ #include "pycore_pyerrors.h" // _PyErr_NoMemory() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_pythonrun.h" // _Py_SourceAsString() -#include "pycore_sysmodule.h" // _PySys_GetRequiredAttr() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_cell.h" // PyCell_GetRef() @@ -465,7 +464,7 @@ builtin_callable(PyObject *module, PyObject *obj) static PyObject * builtin_breakpoint(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *keywords) { - PyObject *hook = _PySys_GetRequiredAttrString("breakpointhook"); + PyObject *hook = PySys_GetAttrString("breakpointhook"); if (hook == NULL) { return NULL; } @@ -958,6 +957,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals) /*[clinic end generated code: output=0a0824aa70093116 input=7c7bce5299a89062]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *result = NULL, *source_copy; const char *str; @@ -971,35 +971,46 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, : "globals must be a dict"); return NULL; } - if (globals == Py_None) { + + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); - } + assert(globals != NULL); + Py_INCREF(globals); } - else if (locals == Py_None) - locals = Py_NewRef(globals); else { - Py_INCREF(locals); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_TypeError, + "eval must be given globals and locals " + "when called without a frame"); + } + return NULL; + } + Py_INCREF(globals); } - if (globals == NULL || locals == NULL) { - PyErr_SetString(PyExc_TypeError, - "eval must be given globals and locals " - "when called without a frame"); - goto error; + if (locals != Py_None) { + Py_INCREF(locals); } - - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); + return NULL; + } + } + else { + locals = Py_NewRef(globals); } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1040,6 +1051,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, } error: + Py_XDECREF(globals); Py_XDECREF(locals); return result; } @@ -1070,29 +1082,44 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals, PyObject *closure) /*[clinic end generated code: output=7579eb4e7646743d input=25e989b6d87a3a21]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *v; - if (globals == Py_None) { + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); + assert(globals != NULL); + Py_INCREF(globals); + } + else { + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_SystemError, + "globals and locals cannot be NULL"); + } + goto error; } - if (!globals || !locals) { - PyErr_SetString(PyExc_SystemError, - "globals and locals cannot be NULL"); + Py_INCREF(globals); + } + + if (locals != Py_None) { + Py_INCREF(locals); + } + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); return NULL; } } - else if (locals == Py_None) { - locals = Py_NewRef(globals); - } else { - Py_INCREF(locals); + locals = Py_NewRef(globals); } if (!PyDict_Check(globals)) { @@ -1106,11 +1133,8 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, Py_TYPE(locals)->tp_name); goto error; } - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); - } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1187,11 +1211,13 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, } if (v == NULL) goto error; + Py_DECREF(globals); Py_DECREF(locals); Py_DECREF(v); Py_RETURN_NONE; error: + Py_XDECREF(globals); Py_XDECREF(locals); return NULL; } @@ -1241,10 +1267,21 @@ static PyObject * builtin_globals_impl(PyObject *module) /*[clinic end generated code: output=e5dd1527067b94d2 input=9327576f92bb48ba]*/ { - PyObject *d; - - d = PyEval_GetGlobals(); - return Py_XNewRef(d); + PyObject *globals; + if (_PyEval_GetFrame() != NULL) { + globals = PyEval_GetGlobals(); + assert(globals != NULL); + return Py_NewRef(globals); + } + PyThreadState *tstate = _PyThreadState_GET(); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(globals); } @@ -1888,7 +1925,21 @@ static PyObject * builtin_locals_impl(PyObject *module) /*[clinic end generated code: output=b46c94015ce11448 input=7874018d478d5c4b]*/ { - return _PyEval_GetFrameLocals(); + PyObject *locals; + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + assert(locals != NULL || PyErr_Occurred()); + return locals; + } + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(locals); } @@ -2164,7 +2215,7 @@ builtin_print_impl(PyObject *module, PyObject * const *args, int i, err; if (file == Py_None) { - file = _PySys_GetRequiredAttr(&_Py_ID(stdout)); + file = PySys_GetAttr(&_Py_ID(stdout)); if (file == NULL) { return NULL; } @@ -2270,7 +2321,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt) int tty; /* Check that stdin/out/err are intact */ - fin = _PySys_GetRequiredAttr(&_Py_ID(stdin)); + fin = PySys_GetAttr(&_Py_ID(stdin)); if (fin == NULL) { goto error; } @@ -2278,7 +2329,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt) PyErr_SetString(PyExc_RuntimeError, "lost sys.stdin"); goto error; } - fout = _PySys_GetRequiredAttr(&_Py_ID(stdout)); + fout = PySys_GetAttr(&_Py_ID(stdout)); if (fout == NULL) { goto error; } @@ -2286,7 +2337,7 @@ builtin_input_impl(PyObject *module, PyObject *prompt) PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); goto error; } - ferr = _PySys_GetRequiredAttr(&_Py_ID(stderr)); + ferr = PySys_GetAttr(&_Py_ID(stderr)); if (ferr == NULL) { goto error; } @@ -2624,7 +2675,22 @@ builtin_vars(PyObject *self, PyObject *args) if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v)) return NULL; if (v == NULL) { - d = _PyEval_GetFrameLocals(); + if (_PyEval_GetFrame() != NULL) { + d = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + d = _PyEval_GetGlobalsFromRunningMain(tstate); + if (d == NULL) { + if (!_PyErr_Occurred(tstate)) { + d = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(d); + } + } } else { if (PyObject_GetOptionalAttr(v, &_Py_ID(__dict__), &d) == 0) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6a076662640..1a5a9ff13a2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -295,55 +295,18 @@ dummy_func( value2 = PyStackRef_Borrow(GETLOCAL(oparg2)); } - family(LOAD_CONST, 0) = { - LOAD_CONST_MORTAL, - LOAD_CONST_IMMORTAL, - }; - inst(LOAD_CONST, (-- value)) { - /* We can't do this in the bytecode compiler as - * marshalling can intern strings and make them immortal. */ - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); -#if ENABLE_SPECIALIZATION_FT -#ifdef Py_GIL_DISABLED - uint8_t expected = LOAD_CONST; - if (!_Py_atomic_compare_exchange_uint8( - &this_instr->op.code, &expected, - _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL)) { - // We might lose a race with instrumentation, which we don't care about. - assert(expected >= MIN_INSTRUMENTED_OPCODE); - } -#else - if (this_instr->op.code == LOAD_CONST) { - this_instr->op.code = _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL; - } -#endif -#endif - } - - inst(LOAD_CONST_MORTAL, (-- value)) { - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNewMortal(obj); - } - - inst(LOAD_CONST_IMMORTAL, (-- value)) { PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - assert(_Py_IsImmortal(obj)); - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); } replicate(4) inst(LOAD_SMALL_INT, (-- value)) { assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); } replicate(8) inst(STORE_FAST, (value --)) { - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; DEAD(value); @@ -355,10 +318,6 @@ dummy_func( }; inst(STORE_FAST_LOAD_FAST, (value1 -- value2)) { - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -369,14 +328,6 @@ dummy_func( } inst(STORE_FAST_STORE_FAST, (value2, value1 --)) { - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value2) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -390,7 +341,33 @@ dummy_func( } pure inst(POP_TOP, (value --)) { - PyStackRef_CLOSE(value); + PyStackRef_XCLOSE(value); + } + + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + + tier2 op(_POP_TWO, (nos, tos --)) { + PyStackRef_CLOSE(tos); + PyStackRef_CLOSE(nos); } pure inst(PUSH_NULL, (-- res)) { @@ -406,9 +383,14 @@ dummy_func( PyStackRef_CLOSE(value); } - macro(POP_ITER) = POP_TOP; - no_save_ip tier1 inst(INSTRUMENTED_END_FOR, (receiver, value -- receiver)) { + inst(POP_ITER, (iter, index_or_null -- )) { + (void)index_or_null; + DEAD(index_or_null); + PyStackRef_CLOSE(iter); + } + + no_save_ip tier1 inst(INSTRUMENTED_END_FOR, (receiver, index_or_null, value -- receiver, index_or_null)) { /* Need to create a fake StopIteration error here, * to conform to PEP 380 */ if (PyStackRef_GenCheck(receiver)) { @@ -420,7 +402,9 @@ dummy_func( PyStackRef_CLOSE(value); } - tier1 inst(INSTRUMENTED_POP_ITER, (iter -- )) { + tier1 inst(INSTRUMENTED_POP_ITER, (iter, index_or_null -- )) { + (void)index_or_null; + DEAD(index_or_null); INSTRUMENTED_JUMP(prev_instr, this_instr+1, PY_MONITORING_EVENT_BRANCH_RIGHT); PyStackRef_CLOSE(iter); } @@ -606,12 +590,24 @@ dummy_func( op(_GUARD_NOS_INT, (left, unused -- left, unused)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - EXIT_IF(!PyLong_CheckExact(left_o)); + EXIT_IF(!_PyLong_CheckExactAndCompact(left_o)); } op(_GUARD_TOS_INT, (value -- value)) { PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - EXIT_IF(!PyLong_CheckExact(value_o)); + EXIT_IF(!_PyLong_CheckExactAndCompact(value_o)); + } + + op(_GUARD_NOS_OVERFLOWED, (left, unused -- left, unused)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + assert(Py_TYPE(left_o) == &PyLong_Type); + EXIT_IF(!_PyLong_IsCompact((PyLongObject *)left_o)); + } + + op(_GUARD_TOS_OVERFLOWED, (value -- value)) { + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + assert(Py_TYPE(value_o) == &PyLong_Type); + EXIT_IF(!_PyLong_IsCompact((PyLongObject *)value_o)); } pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { @@ -619,14 +615,14 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + EXIT_IF(PyStackRef_IsNull(res)); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); } pure op(_BINARY_OP_ADD_INT, (left, right -- res)) { @@ -634,14 +630,14 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + EXIT_IF(PyStackRef_IsNull(res)); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); } pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { @@ -649,20 +645,22 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + EXIT_IF(PyStackRef_IsNull(res)); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); } macro(BINARY_OP_MULTIPLY_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_MULTIPLY_INT; + macro(BINARY_OP_ADD_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_ADD_INT; + macro(BINARY_OP_SUBTRACT_INT) = _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT; @@ -721,6 +719,52 @@ dummy_func( ERROR_IF(PyStackRef_IsNull(res)); } + + pure op(_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsNull(res)); + } + + pure op(_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsNull(res)); + } + + pure op(_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsNull(res)); + } + macro(BINARY_OP_MULTIPLY_FLOAT) = _GUARD_TOS_FLOAT + _GUARD_NOS_FLOAT + unused/5 + _BINARY_OP_MULTIPLY_FLOAT; macro(BINARY_OP_ADD_FLOAT) = @@ -806,7 +850,7 @@ dummy_func( DEOPT_IF(!res); } - pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); @@ -942,7 +986,7 @@ dummy_func( PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectImmortal(res_o); + res = PyStackRef_FromPyObjectBorrow(res_o); } op(_GUARD_NOS_TUPLE, (nos, unused -- nos, unused)) { @@ -1022,12 +1066,13 @@ dummy_func( STAT_INC(BINARY_OP, hit); } - op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _PyInterpreterFrame* )) { - new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; + op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) { + _PyInterpreterFrame* pushed_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); + pushed_frame->localsplus[0] = container; + pushed_frame->localsplus[1] = sub; INPUTS_DEAD(); frame->return_offset = INSTRUCTION_SIZE; + new_frame = PyStackRef_Wrap(pushed_frame); } macro(BINARY_OP_SUBSCR_GETITEM) = @@ -1333,20 +1378,21 @@ dummy_func( macro(SEND) = _SPECIALIZE_SEND + _SEND; - op(_SEND_GEN_FRAME, (receiver, v -- receiver, gen_frame: _PyInterpreterFrame *)) { + op(_SEND_GEN_FRAME, (receiver, v -- receiver, gen_frame)) { PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(receiver); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && Py_TYPE(gen) != &PyCoro_Type); DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(SEND, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_MakeHeapSafe(v)); DEAD(v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; assert(INSTRUCTION_SIZE + oparg <= UINT16_MAX); frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg); - gen_frame->previous = frame; + pushed_frame->previous = frame; + gen_frame = PyStackRef_Wrap(pushed_frame); } macro(SEND_GEN) = @@ -2500,7 +2546,7 @@ dummy_func( _LOAD_ATTR_CLASS + _PUSH_NULL_CONDITIONAL; - op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame: _PyInterpreterFrame *)) { + op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) { assert((oparg & 1) == 0); assert(Py_IS_TYPE(fget, &PyFunction_Type)); PyFunctionObject *f = (PyFunctionObject *)fget; @@ -2510,9 +2556,10 @@ dummy_func( DEOPT_IF(code->co_argcount != 1); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(LOAD_ATTR, hit); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); - new_frame->localsplus[0] = owner; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); + pushed_frame->localsplus[0] = owner; DEAD(owner); + new_frame = PyStackRef_Wrap(pushed_frame); } macro(LOAD_ATTR_PROPERTY) = @@ -2722,8 +2769,8 @@ dummy_func( PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left_o)); - DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right_o)); + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && _PyLong_DigitCount((PyLongObject *)right_o) <= 1); @@ -3085,15 +3132,24 @@ dummy_func( values_or_none = PyStackRef_FromPyObjectSteal(values_or_none_o); } - inst(GET_ITER, (iterable -- iter)) { + inst(GET_ITER, (iterable -- iter, index_or_null)) { #ifdef Py_STATS _Py_GatherStats_GetIter(iterable); #endif /* before: [obj]; after [getiter(obj)] */ - PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); - PyStackRef_CLOSE(iterable); - ERROR_IF(iter_o == NULL); - iter = PyStackRef_FromPyObjectSteal(iter_o); + PyTypeObject *tp = PyStackRef_TYPE(iterable); + if (tp == &PyTuple_Type || tp == &PyList_Type) { + iter = iterable; + DEAD(iterable); + index_or_null = PyStackRef_TagInt(0); + } + else { + PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); + PyStackRef_CLOSE(iterable); + ERROR_IF(iter_o == NULL); + iter = PyStackRef_FromPyObjectSteal(iter_o); + index_or_null = PyStackRef_NULL; + } } inst(GET_YIELD_FROM_ITER, (iterable -- iter)) { @@ -3140,11 +3196,11 @@ dummy_func( FOR_ITER_GEN, }; - specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) { + specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter, null_or_index -- iter, null_or_index)) { #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; - _Py_Specialize_ForIter(iter, next_instr, oparg); + _Py_Specialize_ForIter(iter, null_or_index, next_instr, oparg); DISPATCH_SAME_OPARG(); } OPCODE_DEFERRED_INC(FOR_ITER); @@ -3152,111 +3208,71 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION_FT */ } - replaced op(_FOR_ITER, (iter -- iter, next)) { - /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); - } - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); + replaced op(_FOR_ITER, (iter, null_or_index -- iter, null_or_index, next)) { + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + ERROR_NO_POP(); } - /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - /* Jump forward oparg, then skip following END_FOR */ + // Jump forward by oparg and skip the following END_FOR JUMPBY(oparg + 1); DISPATCH(); } - next = PyStackRef_FromPyObjectSteal(next_o); - // Common case: no jump, leave it to the code generator + next = item; } - op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { - /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); - } - _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); - _PyErr_Clear(tstate); + op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) { + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + ERROR_NO_POP(); } /* iterator ended normally */ /* The translator sets the deopt target just past the matching END_FOR */ EXIT_IF(true); } - next = PyStackRef_FromPyObjectSteal(next_o); - // Common case: no jump, leave it to the code generator + next = item; } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; - inst(INSTRUMENTED_FOR_ITER, (unused/1, iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); - if (next_o != NULL) { - next = PyStackRef_FromPyObjectSteal(next_o); - INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); - } - else { - if (_PyErr_Occurred(tstate)) { - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - if (!matches) { - ERROR_NO_POP(); - } - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); + inst(INSTRUMENTED_FOR_ITER, (unused/1, iter, null_or_index -- iter, null_or_index, next)) { + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + ERROR_NO_POP(); } - /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - /* Skip END_FOR */ + // Jump forward by oparg and skip the following END_FOR JUMPBY(oparg + 1); DISPATCH(); } + next = item; + INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); } - - op(_ITER_CHECK_LIST, (iter -- iter)) { + op(_ITER_CHECK_LIST, (iter, null_or_index -- iter, null_or_index)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - EXIT_IF(Py_TYPE(iter_o) != &PyListIter_Type); + EXIT_IF(Py_TYPE(iter_o) != &PyList_Type); + assert(PyStackRef_IsTaggedInt(null_or_index)); #ifdef Py_GIL_DISABLED - EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o)); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - EXIT_IF(!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq)); + EXIT_IF(!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)); #endif } - replaced op(_ITER_JUMP_LIST, (iter -- iter)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - assert(Py_TYPE(iter_o) == &PyListIter_Type); -// For free-threaded Python, the loop exit can happen at any point during -// item retrieval, so it doesn't make much sense to check and jump -// separately before item retrieval. Any length check we do here can be -// invalid by the time we actually try to fetch the item. + replaced op(_ITER_JUMP_LIST, (iter, null_or_index -- iter, null_or_index)) { #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - (void)iter_o; + // For free-threaded Python, the loop exit can happen at any point during + // item retrieval, so it doesn't make much sense to check and jump + // separately before item retrieval. Any length check we do here can be + // invalid by the time we actually try to fetch the item. #else - _PyListIterObject *it = (_PyListIterObject *)iter_o; + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); STAT_INC(FOR_ITER, hit); - PyListObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - if (seq != NULL) { - it->it_seq = NULL; - Py_DECREF(seq); - } + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { + null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); @@ -3265,73 +3281,54 @@ dummy_func( } // Only used by Tier 2 - op(_GUARD_NOT_EXHAUSTED_LIST, (iter -- iter)) { + op(_GUARD_NOT_EXHAUSTED_LIST, (iter, null_or_index -- iter, null_or_index)) { #ifndef Py_GIL_DISABLED - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - EXIT_IF(seq == NULL); - if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - EXIT_IF(1); - } + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); + EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)); #endif } - replaced op(_ITER_NEXT_LIST, (iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + replaced op(_ITER_NEXT_LIST, (iter, null_or_index -- iter, null_or_index, next)) { + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread(list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); // A negative result means we lost a race with another thread // and we need to take the slow path. DEOPT_IF(result < 0); if (result == 0) { - it->it_index = -1; + null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); } - it->it_index++; #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } // Only used by Tier 2 - op(_ITER_NEXT_LIST_TIER_TWO, (iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + op(_ITER_NEXT_LIST_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) { + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread((PyObject *)list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); // A negative result means we lost a race with another thread // and we need to take the slow path. - EXIT_IF(result < 0); - if (result == 0) { - it->it_index = -1; - EXIT_IF(1); - } - it->it_index++; + DEOPT_IF(result <= 0); #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + assert(PyStackRef_UntagInt(null_or_index) < PyList_GET_SIZE(list_o)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } macro(FOR_ITER_LIST) = @@ -3340,31 +3337,19 @@ dummy_func( _ITER_JUMP_LIST + _ITER_NEXT_LIST; - op(_ITER_CHECK_TUPLE, (iter -- iter)) { + op(_ITER_CHECK_TUPLE, (iter, null_or_index -- iter, null_or_index)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - EXIT_IF(Py_TYPE(iter_o) != &PyTupleIter_Type); -#ifdef Py_GIL_DISABLED - EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o)); -#endif + EXIT_IF(Py_TYPE(iter_o) != &PyTuple_Type); + assert(PyStackRef_IsTaggedInt(null_or_index)); } - replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - (void)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); -#ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); -#endif - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; + replaced op(_ITER_JUMP_TUPLE, (iter, null_or_index -- iter, null_or_index)) { + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + (void)tuple_o; + assert(Py_TYPE(tuple_o) == &PyTuple_Type); STAT_INC(FOR_ITER, hit); - PyTupleObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) { -#ifndef Py_GIL_DISABLED - if (seq != NULL) { - it->it_seq = NULL; - Py_DECREF(seq); - } -#endif + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { + null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); @@ -3372,29 +3357,19 @@ dummy_func( } // Only used by Tier 2 - op(_GUARD_NOT_EXHAUSTED_TUPLE, (iter -- iter)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); -#ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); -#endif - PyTupleObject *seq = it->it_seq; - EXIT_IF(seq == NULL); - EXIT_IF(it->it_index >= PyTuple_GET_SIZE(seq)); + op(_GUARD_NOT_EXHAUSTED_TUPLE, (iter, null_or_index -- iter, null_or_index)) { + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)); } - op(_ITER_NEXT_TUPLE, (iter -- iter, next)) { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - PyTupleObject *seq = it->it_seq; -#ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); -#endif - assert(seq); - assert(it->it_index < PyTuple_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + op(_ITER_NEXT_TUPLE, (iter, null_or_index -- iter, null_or_index, next)) { + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + uintptr_t i = PyStackRef_UntagInt(null_or_index); + assert((size_t)i < (size_t)PyTuple_GET_SIZE(tuple_o)); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(tuple_o, i)); + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } macro(FOR_ITER_TUPLE) = @@ -3403,7 +3378,7 @@ dummy_func( _ITER_JUMP_TUPLE + _ITER_NEXT_TUPLE; - op(_ITER_CHECK_RANGE, (iter -- iter)) { + op(_ITER_CHECK_RANGE, (iter, null_or_index -- iter, null_or_index)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); EXIT_IF(Py_TYPE(r) != &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -3411,7 +3386,7 @@ dummy_func( #endif } - replaced op(_ITER_JUMP_RANGE, (iter -- iter)) { + replaced op(_ITER_JUMP_RANGE, (iter, null_or_index -- iter, null_or_index)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -3426,13 +3401,13 @@ dummy_func( } // Only used by Tier 2 - op(_GUARD_NOT_EXHAUSTED_RANGE, (iter -- iter)) { + op(_GUARD_NOT_EXHAUSTED_RANGE, (iter, null_or_index -- iter, null_or_index)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); EXIT_IF(r->len <= 0); } - op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + op(_ITER_NEXT_RANGE, (iter, null_or_index -- iter, null_or_index, next)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -3453,7 +3428,7 @@ dummy_func( _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; - op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) { + op(_FOR_ITER_GEN_FRAME, (iter, null -- iter, null, gen_frame)) { PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); #ifdef Py_GIL_DISABLED @@ -3465,14 +3440,15 @@ dummy_func( #endif DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(FOR_ITER, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_None); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - gen_frame->previous = frame; + pushed_frame->previous = frame; // oparg is the return offset from the next instruction. frame->return_offset = (uint16_t)(INSTRUCTION_SIZE + oparg); + gen_frame = PyStackRef_Wrap(pushed_frame); } macro(FOR_ITER_GEN) = @@ -3824,7 +3800,7 @@ dummy_func( macro(CALL) = _SPECIALIZE_CALL + unused/2 + _MAYBE_EXPAND_METHOD + _DO_CALL + _CHECK_PERIODIC; macro(INSTRUMENTED_CALL) = unused/3 + _MAYBE_EXPAND_METHOD + _MONITOR_CALL + _DO_CALL + _CHECK_PERIODIC; - op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); // oparg counts all of the args, but *not* self: @@ -3846,7 +3822,7 @@ dummy_func( if (temp == NULL) { ERROR_NO_POP(); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { @@ -3983,27 +3959,26 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame)) { int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } INPUTS_DEAD(); + new_frame = PyStackRef_Wrap(pushed_frame); } - op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) { - // Write it out explicitly because it's subtly different. - // Eventually this should be the only occurrence of this code. + op(_PUSH_FRAME, (new_frame -- )) { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); DEAD(new_frame); SYNC_SP(); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -4041,6 +4016,15 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); } + op(_GUARD_NOS_NOT_NULL, (nos, unused -- nos, unused)) { + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + EXIT_IF(o == NULL); + } + + op(_GUARD_THIRD_NULL, (null, unused, unused -- null, unused, unused)) { + DEOPT_IF(!PyStackRef_IsNull(null)); + } + op(_GUARD_CALLABLE_TYPE_1, (callable, unused, unused -- callable, unused, unused)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); DEOPT_IF(callable_o != (PyObject *)&PyType_Type); @@ -4146,7 +4130,7 @@ dummy_func( PyStackRef_CLOSE(temp); } - op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame: _PyInterpreterFrame *)) { + op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) { _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); @@ -4163,12 +4147,12 @@ dummy_func( _PyEval_FrameClearAndPop(tstate, shim); ERROR_NO_POP(); } - init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + init_frame = PyStackRef_Wrap(temp); } macro(CALL_ALLOC_AND_ENTER_INIT) = @@ -4359,41 +4343,57 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - inst(CALL_ISINSTANCE, (unused/1, unused/2, callable, self_or_null, args[oparg] -- res)) { - /* isinstance(o, o2) */ + op(_GUARD_CALLABLE_ISINSTANCE, (callable, unused, unused, unused -- callable, unused, unused, unused)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - - int total_args = oparg; - _PyStackRef *arguments = args; - if (!PyStackRef_IsNull(self_or_null)) { - arguments--; - total_args++; - } - DEOPT_IF(total_args != 2); PyInterpreterState *interp = tstate->interp; DEOPT_IF(callable_o != interp->callable_cache.isinstance); + } + + op(_CALL_ISINSTANCE, (callable, null, instance, cls -- res)) { + /* isinstance(o, o2) */ STAT_INC(CALL, hit); - _PyStackRef cls_stackref = arguments[1]; - _PyStackRef inst_stackref = arguments[0]; - int retval = PyObject_IsInstance(PyStackRef_AsPyObjectBorrow(inst_stackref), PyStackRef_AsPyObjectBorrow(cls_stackref)); + PyObject *inst_o = PyStackRef_AsPyObjectBorrow(instance); + PyObject *cls_o = PyStackRef_AsPyObjectBorrow(cls); + int retval = PyObject_IsInstance(inst_o, cls_o); if (retval < 0) { ERROR_NO_POP(); } + (void)null; // Silence compiler warnings about unused variables + PyStackRef_CLOSE(cls); + PyStackRef_CLOSE(instance); + DEAD(null); + PyStackRef_CLOSE(callable); res = retval ? PyStackRef_True : PyStackRef_False; assert((!PyStackRef_IsNull(res)) ^ (_PyErr_Occurred(tstate) != NULL)); - DECREF_INPUTS(); + } + + macro(CALL_ISINSTANCE) = + unused/1 + + unused/2 + + _GUARD_THIRD_NULL + + _GUARD_CALLABLE_ISINSTANCE + + _CALL_ISINSTANCE; + + macro(CALL_LIST_APPEND) = + unused/1 + + unused/2 + + _GUARD_CALLABLE_LIST_APPEND + + _GUARD_NOS_NOT_NULL + + _GUARD_NOS_LIST + + _CALL_LIST_APPEND; + + op(_GUARD_CALLABLE_LIST_APPEND, (callable, unused, unused -- callable, unused, unused)){ + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + DEOPT_IF(callable_o != interp->callable_cache.list_append); } // This is secretly a super-instruction - inst(CALL_LIST_APPEND, (unused/1, unused/2, callable, self, arg -- )) { + op(_CALL_LIST_APPEND, (callable, self, arg -- )) { assert(oparg == 1); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); - PyInterpreterState *interp = tstate->interp; - DEOPT_IF(callable_o != interp->callable_cache.list_append); - DEOPT_IF(self_o == NULL); - DEOPT_IF(!PyList_Check(self_o)); + DEOPT_IF(!PyList_CheckExact(self_o)); DEOPT_IF(!LOCK_OBJECT(self_o)); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); @@ -4678,7 +4678,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _PyInterpreterFrame*)) { + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); // oparg counts all of the args, but *not* self: @@ -4705,7 +4705,7 @@ dummy_func( DEAD(callable); SYNC_SP(); ERROR_IF(temp == NULL); - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable, unused, unused[oparg], unused -- callable, unused, unused[oparg], unused)) { @@ -5024,8 +5024,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { - assert(oparg > 0); + pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { top = PyStackRef_DUP(bottom); } @@ -5058,12 +5057,11 @@ dummy_func( macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; - pure inst(SWAP, (bottom, unused[oparg-2], top -- + pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); } inst(INSTRUMENTED_LINE, ( -- )) { @@ -5303,18 +5301,75 @@ dummy_func( } tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + tier2 op(_POP_CALL, (callable, null --)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_ONE, (callable, null, pop --)) { PyStackRef_CLOSE(pop); - value = PyStackRef_FromPyObjectImmortal(ptr); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); } - tier2 pure op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { + tier2 op(_POP_CALL_TWO, (callable, null, pop1, pop2 --)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); - value = PyStackRef_FromPyObjectImmortal(ptr); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + PyStackRef_CLOSE(pop); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { + PyStackRef_CLOSE(pop2); + PyStackRef_CLOSE(pop1); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null -- value)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop -- value)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { + PyStackRef_CLOSE(pop2); + PyStackRef_CLOSE(pop1); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_LOAD_CONST_UNDER_INLINE, (ptr/4, old -- value, new)) { + new = old; + DEAD(old); + value = PyStackRef_FromPyObjectNew(ptr); + } + + tier2 op(_LOAD_CONST_UNDER_INLINE_BORROW, (ptr/4, old -- value, new)) { + new = old; + DEAD(old); + value = PyStackRef_FromPyObjectBorrow(ptr); } tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { diff --git a/Python/ceval.c b/Python/ceval.c index 490b653f132..d1de4875656 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,7 +8,7 @@ #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL___ENTER__ #include "pycore_code.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS @@ -139,6 +139,19 @@ #endif +static void +check_invalid_reentrancy(void) +{ +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) + // In the free-threaded build, the interpreter must not be re-entered if + // the world-is-stopped. If so, that's a bug somewhere (quite likely in + // the painfully complex typeobject code). + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!interp->stoptheworld.world_stopped); +#endif +} + + #ifdef Py_DEBUG static void dump_item(_PyStackRef item) @@ -360,9 +373,6 @@ _Py_EnterRecursiveCallUnchecked(PyThreadState *tstate) # define Py_C_STACK_SIZE 1200000 #elif defined(__sparc__) # define Py_C_STACK_SIZE 1600000 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really the stack depth */ -# define Py_C_STACK_SIZE 131072 // wasi-libc DEFAULT_STACK_SIZE #elif defined(__hppa__) || defined(__powerpc64__) # define Py_C_STACK_SIZE 2000000 #else @@ -999,6 +1009,7 @@ PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate); + check_invalid_reentrancy(); CALL_STAT_INC(pyeval_calls); #if USE_COMPUTED_GOTOS && !Py_TAIL_CALL_INTERP @@ -2735,10 +2746,9 @@ _PyEval_GetFrameLocals(void) return locals; } -PyObject * -PyEval_GetGlobals(void) +static PyObject * +_PyEval_GetGlobals(PyThreadState *tstate) { - PyThreadState *tstate = _PyThreadState_GET(); _PyInterpreterFrame *current_frame = _PyThreadState_GetFrame(tstate); if (current_frame == NULL) { return NULL; @@ -2746,6 +2756,120 @@ PyEval_GetGlobals(void) return current_frame->f_globals; } +PyObject * +PyEval_GetGlobals(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + return _PyEval_GetGlobals(tstate); +} + +PyObject * +_PyEval_GetGlobalsFromRunningMain(PyThreadState *tstate) +{ + if (!_PyInterpreterState_IsRunningMain(tstate->interp)) { + return NULL; + } + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + Py_XDECREF(mod); + return NULL; + } + PyObject *globals = PyModule_GetDict(mod); // borrowed + Py_DECREF(mod); + return globals; +} + +static PyObject * +get_globals_builtins(PyObject *globals) +{ + PyObject *builtins = NULL; + if (PyDict_Check(globals)) { + if (PyDict_GetItemRef(globals, &_Py_ID(__builtins__), &builtins) < 0) { + return NULL; + } + } + else { + if (PyMapping_GetOptionalItem( + globals, &_Py_ID(__builtins__), &builtins) < 0) + { + return NULL; + } + } + return builtins; +} + +static int +set_globals_builtins(PyObject *globals, PyObject *builtins) +{ + if (PyDict_Check(globals)) { + if (PyDict_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + else { + if (PyObject_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + return 0; +} + +int +_PyEval_EnsureBuiltins(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyEval_GetBuiltins(); // borrowed + if (builtins == NULL) { + assert(_PyErr_Occurred(tstate)); + return -1; + } + Py_INCREF(builtins); + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + +int +_PyEval_EnsureBuiltinsWithModule(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyImport_ImportModuleLevel("builtins", NULL, NULL, NULL, 0); + if (builtins == NULL) { + return -1; + } + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + PyObject* PyEval_GetFrameLocals(void) { @@ -2968,7 +3092,7 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) int is_possibly_shadowing_stdlib = 0; if (is_possibly_shadowing) { PyObject *stdlib_modules; - if (_PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { + if (PySys_GetOptionalAttrString("stdlib_module_names", &stdlib_modules) < 0) { goto done; } if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { @@ -3179,7 +3303,7 @@ _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwarg else if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { PyObject *exc = _PyErr_GetRaisedException(tstate); PyObject *args = PyException_GetArgs(exc); - if (exc && PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1) { + if (PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1) { _PyErr_Clear(tstate); PyObject *funcstr = _PyObject_FunctionStr(func); if (funcstr != NULL) { @@ -3428,6 +3552,50 @@ _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject *na return value; } +static _PyStackRef +foriter_next(PyObject *seq, _PyStackRef index) +{ + assert(PyStackRef_IsTaggedInt(index)); + assert(PyTuple_CheckExact(seq) || PyList_CheckExact(seq)); + intptr_t i = PyStackRef_UntagInt(index); + if (PyTuple_CheckExact(seq)) { + size_t size = PyTuple_GET_SIZE(seq); + if ((size_t)i >= size) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, i)); + } + PyObject *item = _PyList_GetItemRef((PyListObject *)seq, i); + if (item == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectSteal(item); +} + +_PyStackRef _PyForIter_VirtualIteratorNext(PyThreadState* tstate, _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef* index_ptr) +{ + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + _PyStackRef index = *index_ptr; + if (PyStackRef_IsTaggedInt(index)) { + *index_ptr = PyStackRef_IncrementTaggedIntNoOverflow(index); + return foriter_next(iter_o, index); + } + PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + if (next_o == NULL) { + if (_PyErr_Occurred(tstate)) { + if (_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); + _PyErr_Clear(tstate); + } + else { + return PyStackRef_ERROR; + } + } + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectSteal(next_o); +} + /* Check if a 'cls' provides the given special method. */ static inline int type_has_special_method(PyTypeObject *cls, PyObject *name) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 5b5018a6373..aa68371ac8f 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1218,30 +1218,30 @@ static inline int run_remote_debugger_source(PyObject *source) // Note that this function is inline to avoid creating a PLT entry // that would be an easy target for a ROP gadget. -static inline void run_remote_debugger_script(const char *path) +static inline void run_remote_debugger_script(PyObject *path) { - if (0 != PySys_Audit("remote_debugger_script", "s", path)) { + if (0 != PySys_Audit("cpython.remote_debugger_script", "O", path)) { PyErr_FormatUnraisable( - "Audit hook failed for remote debugger script %s", path); + "Audit hook failed for remote debugger script %U", path); return; } // Open the debugger script with the open code hook, and reopen the // resulting file object to get a C FILE* object. - PyObject* fileobj = PyFile_OpenCode(path); + PyObject* fileobj = PyFile_OpenCodeObject(path); if (!fileobj) { - PyErr_FormatUnraisable("Can't open debugger script %s", path); + PyErr_FormatUnraisable("Can't open debugger script %U", path); return; } PyObject* source = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(read)); if (!source) { - PyErr_FormatUnraisable("Error reading debugger script %s", path); + PyErr_FormatUnraisable("Error reading debugger script %U", path); } PyObject* res = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(close)); if (!res) { - PyErr_FormatUnraisable("Error closing debugger script %s", path); + PyErr_FormatUnraisable("Error closing debugger script %U", path); } else { Py_DECREF(res); } @@ -1249,7 +1249,7 @@ static inline void run_remote_debugger_script(const char *path) if (source) { if (0 != run_remote_debugger_source(source)) { - PyErr_FormatUnraisable("Error executing debugger script %s", path); + PyErr_FormatUnraisable("Error executing debugger script %U", path); } Py_DECREF(source); } @@ -1278,7 +1278,14 @@ int _PyRunRemoteDebugger(PyThreadState *tstate) pathsz); path[pathsz - 1] = '\0'; if (*path) { - run_remote_debugger_script(path); + PyObject *path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) { + PyErr_FormatUnraisable("Can't decode debugger script"); + } + else { + run_remote_debugger_script(path_obj); + Py_DECREF(path_obj); + } } PyMem_Free(path); } @@ -1380,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate) _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } + /* Process deferred memory frees held by QSBR */ + if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { + _PyMem_ProcessDelayed(tstate); + } #endif /* GC scheduled to run */ diff --git a/Python/codegen.c b/Python/codegen.c index 683601103ec..27fe8e1957b 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -28,6 +28,7 @@ #include "pycore_pystate.h" // _Py_GetConfig() #include "pycore_symtable.h" // PySTEntryObject #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString +#include "pycore_ceval.h" // SPECIAL___ENTER__ #define NEED_OPCODE_METADATA #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed @@ -527,6 +528,15 @@ codegen_unwind_fblock(compiler *c, location *ploc, case COMPILE_FBLOCK_FOR_LOOP: /* Pop the iterator */ if (preserve_tos) { + ADDOP_I(c, *ploc, SWAP, 3); + } + ADDOP(c, *ploc, POP_TOP); + ADDOP(c, *ploc, POP_TOP); + return SUCCESS; + + case COMPILE_FBLOCK_ASYNC_FOR_LOOP: + /* Pop the iterator */ + if (preserve_tos) { ADDOP_I(c, *ploc, SWAP, 2); } ADDOP(c, *ploc, POP_TOP); @@ -629,7 +639,8 @@ codegen_unwind_fblock_stack(compiler *c, location *ploc, c, *ploc, "'break', 'continue' and 'return' cannot appear in an except* block"); } if (loop != NULL && (top->fb_type == COMPILE_FBLOCK_WHILE_LOOP || - top->fb_type == COMPILE_FBLOCK_FOR_LOOP)) { + top->fb_type == COMPILE_FBLOCK_FOR_LOOP || + top->fb_type == COMPILE_FBLOCK_ASYNC_FOR_LOOP)) { *loop = top; return SUCCESS; } @@ -2125,7 +2136,7 @@ codegen_async_for(compiler *c, stmt_ty s) ADDOP(c, LOC(s->v.AsyncFor.iter), GET_AITER); USE_LABEL(c, start); - RETURN_IF_ERROR(_PyCompile_PushFBlock(c, loc, COMPILE_FBLOCK_FOR_LOOP, start, end, NULL)); + RETURN_IF_ERROR(_PyCompile_PushFBlock(c, loc, COMPILE_FBLOCK_ASYNC_FOR_LOOP, start, end, NULL)); /* SETUP_FINALLY to guard the __anext__ call */ ADDOP_JUMP(c, loc, SETUP_FINALLY, except); @@ -2142,7 +2153,7 @@ codegen_async_for(compiler *c, stmt_ty s) /* Mark jump as artificial */ ADDOP_JUMP(c, NO_LOCATION, JUMP, start); - _PyCompile_PopFBlock(c, COMPILE_FBLOCK_FOR_LOOP, start); + _PyCompile_PopFBlock(c, COMPILE_FBLOCK_ASYNC_FOR_LOOP, start); /* Except block for __anext__ */ USE_LABEL(c, except); @@ -3895,10 +3906,11 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) NEW_JUMP_TARGET_LABEL(c, loop); NEW_JUMP_TARGET_LABEL(c, cleanup); + ADDOP(c, loc, PUSH_NULL); // Push NULL index for loop USE_LABEL(c, loop); ADDOP_JUMP(c, loc, FOR_ITER, cleanup); if (const_oparg == CONSTANT_BUILTIN_TUPLE) { - ADDOP_I(c, loc, LIST_APPEND, 2); + ADDOP_I(c, loc, LIST_APPEND, 3); ADDOP_JUMP(c, loc, JUMP, loop); } else { @@ -4442,13 +4454,12 @@ codegen_sync_comprehension_generator(compiler *c, location loc, } if (IS_JUMP_TARGET_LABEL(start)) { VISIT(c, expr, gen->iter); - ADDOP(c, LOC(gen->iter), GET_ITER); } } } if (IS_JUMP_TARGET_LABEL(start)) { - depth++; + depth += 2; ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); @@ -4543,9 +4554,9 @@ codegen_async_comprehension_generator(compiler *c, location loc, else { /* Sub-iter - calculate on the fly */ VISIT(c, expr, gen->iter); - ADDOP(c, LOC(gen->iter), GET_AITER); } } + ADDOP(c, LOC(gen->iter), GET_AITER); USE_LABEL(c, start); /* Runtime will push a block here, so we need to account for that */ @@ -4757,19 +4768,6 @@ pop_inlined_comprehension_state(compiler *c, location loc, return SUCCESS; } -static inline int -codegen_comprehension_iter(compiler *c, comprehension_ty comp) -{ - VISIT(c, expr, comp->iter); - if (comp->is_async) { - ADDOP(c, LOC(comp->iter), GET_AITER); - } - else { - ADDOP(c, LOC(comp->iter), GET_ITER); - } - return SUCCESS; -} - static int codegen_comprehension(compiler *c, expr_ty e, int type, identifier name, asdl_comprehension_seq *generators, expr_ty elt, @@ -4789,9 +4787,7 @@ codegen_comprehension(compiler *c, expr_ty e, int type, outermost = (comprehension_ty) asdl_seq_GET(generators, 0); if (is_inlined) { - if (codegen_comprehension_iter(c, outermost)) { - goto error; - } + VISIT(c, expr, outermost->iter); if (push_inlined_comprehension_state(c, loc, entry, &inline_state)) { goto error; } diff --git a/Python/context.c b/Python/context.c index dceaae9b429..9927cab915c 100644 --- a/Python/context.c +++ b/Python/context.c @@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op) return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) { @@ -987,7 +987,7 @@ contextvar_tp_repr(PyObject *op) } if (self->var_default != NULL) { - if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) { @@ -1182,15 +1182,15 @@ token_tp_repr(PyObject *op) if (writer == NULL) { return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) { goto error; } if (self->tok_used) { - if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) { goto error; } } - if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 74ce02f1a26..16a23f0351c 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -6,8 +6,14 @@ #include "osdefs.h" // MAXPATHLEN #include "pycore_ceval.h" // _Py_simple_func #include "pycore_crossinterp.h" // _PyXIData_t +#include "pycore_function.h" // _PyFunction_VerifyStateless() +#include "pycore_global_strings.h" // _Py_ID() +#include "pycore_import.h" // _PyImport_SetModule() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_namespace.h" // _PyNamespace_New() +#include "pycore_pythonrun.h" // _Py_SourceAsString() +#include "pycore_runtime.h" // _PyRuntime +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_typeobject.h" // _PyStaticType_InitBuiltin() @@ -19,6 +25,7 @@ _Py_GetMainfile(char *buffer, size_t maxlen) PyThreadState *tstate = _PyThreadState_GET(); PyObject *module = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(module) < 0) { + Py_XDECREF(module); return -1; } Py_ssize_t size = _PyModule_GetFilenameUTF8(module, buffer, maxlen); @@ -28,27 +35,6 @@ _Py_GetMainfile(char *buffer, size_t maxlen) static PyObject * -import_get_module(PyThreadState *tstate, const char *modname) -{ - PyObject *module = NULL; - if (strcmp(modname, "__main__") == 0) { - module = _Py_GetMainModule(tstate); - if (_Py_CheckMainModule(module) < 0) { - assert(_PyErr_Occurred(tstate)); - return NULL; - } - } - else { - module = PyImport_ImportModule(modname); - if (module == NULL) { - return NULL; - } - } - return module; -} - - -static PyObject * runpy_run_path(const char *filename, const char *modname) { PyObject *run_path = PyImport_ImportModuleAttrString("runpy", "run_path"); @@ -67,97 +53,192 @@ runpy_run_path(const char *filename, const char *modname) } -static PyObject * -pyerr_get_message(PyObject *exc) +static void +set_exc_with_cause(PyObject *exctype, const char *msg) { - assert(!PyErr_Occurred()); - PyObject *args = PyException_GetArgs(exc); - if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { - return NULL; - } - if (PyUnicode_Check(args)) { - return args; - } - PyObject *msg = PySequence_GetItem(args, 0); - Py_DECREF(args); - if (msg == NULL) { - PyErr_Clear(); - return NULL; - } - if (!PyUnicode_Check(msg)) { - Py_DECREF(msg); - return NULL; - } - return msg; + PyObject *cause = PyErr_GetRaisedException(); + PyErr_SetString(exctype, msg); + PyObject *exc = PyErr_GetRaisedException(); + PyException_SetCause(exc, cause); + PyErr_SetRaisedException(exc); } -#define MAX_MODNAME (255) -#define MAX_ATTRNAME (255) -struct attributeerror_info { - char modname[MAX_MODNAME+1]; - char attrname[MAX_ATTRNAME+1]; +/****************************/ +/* module duplication utils */ +/****************************/ + +struct sync_module_result { + PyObject *module; + PyObject *loaded; + PyObject *failed; +}; + +struct sync_module { + const char *filename; + char _filename[MAXPATHLEN+1]; + struct sync_module_result cached; }; +static void +sync_module_clear(struct sync_module *data) +{ + data->filename = NULL; + Py_CLEAR(data->cached.module); + Py_CLEAR(data->cached.loaded); + Py_CLEAR(data->cached.failed); +} + +static void +sync_module_capture_exc(PyThreadState *tstate, struct sync_module *data) +{ + assert(_PyErr_Occurred(tstate)); + PyObject *context = data->cached.failed; + PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyErr_SetRaisedException(tstate, Py_NewRef(exc)); + if (context != NULL) { + PyException_SetContext(exc, context); + } + data->cached.failed = exc; +} + + static int -_parse_attributeerror(PyObject *exc, struct attributeerror_info *info) +ensure_isolated_main(PyThreadState *tstate, struct sync_module *main) { - assert(exc != NULL); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - int res = -1; + // Load the module from the original file (or from a cache). - PyObject *msgobj = pyerr_get_message(exc); - if (msgobj == NULL) { + // First try the local cache. + if (main->cached.failed != NULL) { + // We'll deal with this in apply_isolated_main(). + assert(main->cached.module == NULL); + assert(main->cached.loaded == NULL); + return 0; + } + else if (main->cached.loaded != NULL) { + assert(main->cached.module != NULL); + return 0; + } + assert(main->cached.module == NULL); + + if (main->filename == NULL) { + _PyErr_SetString(tstate, PyExc_NotImplementedError, ""); return -1; } - const char *err = PyUnicode_AsUTF8(msgobj); - if (strncmp(err, "module '", 8) != 0) { - goto finally; + // It wasn't in the local cache so we'll need to populate it. + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + // This is probably unrecoverable, so don't bother caching the error. + assert(_PyErr_Occurred(tstate)); + Py_XDECREF(mod); + return -1; } - err += 8; + PyObject *loaded = NULL; - const char *matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; + // Try the per-interpreter cache for the loaded module. + // XXX Store it in sys.modules? + PyObject *interpns = PyInterpreterState_GetDict(tstate->interp); + assert(interpns != NULL); + PyObject *key = PyUnicode_FromString("CACHED_MODULE_NS___main__"); + if (key == NULL) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + return -1; } - Py_ssize_t len = matched - err; - if (len > MAX_MODNAME) { - goto finally; + else if (PyDict_GetItemRef(interpns, key, &loaded) < 0) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + Py_DECREF(key); + return -1; } - (void)strncpy(info->modname, err, len); - info->modname[len] = '\0'; - err = matched; + else if (loaded == NULL) { + // It wasn't already loaded from file. + loaded = PyModule_NewObject(&_Py_ID(__main__)); + if (loaded == NULL) { + goto error; + } + PyObject *ns = _PyModule_GetDict(loaded); - if (strncmp(err, "' has no attribute '", 20) != 0) { - goto finally; - } - err += 20; + // We don't want to trigger "if __name__ == '__main__':", + // so we use a bogus module name. + PyObject *loaded_ns = + runpy_run_path(main->filename, "<fake __main__>"); + if (loaded_ns == NULL) { + goto error; + } + int res = PyDict_Update(ns, loaded_ns); + Py_DECREF(loaded_ns); + if (res < 0) { + goto error; + } - matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; - } - len = matched - err; - if (len > MAX_ATTRNAME) { - goto finally; + // Set the per-interpreter cache entry. + if (PyDict_SetItem(interpns, key, loaded) < 0) { + goto error; + } } - (void)strncpy(info->attrname, err, len); - info->attrname[len] = '\0'; - err = matched + 1; - if (strlen(err) > 0) { - goto finally; + Py_DECREF(key); + main->cached = (struct sync_module_result){ + .module = mod, + .loaded = loaded, + }; + return 0; + +error: + sync_module_capture_exc(tstate, main); + Py_XDECREF(loaded); + Py_DECREF(mod); + Py_XDECREF(key); + return -1; +} + +#ifndef NDEBUG +static int +main_mod_matches(PyObject *expected) +{ + PyObject *mod = PyImport_GetModule(&_Py_ID(__main__)); + Py_XDECREF(mod); + return mod == expected; +} +#endif + +static int +apply_isolated_main(PyThreadState *tstate, struct sync_module *main) +{ + assert((main->cached.loaded == NULL) == (main->cached.loaded == NULL)); + if (main->cached.failed != NULL) { + // It must have failed previously. + assert(main->cached.loaded == NULL); + _PyErr_SetRaisedException(tstate, main->cached.failed); + return -1; } - res = 0; + assert(main->cached.loaded != NULL); -finally: - Py_DECREF(msgobj); - return res; + assert(main_mod_matches(main->cached.module)); + if (_PyImport_SetModule(&_Py_ID(__main__), main->cached.loaded) < 0) { + sync_module_capture_exc(tstate, main); + return -1; + } + return 0; } -#undef MAX_MODNAME -#undef MAX_ATTRNAME +static void +restore_main(PyThreadState *tstate, struct sync_module *main) +{ + assert(main->cached.failed == NULL); + assert(main->cached.module != NULL); + assert(main->cached.loaded != NULL); + PyObject *exc = _PyErr_GetRaisedException(tstate); + assert(main_mod_matches(main->cached.loaded)); + int res = _PyImport_SetModule(&_Py_ID(__main__), main->cached.module); + assert(res == 0); + if (res < 0) { + PyErr_FormatUnraisable("Exception ignored while restoring __main__"); + } + _PyErr_SetRaisedException(tstate, exc); +} /**************/ @@ -207,16 +288,16 @@ _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, /* cross-interpreter data */ /**************************/ -/* registry of {type -> xidatafunc} */ +/* registry of {type -> _PyXIData_getdata_t} */ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ static void xid_lookup_init(_PyXIData_lookup_t *); static void xid_lookup_fini(_PyXIData_lookup_t *); struct _dlcontext; -static xidatafunc lookup_getdata(struct _dlcontext *, PyObject *); +static _PyXIData_getdata_t lookup_getdata(struct _dlcontext *, PyObject *); #include "crossinterp_data_lookup.h" @@ -340,7 +421,7 @@ _set_xid_lookup_failure(PyThreadState *tstate, PyObject *obj, const char *msg, set_notshareableerror(tstate, cause, 0, msg); } else { - msg = "%S does not support cross-interpreter data"; + msg = "%R does not support cross-interpreter data"; format_notshareableerror(tstate, cause, 0, msg, obj); } } @@ -353,8 +434,8 @@ _PyObject_CheckXIData(PyThreadState *tstate, PyObject *obj) if (get_lookup_context(tstate, &ctx) < 0) { return -1; } - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (!_PyErr_Occurred(tstate)) { _set_xid_lookup_failure(tstate, obj, NULL, NULL); } @@ -385,9 +466,9 @@ _check_xidata(PyThreadState *tstate, _PyXIData_t *xidata) return 0; } -int -_PyObject_GetXIData(PyThreadState *tstate, - PyObject *obj, _PyXIData_t *xidata) +static int +_get_xidata(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, _PyXIData_t *xidata) { PyInterpreterState *interp = tstate->interp; @@ -395,6 +476,7 @@ _PyObject_GetXIData(PyThreadState *tstate, assert(xidata->obj == NULL); if (xidata->data != NULL || xidata->obj != NULL) { _PyErr_SetString(tstate, PyExc_ValueError, "xidata not cleared"); + return -1; } // Call the "getdata" func for the object. @@ -403,8 +485,8 @@ _PyObject_GetXIData(PyThreadState *tstate, return -1; } Py_INCREF(obj); - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (PyErr_Occurred()) { Py_DECREF(obj); return -1; @@ -416,7 +498,9 @@ _PyObject_GetXIData(PyThreadState *tstate, } return -1; } - int res = getdata(tstate, obj, xidata); + int res = getdata.basic != NULL + ? getdata.basic(tstate, obj, xidata) + : getdata.fallback(tstate, obj, fallback, xidata); Py_DECREF(obj); if (res != 0) { PyObject *cause = _PyErr_GetRaisedException(tstate); @@ -436,6 +520,51 @@ _PyObject_GetXIData(PyThreadState *tstate, return 0; } +int +_PyObject_GetXIDataNoFallback(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return _get_xidata(tstate, obj, _PyXIDATA_XIDATA_ONLY, xidata); +} + +int +_PyObject_GetXIData(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) +{ + switch (fallback) { + case _PyXIDATA_XIDATA_ONLY: + return _get_xidata(tstate, obj, fallback, xidata); + case _PyXIDATA_FULL_FALLBACK: + if (_get_xidata(tstate, obj, fallback, xidata) == 0) { + return 0; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (PyFunction_Check(obj)) { + if (_PyFunction_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + _PyErr_Clear(tstate); + } + // We could try _PyMarshal_GetXIData() but we won't for now. + if (_PyPickle_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + // Raise the original exception. + _PyErr_SetRaisedException(tstate, exc); + return -1; + default: +#ifdef Py_DEBUG + Py_FatalError("unsupported xidata fallback option"); +#endif + _PyErr_SetString(tstate, PyExc_SystemError, + "unsupported xidata fallback option"); + return -1; + } +} + /* pickle C-API */ @@ -456,28 +585,6 @@ _PyPickle_Dumps(struct _pickle_context *ctx, PyObject *obj) } -struct sync_module_result { - PyObject *module; - PyObject *loaded; - PyObject *failed; -}; - -struct sync_module { - const char *filename; - char _filename[MAXPATHLEN+1]; - struct sync_module_result cached; -}; - -static void -sync_module_clear(struct sync_module *data) -{ - data->filename = NULL; - Py_CLEAR(data->cached.module); - Py_CLEAR(data->cached.loaded); - Py_CLEAR(data->cached.failed); -} - - struct _unpickle_context { PyThreadState *tstate; // We only special-case the __main__ module, @@ -491,142 +598,88 @@ _unpickle_context_clear(struct _unpickle_context *ctx) sync_module_clear(&ctx->main); } -static struct sync_module_result -_unpickle_context_get_module(struct _unpickle_context *ctx, - const char *modname) +static int +check_missing___main___attr(PyObject *exc) { - if (strcmp(modname, "__main__") == 0) { - return ctx->main.cached; + assert(!PyErr_Occurred()); + if (!PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)) { + return 0; } - else { - return (struct sync_module_result){ - .failed = PyExc_NotImplementedError, - }; + + // Get the error message. + PyObject *args = PyException_GetArgs(exc); + if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { + assert(!PyErr_Occurred()); + return 0; + } + PyObject *msgobj = args; + if (!PyUnicode_Check(msgobj)) { + msgobj = PySequence_GetItem(args, 0); + Py_DECREF(args); + if (msgobj == NULL) { + PyErr_Clear(); + return 0; + } } + const char *err = PyUnicode_AsUTF8(msgobj); + + // Check if it's a missing __main__ attr. + int cmp = strncmp(err, "module '__main__' has no attribute '", 36); + Py_DECREF(msgobj); + return cmp == 0; } -static struct sync_module_result -_unpickle_context_set_module(struct _unpickle_context *ctx, - const char *modname) +static PyObject * +_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) { - struct sync_module_result res = {0}; - struct sync_module_result *cached = NULL; - const char *filename = NULL; - const char *run_modname = modname; - if (strcmp(modname, "__main__") == 0) { - cached = &ctx->main.cached; - filename = ctx->main.filename; - // We don't want to trigger "if __name__ == '__main__':". - run_modname = "<fake __main__>"; - } - else { - res.failed = PyExc_NotImplementedError; - goto finally; - } + PyThreadState *tstate = ctx->tstate; - res.module = import_get_module(ctx->tstate, modname); - if (res.module == NULL) { - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); - goto finally; + PyObject *exc = NULL; + PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); + if (loads == NULL) { + return NULL; } - if (filename == NULL) { - Py_CLEAR(res.module); - res.failed = PyExc_NotImplementedError; + // Make an initial attempt to unpickle. + PyObject *obj = PyObject_CallOneArg(loads, pickled); + if (obj != NULL) { goto finally; } - res.loaded = runpy_run_path(filename, run_modname); - if (res.loaded == NULL) { - Py_CLEAR(res.module); - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); + assert(_PyErr_Occurred(tstate)); + if (ctx == NULL) { goto finally; } - -finally: - if (cached != NULL) { - assert(cached->module == NULL); - assert(cached->loaded == NULL); - assert(cached->failed == NULL); - *cached = res; - } - return res; -} - - -static int -_handle_unpickle_missing_attr(struct _unpickle_context *ctx, PyObject *exc) -{ - // The caller must check if an exception is set or not when -1 is returned. - assert(!_PyErr_Occurred(ctx->tstate)); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - struct attributeerror_info info; - if (_parse_attributeerror(exc, &info) < 0) { - return -1; + exc = _PyErr_GetRaisedException(tstate); + if (!check_missing___main___attr(exc)) { + goto finally; } - // Get the module. - struct sync_module_result mod = _unpickle_context_get_module(ctx, info.modname); - if (mod.failed != NULL) { - // It must have failed previously. - return -1; - } - if (mod.module == NULL) { - mod = _unpickle_context_set_module(ctx, info.modname); - if (mod.failed != NULL) { - return -1; - } - assert(mod.module != NULL); + // Temporarily swap in a fake __main__ loaded from the original + // file and cached. Note that functions will use the cached ns + // for __globals__, // not the actual module. + if (ensure_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - - // Bail out if it is unexpectedly set already. - if (PyObject_HasAttrString(mod.module, info.attrname)) { - return -1; + if (apply_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - // Try setting the attribute. - PyObject *value = NULL; - if (PyDict_GetItemStringRef(mod.loaded, info.attrname, &value) <= 0) { - return -1; - } - assert(value != NULL); - int res = PyObject_SetAttrString(mod.module, info.attrname, value); - Py_DECREF(value); - if (res < 0) { - return -1; + // Try to unpickle once more. + obj = PyObject_CallOneArg(loads, pickled); + restore_main(tstate, &ctx->main); + if (obj == NULL) { + goto finally; } + Py_CLEAR(exc); - return 0; -} - -static PyObject * -_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) -{ - PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); - if (loads == NULL) { - return NULL; - } - PyObject *obj = PyObject_CallOneArg(loads, pickled); - if (ctx != NULL) { - while (obj == NULL) { - assert(_PyErr_Occurred(ctx->tstate)); - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { - // We leave other failures unhandled. - break; - } - // Try setting the attr if not set. - PyObject *exc = _PyErr_GetRaisedException(ctx->tstate); - if (_handle_unpickle_missing_attr(ctx, exc) < 0) { - // Any resulting exceptions are ignored - // in favor of the original. - _PyErr_SetRaisedException(ctx->tstate, exc); - break; - } - Py_CLEAR(exc); - // Retry with the attribute set. - obj = PyObject_CallOneArg(loads, pickled); +finally: + if (exc != NULL) { + if (_PyErr_Occurred(tstate)) { + sync_module_capture_exc(tstate, &ctx->main); } + // We restore the original exception. + // It might make sense to chain it (__context__). + _PyErr_SetRaisedException(tstate, exc); } Py_DECREF(loads); return obj; @@ -784,6 +837,138 @@ _PyMarshal_GetXIData(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) } +/* script wrapper */ + +static int +verify_script(PyThreadState *tstate, PyCodeObject *co, int checked, int pure) +{ + // Make sure it isn't a closure and (optionally) doesn't use globals. + PyObject *builtins = NULL; + if (pure) { + builtins = _PyEval_GetBuiltins(tstate); + assert(builtins != NULL); + } + if (checked) { + assert(_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) == 0); + } + else if (_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) < 0) { + return -1; + } + // Make sure it doesn't have args. + if (co->co_argcount > 0 + || co->co_posonlyargcount > 0 + || co->co_kwonlyargcount > 0 + || co->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) + { + _PyErr_SetString(tstate, PyExc_ValueError, + "code with args not supported"); + return -1; + } + // Make sure it doesn't return anything. + if (!_PyCode_ReturnsOnlyNone(co)) { + _PyErr_SetString(tstate, PyExc_ValueError, + "code that returns a value is not a script"); + return -1; + } + return 0; +} + +static int +get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure, + _PyXIData_t *xidata) +{ + // Get the corresponding code object. + PyObject *code = NULL; + int checked = 0; + if (PyCode_Check(obj)) { + code = obj; + Py_INCREF(code); + } + else if (PyFunction_Check(obj)) { + code = PyFunction_GET_CODE(obj); + assert(code != NULL); + Py_INCREF(code); + if (pure) { + if (_PyFunction_VerifyStateless(tstate, obj) < 0) { + goto error; + } + checked = 1; + } + } + else { + const char *filename = "<script>"; + int optimize = 0; + PyCompilerFlags cf = _PyCompilerFlags_INIT; + cf.cf_flags = PyCF_SOURCE_IS_UTF8; + PyObject *ref = NULL; + const char *script = _Py_SourceAsString(obj, "???", "???", &cf, &ref); + if (script == NULL) { + if (!_PyObject_SupportedAsScript(obj)) { + // We discard the raised exception. + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported script %R", obj); + } + goto error; + } +#ifdef Py_GIL_DISABLED + // Don't immortalize code constants to avoid memory leaks. + ((_PyThreadStateImpl *)tstate)->suppress_co_const_immortalization++; +#endif + code = Py_CompileStringExFlags( + script, filename, Py_file_input, &cf, optimize); +#ifdef Py_GIL_DISABLED + ((_PyThreadStateImpl *)tstate)->suppress_co_const_immortalization--; +#endif + Py_XDECREF(ref); + if (code == NULL) { + goto error; + } + // Compiled text can't have args or any return statements, + // nor be a closure. It can use globals though. + if (!pure) { + // We don't need to check for globals either. + checked = 1; + } + } + + // Make sure it's actually a script. + if (verify_script(tstate, (PyCodeObject *)code, checked, pure) < 0) { + goto error; + } + + // Convert the code object. + int res = _PyCode_GetXIData(tstate, code, xidata); + Py_DECREF(code); + if (res < 0) { + return -1; + } + return 0; + +error: + Py_XDECREF(code); + PyObject *cause = _PyErr_GetRaisedException(tstate); + assert(cause != NULL); + _set_xid_lookup_failure( + tstate, NULL, "object not a valid script", cause); + Py_DECREF(cause); + return -1; +} + +int +_PyCode_GetScriptXIData(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return get_script_xidata(tstate, obj, 0, xidata); +} + +int +_PyCode_GetPureScriptXIData(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return get_script_xidata(tstate, obj, 1, xidata); +} + + /* using cross-interpreter data */ PyObject * @@ -1130,8 +1315,14 @@ _excinfo_normalize_type(struct _excinfo_type *info, *p_module = module; } +static int +excinfo_is_set(_PyXI_excinfo *info) +{ + return info->type.name != NULL || info->msg != NULL; +} + static void -_PyXI_excinfo_Clear(_PyXI_excinfo *info) +_PyXI_excinfo_clear(_PyXI_excinfo *info) { _excinfo_clear_type(&info->type); if (info->msg != NULL) { @@ -1181,7 +1372,7 @@ _PyXI_excinfo_InitFromException(_PyXI_excinfo *info, PyObject *exc) assert(exc != NULL); if (PyErr_GivenExceptionMatches(exc, PyExc_MemoryError)) { - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return NULL; } const char *failure = NULL; @@ -1227,7 +1418,7 @@ _PyXI_excinfo_InitFromException(_PyXI_excinfo *info, PyObject *exc) error: assert(failure != NULL); - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return failure; } @@ -1278,7 +1469,7 @@ _PyXI_excinfo_InitFromObject(_PyXI_excinfo *info, PyObject *obj) error: assert(failure != NULL); - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return failure; } @@ -1291,6 +1482,11 @@ _PyXI_excinfo_Apply(_PyXI_excinfo *info, PyObject *exctype) if (tbexc == NULL) { PyErr_Clear(); } + else { + PyErr_SetObject(exctype, tbexc); + Py_DECREF(tbexc); + return; + } } PyObject *formatted = _PyXI_excinfo_format(info); @@ -1436,13 +1632,17 @@ error: } -int -_PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc) +_PyXI_excinfo * +_PyXI_NewExcInfo(PyObject *exc) { assert(!PyErr_Occurred()); if (exc == NULL || exc == Py_None) { PyErr_SetString(PyExc_ValueError, "missing exc"); - return -1; + return NULL; + } + _PyXI_excinfo *info = PyMem_RawCalloc(1, sizeof(_PyXI_excinfo)); + if (info == NULL) { + return NULL; } const char *failure; if (PyExceptionInstance_Check(exc) || PyExceptionClass_Check(exc)) { @@ -1452,10 +1652,18 @@ _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc) failure = _PyXI_excinfo_InitFromObject(info, exc); } if (failure != NULL) { - PyErr_SetString(PyExc_Exception, failure); - return -1; + PyMem_RawFree(info); + set_exc_with_cause(PyExc_Exception, failure); + return NULL; } - return 0; + return info; +} + +void +_PyXI_FreeExcInfo(_PyXI_excinfo *info) +{ + _PyXI_excinfo_clear(info); + PyMem_RawFree(info); } PyObject * @@ -1470,12 +1678,6 @@ _PyXI_ExcInfoAsObject(_PyXI_excinfo *info) return _PyXI_excinfo_AsObject(info); } -void -_PyXI_ClearExcInfo(_PyXI_excinfo *info) -{ - _PyXI_excinfo_Clear(info); -} - /***************************/ /* short-term data sharing */ @@ -1489,14 +1691,9 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) PyThreadState *tstate = _PyThreadState_GET(); assert(!PyErr_Occurred()); + assert(code != _PyXI_ERR_NO_ERROR); + assert(code != _PyXI_ERR_UNCAUGHT_EXCEPTION); switch (code) { - case _PyXI_ERR_NO_ERROR: _Py_FALLTHROUGH; - case _PyXI_ERR_UNCAUGHT_EXCEPTION: - // There is nothing to apply. -#ifdef Py_DEBUG - Py_UNREACHABLE(); -#endif - return 0; case _PyXI_ERR_OTHER: // XXX msg? PyErr_SetNone(PyExc_InterpreterError); @@ -1516,12 +1713,20 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) PyErr_SetString(PyExc_InterpreterError, "failed to apply namespace to __main__"); break; + case _PyXI_ERR_PRESERVE_FAILURE: + PyErr_SetString(PyExc_InterpreterError, + "failed to preserve objects across session"); + break; + case _PyXI_ERR_EXC_PROPAGATION_FAILURE: + PyErr_SetString(PyExc_InterpreterError, + "failed to transfer exception between interpreters"); + break; case _PyXI_ERR_NOT_SHAREABLE: _set_xid_lookup_failure(tstate, NULL, NULL, NULL); break; default: #ifdef Py_DEBUG - Py_UNREACHABLE(); + Py_FatalError("unsupported error code"); #else PyErr_Format(PyExc_RuntimeError, "unsupported error code %d", code); #endif @@ -1530,70 +1735,267 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) return -1; } +/* basic failure info */ + +struct xi_failure { + // The kind of error to propagate. + _PyXI_errcode code; + // The propagated message. + const char *msg; + int msg_owned; +}; // _PyXI_failure + +#define XI_FAILURE_INIT (_PyXI_failure){ .code = _PyXI_ERR_NO_ERROR } + +static void +clear_xi_failure(_PyXI_failure *failure) +{ + if (failure->msg != NULL && failure->msg_owned) { + PyMem_RawFree((void*)failure->msg); + } + *failure = XI_FAILURE_INIT; +} + +static void +copy_xi_failure(_PyXI_failure *dest, _PyXI_failure *src) +{ + *dest = *src; + dest->msg_owned = 0; +} + +_PyXI_failure * +_PyXI_NewFailure(void) +{ + _PyXI_failure *failure = PyMem_RawMalloc(sizeof(_PyXI_failure)); + if (failure == NULL) { + PyErr_NoMemory(); + return NULL; + } + *failure = XI_FAILURE_INIT; + return failure; +} + +void +_PyXI_FreeFailure(_PyXI_failure *failure) +{ + clear_xi_failure(failure); + PyMem_RawFree(failure); +} + +_PyXI_errcode +_PyXI_GetFailureCode(_PyXI_failure *failure) +{ + if (failure == NULL) { + return _PyXI_ERR_NO_ERROR; + } + return failure->code; +} + +void +_PyXI_InitFailureUTF8(_PyXI_failure *failure, + _PyXI_errcode code, const char *msg) +{ + *failure = (_PyXI_failure){ + .code = code, + .msg = msg, + .msg_owned = 0, + }; +} + +int +_PyXI_InitFailure(_PyXI_failure *failure, _PyXI_errcode code, PyObject *obj) +{ + PyObject *msgobj = PyObject_Str(obj); + if (msgobj == NULL) { + return -1; + } + // This will leak if not paired with clear_xi_failure(). + // That happens automatically in _capture_current_exception(). + const char *msg = _copy_string_obj_raw(msgobj, NULL); + Py_DECREF(msgobj); + if (PyErr_Occurred()) { + return -1; + } + *failure = (_PyXI_failure){ + .code = code, + .msg = msg, + .msg_owned = 1, + }; + return 0; +} + /* shared exceptions */ -static const char * -_PyXI_InitError(_PyXI_error *error, PyObject *excobj, _PyXI_errcode code) +typedef struct { + // The originating interpreter. + PyInterpreterState *interp; + // The error to propagate, if different from the uncaught exception. + _PyXI_failure *override; + _PyXI_failure _override; + // The exception information to propagate, if applicable. + // This is populated only for some error codes, + // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. + _PyXI_excinfo uncaught; +} _PyXI_error; + +static void +xi_error_clear(_PyXI_error *err) { - if (error->interp == NULL) { - error->interp = PyInterpreterState_Get(); + err->interp = NULL; + if (err->override != NULL) { + clear_xi_failure(err->override); } + _PyXI_excinfo_clear(&err->uncaught); +} - const char *failure = NULL; - if (code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // There is an unhandled exception we need to propagate. - failure = _PyXI_excinfo_InitFromException(&error->uncaught, excobj); - if (failure != NULL) { - // We failed to initialize error->uncaught. - // XXX Print the excobj/traceback? Emit a warning? - // XXX Print the current exception/traceback? - if (PyErr_ExceptionMatches(PyExc_MemoryError)) { - error->code = _PyXI_ERR_NO_MEMORY; - } - else { - error->code = _PyXI_ERR_OTHER; - } - PyErr_Clear(); +static int +xi_error_is_set(_PyXI_error *error) +{ + if (error->override != NULL) { + assert(error->override->code != _PyXI_ERR_NO_ERROR); + assert(error->override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION + || excinfo_is_set(&error->uncaught)); + return 1; + } + return excinfo_is_set(&error->uncaught); +} + +static int +xi_error_has_override(_PyXI_error *err) +{ + if (err->override == NULL) { + return 0; + } + return (err->override->code != _PyXI_ERR_NO_ERROR + && err->override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); +} + +static PyObject * +xi_error_resolve_current_exc(PyThreadState *tstate, + _PyXI_failure *override) +{ + assert(override == NULL || override->code != _PyXI_ERR_NO_ERROR); + + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (exc == NULL) { + assert(override == NULL + || override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); + } + else if (override == NULL) { + // This is equivalent to _PyXI_ERR_UNCAUGHT_EXCEPTION. + } + else if (override->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + // We want to actually capture the current exception. + } + else if (exc != NULL) { + // It might make sense to do similarly for other codes. + if (override->code == _PyXI_ERR_ALREADY_RUNNING) { + // We don't need the exception info. + Py_CLEAR(exc); + } + // ...else we want to actually capture the current exception. + } + return exc; +} + +static void +xi_error_set_override(PyThreadState *tstate, _PyXI_error *err, + _PyXI_failure *override) +{ + assert(err->override == NULL); + assert(override != NULL); + assert(override->code != _PyXI_ERR_NO_ERROR); + // Use xi_error_set_exc() instead of setting _PyXI_ERR_UNCAUGHT_EXCEPTION.. + assert(override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); + err->override = &err->_override; + // The caller still owns override->msg. + copy_xi_failure(&err->_override, override); + err->interp = tstate->interp; +} + +static void +xi_error_set_override_code(PyThreadState *tstate, _PyXI_error *err, + _PyXI_errcode code) +{ + _PyXI_failure override = XI_FAILURE_INIT; + override.code = code; + xi_error_set_override(tstate, err, &override); +} + +static const char * +xi_error_set_exc(PyThreadState *tstate, _PyXI_error *err, PyObject *exc) +{ + assert(!_PyErr_Occurred(tstate)); + assert(!xi_error_is_set(err)); + assert(err->override == NULL); + assert(err->interp == NULL); + assert(exc != NULL); + const char *failure = + _PyXI_excinfo_InitFromException(&err->uncaught, exc); + if (failure != NULL) { + // We failed to initialize err->uncaught. + // XXX Print the excobj/traceback? Emit a warning? + // XXX Print the current exception/traceback? + if (_PyErr_ExceptionMatches(tstate, PyExc_MemoryError)) { + xi_error_set_override_code(tstate, err, _PyXI_ERR_NO_MEMORY); } else { - error->code = code; + xi_error_set_override_code(tstate, err, _PyXI_ERR_OTHER); } - assert(error->code != _PyXI_ERR_NO_ERROR); - } - else { - // There is an error code we need to propagate. - assert(excobj == NULL); - assert(code != _PyXI_ERR_NO_ERROR); - error->code = code; - _PyXI_excinfo_Clear(&error->uncaught); + PyErr_Clear(); } return failure; } -PyObject * -_PyXI_ApplyError(_PyXI_error *error) +static PyObject * +_PyXI_ApplyError(_PyXI_error *error, const char *failure) { PyThreadState *tstate = PyThreadState_Get(); - if (error->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // Raise an exception that proxies the propagated exception. + + if (failure != NULL) { + xi_error_clear(error); + return NULL; + } + + _PyXI_errcode code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + if (error->override != NULL) { + code = error->override->code; + assert(code != _PyXI_ERR_NO_ERROR); + } + + if (code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + // We will raise an exception that proxies the propagated exception. return _PyXI_excinfo_AsObject(&error->uncaught); } - else if (error->code == _PyXI_ERR_NOT_SHAREABLE) { + else if (code == _PyXI_ERR_NOT_SHAREABLE) { // Propagate the exception directly. assert(!_PyErr_Occurred(tstate)); - _set_xid_lookup_failure(tstate, NULL, error->uncaught.msg, NULL); + PyObject *cause = NULL; + if (excinfo_is_set(&error->uncaught)) { + // Maybe instead set a PyExc_ExceptionSnapshot as __cause__? + // That type doesn't exist currently + // but would look like interpreters.ExecutionFailed. + _PyXI_excinfo_Apply(&error->uncaught, PyExc_Exception); + cause = _PyErr_GetRaisedException(tstate); + } + const char *msg = error->override != NULL + ? error->override->msg + : error->uncaught.msg; + _set_xid_lookup_failure(tstate, NULL, msg, cause); + Py_XDECREF(cause); } else { // Raise an exception corresponding to the code. - assert(error->code != _PyXI_ERR_NO_ERROR); - (void)_PyXI_ApplyErrorCode(error->code, error->interp); - if (error->uncaught.type.name != NULL || error->uncaught.msg != NULL) { + (void)_PyXI_ApplyErrorCode(code, error->interp); + assert(error->override == NULL || error->override->msg == NULL); + if (excinfo_is_set(&error->uncaught)) { // __context__ will be set to a proxy of the propagated exception. - PyObject *exc = PyErr_GetRaisedException(); + // (or use PyExc_ExceptionSnapshot like _PyXI_ERR_NOT_SHAREABLE?) + PyObject *exc = _PyErr_GetRaisedException(tstate); _PyXI_excinfo_Apply(&error->uncaught, PyExc_InterpreterError); - PyObject *exc2 = PyErr_GetRaisedException(); + PyObject *exc2 = _PyErr_GetRaisedException(tstate); PyException_SetContext(exc, exc2); - PyErr_SetRaisedException(exc); + _PyErr_SetRaisedException(tstate, exc); } } assert(PyErr_Occurred()); @@ -1624,6 +2026,7 @@ typedef struct _sharednsitem { // in a different interpreter to release the XI data. } _PyXI_namespace_item; +#ifndef NDEBUG static int _sharednsitem_is_initialized(_PyXI_namespace_item *item) { @@ -1632,6 +2035,7 @@ _sharednsitem_is_initialized(_PyXI_namespace_item *item) } return 0; } +#endif static int _sharednsitem_init(_PyXI_namespace_item *item, PyObject *key) @@ -1659,7 +2063,8 @@ _sharednsitem_has_value(_PyXI_namespace_item *item, int64_t *p_interpid) } static int -_sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) +_sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value, + xidata_fallback_t fallback) { assert(_sharednsitem_is_initialized(item)); assert(item->xidata == NULL); @@ -1668,7 +2073,7 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) return -1; } PyThreadState *tstate = PyThreadState_Get(); - if (_PyObject_GetXIData(tstate, value, item->xidata) != 0) { + if (_PyObject_GetXIData(tstate, value, fallback, item->xidata) < 0) { PyMem_RawFree(item->xidata); item->xidata = NULL; // The caller may want to propagate PyExc_NotShareableError @@ -1700,7 +2105,8 @@ _sharednsitem_clear(_PyXI_namespace_item *item) } static int -_sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns) +_sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns, + xidata_fallback_t fallback) { assert(item->name != NULL); assert(item->xidata == NULL); @@ -1712,7 +2118,7 @@ _sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns) // When applied, this item will be set to the default (or fail). return 0; } - if (_sharednsitem_set_value(item, value) < 0) { + if (_sharednsitem_set_value(item, value, fallback) < 0) { return -1; } return 0; @@ -1742,156 +2148,212 @@ _sharednsitem_apply(_PyXI_namespace_item *item, PyObject *ns, PyObject *dflt) return res; } -struct _sharedns { - Py_ssize_t len; - _PyXI_namespace_item *items; -}; -static _PyXI_namespace * -_sharedns_new(void) -{ - _PyXI_namespace *ns = PyMem_RawCalloc(sizeof(_PyXI_namespace), 1); - if (ns == NULL) { - PyErr_NoMemory(); - return NULL; - } - *ns = (_PyXI_namespace){ 0 }; - return ns; -} +typedef struct { + Py_ssize_t maxitems; + Py_ssize_t numnames; + Py_ssize_t numvalues; + _PyXI_namespace_item items[1]; +} _PyXI_namespace; +#ifndef NDEBUG static int -_sharedns_is_initialized(_PyXI_namespace *ns) +_sharedns_check_counts(_PyXI_namespace *ns) { - if (ns->len == 0) { - assert(ns->items == NULL); + if (ns->maxitems <= 0) { + return 0; + } + if (ns->numnames < 0) { + return 0; + } + if (ns->numnames > ns->maxitems) { + return 0; + } + if (ns->numvalues < 0) { + return 0; + } + if (ns->numvalues > ns->numnames) { return 0; } - - assert(ns->len > 0); - assert(ns->items != NULL); - assert(_sharednsitem_is_initialized(&ns->items[0])); - assert(ns->len == 1 - || _sharednsitem_is_initialized(&ns->items[ns->len - 1])); return 1; } -#define HAS_COMPLETE_DATA 1 -#define HAS_PARTIAL_DATA 2 - static int -_sharedns_has_xidata(_PyXI_namespace *ns, int64_t *p_interpid) +_sharedns_check_consistency(_PyXI_namespace *ns) { - // We expect _PyXI_namespace to always be initialized. - assert(_sharedns_is_initialized(ns)); - int res = 0; - _PyXI_namespace_item *item0 = &ns->items[0]; - if (!_sharednsitem_is_initialized(item0)) { + if (!_sharedns_check_counts(ns)) { return 0; } - int64_t interpid0 = -1; - if (!_sharednsitem_has_value(item0, &interpid0)) { - return 0; + + Py_ssize_t i = 0; + _PyXI_namespace_item *item; + if (ns->numvalues > 0) { + item = &ns->items[0]; + if (!_sharednsitem_is_initialized(item)) { + return 0; + } + int64_t interpid0 = -1; + if (!_sharednsitem_has_value(item, &interpid0)) { + return 0; + } + i += 1; + for (; i < ns->numvalues; i++) { + item = &ns->items[i]; + if (!_sharednsitem_is_initialized(item)) { + return 0; + } + int64_t interpid = -1; + if (!_sharednsitem_has_value(item, &interpid)) { + return 0; + } + if (interpid != interpid0) { + return 0; + } + } } - if (ns->len > 1) { - // At this point we know it is has at least partial data. - _PyXI_namespace_item *itemN = &ns->items[ns->len-1]; - if (!_sharednsitem_is_initialized(itemN)) { - res = HAS_PARTIAL_DATA; - goto finally; + for (; i < ns->numnames; i++) { + item = &ns->items[i]; + if (!_sharednsitem_is_initialized(item)) { + return 0; } - int64_t interpidN = -1; - if (!_sharednsitem_has_value(itemN, &interpidN)) { - res = HAS_PARTIAL_DATA; - goto finally; + if (_sharednsitem_has_value(item, NULL)) { + return 0; } - assert(interpidN == interpid0); } - res = HAS_COMPLETE_DATA; - *p_interpid = interpid0; - -finally: - return res; + for (; i < ns->maxitems; i++) { + item = &ns->items[i]; + if (_sharednsitem_is_initialized(item)) { + return 0; + } + if (_sharednsitem_has_value(item, NULL)) { + return 0; + } + } + return 1; } +#endif -static void -_sharedns_clear(_PyXI_namespace *ns) +static _PyXI_namespace * +_sharedns_alloc(Py_ssize_t maxitems) { - if (!_sharedns_is_initialized(ns)) { - return; + if (maxitems < 0) { + if (!PyErr_Occurred()) { + PyErr_BadInternalCall(); + } + return NULL; + } + else if (maxitems == 0) { + PyErr_SetString(PyExc_ValueError, "empty namespaces not allowed"); + return NULL; + } + + // Check for overflow. + size_t fixedsize = sizeof(_PyXI_namespace) - sizeof(_PyXI_namespace_item); + if ((size_t)maxitems > + ((size_t)PY_SSIZE_T_MAX - fixedsize) / sizeof(_PyXI_namespace_item)) + { + PyErr_NoMemory(); + return NULL; } - // If the cross-interpreter data were allocated as part of - // _PyXI_namespace_item (instead of dynamically), this is where - // we would need verify that we are clearing the items in the - // correct interpreter, to avoid a race with releasing the XI data - // via a pending call. See _sharedns_has_xidata(). - for (Py_ssize_t i=0; i < ns->len; i++) { - _sharednsitem_clear(&ns->items[i]); + // Allocate the value, including items. + size_t size = fixedsize + sizeof(_PyXI_namespace_item) * maxitems; + + _PyXI_namespace *ns = PyMem_RawCalloc(size, 1); + if (ns == NULL) { + PyErr_NoMemory(); + return NULL; } - PyMem_RawFree(ns->items); - ns->items = NULL; - ns->len = 0; + ns->maxitems = maxitems; + assert(_sharedns_check_consistency(ns)); + return ns; } static void _sharedns_free(_PyXI_namespace *ns) { - _sharedns_clear(ns); + // If we weren't always dynamically allocating the cross-interpreter + // data in each item then we would need to use a pending call + // to call _sharedns_free(), to avoid the race between freeing + // the shared namespace and releasing the XI data. + assert(_sharedns_check_counts(ns)); + Py_ssize_t i = 0; + _PyXI_namespace_item *item; + if (ns->numvalues > 0) { + // One or more items may have interpreter-specific data. +#ifndef NDEBUG + int64_t interpid = PyInterpreterState_GetID(PyInterpreterState_Get()); + int64_t interpid_i; +#endif + for (; i < ns->numvalues; i++) { + item = &ns->items[i]; + assert(_sharednsitem_is_initialized(item)); + // While we do want to ensure consistency across items, + // technically they don't need to match the current + // interpreter. However, we keep the constraint for + // simplicity, by giving _PyXI_FreeNamespace() the exclusive + // responsibility of dealing with the owning interpreter. + assert(_sharednsitem_has_value(item, &interpid_i)); + assert(interpid_i == interpid); + _sharednsitem_clear(item); + } + } + for (; i < ns->numnames; i++) { + item = &ns->items[i]; + assert(_sharednsitem_is_initialized(item)); + assert(!_sharednsitem_has_value(item, NULL)); + _sharednsitem_clear(item); + } +#ifndef NDEBUG + for (; i < ns->maxitems; i++) { + item = &ns->items[i]; + assert(!_sharednsitem_is_initialized(item)); + assert(!_sharednsitem_has_value(item, NULL)); + } +#endif + PyMem_RawFree(ns); } -static int -_sharedns_init(_PyXI_namespace *ns, PyObject *names) +static _PyXI_namespace * +_create_sharedns(PyObject *names) { - assert(!_sharedns_is_initialized(ns)); assert(names != NULL); - Py_ssize_t len = PyDict_CheckExact(names) + Py_ssize_t numnames = PyDict_CheckExact(names) ? PyDict_Size(names) : PySequence_Size(names); - if (len < 0) { - return -1; - } - if (len == 0) { - PyErr_SetString(PyExc_ValueError, "empty namespaces not allowed"); - return -1; - } - assert(len > 0); - // Allocate the items. - _PyXI_namespace_item *items = - PyMem_RawCalloc(sizeof(struct _sharednsitem), len); - if (items == NULL) { - PyErr_NoMemory(); - return -1; + _PyXI_namespace *ns = _sharedns_alloc(numnames); + if (ns == NULL) { + return NULL; } + _PyXI_namespace_item *items = ns->items; // Fill in the names. - Py_ssize_t i = -1; if (PyDict_CheckExact(names)) { + Py_ssize_t i = 0; Py_ssize_t pos = 0; - for (i=0; i < len; i++) { - PyObject *key; - if (!PyDict_Next(names, &pos, &key, NULL)) { - // This should not be possible. - assert(0); - goto error; - } - if (_sharednsitem_init(&items[i], key) < 0) { + PyObject *name; + while(PyDict_Next(names, &pos, &name, NULL)) { + if (_sharednsitem_init(&items[i], name) < 0) { goto error; } + ns->numnames += 1; + i += 1; } } else if (PySequence_Check(names)) { - for (i=0; i < len; i++) { - PyObject *key = PySequence_GetItem(names, i); - if (key == NULL) { + for (Py_ssize_t i = 0; i < numnames; i++) { + PyObject *name = PySequence_GetItem(names, i); + if (name == NULL) { goto error; } - int res = _sharednsitem_init(&items[i], key); - Py_DECREF(key); + int res = _sharednsitem_init(&items[i], name); + Py_DECREF(name); if (res < 0) { goto error; } + ns->numnames += 1; } } else { @@ -1899,140 +2361,84 @@ _sharedns_init(_PyXI_namespace *ns, PyObject *names) "non-sequence namespace not supported"); goto error; } - - ns->items = items; - ns->len = len; - assert(_sharedns_is_initialized(ns)); - return 0; + assert(ns->numnames == ns->maxitems); + return ns; error: - for (Py_ssize_t j=0; j < i; j++) { - _sharednsitem_clear(&items[j]); - } - PyMem_RawFree(items); - assert(!_sharedns_is_initialized(ns)); - return -1; + _sharedns_free(ns); + return NULL; } -void -_PyXI_FreeNamespace(_PyXI_namespace *ns) -{ - if (!_sharedns_is_initialized(ns)) { - return; - } +static void _propagate_not_shareable_error(PyThreadState *, + _PyXI_failure *); - int64_t interpid = -1; - if (!_sharedns_has_xidata(ns, &interpid)) { - _sharedns_free(ns); - return; - } - - if (interpid == PyInterpreterState_GetID(PyInterpreterState_Get())) { - _sharedns_free(ns); - } - else { - // If we weren't always dynamically allocating the cross-interpreter - // data in each item then we would need to using a pending call - // to call _sharedns_free(), to avoid the race between freeing - // the shared namespace and releasing the XI data. - _sharedns_free(ns); - } -} - -_PyXI_namespace * -_PyXI_NamespaceFromNames(PyObject *names) +static int +_fill_sharedns(_PyXI_namespace *ns, PyObject *nsobj, + xidata_fallback_t fallback, _PyXI_failure *p_err) { - if (names == NULL || names == Py_None) { - return NULL; - } - - _PyXI_namespace *ns = _sharedns_new(); - if (ns == NULL) { - return NULL; - } - - if (_sharedns_init(ns, names) < 0) { - PyMem_RawFree(ns); - if (PySequence_Size(names) == 0) { - PyErr_Clear(); - } - return NULL; - } - - return ns; -} - -#ifndef NDEBUG -static int _session_is_active(_PyXI_session *); -#endif -static void _propagate_not_shareable_error(_PyXI_session *); - -int -_PyXI_FillNamespaceFromDict(_PyXI_namespace *ns, PyObject *nsobj, - _PyXI_session *session) -{ - // session must be entered already, if provided. - assert(session == NULL || _session_is_active(session)); - assert(_sharedns_is_initialized(ns)); - for (Py_ssize_t i=0; i < ns->len; i++) { - _PyXI_namespace_item *item = &ns->items[i]; - if (_sharednsitem_copy_from_ns(item, nsobj) < 0) { - _propagate_not_shareable_error(session); + // All items are expected to be shareable. + assert(_sharedns_check_counts(ns)); + assert(ns->numnames == ns->maxitems); + assert(ns->numvalues == 0); + PyThreadState *tstate = PyThreadState_Get(); + for (Py_ssize_t i=0; i < ns->maxitems; i++) { + if (_sharednsitem_copy_from_ns(&ns->items[i], nsobj, fallback) < 0) { + if (p_err != NULL) { + _propagate_not_shareable_error(tstate, p_err); + } // Clear out the ones we set so far. for (Py_ssize_t j=0; j < i; j++) { _sharednsitem_clear_value(&ns->items[j]); + ns->numvalues -= 1; } return -1; } + ns->numvalues += 1; } return 0; } -// All items are expected to be shareable. -static _PyXI_namespace * -_PyXI_NamespaceFromDict(PyObject *nsobj, _PyXI_session *session) +static int +_sharedns_free_pending(void *data) { - // session must be entered already, if provided. - assert(session == NULL || _session_is_active(session)); - if (nsobj == NULL || nsobj == Py_None) { - return NULL; - } - if (!PyDict_CheckExact(nsobj)) { - PyErr_SetString(PyExc_TypeError, "expected a dict"); - return NULL; - } + _sharedns_free((_PyXI_namespace *)data); + return 0; +} - _PyXI_namespace *ns = _sharedns_new(); - if (ns == NULL) { - return NULL; +static void +_destroy_sharedns(_PyXI_namespace *ns) +{ + assert(_sharedns_check_counts(ns)); + assert(ns->numnames == ns->maxitems); + if (ns->numvalues == 0) { + _sharedns_free(ns); + return; } - if (_sharedns_init(ns, nsobj) < 0) { - if (PyDict_Size(nsobj) == 0) { - PyMem_RawFree(ns); - PyErr_Clear(); - return NULL; - } - goto error; + int64_t interpid0; + if (!_sharednsitem_has_value(&ns->items[0], &interpid0)) { + // This shouldn't have been possible. + // We can deal with it in _sharedns_free(). + _sharedns_free(ns); + return; } - - if (_PyXI_FillNamespaceFromDict(ns, nsobj, session) < 0) { - goto error; + PyInterpreterState *interp = _PyInterpreterState_LookUpID(interpid0); + if (interp == PyInterpreterState_Get()) { + _sharedns_free(ns); + return; } - return ns; - -error: - assert(PyErr_Occurred() - || (session != NULL && session->error_override != NULL)); - _sharedns_free(ns); - return NULL; + // One or more items may have interpreter-specific data. + // Currently the xidata for each value is dynamically allocated, + // so technically we don't need to worry about that. + // However, explicitly adding a pending call here is simpler. + (void)_Py_CallInInterpreter(interp, _sharedns_free_pending, ns); } -int -_PyXI_ApplyNamespace(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) +static int +_apply_sharedns(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) { - for (Py_ssize_t i=0; i < ns->len; i++) { + for (Py_ssize_t i=0; i < ns->maxitems; i++) { if (_sharednsitem_apply(&ns->items[i], nsobj, dflt) != 0) { return -1; } @@ -2041,9 +2447,69 @@ _PyXI_ApplyNamespace(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) } -/**********************/ -/* high-level helpers */ -/**********************/ +/*********************************/ +/* switched-interpreter sessions */ +/*********************************/ + +struct xi_session { +#define SESSION_UNUSED 0 +#define SESSION_ACTIVE 1 + int status; + int switched; + + // Once a session has been entered, this is the tstate that was + // current before the session. If it is different from cur_tstate + // then we must have switched interpreters. Either way, this will + // be the current tstate once we exit the session. + PyThreadState *prev_tstate; + // Once a session has been entered, this is the current tstate. + // It must be current when the session exits. + PyThreadState *init_tstate; + // This is true if init_tstate needs cleanup during exit. + int own_init_tstate; + + // This is true if, while entering the session, init_thread took + // "ownership" of the interpreter's __main__ module. This means + // it is the only thread that is allowed to run code there. + // (Caveat: for now, users may still run exec() against the + // __main__ module's dict, though that isn't advisable.) + int running; + // This is a cached reference to the __dict__ of the entered + // interpreter's __main__ module. It is looked up when at the + // beginning of the session as a convenience. + PyObject *main_ns; + + // This is a dict of objects that will be available (via sharing) + // once the session exits. Do not access this directly; use + // _PyXI_Preserve() and _PyXI_GetPreserved() instead; + PyObject *_preserved; +}; + +_PyXI_session * +_PyXI_NewSession(void) +{ + _PyXI_session *session = PyMem_RawCalloc(1, sizeof(_PyXI_session)); + if (session == NULL) { + PyErr_NoMemory(); + return NULL; + } + return session; +} + +void +_PyXI_FreeSession(_PyXI_session *session) +{ + assert(session->status == SESSION_UNUSED); + PyMem_RawFree(session); +} + + +static inline int +_session_is_active(_PyXI_session *session) +{ + return session->status == SESSION_ACTIVE; +} + /* enter/exit a cross-interpreter session */ @@ -2051,29 +2517,33 @@ static void _enter_session(_PyXI_session *session, PyInterpreterState *interp) { // Set here and cleared in _exit_session(). + assert(session->status == SESSION_UNUSED); assert(!session->own_init_tstate); assert(session->init_tstate == NULL); assert(session->prev_tstate == NULL); // Set elsewhere and cleared in _exit_session(). assert(!session->running); assert(session->main_ns == NULL); - // Set elsewhere and cleared in _capture_current_exception(). - assert(session->error_override == NULL); - // Set elsewhere and cleared in _PyXI_ApplyCapturedException(). - assert(session->error == NULL); // Switch to interpreter. PyThreadState *tstate = PyThreadState_Get(); PyThreadState *prev = tstate; - if (interp != tstate->interp) { + int same_interp = (interp == tstate->interp); + if (!same_interp) { tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_EXEC); // XXX Possible GILState issues? - session->prev_tstate = PyThreadState_Swap(tstate); - assert(session->prev_tstate == prev); - session->own_init_tstate = 1; + PyThreadState *swapped = PyThreadState_Swap(tstate); + assert(swapped == prev); + (void)swapped; } - session->init_tstate = tstate; - session->prev_tstate = prev; + + *session = (_PyXI_session){ + .status = SESSION_ACTIVE, + .switched = !same_interp, + .init_tstate = tstate, + .prev_tstate = prev, + .own_init_tstate = !same_interp, + }; } static void @@ -2082,16 +2552,16 @@ _exit_session(_PyXI_session *session) PyThreadState *tstate = session->init_tstate; assert(tstate != NULL); assert(PyThreadState_Get() == tstate); + assert(!_PyErr_Occurred(tstate)); // Release any of the entered interpreters resources. - if (session->main_ns != NULL) { - Py_CLEAR(session->main_ns); - } + Py_CLEAR(session->main_ns); + Py_CLEAR(session->_preserved); // Ensure this thread no longer owns __main__. if (session->running) { _PyInterpreterState_SetNotRunningMain(tstate->interp); - assert(!PyErr_Occurred()); + assert(!_PyErr_Occurred(tstate)); session->running = 0; } @@ -2107,25 +2577,15 @@ _exit_session(_PyXI_session *session) else { assert(!session->own_init_tstate); } - session->prev_tstate = NULL; - session->init_tstate = NULL; -} -#ifndef NDEBUG -static int -_session_is_active(_PyXI_session *session) -{ - return (session->init_tstate != NULL); + *session = (_PyXI_session){0}; } -#endif static void -_propagate_not_shareable_error(_PyXI_session *session) +_propagate_not_shareable_error(PyThreadState *tstate, + _PyXI_failure *override) { - if (session == NULL) { - return; - } - PyThreadState *tstate = PyThreadState_Get(); + assert(override != NULL); PyObject *exctype = get_notshareableerror_type(tstate); if (exctype == NULL) { PyErr_FormatUnraisable( @@ -2134,166 +2594,463 @@ _propagate_not_shareable_error(_PyXI_session *session) } if (PyErr_ExceptionMatches(exctype)) { // We want to propagate the exception directly. - session->_error_override = _PyXI_ERR_NOT_SHAREABLE; - session->error_override = &session->_error_override; + *override = (_PyXI_failure){ + .code = _PyXI_ERR_NOT_SHAREABLE, + }; } } -static void -_capture_current_exception(_PyXI_session *session) + +static int _ensure_main_ns(_PyXI_session *, _PyXI_failure *); +static const char * capture_session_error(_PyXI_session *, _PyXI_error *, + _PyXI_failure *); + +int +_PyXI_Enter(_PyXI_session *session, + PyInterpreterState *interp, PyObject *nsupdates, + _PyXI_session_result *result) { - assert(session->error == NULL); - if (!PyErr_Occurred()) { - assert(session->error_override == NULL); - return; +#ifndef NDEBUG + PyThreadState *tstate = _PyThreadState_GET(); // Only used for asserts +#endif + + // Convert the attrs for cross-interpreter use. + _PyXI_namespace *sharedns = NULL; + if (nsupdates != NULL) { + assert(PyDict_Check(nsupdates)); + Py_ssize_t len = PyDict_Size(nsupdates); + if (len < 0) { + if (result != NULL) { + result->errcode = _PyXI_ERR_APPLY_NS_FAILURE; + } + return -1; + } + if (len > 0) { + sharedns = _create_sharedns(nsupdates); + if (sharedns == NULL) { + if (result != NULL) { + result->errcode = _PyXI_ERR_APPLY_NS_FAILURE; + } + return -1; + } + // For now we limit it to shareable objects. + xidata_fallback_t fallback = _PyXIDATA_XIDATA_ONLY; + _PyXI_failure _err = XI_FAILURE_INIT; + if (_fill_sharedns(sharedns, nsupdates, fallback, &_err) < 0) { + assert(_PyErr_Occurred(tstate)); + if (_err.code == _PyXI_ERR_NO_ERROR) { + _err.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } + _destroy_sharedns(sharedns); + if (result != NULL) { + assert(_err.msg == NULL); + result->errcode = _err.code; + } + return -1; + } + } } - // Handle the exception override. - _PyXI_errcode *override = session->error_override; - session->error_override = NULL; - _PyXI_errcode errcode = override != NULL - ? *override - : _PyXI_ERR_UNCAUGHT_EXCEPTION; + // Switch to the requested interpreter (if necessary). + _enter_session(session, interp); + _PyXI_failure override = XI_FAILURE_INIT; + override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; +#ifndef NDEBUG + tstate = _PyThreadState_GET(); +#endif - // Pop the exception object. - PyObject *excval = NULL; - if (errcode == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // We want to actually capture the current exception. - excval = PyErr_GetRaisedException(); + // Ensure this thread owns __main__. + if (_PyInterpreterState_SetRunningMain(interp) < 0) { + // In the case where we didn't switch interpreters, it would + // be more efficient to leave the exception in place and return + // immediately. However, life is simpler if we don't. + override.code = _PyXI_ERR_ALREADY_RUNNING; + goto error; } - else if (errcode == _PyXI_ERR_ALREADY_RUNNING) { - // We don't need the exception info. - PyErr_Clear(); + session->running = 1; + + // Apply the cross-interpreter data. + if (sharedns != NULL) { + if (_ensure_main_ns(session, &override) < 0) { + goto error; + } + if (_apply_sharedns(sharedns, session->main_ns, NULL) < 0) { + override.code = _PyXI_ERR_APPLY_NS_FAILURE; + goto error; + } + _destroy_sharedns(sharedns); } - else { - // We could do a variety of things here, depending on errcode. - // However, for now we simply capture the exception and save - // the errcode. - excval = PyErr_GetRaisedException(); + + override.code = _PyXI_ERR_NO_ERROR; + assert(!_PyErr_Occurred(tstate)); + return 0; + +error: + // We want to propagate all exceptions here directly (best effort). + assert(override.code != _PyXI_ERR_NO_ERROR); + _PyXI_error err = {0}; + const char *failure = capture_session_error(session, &err, &override); + + // Exit the session. + _exit_session(session); +#ifndef NDEBUG + tstate = _PyThreadState_GET(); +#endif + + if (sharedns != NULL) { + _destroy_sharedns(sharedns); } - // Capture the exception. - _PyXI_error *err = &session->_error; - *err = (_PyXI_error){ - .interp = session->init_tstate->interp, - }; - const char *failure; - if (excval == NULL) { - failure = _PyXI_InitError(err, NULL, errcode); + // Apply the error from the other interpreter. + PyObject *excinfo = _PyXI_ApplyError(&err, failure); + xi_error_clear(&err); + if (excinfo != NULL) { + if (result != NULL) { + result->excinfo = excinfo; + } + else { +#ifdef Py_DEBUG + fprintf(stderr, "_PyXI_Enter(): uncaught exception discarded"); +#endif + Py_DECREF(excinfo); + } + } + assert(_PyErr_Occurred(tstate)); + + return -1; +} + +static int _pop_preserved(_PyXI_session *, _PyXI_namespace **, PyObject **, + _PyXI_failure *); +static int _finish_preserved(_PyXI_namespace *, PyObject **); + +int +_PyXI_Exit(_PyXI_session *session, _PyXI_failure *override, + _PyXI_session_result *result) +{ + PyThreadState *tstate = _PyThreadState_GET(); + int res = 0; + + // Capture the raised exception, if any. + _PyXI_error err = {0}; + const char *failure = NULL; + if (override != NULL && override->code == _PyXI_ERR_NO_ERROR) { + assert(override->msg == NULL); + override = NULL; + } + if (_PyErr_Occurred(tstate)) { + failure = capture_session_error(session, &err, override); } else { - failure = _PyXI_InitError(err, excval, _PyXI_ERR_UNCAUGHT_EXCEPTION); - Py_DECREF(excval); - if (failure == NULL && override != NULL) { - err->code = errcode; - } + assert(override == NULL); } - // Handle capture failure. - if (failure != NULL) { - // XXX Make this error message more generic. - fprintf(stderr, - "RunFailedError: script raised an uncaught exception (%s)", - failure); - err = NULL; + // Capture the preserved namespace. + _PyXI_namespace *preserved = NULL; + PyObject *preservedobj = NULL; + if (result != NULL) { + assert(!_PyErr_Occurred(tstate)); + _PyXI_failure _override = XI_FAILURE_INIT; + if (_pop_preserved( + session, &preserved, &preservedobj, &_override) < 0) + { + assert(preserved == NULL); + assert(preservedobj == NULL); + if (xi_error_is_set(&err)) { + // XXX Chain the exception (i.e. set __context__)? + PyErr_FormatUnraisable( + "Exception ignored while capturing preserved objects"); + clear_xi_failure(&_override); + } + else { + if (_override.code == _PyXI_ERR_NO_ERROR) { + _override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } + failure = capture_session_error(session, &err, &_override); + } + } } - // Finished! - assert(!PyErr_Occurred()); - session->error = err; -} + // Exit the session. + assert(!_PyErr_Occurred(tstate)); + _exit_session(session); + tstate = _PyThreadState_GET(); + + // Restore the preserved namespace. + assert(preserved == NULL || preservedobj == NULL); + if (_finish_preserved(preserved, &preservedobj) < 0) { + assert(preservedobj == NULL); + if (xi_error_is_set(&err)) { + // XXX Chain the exception (i.e. set __context__)? + PyErr_FormatUnraisable( + "Exception ignored while capturing preserved objects"); + } + else { + xi_error_set_override_code( + tstate, &err, _PyXI_ERR_PRESERVE_FAILURE); + _propagate_not_shareable_error(tstate, err.override); + } + } + if (result != NULL) { + result->preserved = preservedobj; + result->errcode = err.override != NULL + ? err.override->code + : _PyXI_ERR_NO_ERROR; + } -PyObject * -_PyXI_ApplyCapturedException(_PyXI_session *session) -{ - assert(!PyErr_Occurred()); - assert(session->error != NULL); - PyObject *res = _PyXI_ApplyError(session->error); - assert((res == NULL) != (PyErr_Occurred() == NULL)); - session->error = NULL; + // Apply the error from the other interpreter, if any. + if (xi_error_is_set(&err)) { + res = -1; + assert(!_PyErr_Occurred(tstate)); + PyObject *excinfo = _PyXI_ApplyError(&err, failure); + if (excinfo == NULL) { + assert(_PyErr_Occurred(tstate)); + if (result != NULL && !xi_error_has_override(&err)) { + _PyXI_ClearResult(result); + *result = (_PyXI_session_result){ + .errcode = _PyXI_ERR_EXC_PROPAGATION_FAILURE, + }; + } + } + else if (result != NULL) { + result->excinfo = excinfo; + } + else { +#ifdef Py_DEBUG + fprintf(stderr, "_PyXI_Exit(): uncaught exception discarded"); +#endif + Py_DECREF(excinfo); + } + xi_error_clear(&err); + } return res; } -int -_PyXI_HasCapturedException(_PyXI_session *session) -{ - return session->error != NULL; -} -int -_PyXI_Enter(_PyXI_session *session, - PyInterpreterState *interp, PyObject *nsupdates) +/* in an active cross-interpreter session */ + +static const char * +capture_session_error(_PyXI_session *session, _PyXI_error *err, + _PyXI_failure *override) { - // Convert the attrs for cross-interpreter use. - _PyXI_namespace *sharedns = NULL; - if (nsupdates != NULL) { - sharedns = _PyXI_NamespaceFromDict(nsupdates, NULL); - if (sharedns == NULL && PyErr_Occurred()) { - assert(session->error == NULL); - return -1; + assert(_session_is_active(session)); + assert(!xi_error_is_set(err)); + PyThreadState *tstate = session->init_tstate; + + // Normalize the exception override. + if (override != NULL) { + if (override->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + assert(override->msg == NULL); + override = NULL; + } + else { + assert(override->code != _PyXI_ERR_NO_ERROR); } } - // Switch to the requested interpreter (if necessary). - _enter_session(session, interp); - PyThreadState *session_tstate = session->init_tstate; - _PyXI_errcode errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + // Handle the exception, if any. + const char *failure = NULL; + PyObject *exc = xi_error_resolve_current_exc(tstate, override); + if (exc != NULL) { + // There is an unhandled exception we need to preserve. + failure = xi_error_set_exc(tstate, err, exc); + Py_DECREF(exc); + if (_PyErr_Occurred(tstate)) { + PyErr_FormatUnraisable(failure); + } + } - // Ensure this thread owns __main__. - if (_PyInterpreterState_SetRunningMain(interp) < 0) { - // In the case where we didn't switch interpreters, it would - // be more efficient to leave the exception in place and return - // immediately. However, life is simpler if we don't. - errcode = _PyXI_ERR_ALREADY_RUNNING; - goto error; + // Handle the override. + if (override != NULL && failure == NULL) { + xi_error_set_override(tstate, err, override); } - session->running = 1; + // Finished! + assert(!_PyErr_Occurred(tstate)); + return failure; +} + +static int +_ensure_main_ns(_PyXI_session *session, _PyXI_failure *failure) +{ + assert(_session_is_active(session)); + PyThreadState *tstate = session->init_tstate; + if (session->main_ns != NULL) { + return 0; + } // Cache __main__.__dict__. - PyObject *main_mod = _Py_GetMainModule(session_tstate); + PyObject *main_mod = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(main_mod) < 0) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; - goto error; + Py_XDECREF(main_mod); + if (failure != NULL) { + *failure = (_PyXI_failure){ + .code = _PyXI_ERR_MAIN_NS_FAILURE, + }; + } + return -1; } PyObject *ns = PyModule_GetDict(main_mod); // borrowed Py_DECREF(main_mod); if (ns == NULL) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; - goto error; + if (failure != NULL) { + *failure = (_PyXI_failure){ + .code = _PyXI_ERR_MAIN_NS_FAILURE, + }; + } + return -1; } session->main_ns = Py_NewRef(ns); + return 0; +} - // Apply the cross-interpreter data. - if (sharedns != NULL) { - if (_PyXI_ApplyNamespace(sharedns, ns, NULL) < 0) { - errcode = _PyXI_ERR_APPLY_NS_FAILURE; +PyObject * +_PyXI_GetMainNamespace(_PyXI_session *session, _PyXI_failure *failure) +{ + if (!_session_is_active(session)) { + PyErr_SetString(PyExc_RuntimeError, "session not active"); + return NULL; + } + if (_ensure_main_ns(session, failure) < 0) { + return NULL; + } + return session->main_ns; +} + + +static int +_pop_preserved(_PyXI_session *session, + _PyXI_namespace **p_xidata, PyObject **p_obj, + _PyXI_failure *p_failure) +{ + _PyXI_failure failure = XI_FAILURE_INIT; + _PyXI_namespace *xidata = NULL; + assert(_PyThreadState_GET() == session->init_tstate); // active session + + if (session->_preserved == NULL) { + *p_xidata = NULL; + *p_obj = NULL; + return 0; + } + if (session->init_tstate == session->prev_tstate) { + // We did not switch interpreters. + *p_xidata = NULL; + *p_obj = session->_preserved; + session->_preserved = NULL; + return 0; + } + *p_obj = NULL; + + // We did switch interpreters. + Py_ssize_t len = PyDict_Size(session->_preserved); + if (len < 0) { + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } + else if (len == 0) { + *p_xidata = NULL; + } + else { + _PyXI_namespace *xidata = _create_sharedns(session->_preserved); + if (xidata == NULL) { + failure.code = _PyXI_ERR_PRESERVE_FAILURE; goto error; } - _PyXI_FreeNamespace(sharedns); + if (_fill_sharedns(xidata, session->_preserved, + _PyXIDATA_FULL_FALLBACK, &failure) < 0) + { + if (failure.code != _PyXI_ERR_NOT_SHAREABLE) { + assert(failure.msg != NULL); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + } + goto error; + } + *p_xidata = xidata; } + Py_CLEAR(session->_preserved); + return 0; - errcode = _PyXI_ERR_NO_ERROR; - assert(!PyErr_Occurred()); +error: + if (p_failure != NULL) { + *p_failure = failure; + } + if (xidata != NULL) { + _destroy_sharedns(xidata); + } + return -1; +} + +static int +_finish_preserved(_PyXI_namespace *xidata, PyObject **p_preserved) +{ + if (xidata == NULL) { + return 0; + } + int res = -1; + if (p_preserved != NULL) { + PyObject *ns = PyDict_New(); + if (ns == NULL) { + goto finally; + } + if (_apply_sharedns(xidata, ns, NULL) < 0) { + Py_CLEAR(ns); + goto finally; + } + *p_preserved = ns; + } + res = 0; + +finally: + _destroy_sharedns(xidata); + return res; +} + +int +_PyXI_Preserve(_PyXI_session *session, const char *name, PyObject *value, + _PyXI_failure *p_failure) +{ + _PyXI_failure failure = XI_FAILURE_INIT; + if (!_session_is_active(session)) { + PyErr_SetString(PyExc_RuntimeError, "session not active"); + return -1; + } + if (session->_preserved == NULL) { + session->_preserved = PyDict_New(); + if (session->_preserved == NULL) { + set_exc_with_cause(PyExc_RuntimeError, + "failed to initialize preserved objects"); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } + } + if (PyDict_SetItemString(session->_preserved, name, value) < 0) { + set_exc_with_cause(PyExc_RuntimeError, "failed to preserve object"); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } return 0; error: - assert(PyErr_Occurred()); - // We want to propagate all exceptions here directly (best effort). - assert(errcode != _PyXI_ERR_UNCAUGHT_EXCEPTION); - session->error_override = &errcode; - _capture_current_exception(session); - _exit_session(session); - if (sharedns != NULL) { - _PyXI_FreeNamespace(sharedns); + if (p_failure != NULL) { + *p_failure = failure; } return -1; } +PyObject * +_PyXI_GetPreserved(_PyXI_session_result *result, const char *name) +{ + PyObject *value = NULL; + if (result->preserved != NULL) { + (void)PyDict_GetItemStringRef(result->preserved, name, &value); + } + return value; +} + void -_PyXI_Exit(_PyXI_session *session) +_PyXI_ClearResult(_PyXI_session_result *result) { - _capture_current_exception(session); - _exit_session(session); + Py_CLEAR(result->preserved); + Py_CLEAR(result->excinfo); } diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index 231537c66d7..c3c76ae8d9a 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -12,7 +12,8 @@ typedef _PyXIData_regitem_t dlregitem_t; // forward static void _xidregistry_init(dlregistry_t *); static void _xidregistry_fini(dlregistry_t *); -static xidatafunc _lookup_getdata_from_registry(dlcontext_t *, PyObject *); +static _PyXIData_getdata_t _lookup_getdata_from_registry( + dlcontext_t *, PyObject *); /* used in crossinterp.c */ @@ -49,7 +50,7 @@ get_lookup_context(PyThreadState *tstate, dlcontext_t *res) return 0; } -static xidatafunc +static _PyXIData_getdata_t lookup_getdata(dlcontext_t *ctx, PyObject *obj) { /* Cross-interpreter objects are looked up by exact match on the class. @@ -87,25 +88,52 @@ _PyXIData_FormatNotShareableError(PyThreadState *tstate, va_end(vargs); } +int +_PyXI_UnwrapNotShareableError(PyThreadState * tstate, _PyXI_failure *failure) +{ + PyObject *exctype = get_notshareableerror_type(tstate); + assert(exctype != NULL); + if (!_PyErr_ExceptionMatches(tstate, exctype)) { + return -1; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (failure != NULL) { + _PyXI_errcode code = _PyXI_ERR_NOT_SHAREABLE; + if (_PyXI_InitFailure(failure, code, exc) < 0) { + return -1; + } + } + PyObject *cause = PyException_GetCause(exc); + if (cause != NULL) { + Py_DECREF(exc); + exc = cause; + } + else { + assert(PyException_GetContext(exc) == NULL); + } + _PyErr_SetRaisedException(tstate, exc); + return 0; +} + -xidatafunc +_PyXIData_getdata_t _PyXIData_Lookup(PyThreadState *tstate, PyObject *obj) { dlcontext_t ctx; if (get_lookup_context(tstate, &ctx) < 0) { - return NULL; + return (_PyXIData_getdata_t){0}; } return lookup_getdata(&ctx, obj); } /***********************************************/ -/* a registry of {type -> xidatafunc} */ +/* a registry of {type -> _PyXIData_getdata_t} */ /***********************************************/ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ /* registry lifecycle */ @@ -200,7 +228,7 @@ _xidregistry_find_type(dlregistry_t *xidregistry, PyTypeObject *cls) return NULL; } -static xidatafunc +static _PyXIData_getdata_t _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) { PyTypeObject *cls = Py_TYPE(obj); @@ -209,10 +237,12 @@ _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) _xidregistry_lock(xidregistry); dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); - xidatafunc func = matched != NULL ? matched->getdata : NULL; + _PyXIData_getdata_t getdata = matched != NULL + ? matched->getdata + : (_PyXIData_getdata_t){0}; _xidregistry_unlock(xidregistry); - return func; + return getdata; } @@ -220,12 +250,13 @@ _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) static int _xidregistry_add_type(dlregistry_t *xidregistry, - PyTypeObject *cls, xidatafunc getdata) + PyTypeObject *cls, _PyXIData_getdata_t getdata) { dlregitem_t *newhead = PyMem_RawMalloc(sizeof(dlregitem_t)); if (newhead == NULL) { return -1; } + assert((getdata.basic == NULL) != (getdata.fallback == NULL)); *newhead = (dlregitem_t){ // We do not keep a reference, to avoid keeping the class alive. .cls = cls, @@ -283,13 +314,13 @@ _xidregistry_clear(dlregistry_t *xidregistry) int _PyXIData_RegisterClass(PyThreadState *tstate, - PyTypeObject *cls, xidatafunc getdata) + PyTypeObject *cls, _PyXIData_getdata_t getdata) { if (!PyType_Check(cls)) { PyErr_Format(PyExc_ValueError, "only classes may be registered"); return -1; } - if (getdata == NULL) { + if (getdata.basic == NULL && getdata.fallback == NULL) { PyErr_Format(PyExc_ValueError, "missing 'getdata' func"); return -1; } @@ -304,7 +335,8 @@ _PyXIData_RegisterClass(PyThreadState *tstate, dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); if (matched != NULL) { - assert(matched->getdata == getdata); + assert(matched->getdata.basic == getdata.basic); + assert(matched->getdata.fallback == getdata.fallback); matched->refcount += 1; goto finally; } @@ -608,7 +640,8 @@ _tuple_shared_free(void* data) } static int -_tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) +_tuple_shared(PyThreadState *tstate, PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) { Py_ssize_t len = PyTuple_GET_SIZE(obj); if (len < 0) { @@ -636,7 +669,7 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) int res = -1; if (!_Py_EnterRecursiveCallTstate(tstate, " while sharing a tuple")) { - res = _PyObject_GetXIData(tstate, item, xidata_i); + res = _PyObject_GetXIData(tstate, item, fallback, xidata_i); _Py_LeaveRecursiveCallTstate(tstate); } if (res < 0) { @@ -677,44 +710,126 @@ _PyCode_GetXIData(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) return 0; } +// function + +PyObject * +_PyFunction_FromXIData(_PyXIData_t *xidata) +{ + // For now "stateless" functions are the only ones we must accommodate. + + PyObject *code = _PyMarshal_ReadObjectFromXIData(xidata); + if (code == NULL) { + return NULL; + } + // Create a new function. + // For stateless functions (no globals) we use __main__ as __globals__, + // just like we do for builtins like exec(). + assert(PyCode_Check(code)); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *globals = _PyEval_GetGlobalsFromRunningMain(tstate); // borrowed + if (globals == NULL) { + if (_PyErr_Occurred(tstate)) { + Py_DECREF(code); + return NULL; + } + globals = PyDict_New(); + if (globals == NULL) { + Py_DECREF(code); + return NULL; + } + } + else { + Py_INCREF(globals); + } + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { + Py_DECREF(code); + Py_DECREF(globals); + return NULL; + } + PyObject *func = PyFunction_New(code, globals); + Py_DECREF(code); + Py_DECREF(globals); + return func; +} + +int +_PyFunction_GetXIData(PyThreadState *tstate, PyObject *func, + _PyXIData_t *xidata) +{ + if (!PyFunction_Check(func)) { + const char *msg = "expected a function, got %R"; + format_notshareableerror(tstate, NULL, 0, msg, func); + return -1; + } + if (_PyFunction_VerifyStateless(tstate, func) < 0) { + PyObject *cause = _PyErr_GetRaisedException(tstate); + assert(cause != NULL); + const char *msg = "only stateless functions are shareable"; + set_notshareableerror(tstate, cause, 0, msg); + Py_DECREF(cause); + return -1; + } + PyObject *code = PyFunction_GET_CODE(func); + + // Ideally code objects would be immortal and directly shareable. + // In the meantime, we use marshal. + if (_PyMarshal_GetXIData(tstate, code, xidata) < 0) { + return -1; + } + // Replace _PyMarshal_ReadObjectFromXIData. + // (_PyFunction_FromXIData() will call it.) + _PyXIData_SET_NEW_OBJECT(xidata, _PyFunction_FromXIData); + return 0; +} + // registration static void _register_builtins_for_crossinterpreter_data(dlregistry_t *xidregistry) { +#define REGISTER(TYPE, GETDATA) \ + _xidregistry_add_type(xidregistry, (PyTypeObject *)TYPE, \ + ((_PyXIData_getdata_t){.basic=(GETDATA)})) +#define REGISTER_FALLBACK(TYPE, GETDATA) \ + _xidregistry_add_type(xidregistry, (PyTypeObject *)TYPE, \ + ((_PyXIData_getdata_t){.fallback=(GETDATA)})) // None - if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { + if (REGISTER(Py_TYPE(Py_None), _none_shared) != 0) { Py_FatalError("could not register None for cross-interpreter sharing"); } // int - if (_xidregistry_add_type(xidregistry, &PyLong_Type, _long_shared) != 0) { + if (REGISTER(&PyLong_Type, _long_shared) != 0) { Py_FatalError("could not register int for cross-interpreter sharing"); } // bytes - if (_xidregistry_add_type(xidregistry, &PyBytes_Type, _PyBytes_GetXIData) != 0) { + if (REGISTER(&PyBytes_Type, _PyBytes_GetXIData) != 0) { Py_FatalError("could not register bytes for cross-interpreter sharing"); } // str - if (_xidregistry_add_type(xidregistry, &PyUnicode_Type, _str_shared) != 0) { + if (REGISTER(&PyUnicode_Type, _str_shared) != 0) { Py_FatalError("could not register str for cross-interpreter sharing"); } // bool - if (_xidregistry_add_type(xidregistry, &PyBool_Type, _bool_shared) != 0) { + if (REGISTER(&PyBool_Type, _bool_shared) != 0) { Py_FatalError("could not register bool for cross-interpreter sharing"); } // float - if (_xidregistry_add_type(xidregistry, &PyFloat_Type, _float_shared) != 0) { + if (REGISTER(&PyFloat_Type, _float_shared) != 0) { Py_FatalError("could not register float for cross-interpreter sharing"); } // tuple - if (_xidregistry_add_type(xidregistry, &PyTuple_Type, _tuple_shared) != 0) { + if (REGISTER_FALLBACK(&PyTuple_Type, _tuple_shared) != 0) { Py_FatalError("could not register tuple for cross-interpreter sharing"); } + + // For now, we do not register PyCode_Type or PyFunction_Type. +#undef REGISTER +#undef REGISTER_FALLBACK } diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h index ca4ca1cf123..98411adc5eb 100644 --- a/Python/crossinterp_exceptions.h +++ b/Python/crossinterp_exceptions.h @@ -7,13 +7,6 @@ _ensure_current_cause(PyThreadState *tstate, PyObject *cause) } PyObject *exc = _PyErr_GetRaisedException(tstate); assert(exc != NULL); - PyObject *ctx = PyException_GetContext(exc); - if (ctx == NULL) { - PyException_SetContext(exc, Py_NewRef(cause)); - } - else { - Py_DECREF(ctx); - } assert(PyException_GetCause(exc) == NULL); PyException_SetCause(exc, Py_NewRef(cause)); _PyErr_SetRaisedException(tstate, exc); @@ -24,7 +17,7 @@ _ensure_current_cause(PyThreadState *tstate, PyObject *cause) static PyTypeObject _PyExc_InterpreterError = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "interpreters.InterpreterError", + .tp_name = "concurrent.interpreters.InterpreterError", .tp_doc = PyDoc_STR("A cross-interpreter operation failed"), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, //.tp_traverse = ((PyTypeObject *)PyExc_Exception)->tp_traverse, @@ -37,7 +30,7 @@ PyObject *PyExc_InterpreterError = (PyObject *)&_PyExc_InterpreterError; static PyTypeObject _PyExc_InterpreterNotFoundError = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "interpreters.InterpreterNotFoundError", + .tp_name = "concurrent.interpreters.InterpreterNotFoundError", .tp_doc = PyDoc_STR("An interpreter was not found"), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, //.tp_traverse = ((PyTypeObject *)PyExc_Exception)->tp_traverse, @@ -51,7 +44,7 @@ PyObject *PyExc_InterpreterNotFoundError = (PyObject *)&_PyExc_InterpreterNotFou static int _init_notshareableerror(exceptions_t *state) { - const char *name = "interpreters.NotShareableError"; + const char *name = "concurrent.interpreters.NotShareableError"; PyObject *base = PyExc_TypeError; PyObject *ns = NULL; PyObject *exctype = PyErr_NewException(name, base, ns); diff --git a/Python/dynload_win.c b/Python/dynload_win.c index 6324063401e..de9b0a77817 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -108,7 +108,7 @@ static char *GetPythonImport (HINSTANCE hModule) char *pch; /* Don't claim that python3.dll is a Python DLL. */ -#ifdef _DEBUG +#ifdef Py_DEBUG if (strcmp(import_name, "python3_d.dll") == 0) { #else if (strcmp(import_name, "python3.dll") == 0) { @@ -120,7 +120,7 @@ static char *GetPythonImport (HINSTANCE hModule) /* Ensure python prefix is followed only by numbers to the end of the basename */ pch = import_name + 6; -#ifdef _DEBUG +#ifdef Py_DEBUG while (*pch && pch[0] != '_' && pch[1] != 'd' && pch[2] != '.') { #else while (*pch && *pch != '.') { @@ -300,7 +300,7 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, char buffer[256]; PyOS_snprintf(buffer, sizeof(buffer), -#ifdef _DEBUG +#ifdef Py_DEBUG "python%d%d_d.dll", #else "python%d%d.dll", diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index a7bb685bf3d..75b98a04723 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -35,7 +35,7 @@ EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { // (type $type1 (func (param i32) (result i32))) // (type $type2 (func (param i32 i32) (result i32))) // (type $type3 (func (param i32 i32 i32) (result i32))) -// (type $blocktype (func (param i32) (result))) +// (type $blocktype (func (param) (result))) // (table $funcs (import "e" "t") 0 funcref) // (export "f" (func $f)) // (func $f (param $fptr i32) (result i32) @@ -44,42 +44,43 @@ EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { // table.get $funcs // local.tee $fref // ref.test $type3 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 3 // return -// ) +// end // local.get $fref // ref.test $type2 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 2 // return -// ) +// end // local.get $fref // ref.test $type1 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 1 // return -// ) +// end // local.get $fref // ref.test $type0 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b +// if $blocktype // i32.const 0 // return -// ) +// end // i32.const -1 // ) // ) function getPyEMCountArgsPtr() { - let isIOS = globalThis.navigator && /iPad|iPhone|iPod/.test(navigator.platform); + // Starting with iOS 18.3.1, WebKit on iOS has an issue with the garbage + // collector that breaks the call trampoline. See #130418 and + // https://bugs.webkit.org/show_bug.cgi?id=293113 for details. + let isIOS = globalThis.navigator && ( + /iPad|iPhone|iPod/.test(navigator.userAgent) || + // Starting with iPadOS 13, iPads might send a platform string that looks like a desktop Mac. + // To differentiate, we check if the platform is 'MacIntel' (common for Macs and newer iPads) + // AND if the device has multi-touch capabilities (navigator.maxTouchPoints > 1) + (navigator.platform === 'MacIntel' && typeof navigator.maxTouchPoints !== 'undefined' && navigator.maxTouchPoints > 1) + ); if (isIOS) { return 0; } @@ -88,13 +89,13 @@ function getPyEMCountArgsPtr() { const code = new Uint8Array([ 0x00, 0x61, 0x73, 0x6d, // \0asm magic number 0x01, 0x00, 0x00, 0x00, // version 1 - 0x01, 0x1b, // Type section, body is 0x1b bytes + 0x01, 0x1a, // Type section, body is 0x1a bytes 0x05, // 6 entries - 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) - 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) - 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) - 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) - 0x60, 0x01, 0x7f, 0x00, // (type $blocktype (func (param i32) (result))) + 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) + 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) + 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) + 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) + 0x60, 0x00, 0x00, // (type $blocktype (func (param) (result))) 0x02, 0x09, // Import section, 0x9 byte body 0x01, // 1 import (table $funcs (import "e" "t") 0 funcref) 0x01, 0x65, // "e" @@ -110,44 +111,36 @@ function getPyEMCountArgsPtr() { 0x00, // a function 0x00, // at index 0 - 0x0a, 0x44, // Code section, - 0x01, 0x42, // one entry of length 50 + 0x0a, 56, // Code section, + 0x01, 54, // one entry of length 54 0x01, 0x01, 0x70, // one local of type funcref // Body of the function 0x20, 0x00, // local.get $fptr 0x25, 0x00, // table.get $funcs 0x22, 0x01, // local.tee $fref 0xfb, 0x14, 0x03, // ref.test $type3 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x03, // i32.const 3 0x0f, // return 0x0b, // end block 0x20, 0x01, // local.get $fref 0xfb, 0x14, 0x02, // ref.test $type2 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x02, // i32.const 2 0x0f, // return 0x0b, // end block 0x20, 0x01, // local.get $fref 0xfb, 0x14, 0x01, // ref.test $type1 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x01, // i32.const 1 0x0f, // return 0x0b, // end block 0x20, 0x01, // local.get $fref 0xfb, 0x14, 0x00, // ref.test $type0 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b + 0x04, 0x04, // if (type $blocktype) 0x41, 0x00, // i32.const 0 0x0f, // return 0x0b, // end block diff --git a/Python/errors.c b/Python/errors.c index 81f267b043a..a3122f76bdd 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -10,7 +10,6 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_runtime.h" // _Py_ID() #include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_traceback.h" // _PyTraceBack_FromFrame() #include "pycore_unicodeobject.h" // _PyUnicode_Equal() @@ -1570,7 +1569,7 @@ write_unraisable_exc(PyThreadState *tstate, PyObject *exc_type, PyObject *obj) { PyObject *file; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { return -1; } if (file == NULL || file == Py_None) { @@ -1677,7 +1676,7 @@ format_unraisable_v(const char *format, va_list va, PyObject *obj) } PyObject *hook; - if (_PySys_GetOptionalAttr(&_Py_ID(unraisablehook), &hook) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(unraisablehook), &hook) < 0) { Py_DECREF(hook_args); err_msg_str = NULL; obj = NULL; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3e51ac41fa3..46fc164a5b3 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -319,25 +319,11 @@ break; } - /* _LOAD_CONST is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ - - case _LOAD_CONST_MORTAL: { - _PyStackRef value; - oparg = CURRENT_OPARG(); - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNewMortal(obj); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_CONST_IMMORTAL: { + case _LOAD_CONST: { _PyStackRef value; oparg = CURRENT_OPARG(); PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - assert(_Py_IsImmortal(obj)); - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -350,7 +336,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -363,7 +349,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -376,7 +362,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -389,7 +375,7 @@ assert(oparg == CURRENT_OPARG()); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -401,7 +387,7 @@ oparg = CURRENT_OPARG(); assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -413,10 +399,6 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -432,10 +414,6 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -451,10 +429,6 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -470,10 +444,6 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -489,10 +459,6 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -508,10 +474,6 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -527,10 +489,6 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -546,10 +504,6 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -564,10 +518,6 @@ _PyStackRef value; oparg = CURRENT_OPARG(); value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -584,7 +534,65 @@ stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(value); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TWO: { + _PyStackRef tos; + _PyStackRef nos; + tos = stack_pointer[-1]; + nos = stack_pointer[-2]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(tos); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(nos); stack_pointer = _PyFrame_GetStackPointer(frame); break; } @@ -609,6 +617,20 @@ break; } + case _POP_ITER: { + _PyStackRef index_or_null; + _PyStackRef iter; + index_or_null = stack_pointer[-1]; + iter = stack_pointer[-2]; + (void)index_or_null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(iter); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _END_SEND: { _PyStackRef value; _PyStackRef receiver; @@ -870,7 +892,7 @@ _PyStackRef left; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -881,7 +903,31 @@ _PyStackRef value; value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_NOS_OVERFLOWED: { + _PyStackRef left; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + assert(Py_TYPE(left_o) == &PyLong_Type); + if (!_PyLong_IsCompact((PyLongObject *)left_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_TOS_OVERFLOWED: { + _PyStackRef value; + value = stack_pointer[-1]; + PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); + assert(Py_TYPE(value_o) == &PyLong_Type); + if (!_PyLong_IsCompact((PyLongObject *)value_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -898,18 +944,15 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_ERROR(); - } - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -926,18 +969,15 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_ERROR(); - } - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -954,18 +994,15 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - JUMP_TO_ERROR(); - } - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1075,6 +1112,87 @@ break; } + case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: { + _PyStackRef right; + _PyStackRef left; + _PyStackRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + if (PyStackRef_IsNull(res)) { + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_ERROR(); + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: { + _PyStackRef right; + _PyStackRef left; + _PyStackRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + if (PyStackRef_IsNull(res)) { + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_ERROR(); + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: { + _PyStackRef right; + _PyStackRef left; + _PyStackRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres)); + if (PyStackRef_IsNull(res)) { + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_ERROR(); + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _BINARY_OP_ADD_UNICODE: { _PyStackRef right; _PyStackRef left; @@ -1403,7 +1521,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(str_st); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectImmortal(res_o); + res = PyStackRef_FromPyObjectBorrow(res_o); stack_pointer[0] = res; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -1569,15 +1687,16 @@ _PyStackRef getitem; _PyStackRef sub; _PyStackRef container; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; getitem = stack_pointer[-1]; sub = stack_pointer[-2]; container = stack_pointer[-3]; - new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; + _PyInterpreterFrame* pushed_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); + pushed_frame->localsplus[0] = container; + pushed_frame->localsplus[1] = sub; frame->return_offset = 6 ; - stack_pointer[-3].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-3] = new_frame; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; @@ -1925,7 +2044,7 @@ case _SEND_GEN_FRAME: { _PyStackRef v; _PyStackRef receiver; - _PyInterpreterFrame *gen_frame; + _PyStackRef gen_frame; oparg = CURRENT_OPARG(); v = stack_pointer[-1]; receiver = stack_pointer[-2]; @@ -1939,15 +2058,16 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(SEND, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; assert( 2 + oparg <= UINT16_MAX); frame->return_offset = (uint16_t)( 2 + oparg); - gen_frame->previous = frame; - stack_pointer[-1].bits = (uintptr_t)gen_frame; + pushed_frame->previous = frame; + gen_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-1] = gen_frame; break; } @@ -3489,7 +3609,7 @@ case _LOAD_ATTR_PROPERTY_FRAME: { _PyStackRef owner; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); owner = stack_pointer[-1]; PyObject *fget = (PyObject *)CURRENT_OPERAND0(); @@ -3514,9 +3634,10 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); - new_frame->localsplus[0] = owner; - stack_pointer[-1].bits = (uintptr_t)new_frame; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); + pushed_frame->localsplus[0] = owner; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-1] = new_frame; break; } @@ -3735,14 +3856,8 @@ left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (!_PyLong_IsCompact((PyLongObject *)right_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && _PyLong_DigitCount((PyLongObject *)right_o) <= 1); @@ -4204,25 +4319,37 @@ case _GET_ITER: { _PyStackRef iterable; _PyStackRef iter; + _PyStackRef index_or_null; iterable = stack_pointer[-1]; #ifdef Py_STATS _PyFrame_SetStackPointer(frame, stack_pointer); _Py_GatherStats_GetIter(iterable); stack_pointer = _PyFrame_GetStackPointer(frame); #endif - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(iterable); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (iter_o == NULL) { - JUMP_TO_ERROR(); + + PyTypeObject *tp = PyStackRef_TYPE(iterable); + if (tp == &PyTuple_Type || tp == &PyList_Type) { + iter = iterable; + index_or_null = PyStackRef_TagInt(0); } - iter = PyStackRef_FromPyObjectSteal(iter_o); - stack_pointer[0] = iter; + else { + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(iterable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (iter_o == NULL) { + JUMP_TO_ERROR(); + } + iter = PyStackRef_FromPyObjectSteal(iter_o); + index_or_null = PyStackRef_NULL; + stack_pointer += 1; + } + stack_pointer[-1] = iter; + stack_pointer[0] = index_or_null; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -4269,32 +4396,25 @@ /* _FOR_ITER is not a viable micro-op for tier 2 because it is replaced */ case _FOR_ITER_TIER_TWO: { + _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_ERROR(); - } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + JUMP_TO_ERROR(); } if (true) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } } - next = PyStackRef_FromPyObjectSteal(next_o); + next = item; + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -4304,21 +4424,18 @@ /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 because it is instrumented */ case _ITER_CHECK_LIST: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyListIter_Type) { + if (Py_TYPE(iter_o) != &PyList_Type) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + assert(PyStackRef_IsTaggedInt(null_or_index)); #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq)) { + if (!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -4329,24 +4446,17 @@ /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 because it is replaced */ case _GUARD_NOT_EXHAUSTED_LIST: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; #ifndef Py_GIL_DISABLED - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - if (seq == NULL) { + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - if (1) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } #endif break; } @@ -4354,38 +4464,30 @@ /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 because it is replaced */ case _ITER_NEXT_LIST_TIER_TWO: { + _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread((PyObject *)list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); stack_pointer = _PyFrame_GetStackPointer(frame); - if (result < 0) { + if (result <= 0) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (result == 0) { - it->it_index = -1; - if (1) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } - it->it_index++; #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + assert(PyStackRef_UntagInt(null_or_index) < PyList_GET_SIZE(list_o)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -4393,39 +4495,29 @@ } case _ITER_CHECK_TUPLE: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyTupleIter_Type) { + if (Py_TYPE(iter_o) != &PyTuple_Type) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - #endif + assert(PyStackRef_IsTaggedInt(null_or_index)); break; } /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 because it is replaced */ case _GUARD_NOT_EXHAUSTED_TUPLE: { + _PyStackRef null_or_index; _PyStackRef iter; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - PyTupleObject *seq = it->it_seq; - if (seq == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (it->it_index >= PyTuple_GET_SIZE(seq)) { + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -4433,19 +4525,18 @@ } case _ITER_NEXT_TUPLE: { + _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - PyTupleObject *seq = it->it_seq; - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - assert(seq); - assert(it->it_index < PyTuple_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + uintptr_t i = PyStackRef_UntagInt(null_or_index); + assert((size_t)i < (size_t)PyTuple_GET_SIZE(tuple_o)); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(tuple_o, i)); + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -4454,7 +4545,7 @@ case _ITER_CHECK_RANGE: { _PyStackRef iter; - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(r) != &PyRangeIter_Type) { UOP_STAT_INC(uopcode, miss); @@ -4473,7 +4564,7 @@ case _GUARD_NOT_EXHAUSTED_RANGE: { _PyStackRef iter; - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); if (r->len <= 0) { @@ -4486,7 +4577,7 @@ case _ITER_NEXT_RANGE: { _PyStackRef iter; _PyStackRef next; - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); #ifdef Py_GIL_DISABLED @@ -4509,9 +4600,9 @@ case _FOR_ITER_GEN_FRAME: { _PyStackRef iter; - _PyInterpreterFrame *gen_frame; + _PyStackRef gen_frame; oparg = CURRENT_OPARG(); - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(gen) != &PyGen_Type) { UOP_STAT_INC(uopcode, miss); @@ -4529,14 +4620,15 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(FOR_ITER, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_None); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - gen_frame->previous = frame; + pushed_frame->previous = frame; frame->return_offset = (uint16_t)( 2 + oparg); - stack_pointer[0].bits = (uintptr_t)gen_frame; + gen_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[0] = gen_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -4817,7 +4909,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; @@ -4842,8 +4934,8 @@ if (temp == NULL) { JUMP_TO_ERROR(); } - new_frame = temp; - stack_pointer[0].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(temp); + stack_pointer[0] = new_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -5109,7 +5201,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 0; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5117,13 +5209,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5133,7 +5226,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 1; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5141,13 +5234,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5157,7 +5251,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 2; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5165,13 +5259,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5181,7 +5276,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 3; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5189,13 +5284,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5205,7 +5301,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = 4; assert(oparg == CURRENT_OPARG()); args = &stack_pointer[-oparg]; @@ -5213,13 +5309,14 @@ callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -5229,34 +5326,35 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } - stack_pointer[-2 - oparg].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(pushed_frame); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_FRAME: { - _PyInterpreterFrame *new_frame; - new_frame = (_PyInterpreterFrame *)stack_pointer[-1].bits; + _PyStackRef new_frame; + new_frame = stack_pointer[-1]; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -5276,6 +5374,27 @@ break; } + case _GUARD_NOS_NOT_NULL: { + _PyStackRef nos; + nos = stack_pointer[-2]; + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + if (o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_THIRD_NULL: { + _PyStackRef null; + null = stack_pointer[-3]; + if (!PyStackRef_IsNull(null)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _GUARD_CALLABLE_TYPE_1: { _PyStackRef callable; callable = stack_pointer[-3]; @@ -5450,7 +5569,7 @@ _PyStackRef *args; _PyStackRef self; _PyStackRef init; - _PyInterpreterFrame *init_frame; + _PyStackRef init_frame; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self = stack_pointer[-1 - oparg]; @@ -5474,10 +5593,10 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_ERROR(); } - init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; tstate->py_recursion_remaining--; - stack_pointer[0].bits = (uintptr_t)init_frame; + init_frame = PyStackRef_Wrap(temp); + stack_pointer[0] = init_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -5855,59 +5974,70 @@ break; } - case _CALL_ISINSTANCE: { - _PyStackRef *args; - _PyStackRef self_or_null; + case _GUARD_CALLABLE_ISINSTANCE: { _PyStackRef callable; - _PyStackRef res; - oparg = CURRENT_OPARG(); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; + callable = stack_pointer[-4]; PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - int total_args = oparg; - _PyStackRef *arguments = args; - if (!PyStackRef_IsNull(self_or_null)) { - arguments--; - total_args++; - } - if (total_args != 2) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } PyInterpreterState *interp = tstate->interp; if (callable_o != interp->callable_cache.isinstance) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + break; + } + + case _CALL_ISINSTANCE: { + _PyStackRef cls; + _PyStackRef instance; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef res; + cls = stack_pointer[-1]; + instance = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; STAT_INC(CALL, hit); - _PyStackRef cls_stackref = arguments[1]; - _PyStackRef inst_stackref = arguments[0]; + PyObject *inst_o = PyStackRef_AsPyObjectBorrow(instance); + PyObject *cls_o = PyStackRef_AsPyObjectBorrow(cls); _PyFrame_SetStackPointer(frame, stack_pointer); - int retval = PyObject_IsInstance(PyStackRef_AsPyObjectBorrow(inst_stackref), PyStackRef_AsPyObjectBorrow(cls_stackref)); + int retval = PyObject_IsInstance(inst_o, cls_o); stack_pointer = _PyFrame_GetStackPointer(frame); if (retval < 0) { JUMP_TO_ERROR(); } - res = retval ? PyStackRef_True : PyStackRef_False; - assert((!PyStackRef_IsNull(res)) ^ (_PyErr_Occurred(tstate) != NULL)); + (void)null; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = callable; - callable = res; - stack_pointer[-2 - oparg] = callable; - PyStackRef_CLOSE(tmp); - for (int _i = oparg; --_i >= 0;) { - tmp = args[_i]; - args[_i] = PyStackRef_NULL; - PyStackRef_CLOSE(tmp); - } - tmp = self_or_null; - self_or_null = PyStackRef_NULL; - stack_pointer[-1 - oparg] = self_or_null; - PyStackRef_XCLOSE(tmp); + PyStackRef_CLOSE(cls); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1 - oparg; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(instance); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + res = retval ? PyStackRef_True : PyStackRef_False; + assert((!PyStackRef_IsNull(res)) ^ (_PyErr_Occurred(tstate) != NULL)); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_CALLABLE_LIST_APPEND: { + _PyStackRef callable; + callable = stack_pointer[-3]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + if (callable_o != interp->callable_cache.list_append) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } break; } @@ -5920,18 +6050,8 @@ self = stack_pointer[-2]; callable = stack_pointer[-3]; assert(oparg == 1); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); - PyInterpreterState *interp = tstate->interp; - if (callable_o != interp->callable_cache.list_append) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (self_o == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (!PyList_Check(self_o)) { + if (!PyList_CheckExact(self_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -6329,7 +6449,7 @@ _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; oparg = CURRENT_OPARG(); kwnames = stack_pointer[-1]; args = &stack_pointer[-1 - oparg]; @@ -6363,8 +6483,8 @@ if (temp == NULL) { JUMP_TO_ERROR(); } - new_frame = temp; - stack_pointer[0].bits = (uintptr_t)new_frame; + new_frame = PyStackRef_Wrap(temp); + stack_pointer[0] = new_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; @@ -6767,12 +6887,44 @@ break; } + case _COPY_1: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-1]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_2: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-2]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_3: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-3]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _COPY: { _PyStackRef bottom; _PyStackRef top; oparg = CURRENT_OPARG(); bottom = stack_pointer[-1 - (oparg-1)]; - assert(oparg > 0); top = PyStackRef_DUP(bottom); stack_pointer[0] = top; stack_pointer += 1; @@ -6812,6 +6964,32 @@ break; } + case _SWAP_2: { + _PyStackRef top; + _PyStackRef bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2]; + _PyStackRef temp = bottom; + bottom = top; + top = temp; + stack_pointer[-2] = bottom; + stack_pointer[-1] = top; + break; + } + + case _SWAP_3: { + _PyStackRef top; + _PyStackRef bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-3]; + _PyStackRef temp = bottom; + bottom = top; + top = temp; + stack_pointer[-3] = bottom; + stack_pointer[-1] = top; + break; + } + case _SWAP: { _PyStackRef top; _PyStackRef bottom; @@ -6821,7 +6999,6 @@ _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); stack_pointer[-2 - (oparg-2)] = bottom; stack_pointer[-1] = top; break; @@ -7033,13 +7210,76 @@ case _LOAD_CONST_INLINE_BORROW: { _PyStackRef value; PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } + case _POP_CALL: { + _PyStackRef null; + _PyStackRef callable; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_ONE: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_TWO: { + _PyStackRef pop2; + _PyStackRef pop1; + _PyStackRef null; + _PyStackRef callable; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop2); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop1); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop; _PyStackRef value; @@ -7050,7 +7290,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(pop); stack_pointer = _PyFrame_GetStackPointer(frame); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -7074,13 +7314,124 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(pop1); stack_pointer = _PyFrame_GetStackPointer(frame); - value = PyStackRef_FromPyObjectImmortal(ptr); + value = PyStackRef_FromPyObjectBorrow(ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { + _PyStackRef pop2; + _PyStackRef pop1; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop2); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop1); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE: { + _PyStackRef old; + _PyStackRef value; + _PyStackRef new; + old = stack_pointer[-1]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + new = old; + value = PyStackRef_FromPyObjectNew(ptr); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE_BORROW: { + _PyStackRef old; + _PyStackRef value; + _PyStackRef new; + old = stack_pointer[-1]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + new = old; + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND0(); assert(PyStackRef_FunctionCheck(frame->f_funcobj)); diff --git a/Python/fileutils.c b/Python/fileutils.c index 78603d40704..2a3f12d4e87 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2784,6 +2784,43 @@ error: return -1; } #else /* MS_WINDOWS */ + +// The Windows Games API family doesn't expose GetNamedPipeHandleStateW so attempt +// to load it directly from the Kernel32.dll +#if !defined(MS_WINDOWS_APP) && !defined(MS_WINDOWS_SYSTEM) +BOOL +GetNamedPipeHandleStateW(HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount, + LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize) +{ + static int initialized = 0; + typedef BOOL(__stdcall* PGetNamedPipeHandleStateW) ( + HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount, + LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize); + static PGetNamedPipeHandleStateW _GetNamedPipeHandleStateW; + + if (initialized == 0) { + HMODULE api = LoadLibraryExW(L"Kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + if (api) { + _GetNamedPipeHandleStateW = (PGetNamedPipeHandleStateW)GetProcAddress( + api, "GetNamedPipeHandleStateW"); + } + else { + _GetNamedPipeHandleStateW = NULL; + } + initialized = 1; + } + + if (!_GetNamedPipeHandleStateW) { + SetLastError(E_NOINTERFACE); + return FALSE; + } + + return _GetNamedPipeHandleStateW( + hNamedPipe, lpState, lpCurInstances, lpMaxCollectionCount, lpCollectDataTimeout, lpUserName, nMaxUserNameSize + ); +} +#endif /* !MS_WINDOWS_APP && !MS_WINDOWS_SYSTEM */ + int _Py_get_blocking(int fd) { diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 78ef02a911a..2adc8c84d83 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -299,26 +299,34 @@ basicblock_returns(const basicblock *b) { } static void -dump_basicblock(const basicblock *b) +dump_basicblock(const basicblock *b, bool highlight) { const char *b_return = basicblock_returns(b) ? "return " : ""; + if (highlight) { + fprintf(stderr, ">>> "); + } fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n", b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused, b->b_startdepth, b->b_predecessors, b_return); + int depth = b->b_startdepth; if (b->b_instr) { int i; for (i = 0; i < b->b_iused; i++) { - fprintf(stderr, " [%02d] ", i); + fprintf(stderr, " [%02d] depth: %d ", i, depth); dump_instr(b->b_instr + i); + + int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg); + int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg); + depth += (pushed - popped); } } } void -_PyCfgBuilder_DumpGraph(const basicblock *entryblock) +_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark) { for (const basicblock *b = entryblock; b != NULL; b = b->b_next) { - dump_basicblock(b); + dump_basicblock(b, b == mark); } } @@ -2862,8 +2870,11 @@ optimize_load_fast(cfg_builder *g) // how many inputs should be left on the stack. // Opcodes that consume no inputs + case FORMAT_SIMPLE: case GET_ANEXT: + case GET_ITER: case GET_LEN: + case GET_YIELD_FROM_ITER: case IMPORT_FROM: case MATCH_KEYS: case MATCH_MAPPING: @@ -2898,6 +2909,16 @@ optimize_load_fast(cfg_builder *g) break; } + case END_SEND: + case SET_FUNCTION_ATTRIBUTE: { + assert(_PyOpcode_num_popped(opcode, oparg) == 2); + assert(_PyOpcode_num_pushed(opcode, oparg) == 1); + ref tos = ref_stack_pop(&refs); + ref_stack_pop(&refs); + PUSH_REF(tos.instr, tos.local); + break; + } + // Opcodes that consume some inputs and push new values case CHECK_EXC_MATCH: { ref_stack_pop(&refs); @@ -2927,6 +2948,14 @@ optimize_load_fast(cfg_builder *g) break; } + case LOAD_SPECIAL: + case PUSH_EXC_INFO: { + ref tos = ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + PUSH_REF(tos.instr, tos.local); + break; + } + case SEND: { load_fast_push_block(&sp, instr->i_target, refs.size); ref_stack_pop(&refs); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 757e9cb3227..5aaa68c5b51 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1927,8 +1927,7 @@ get_process_mem_usage(void) } #elif __linux__ - // Linux, use smaps_rollup (Kernel >= 4.4) for RSS + Swap - FILE* fp = fopen("/proc/self/smaps_rollup", "r"); + FILE* fp = fopen("/proc/self/status", "r"); if (fp == NULL) { return -1; } @@ -1938,11 +1937,11 @@ get_process_mem_usage(void) long long swap_kb = -1; while (fgets(line_buffer, sizeof(line_buffer), fp) != NULL) { - if (rss_kb == -1 && strncmp(line_buffer, "Rss:", 4) == 0) { - sscanf(line_buffer + 4, "%lld", &rss_kb); + if (rss_kb == -1 && strncmp(line_buffer, "VmRSS:", 6) == 0) { + sscanf(line_buffer + 6, "%lld", &rss_kb); } - else if (swap_kb == -1 && strncmp(line_buffer, "Swap:", 5) == 0) { - sscanf(line_buffer + 5, "%lld", &swap_kb); + else if (swap_kb == -1 && strncmp(line_buffer, "VmSwap:", 7) == 0) { + sscanf(line_buffer + 7, "%lld", &swap_kb); } if (rss_kb != -1 && swap_kb != -1) { break; // Found both @@ -2063,7 +2062,7 @@ gc_should_collect_mem_usage(GCState *gcstate) // 70,000 new container objects. return true; } - Py_ssize_t last_mem = gcstate->last_mem; + Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed(&gcstate->last_mem); Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128); if ((mem - last_mem) > mem_threshold) { // The process memory usage has increased too much, do a collection. @@ -2246,7 +2245,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Store the current memory usage, can be smaller now if breaking cycles // freed some memory. - state->gcstate->last_mem = get_process_mem_usage(); + Py_ssize_t last_mem = get_process_mem_usage(); + _Py_atomic_store_ssize_relaxed(&state->gcstate->last_mem, last_mem); // Append objects with legacy finalizers to the "gc.garbage" list. handle_legacy_finalizers(state); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7e8b05b747e..8f7932f0033 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -158,7 +158,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -168,7 +168,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -182,16 +182,16 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - JUMP_TO_LABEL(pop_2_error); - } - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; @@ -483,7 +483,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -493,7 +493,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -507,16 +507,16 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - JUMP_TO_LABEL(pop_2_error); - } - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; @@ -604,7 +604,7 @@ _PyStackRef container; _PyStackRef getitem; _PyStackRef sub; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 5 cache entries */ // _CHECK_PEP_523 { @@ -650,19 +650,20 @@ // _BINARY_OP_SUBSCR_INIT_CALL { sub = stack_pointer[-1]; - new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); - new_frame->localsplus[0] = container; - new_frame->localsplus[1] = sub; + _PyInterpreterFrame* pushed_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); + pushed_frame->localsplus[0] = container; + pushed_frame->localsplus[1] = sub; frame->return_offset = 6 ; + new_frame = PyStackRef_Wrap(pushed_frame); } // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -693,7 +694,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -855,7 +856,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -905,7 +906,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(str_st); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectImmortal(res_o); + res = PyStackRef_FromPyObjectBorrow(res_o); } stack_pointer[0] = res; stack_pointer += 1; @@ -933,7 +934,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1063,7 +1064,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1073,7 +1074,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1087,16 +1088,16 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); + res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res)) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (res_o == NULL) { - JUMP_TO_LABEL(pop_2_error); - } - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; @@ -1708,8 +1709,8 @@ _PyStackRef init; _PyStackRef self; _PyStackRef *args; - _PyInterpreterFrame *init_frame; - _PyInterpreterFrame *new_frame; + _PyStackRef init_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -1792,17 +1793,17 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } - init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; tstate->py_recursion_remaining--; + init_frame = PyStackRef_Wrap(temp); } // _PUSH_FRAME { new_frame = init_frame; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -1828,7 +1829,7 @@ _PyStackRef null; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -1921,12 +1922,13 @@ args = &stack_pointer[-oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } + new_frame = PyStackRef_Wrap(pushed_frame); } // _SAVE_RETURN_OFFSET { @@ -1940,11 +1942,11 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -1970,7 +1972,7 @@ _PyStackRef null; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -2056,7 +2058,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -2070,9 +2072,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -2777,60 +2779,67 @@ next_instr += 4; INSTRUCTION_STATS(CALL_ISINSTANCE); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); + _PyStackRef null; _PyStackRef callable; - _PyStackRef self_or_null; - _PyStackRef *args; + _PyStackRef instance; + _PyStackRef cls; _PyStackRef res; /* Skip 1 cache entry */ /* Skip 2 cache entries */ - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - int total_args = oparg; - _PyStackRef *arguments = args; - if (!PyStackRef_IsNull(self_or_null)) { - arguments--; - total_args++; - } - if (total_args != 2) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - PyInterpreterState *interp = tstate->interp; - if (callable_o != interp->callable_cache.isinstance) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - STAT_INC(CALL, hit); - _PyStackRef cls_stackref = arguments[1]; - _PyStackRef inst_stackref = arguments[0]; - _PyFrame_SetStackPointer(frame, stack_pointer); - int retval = PyObject_IsInstance(PyStackRef_AsPyObjectBorrow(inst_stackref), PyStackRef_AsPyObjectBorrow(cls_stackref)); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (retval < 0) { - JUMP_TO_LABEL(error); + // _GUARD_THIRD_NULL + { + null = stack_pointer[-3]; + if (!PyStackRef_IsNull(null)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } } - res = retval ? PyStackRef_True : PyStackRef_False; - assert((!PyStackRef_IsNull(res)) ^ (_PyErr_Occurred(tstate) != NULL)); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = callable; - callable = res; - stack_pointer[-2 - oparg] = callable; - PyStackRef_CLOSE(tmp); - for (int _i = oparg; --_i >= 0;) { - tmp = args[_i]; - args[_i] = PyStackRef_NULL; - PyStackRef_CLOSE(tmp); + // _GUARD_CALLABLE_ISINSTANCE + { + callable = stack_pointer[-4]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + if (callable_o != interp->callable_cache.isinstance) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } } - tmp = self_or_null; - self_or_null = PyStackRef_NULL; - stack_pointer[-1 - oparg] = self_or_null; - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1 - oparg; + // _CALL_ISINSTANCE + { + cls = stack_pointer[-1]; + instance = stack_pointer[-2]; + STAT_INC(CALL, hit); + PyObject *inst_o = PyStackRef_AsPyObjectBorrow(instance); + PyObject *cls_o = PyStackRef_AsPyObjectBorrow(cls); + _PyFrame_SetStackPointer(frame, stack_pointer); + int retval = PyObject_IsInstance(inst_o, cls_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (retval < 0) { + JUMP_TO_LABEL(error); + } + (void)null; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(cls); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(instance); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + res = retval ? PyStackRef_True : PyStackRef_False; + assert((!PyStackRef_IsNull(res)) ^ (_PyErr_Occurred(tstate) != NULL)); + } + stack_pointer[0] = res; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); DISPATCH(); } @@ -3033,7 +3042,7 @@ _PyStackRef self_or_null; _PyStackRef *args; _PyStackRef kwnames; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -3120,7 +3129,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -3134,9 +3143,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -3297,7 +3306,7 @@ _PyStackRef self_or_null; _PyStackRef *args; _PyStackRef kwnames; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -3357,7 +3366,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -3371,9 +3380,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -3468,58 +3477,79 @@ INSTRUCTION_STATS(CALL_LIST_APPEND); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); _PyStackRef callable; + _PyStackRef nos; _PyStackRef self; _PyStackRef arg; /* Skip 1 cache entry */ /* Skip 2 cache entries */ - arg = stack_pointer[-1]; - self = stack_pointer[-2]; - callable = stack_pointer[-3]; - assert(oparg == 1); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); - PyInterpreterState *interp = tstate->interp; - if (callable_o != interp->callable_cache.list_append) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - if (self_o == NULL) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - if (!PyList_Check(self_o)) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - if (!LOCK_OBJECT(self_o)) { - UPDATE_MISS_STATS(CALL); - assert(_PyOpcode_Deopt[opcode] == (CALL)); - JUMP_TO_PREDICTED(CALL); - } - STAT_INC(CALL, hit); - int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - UNLOCK_OBJECT(self_o); - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(self); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(callable); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (err) { - JUMP_TO_LABEL(error); + // _GUARD_CALLABLE_LIST_APPEND + { + callable = stack_pointer[-3]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyInterpreterState *interp = tstate->interp; + if (callable_o != interp->callable_cache.list_append) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } } - #if TIER_ONE + // _GUARD_NOS_NOT_NULL + { + nos = stack_pointer[-2]; + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + if (o == NULL) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + } + // _GUARD_NOS_LIST + { + PyObject *o = PyStackRef_AsPyObjectBorrow(nos); + if (!PyList_CheckExact(o)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + } + // _CALL_LIST_APPEND + { + arg = stack_pointer[-1]; + self = nos; + assert(oparg == 1); + PyObject *self_o = PyStackRef_AsPyObjectBorrow(self); + if (!PyList_CheckExact(self_o)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + if (!LOCK_OBJECT(self_o)) { + UPDATE_MISS_STATS(CALL); + assert(_PyOpcode_Deopt[opcode] == (CALL)); + JUMP_TO_PREDICTED(CALL); + } + STAT_INC(CALL, hit); + int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(self); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (err) { + JUMP_TO_LABEL(error); + } + #if TIER_ONE - assert(next_instr->op.code == POP_TOP); - SKIP_OVER(1); - #endif + assert(next_instr->op.code == POP_TOP); + SKIP_OVER(1); + #endif + } DISPATCH(); } @@ -4135,7 +4165,7 @@ _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -4199,12 +4229,13 @@ args = &stack_pointer[-oparg]; int has_self = !PyStackRef_IsNull(self_or_null); STAT_INC(CALL, hit); - new_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); - _PyStackRef *first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, callable, oparg + has_self, frame); + _PyStackRef *first_non_self_local = pushed_frame->localsplus + has_self; + pushed_frame->localsplus[0] = self_or_null; for (int i = 0; i < oparg; i++) { first_non_self_local[i] = args[i]; } + new_frame = PyStackRef_Wrap(pushed_frame); } // _SAVE_RETURN_OFFSET { @@ -4218,11 +4249,11 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -4247,7 +4278,7 @@ _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -4306,7 +4337,7 @@ if (temp == NULL) { JUMP_TO_LABEL(error); } - new_frame = temp; + new_frame = PyStackRef_Wrap(temp); } // _SAVE_RETURN_OFFSET { @@ -4320,9 +4351,9 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -4862,7 +4893,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -4872,7 +4903,7 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!PyLong_CheckExact(left_o)) { + if (!_PyLong_CheckExactAndCompact(left_o)) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -4884,16 +4915,8 @@ right = value; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { - UPDATE_MISS_STATS(COMPARE_OP); - assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); - JUMP_TO_PREDICTED(COMPARE_OP); - } - if (!_PyLong_IsCompact((PyLongObject *)right_o)) { - UPDATE_MISS_STATS(COMPARE_OP); - assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); - JUMP_TO_PREDICTED(COMPARE_OP); - } + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && _PyLong_DigitCount((PyLongObject *)right_o) <= 1); @@ -5188,7 +5211,6 @@ _PyStackRef bottom; _PyStackRef top; bottom = stack_pointer[-1 - (oparg-1)]; - assert(oparg > 0); top = PyStackRef_DUP(bottom); stack_pointer[0] = top; stack_pointer += 1; @@ -5703,17 +5725,19 @@ _Py_CODEUNIT* const this_instr = next_instr - 2; (void)this_instr; _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; // _SPECIALIZE_FOR_ITER { - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_Specialize_ForIter(iter, next_instr, oparg); + _Py_Specialize_ForIter(iter, null_or_index, next_instr, oparg); stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH_SAME_OPARG(); } @@ -5723,30 +5747,20 @@ } // _FOR_ITER { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o == NULL) { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_LABEL(error); - } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + JUMP_TO_LABEL(error); } - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } - next = PyStackRef_FromPyObjectSteal(next_o); + next = item; } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -5765,8 +5779,8 @@ INSTRUCTION_STATS(FOR_ITER_GEN); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); _PyStackRef iter; - _PyInterpreterFrame *gen_frame; - _PyInterpreterFrame *new_frame; + _PyStackRef gen_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -5778,7 +5792,7 @@ } // _FOR_ITER_GEN_FRAME { - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(gen) != &PyGen_Type) { UPDATE_MISS_STATS(FOR_ITER); @@ -5798,21 +5812,22 @@ JUMP_TO_PREDICTED(FOR_ITER); } STAT_INC(FOR_ITER, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_None); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - gen_frame->previous = frame; + pushed_frame->previous = frame; frame->return_offset = (uint16_t)( 2 + oparg); + gen_frame = PyStackRef_Wrap(pushed_frame); } // _PUSH_FRAME { new_frame = gen_frame; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -5835,26 +5850,22 @@ INSTRUCTION_STATS(FOR_ITER_LIST); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; /* Skip 1 cache entry */ // _ITER_CHECK_LIST { - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyListIter_Type) { + if (Py_TYPE(iter_o) != &PyList_Type) { UPDATE_MISS_STATS(FOR_ITER); assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); JUMP_TO_PREDICTED(FOR_ITER); } + assert(PyStackRef_IsTaggedInt(null_or_index)); #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UPDATE_MISS_STATS(FOR_ITER); - assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); - JUMP_TO_PREDICTED(FOR_ITER); - } - _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq)) { + if (!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)) { UPDATE_MISS_STATS(FOR_ITER); assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); JUMP_TO_PREDICTED(FOR_ITER); @@ -5863,42 +5874,30 @@ } // _ITER_JUMP_LIST { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - assert(Py_TYPE(iter_o) == &PyListIter_Type); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - (void)iter_o; + #else - _PyListIterObject *it = (_PyListIterObject *)iter_o; + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(list_o) == &PyList_Type); STAT_INC(FOR_ITER, hit); - PyListObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { - it->it_index = -1; - if (seq != NULL) { - it->it_seq = NULL; - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_DECREF(seq); - stack_pointer = _PyFrame_GetStackPointer(frame); - } + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { + null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } #endif } // _ITER_NEXT_LIST { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); - PyListObject *seq = it->it_seq; - assert(seq); + PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); + assert(PyList_CheckExact(list_o)); #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + assert(_Py_IsOwnedByCurrentThread(list_o) || + _PyObject_GC_IS_SHARED(list_o)); STAT_INC(FOR_ITER, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + int result = _PyList_GetItemRefNoLock((PyListObject *)list_o, PyStackRef_UntagInt(null_or_index), &next); stack_pointer = _PyFrame_GetStackPointer(frame); if (result < 0) { UPDATE_MISS_STATS(FOR_ITER); @@ -5906,16 +5905,17 @@ JUMP_TO_PREDICTED(FOR_ITER); } if (result == 0) { - it->it_index = -1; + null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } - it->it_index++; #else - assert(it->it_index < PyList_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(list_o, PyStackRef_UntagInt(null_or_index))); #endif + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -5938,7 +5938,7 @@ /* Skip 1 cache entry */ // _ITER_CHECK_RANGE { - iter = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); if (Py_TYPE(r) != &PyRangeIter_Type) { UPDATE_MISS_STATS(FOR_ITER); @@ -6001,63 +6001,44 @@ INSTRUCTION_STATS(FOR_ITER_TUPLE); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; /* Skip 1 cache entry */ // _ITER_CHECK_TUPLE { - iter = stack_pointer[-1]; + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - if (Py_TYPE(iter_o) != &PyTupleIter_Type) { + if (Py_TYPE(iter_o) != &PyTuple_Type) { UPDATE_MISS_STATS(FOR_ITER); assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); JUMP_TO_PREDICTED(FOR_ITER); } - #ifdef Py_GIL_DISABLED - if (!_PyObject_IsUniquelyReferenced(iter_o)) { - UPDATE_MISS_STATS(FOR_ITER); - assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); - JUMP_TO_PREDICTED(FOR_ITER); - } - #endif + assert(PyStackRef_IsTaggedInt(null_or_index)); } // _ITER_JUMP_TUPLE { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - (void)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + (void)tuple_o; + assert(Py_TYPE(tuple_o) == &PyTuple_Type); STAT_INC(FOR_ITER, hit); - PyTupleObject *seq = it->it_seq; - if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) { - #ifndef Py_GIL_DISABLED - if (seq != NULL) { - it->it_seq = NULL; - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_DECREF(seq); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - #endif - + if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { + null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } } // _ITER_NEXT_TUPLE { - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - PyTupleObject *seq = it->it_seq; - #ifdef Py_GIL_DISABLED - assert(_PyObject_IsUniquelyReferenced(iter_o)); - #endif - assert(seq); - assert(it->it_index < PyTuple_GET_SIZE(seq)); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + PyObject *tuple_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(tuple_o) == &PyTuple_Type); + uintptr_t i = PyStackRef_UntagInt(null_or_index); + assert((size_t)i < (size_t)PyTuple_GET_SIZE(tuple_o)); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(tuple_o, i)); + null_or_index = PyStackRef_IncrementTaggedIntNoOverflow(null_or_index); } + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -6188,25 +6169,37 @@ INSTRUCTION_STATS(GET_ITER); _PyStackRef iterable; _PyStackRef iter; + _PyStackRef index_or_null; iterable = stack_pointer[-1]; #ifdef Py_STATS _PyFrame_SetStackPointer(frame, stack_pointer); _Py_GatherStats_GetIter(iterable); stack_pointer = _PyFrame_GetStackPointer(frame); #endif - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(iterable); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (iter_o == NULL) { - JUMP_TO_LABEL(error); + + PyTypeObject *tp = PyStackRef_TYPE(iterable); + if (tp == &PyTuple_Type || tp == &PyList_Type) { + iter = iterable; + index_or_null = PyStackRef_TagInt(0); } - iter = PyStackRef_FromPyObjectSteal(iter_o); - stack_pointer[0] = iter; + else { + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable)); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(iterable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (iter_o == NULL) { + JUMP_TO_LABEL(error); + } + iter = PyStackRef_FromPyObjectSteal(iter_o); + index_or_null = PyStackRef_NULL; + stack_pointer += 1; + } + stack_pointer[-1] = iter; + stack_pointer[0] = index_or_null; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); DISPATCH(); @@ -6971,7 +6964,7 @@ _PyStackRef receiver; _PyStackRef value; value = stack_pointer[-1]; - receiver = stack_pointer[-2]; + receiver = stack_pointer[-3]; if (PyStackRef_GenCheck(receiver)) { _PyFrame_SetStackPointer(frame, stack_pointer); int err = monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value)); @@ -7033,35 +7026,25 @@ next_instr += 2; INSTRUCTION_STATS(INSTRUMENTED_FOR_ITER); _PyStackRef iter; + _PyStackRef null_or_index; _PyStackRef next; /* Skip 1 cache entry */ - iter = stack_pointer[-1]; - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + null_or_index = stack_pointer[-1]; + iter = stack_pointer[-2]; _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); stack_pointer = _PyFrame_GetStackPointer(frame); - if (next_o != NULL) { - next = PyStackRef_FromPyObjectSteal(next_o); - INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); - } - else { - if (_PyErr_Occurred(tstate)) { - _PyFrame_SetStackPointer(frame, stack_pointer); - int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (!matches) { - JUMP_TO_LABEL(error); - } - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyEval_MonitorRaise(tstate, frame, this_instr); - _PyErr_Clear(tstate); - stack_pointer = _PyFrame_GetStackPointer(frame); + if (!PyStackRef_IsValid(item)) { + if (PyStackRef_IsError(item)) { + JUMP_TO_LABEL(error); } - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); JUMPBY(oparg + 1); + stack_pointer[-1] = null_or_index; DISPATCH(); } + next = item; + INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); + stack_pointer[-1] = null_or_index; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -7328,9 +7311,12 @@ next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_POP_ITER); _PyStackRef iter; - iter = stack_pointer[-1]; + _PyStackRef index_or_null; + index_or_null = stack_pointer[-1]; + iter = stack_pointer[-2]; + (void)index_or_null; INSTRUMENTED_JUMP(prev_instr, this_instr+1, PY_MONITORING_EVENT_BRANCH_RIGHT); - stack_pointer += -1; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(iter); @@ -8659,7 +8645,7 @@ INSTRUCTION_STATS(LOAD_ATTR_PROPERTY); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; - _PyInterpreterFrame *new_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -8710,8 +8696,9 @@ JUMP_TO_PREDICTED(LOAD_ATTR); } STAT_INC(LOAD_ATTR, hit); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); - new_frame->localsplus[0] = owner; + _PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(fget), 1, frame); + pushed_frame->localsplus[0] = owner; + new_frame = PyStackRef_Wrap(pushed_frame); } // _SAVE_RETURN_OFFSET { @@ -8725,11 +8712,11 @@ // _PUSH_FRAME { assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -8974,63 +8961,9 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(LOAD_CONST); - PREDICTED_LOAD_CONST:; - _Py_CODEUNIT* const this_instr = next_instr - 1; - (void)this_instr; _PyStackRef value; PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); - #if ENABLE_SPECIALIZATION_FT - #ifdef Py_GIL_DISABLED - uint8_t expected = LOAD_CONST; - if (!_Py_atomic_compare_exchange_uint8( - &this_instr->op.code, &expected, - _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL)) { - assert(expected >= MIN_INSTRUMENTED_OPCODE); - } - #else - if (this_instr->op.code == LOAD_CONST) { - this_instr->op.code = _Py_IsImmortal(obj) ? LOAD_CONST_IMMORTAL : LOAD_CONST_MORTAL; - } - #endif - #endif - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - DISPATCH(); - } - - TARGET(LOAD_CONST_IMMORTAL) { - #if Py_TAIL_CALL_INTERP - int opcode = LOAD_CONST_IMMORTAL; - (void)(opcode); - #endif - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(LOAD_CONST_IMMORTAL); - static_assert(0 == 0, "incorrect cache size"); - _PyStackRef value; - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - assert(_Py_IsImmortal(obj)); - value = PyStackRef_FromPyObjectImmortal(obj); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - DISPATCH(); - } - - TARGET(LOAD_CONST_MORTAL) { - #if Py_TAIL_CALL_INTERP - int opcode = LOAD_CONST_MORTAL; - (void)(opcode); - #endif - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(LOAD_CONST_MORTAL); - static_assert(0 == 0, "incorrect cache size"); - _PyStackRef value; - PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNewMortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -9565,7 +9498,7 @@ _PyStackRef value; assert(oparg < _PY_NSMALLPOSINTS); PyObject *obj = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + oparg]; - value = PyStackRef_FromPyObjectImmortal(obj); + value = PyStackRef_FromPyObjectBorrow(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -10148,12 +10081,15 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(POP_ITER); - _PyStackRef value; - value = stack_pointer[-1]; - stack_pointer += -1; + _PyStackRef iter; + _PyStackRef index_or_null; + index_or_null = stack_pointer[-1]; + iter = stack_pointer[-2]; + (void)index_or_null; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(value); + PyStackRef_CLOSE(iter); stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH(); } @@ -10301,7 +10237,7 @@ stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(value); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); DISPATCH(); } @@ -10721,8 +10657,8 @@ static_assert(INLINE_CACHE_ENTRIES_SEND == 1, "incorrect cache size"); _PyStackRef receiver; _PyStackRef v; - _PyInterpreterFrame *gen_frame; - _PyInterpreterFrame *new_frame; + _PyStackRef gen_frame; + _PyStackRef new_frame; /* Skip 1 cache entry */ // _CHECK_PEP_523 { @@ -10748,24 +10684,25 @@ JUMP_TO_PREDICTED(SEND); } STAT_INC(SEND, hit); - gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); + _PyInterpreterFrame *pushed_frame = &gen->gi_iframe; + _PyFrame_StackPush(pushed_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; assert( 2 + oparg <= UINT16_MAX); frame->return_offset = (uint16_t)( 2 + oparg); - gen_frame->previous = frame; + pushed_frame->previous = frame; + gen_frame = PyStackRef_Wrap(pushed_frame); } // _PUSH_FRAME { new_frame = gen_frame; assert(tstate->interp->eval_frame == NULL); - _PyInterpreterFrame *temp = new_frame; + _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - assert(new_frame->previous == frame || new_frame->previous->previous == frame); + assert(temp->previous == frame || temp->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = temp; tstate->py_recursion_remaining--; @@ -11212,10 +11149,6 @@ INSTRUCTION_STATS(STORE_FAST); _PyStackRef value; value = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value) - ); _PyStackRef tmp = GETLOCAL(oparg); GETLOCAL(oparg) = value; stack_pointer += -1; @@ -11237,10 +11170,6 @@ _PyStackRef value1; _PyStackRef value2; value1 = stack_pointer[-1]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -11265,14 +11194,6 @@ _PyStackRef value1; value1 = stack_pointer[-1]; value2 = stack_pointer[-2]; - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value1) - ); - assert( - ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || - PyStackRef_IsHeapSafe(value2) - ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); @@ -11552,7 +11473,7 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!PyLong_CheckExact(value_o)) { + if (!_PyLong_CheckExactAndCompact(value_o)) { UPDATE_MISS_STATS(STORE_SUBSCR); assert(_PyOpcode_Deopt[opcode] == (STORE_SUBSCR)); JUMP_TO_PREDICTED(STORE_SUBSCR); @@ -11629,7 +11550,6 @@ _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); stack_pointer[-2 - (oparg-2)] = bottom; stack_pointer[-1] = top; DISPATCH(); diff --git a/Python/getversion.c b/Python/getversion.c index 226b2f999a6..8d8bc6ea700 100644 --- a/Python/getversion.c +++ b/Python/getversion.c @@ -15,7 +15,7 @@ void _Py_InitVersion(void) } initialized = 1; #ifdef Py_GIL_DISABLED - const char *buildinfo_format = "%.80s experimental free-threading build (%.80s) %.80s"; + const char *buildinfo_format = "%.80s free-threading build (%.80s) %.80s"; #else const char *buildinfo_format = "%.80s (%.80s) %.80s"; #endif diff --git a/Python/hamt.c b/Python/hamt.c index f9bbf63961d..906149cc6cd 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } if (key_or_null == NULL) { - if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) { goto error; } @@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } @@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } diff --git a/Python/import.c b/Python/import.c index afdc28eda31..73b94d0dd2a 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3369,11 +3369,11 @@ PyObject * PyImport_GetImporter(PyObject *path) { PyThreadState *tstate = _PyThreadState_GET(); - PyObject *path_importer_cache = _PySys_GetRequiredAttrString("path_importer_cache"); + PyObject *path_importer_cache = PySys_GetAttrString("path_importer_cache"); if (path_importer_cache == NULL) { return NULL; } - PyObject *path_hooks = _PySys_GetRequiredAttrString("path_hooks"); + PyObject *path_hooks = PySys_GetAttrString("path_hooks"); if (path_hooks == NULL) { Py_DECREF(path_importer_cache); return NULL; @@ -3434,8 +3434,10 @@ PyImport_ImportModule(const char *name) * ImportError instead of blocking. * * Returns the module object with incremented ref count. + * + * Removed in 3.15, but kept for stable ABI compatibility. */ -PyObject * +PyAPI_FUNC(PyObject *) PyImport_ImportModuleNoBlock(const char *name) { if (PyErr_WarnEx(PyExc_DeprecationWarning, @@ -3680,14 +3682,14 @@ import_find_and_load(PyThreadState *tstate, PyObject *abs_name) PyTime_t t1 = 0, accumulated_copy = accumulated; PyObject *sys_path, *sys_meta_path, *sys_path_hooks; - if (_PySys_GetOptionalAttrString("path", &sys_path) < 0) { + if (PySys_GetOptionalAttrString("path", &sys_path) < 0) { return NULL; } - if (_PySys_GetOptionalAttrString("meta_path", &sys_meta_path) < 0) { + if (PySys_GetOptionalAttrString("meta_path", &sys_meta_path) < 0) { Py_XDECREF(sys_path); return NULL; } - if (_PySys_GetOptionalAttrString("path_hooks", &sys_path_hooks) < 0) { + if (PySys_GetOptionalAttrString("path_hooks", &sys_path_hooks) < 0) { Py_XDECREF(sys_meta_path); Py_XDECREF(sys_path); return NULL; @@ -3852,15 +3854,17 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, } final_mod = import_get_module(tstate, to_return); - Py_DECREF(to_return); if (final_mod == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_Format(tstate, PyExc_KeyError, "%R not in sys.modules as expected", to_return); } + Py_DECREF(to_return); goto error; } + + Py_DECREF(to_return); } } else { @@ -3956,23 +3960,28 @@ PyImport_Import(PyObject *module_name) } /* Get the builtins from current globals */ - globals = PyEval_GetGlobals(); + globals = PyEval_GetGlobals(); // borrowed if (globals != NULL) { Py_INCREF(globals); + // XXX Use _PyEval_EnsureBuiltins()? builtins = PyObject_GetItem(globals, &_Py_ID(__builtins__)); - if (builtins == NULL) + if (builtins == NULL) { + // XXX Fall back to interp->builtins or sys.modules['builtins']? goto err; + } + } + else if (_PyErr_Occurred(tstate)) { + goto err; } else { /* No globals -- use standard builtins, and fake globals */ - builtins = PyImport_ImportModuleLevel("builtins", - NULL, NULL, NULL, 0); - if (builtins == NULL) { + globals = PyDict_New(); + if (globals == NULL) { goto err; } - globals = Py_BuildValue("{OO}", &_Py_ID(__builtins__), builtins); - if (globals == NULL) + if (_PyEval_EnsureBuiltinsWithModule(tstate, globals, &builtins) < 0) { goto err; + } } /* Get the __import__ function from the builtins */ @@ -4123,7 +4132,7 @@ _PyImport_FiniCore(PyInterpreterState *interp) static int init_zipimport(PyThreadState *tstate, int verbose) { - PyObject *path_hooks = _PySys_GetRequiredAttrString("path_hooks"); + PyObject *path_hooks = PySys_GetAttrString("path_hooks"); if (path_hooks == NULL) { return -1; } diff --git a/Python/index_pool.c b/Python/index_pool.c index 007c81a0fc1..520a65938ec 100644 --- a/Python/index_pool.c +++ b/Python/index_pool.c @@ -172,6 +172,9 @@ _PyIndexPool_AllocIndex(_PyIndexPool *pool) else { index = heap_pop(free_indices); } + + pool->tlbc_generation++; + UNLOCK_POOL(pool); return index; } @@ -180,6 +183,7 @@ void _PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index) { LOCK_POOL(pool); + pool->tlbc_generation++; heap_add(&pool->free_indices, index); UNLOCK_POOL(pool); } diff --git a/Python/initconfig.c b/Python/initconfig.c index e8270911721..71d7cfed5c4 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -2962,8 +2962,6 @@ config_parse_cmdline(PyConfig *config, PyWideStringList *warnoptions, /* option handled by _PyPreCmdline_Read() */ break; - /* case 'J': reserved for Jython */ - case 'O': config->optimization_level++; break; @@ -3649,7 +3647,7 @@ _Py_DumpPathConfig(PyThreadState *tstate) #define DUMP_SYS(NAME) \ do { \ PySys_FormatStderr(" sys.%s = ", #NAME); \ - if (_PySys_GetOptionalAttrString(#NAME, &obj) < 0) { \ + if (PySys_GetOptionalAttrString(#NAME, &obj) < 0) { \ PyErr_Clear(); \ } \ if (obj != NULL) { \ @@ -3673,7 +3671,7 @@ _Py_DumpPathConfig(PyThreadState *tstate) #undef DUMP_SYS PyObject *sys_path; - (void) _PySys_GetOptionalAttrString("path", &sys_path); + (void) PySys_GetOptionalAttrString("path", &sys_path); if (sys_path != NULL && PyList_Check(sys_path)) { PySys_WriteStderr(" sys.path = [\n"); Py_ssize_t len = PyList_GET_SIZE(sys_path); @@ -4296,7 +4294,7 @@ _PyConfig_CreateXOptionsDict(const PyConfig *config) static int config_get_sys_write_bytecode(const PyConfig *config, int *value) { - PyObject *attr = _PySys_GetRequiredAttrString("dont_write_bytecode"); + PyObject *attr = PySys_GetAttrString("dont_write_bytecode"); if (attr == NULL) { return -1; } @@ -4317,7 +4315,7 @@ config_get(const PyConfig *config, const PyConfigSpec *spec, { if (use_sys) { if (spec->sys.attr != NULL) { - return _PySys_GetRequiredAttrString(spec->sys.attr); + return PySys_GetAttrString(spec->sys.attr); } if (strcmp(spec->name, "write_bytecode") == 0) { diff --git a/Python/intrinsics.c b/Python/intrinsics.c index ff44ba0ee64..8ea920e690c 100644 --- a/Python/intrinsics.c +++ b/Python/intrinsics.c @@ -9,7 +9,6 @@ #include "pycore_intrinsics.h" // INTRINSIC_PRINT #include "pycore_pyerrors.h" // _PyErr_SetString() #include "pycore_runtime.h" // _Py_ID() -#include "pycore_sysmodule.h" // _PySys_GetRequiredAttr() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_typevarobject.h" // _Py_make_typevar() #include "pycore_unicodeobject.h" // _PyUnicode_FromASCII() @@ -27,7 +26,7 @@ no_intrinsic1(PyThreadState* tstate, PyObject *unused) static PyObject * print_expr(PyThreadState* Py_UNUSED(ignored), PyObject *value) { - PyObject *hook = _PySys_GetRequiredAttr(&_Py_ID(displayhook)); + PyObject *hook = PySys_GetAttr(&_Py_ID(displayhook)); if (hook == NULL) { return NULL; } diff --git a/Python/lock.c b/Python/lock.c index 28a12ad1835..ea6ac00bfec 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -58,7 +58,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_ACQUIRED; } } - else if (timeout == 0) { + if (timeout == 0) { return PY_LOCK_FAILURE; } @@ -119,6 +119,9 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_INTR; } } + else if (ret == Py_PARK_INTR && (flags & _PY_FAIL_IF_INTERRUPTED)) { + return PY_LOCK_INTR; + } else if (ret == Py_PARK_TIMEOUT) { assert(timeout >= 0); return PY_LOCK_FAILURE; diff --git a/Python/marshal.c b/Python/marshal.c index b39c1a5b1ad..15dd25d6268 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -38,7 +38,7 @@ module marshal * On Windows PGO builds, the r_object function overallocates its stack and * can cause a stack overflow. We reduce the maximum depth for all Windows * releases to protect against this. - * #if defined(MS_WINDOWS) && defined(_DEBUG) + * #if defined(MS_WINDOWS) && defined(Py_DEBUG) */ #if defined(MS_WINDOWS) # define MAX_MARSHAL_STACK_DEPTH 1000 @@ -1656,6 +1656,9 @@ r_object(RFILE *p) case TYPE_SLICE: { Py_ssize_t idx = r_ref_reserve(flag, p); + if (idx < 0) { + break; + } PyObject *stop = NULL; PyObject *step = NULL; PyObject *start = r_object(p); diff --git a/Python/modsupport.c b/Python/modsupport.c index 2caf595949d..437ad412027 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -669,5 +669,5 @@ Py_PACK_FULL_VERSION(int x, int y, int z, int level, int serial) uint32_t Py_PACK_VERSION(int x, int y) { - return Py_PACK_FULL_VERSION(x, y, 0, 0, 0); + return _Py_PACK_VERSION(x, y); } diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 8af445d7d6a..1d6dcddab4b 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -190,8 +190,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_PROPERTY, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_CONST_IMMORTAL, - &&TARGET_LOAD_CONST_MORTAL, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_SUPER_ATTR_ATTR, @@ -234,6 +232,8 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, &&TARGET_INSTRUMENTED_END_FOR, &&TARGET_INSTRUMENTED_POP_ITER, &&TARGET_INSTRUMENTED_END_SEND, @@ -410,8 +410,6 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_ATTR_WITH_HINT(TAIL_CALL_PA Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_BUILD_CLASS(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_COMMON_CONSTANT(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST(TAIL_CALL_PARAMS); -Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_IMMORTAL(TAIL_CALL_PARAMS); -Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_MORTAL(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS); @@ -649,8 +647,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [LOAD_BUILD_CLASS] = _TAIL_CALL_LOAD_BUILD_CLASS, [LOAD_COMMON_CONSTANT] = _TAIL_CALL_LOAD_COMMON_CONSTANT, [LOAD_CONST] = _TAIL_CALL_LOAD_CONST, - [LOAD_CONST_IMMORTAL] = _TAIL_CALL_LOAD_CONST_IMMORTAL, - [LOAD_CONST_MORTAL] = _TAIL_CALL_LOAD_CONST_MORTAL, [LOAD_DEREF] = _TAIL_CALL_LOAD_DEREF, [LOAD_FAST] = _TAIL_CALL_LOAD_FAST, [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR, @@ -740,6 +736,8 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [125] = _TAIL_CALL_UNKNOWN_OPCODE, [126] = _TAIL_CALL_UNKNOWN_OPCODE, [127] = _TAIL_CALL_UNKNOWN_OPCODE, + [210] = _TAIL_CALL_UNKNOWN_OPCODE, + [211] = _TAIL_CALL_UNKNOWN_OPCODE, [212] = _TAIL_CALL_UNKNOWN_OPCODE, [213] = _TAIL_CALL_UNKNOWN_OPCODE, [214] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Python/optimizer.c b/Python/optimizer.c index dde3dd8ebe7..8d01d605ef4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1292,8 +1292,8 @@ uop_optimize( for (int pc = 0; pc < length; pc++) { int opcode = buffer[pc].opcode; int oparg = buffer[pc].oparg; - if (oparg < _PyUop_Replication[opcode]) { - buffer[pc].opcode = opcode + oparg + 1; + if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) { + buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start; assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0); } else if (is_terminator(&buffer[pc])) { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b0bd1e9518..fab6fef5ccd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -26,6 +26,8 @@ #include "pycore_function.h" #include "pycore_uop_ids.h" #include "pycore_range.h" +#include "pycore_unicodeobject.h" +#include "pycore_ceval.h" #include <stdarg.h> #include <stdbool.h> @@ -103,6 +105,10 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop) if ((int)index >= dict->ma_keys->dk_nentries) { return NULL; } + PyDictKeysObject *keys = dict->ma_keys; + if (keys->dk_version != inst->operand0) { + return NULL; + } PyObject *res = entries[index].me_value; if (res == NULL) { return NULL; @@ -317,7 +323,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const +#define sym_new_const_steal _Py_uop_sym_new_const_steal +#define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type @@ -333,6 +342,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) +#define sym_set_compact_int(SYM) _Py_uop_sym_set_compact_int(ctx, SYM) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness #define frame_new _Py_uop_frame_new @@ -340,15 +350,19 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal +#define sym_is_compact_int _Py_uop_sym_is_compact_int +#define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define JUMP_TO_LABEL(label) goto label; + static int optimize_to_bool( _PyUOpInstruction *this_instr, JitOptContext *ctx, - JitOptSymbol *value, - JitOptSymbol **result_ptr) + JitOptRef value, + JitOptRef *result_ptr) { if (sym_matches_type(value, &PyBool_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -375,6 +389,23 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) } } +static JitOptRef +lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr, + PyTypeObject *type, PyObject *name, uint16_t immortal, + uint16_t mortal) +{ + // The cached value may be dead, so we need to do the lookup again... :( + if (type && PyType_Check(type)) { + PyObject *lookup = _PyType_Lookup(type, name); + if (lookup) { + int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; + REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); + return sym_new_const(ctx, lookup); + } + } + return sym_new_not_null(ctx); +} + /* _PUSH_FRAME/_RETURN_VALUE's operand can be 0, a PyFunctionObject *, or a * PyCodeObject *. Retrieve the code object if possible. */ @@ -423,6 +454,13 @@ get_code_with_logging(_PyUOpInstruction *op) return co; } +// TODO (gh-134584) generate most of this table automatically +const uint16_t op_without_decref_inputs[MAX_UOP_ID + 1] = { + [_BINARY_OP_MULTIPLY_FLOAT] = _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS, + [_BINARY_OP_ADD_FLOAT] = _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS, + [_BINARY_OP_SUBTRACT_FLOAT] = _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS, +}; + /* 1 for success, 0 for not ready, cannot error at the moment. */ static int optimize_uops( @@ -460,7 +498,7 @@ optimize_uops( int oparg = this_instr->oparg; opcode = this_instr->opcode; - JitOptSymbol **stack_pointer = ctx->frame->stack_pointer; + JitOptRef *stack_pointer = ctx->frame->stack_pointer; #ifdef Py_DEBUG if (get_lltrace() >= 3) { @@ -523,6 +561,45 @@ error: } +const uint16_t op_without_push[MAX_UOP_ID + 1] = { + [_COPY] = _NOP, + [_LOAD_CONST_INLINE] = _NOP, + [_LOAD_CONST_INLINE_BORROW] = _NOP, + [_LOAD_CONST_UNDER_INLINE] = _POP_TOP_LOAD_CONST_INLINE, + [_LOAD_CONST_UNDER_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, + [_LOAD_FAST] = _NOP, + [_LOAD_FAST_BORROW] = _NOP, + [_LOAD_SMALL_INT] = _NOP, + [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_TWO, +}; + +const bool op_skip[MAX_UOP_ID + 1] = { + [_NOP] = true, + [_CHECK_VALIDITY] = true, + [_CHECK_PERIODIC] = true, + [_SET_IP] = true, +}; + +const uint16_t op_without_pop[MAX_UOP_ID + 1] = { + [_POP_TOP] = _NOP, + [_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, + [_POP_TWO] = _POP_TOP, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO] = _POP_CALL_ONE, + [_POP_CALL_ONE] = _POP_CALL, +}; + +const uint16_t op_without_pop_null[MAX_UOP_ID + 1] = { + [_POP_CALL] = _POP_TOP, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, +}; + static int remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) @@ -551,50 +628,37 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _NOP; } break; - case _POP_TOP: - case _POP_TOP_LOAD_CONST_INLINE: - case _POP_TOP_LOAD_CONST_INLINE_BORROW: - case _POP_TWO_LOAD_CONST_INLINE_BORROW: - optimize_pop_top_again: + default: { - _PyUOpInstruction *last = &buffer[pc-1]; - while (last->opcode == _NOP) { - last--; - } - switch (last->opcode) { - case _POP_TWO_LOAD_CONST_INLINE_BORROW: - last->opcode = _POP_TOP; - break; - case _POP_TOP_LOAD_CONST_INLINE: - case _POP_TOP_LOAD_CONST_INLINE_BORROW: - last->opcode = _NOP; - goto optimize_pop_top_again; - case _COPY: - case _LOAD_CONST_INLINE: - case _LOAD_CONST_INLINE_BORROW: - case _LOAD_FAST: - case _LOAD_FAST_BORROW: - case _LOAD_SMALL_INT: - last->opcode = _NOP; - if (opcode == _POP_TOP) { - opcode = buffer[pc].opcode = _NOP; - } - else if (opcode == _POP_TOP_LOAD_CONST_INLINE) { - opcode = buffer[pc].opcode = _LOAD_CONST_INLINE; - } - else if (opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) { - opcode = buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW; - } - else { - assert(opcode == _POP_TWO_LOAD_CONST_INLINE_BORROW); - opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; - goto optimize_pop_top_again; + // Cancel out pushes and pops, repeatedly. So: + // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP + // ...becomes: + // _NOP + _POP_TOP + _NOP + while (op_without_pop[opcode] || op_without_pop_null[opcode]) { + _PyUOpInstruction *last = &buffer[pc - 1]; + while (op_skip[last->opcode]) { + last--; + } + if (op_without_push[last->opcode] && op_without_pop[opcode]) { + last->opcode = op_without_push[last->opcode]; + opcode = buffer[pc].opcode = op_without_pop[opcode]; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; } + } + else if (last->opcode == _PUSH_NULL) { + // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. + // This looks for a preceding _PUSH_NULL instruction and + // simplifies to _POP_TOP(_LOAD_CONST_INLINE_BORROW). + last->opcode = _NOP; + opcode = buffer[pc].opcode = op_without_pop_null[opcode]; + assert(opcode); + } + else { + break; + } } - _Py_FALLTHROUGH; - } - default: - { /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ bool needs_ip = opcode == _PUSH_FRAME; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e99421a3aff..3182e8b3b70 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -27,13 +27,16 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) +#define sym_set_compact_int(SYM) _Py_uop_sym_set_compact_int(ctx, SYM) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new #define frame_pop _Py_uop_frame_pop #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal +#define sym_new_compact_int _Py_uop_sym_new_compact_int +#define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness extern int @@ -87,12 +90,12 @@ dummy_func(void) { } op(_LOAD_FAST_BORROW, (-- value)) { - value = GETLOCAL(oparg); + value = PyJitRef_Borrow(GETLOCAL(oparg)); } op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); - JitOptSymbol *temp = sym_new_null(ctx); + JitOptRef temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; } @@ -104,18 +107,40 @@ dummy_func(void) { res = sym_new_null(ctx); } - op(_GUARD_TOS_INT, (tos -- tos)) { - if (sym_matches_type(tos, &PyLong_Type)) { + op(_GUARD_TOS_INT, (value -- value)) { + if (sym_is_compact_int(value)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyLong_Type); + else { + if (sym_get_type(value) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_TOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(value); + } } - op(_GUARD_NOS_INT, (nos, unused -- nos, unused)) { - if (sym_matches_type(nos, &PyLong_Type)) { + op(_GUARD_NOS_INT, (left, unused -- left, unused)) { + if (sym_is_compact_int(left)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyLong_Type); + else { + if (sym_get_type(left) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_NOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(left); + } + } + + op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) { + PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); + if (type) { + if (type == sym_get_const(ctx, owner)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + sym_set_const(owner, type); + } + } } op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { @@ -141,25 +166,26 @@ dummy_func(void) { } } - op(_GUARD_TOS_FLOAT, (tos -- tos)) { - if (sym_matches_type(tos, &PyFloat_Type)) { + op(_GUARD_TOS_FLOAT, (value -- value)) { + if (sym_matches_type(value, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyFloat_Type); + sym_set_type(value, &PyFloat_Type); } - op(_GUARD_NOS_FLOAT, (nos, unused -- nos, unused)) { - if (sym_matches_type(nos, &PyFloat_Type)) { + op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) { + if (sym_matches_type(left, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyFloat_Type); + sym_set_type(left, &PyFloat_Type); } - op(_BINARY_OP, (left, right -- res)) { - bool lhs_int = sym_matches_type(left, &PyLong_Type); - bool rhs_int = sym_matches_type(right, &PyLong_Type); - bool lhs_float = sym_matches_type(left, &PyFloat_Type); - bool rhs_float = sym_matches_type(right, &PyFloat_Type); + op(_BINARY_OP, (lhs, rhs -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs); + bool lhs_int = sym_matches_type(lhs, &PyLong_Type); + bool rhs_int = sym_matches_type(rhs, &PyLong_Type); + bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); + bool rhs_float = sym_matches_type(rhs, &PyFloat_Type); if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) { // There's something other than an int or float involved: res = sym_new_unknown(ctx); @@ -185,11 +211,11 @@ dummy_func(void) { // Case C: res = sym_new_type(ctx, &PyFloat_Type); } - else if (!sym_is_const(ctx, right)) { + else if (!sym_is_const(ctx, rhs)) { // Case A or B... can't know without the sign of the RHS: res = sym_new_unknown(ctx); } - else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(ctx, right))) { + else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(ctx, rhs))) { // Case B: res = sym_new_type(ctx, &PyFloat_Type); } @@ -210,140 +236,54 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! - } - else { - res = sym_new_type(ctx, &PyLong_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_compact_int(ctx); } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! - } - else { - res = sym_new_type(ctx, &PyLong_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_compact_int(ctx); } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! - } - else { - res = sym_new_type(ctx, &PyLong_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_compact_int(ctx); } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); + // TODO (gh-134584): Refactor this to use another uop + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } } op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); + // TODO (gh-134584): Refactor this to use another uop + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } } op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); + // TODO (gh-134584): Refactor this to use another uop + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } } op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyUnicode_Type); } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { - JitOptSymbol *res; + JitOptRef res; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); @@ -361,12 +301,12 @@ dummy_func(void) { GETLOCAL(this_instr->operand0) = res; } - op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _Py_UOpsAbstractFrame *)) { - new_frame = NULL; + op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) { + new_frame = PyJitRef_NULL; ctx->done = true; } - op(_BINARY_OP_SUBSCR_STR_INT, (left, right -- res)) { + op(_BINARY_OP_SUBSCR_STR_INT, (str_st, sub_st -- res)) { res = sym_new_type(ctx, &PyUnicode_Type); } @@ -398,11 +338,11 @@ dummy_func(void) { } } - op(_TO_BOOL_BOOL, (value -- res)) { - int already_bool = optimize_to_bool(this_instr, ctx, value, &res); + op(_TO_BOOL_BOOL, (value -- value)) { + int already_bool = optimize_to_bool(this_instr, ctx, value, &value); if (!already_bool) { sym_set_type(value, &PyBool_Type); - res = sym_new_truthiness(ctx, value, true); + value = sym_new_truthiness(ctx, value, true); } } @@ -451,10 +391,35 @@ dummy_func(void) { } op(_UNARY_NOT, (value -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(value); sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); } + op(_UNARY_NEGATIVE, (value -- res)) { + if (sym_is_compact_int(value)) { + res = sym_new_compact_int(ctx); + } + else { + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } + } + } + + op(_UNARY_INVERT, (value -- res)) { + if (sym_matches_type(value, &PyLong_Type)) { + res = sym_new_type(ctx, &PyLong_Type); + } + else { + res = sym_new_not_null(ctx); + } + } + op(_COMPARE_OP, (left, right -- res)) { if (oparg & 16) { res = sym_new_type(ctx, &PyBool_Type); @@ -493,45 +458,34 @@ dummy_func(void) { res = sym_new_type(ctx, &PyBool_Type); } - op(_IS_OP, (left, right -- res)) { - res = sym_new_type(ctx, &PyBool_Type); + op(_IS_OP, (left, right -- b)) { + b = sym_new_type(ctx, &PyBool_Type); } - op(_CONTAINS_OP, (left, right -- res)) { - res = sym_new_type(ctx, &PyBool_Type); + op(_CONTAINS_OP, (left, right -- b)) { + b = sym_new_type(ctx, &PyBool_Type); } - op(_CONTAINS_OP_SET, (left, right -- res)) { - res = sym_new_type(ctx, &PyBool_Type); + op(_CONTAINS_OP_SET, (left, right -- b)) { + b = sym_new_type(ctx, &PyBool_Type); } - op(_CONTAINS_OP_DICT, (left, right -- res)) { - res = sym_new_type(ctx, &PyBool_Type); + op(_CONTAINS_OP_DICT, (left, right -- b)) { + b = sym_new_type(ctx, &PyBool_Type); } op(_LOAD_CONST, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); - } - - op(_LOAD_CONST_MORTAL, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); - } - - op(_LOAD_CONST_IMMORTAL, (-- value)) { - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); } op(_LOAD_SMALL_INT, (-- value)) { - PyObject *val = PyLong_FromLong(this_instr->oparg); - value = sym_new_const(ctx, val); + PyObject *val = PyLong_FromLong(oparg); + assert(val); + assert(_Py_IsImmortal(val)); + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { @@ -539,7 +493,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { @@ -547,7 +501,37 @@ dummy_func(void) { } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + } + + op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused -- value)) { + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + } + + op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused -- value)) { + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + } + + op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) { + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + } + + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } } op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { @@ -556,7 +540,7 @@ dummy_func(void) { } op(_SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { - JitOptSymbol *temp = bottom; + JitOptRef temp = bottom; bottom = top; top = temp; assert(oparg >= 2); @@ -570,7 +554,7 @@ dummy_func(void) { op(_LOAD_ATTR_MODULE, (dict_version/2, index/1, owner -- attr)) { (void)dict_version; (void)index; - attr = NULL; + attr = PyJitRef_NULL; if (sym_is_const(ctx, owner)) { PyModuleObject *mod = (PyModuleObject *)sym_get_const(ctx, owner); if (PyModule_CheckExact(mod)) { @@ -580,11 +564,17 @@ dummy_func(void) { PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); PyObject *res = convert_global_to_const(this_instr, dict, true); - attr = sym_new_const(ctx, res); + if (res == NULL) { + attr = sym_new_not_null(ctx); + } + else { + attr = sym_new_const(ctx, res); + } + } } } - if (attr == NULL) { + if (PyJitRef_IsNull(attr)) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } @@ -603,7 +593,7 @@ dummy_func(void) { op(_LOAD_ATTR, (owner -- attr, self_or_null[oparg&1])) { (void)owner; attr = sym_new_not_null(ctx); - if (oparg &1) { + if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } } @@ -619,31 +609,65 @@ dummy_func(void) { } op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr)) { - attr = sym_new_not_null(ctx); (void)descr; + PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); + } + + op(_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, (descr/4, owner -- attr)) { + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); + } + + op(_LOAD_ATTR_NONDESCRIPTOR_NO_DICT, (descr/4, owner -- attr)) { + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); } op(_LOAD_ATTR_METHOD_WITH_VALUES, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } op(_LOAD_ATTR_METHOD_NO_DICT, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } op(_LOAD_ATTR_METHOD_LAZY_DICT, (descr/4, owner -- attr, self)) { (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; } - op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) { (void)fget; - new_frame = NULL; + new_frame = PyJitRef_NULL; ctx->done = true; } @@ -661,6 +685,16 @@ dummy_func(void) { sym_set_type(callable, &PyFunction_Type); } + op(_CHECK_METHOD_VERSION, (func_version/2, callable, null, unused[oparg] -- callable, null, unused[oparg])) { + if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) { + PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable); + assert(PyMethod_Check(method)); + REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version); + this_instr->operand1 = (uintptr_t)method->im_func; + } + sym_set_type(callable, &PyMethod_Type); + } + op(_CHECK_FUNCTION_EXACT_ARGS, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { assert(sym_matches_type(callable, &PyFunction_Type)); if (sym_is_const(ctx, callable)) { @@ -679,7 +713,7 @@ dummy_func(void) { sym_set_type(callable, &PyMethod_Type); } - op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame)) { int argcount = oparg; PyCodeObject *co = NULL; @@ -691,7 +725,7 @@ dummy_func(void) { } - assert(self_or_null != NULL); + assert(!PyJitRef_IsNull(self_or_null)); assert(args != NULL); if (sym_is_not_null(self_or_null)) { // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM @@ -700,20 +734,19 @@ dummy_func(void) { } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args, argcount)); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); - + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } } - op(_MAYBE_EXPAND_METHOD, (callable, self_or_null, args[oparg] -- func, maybe_self, args[oparg])) { + op(_MAYBE_EXPAND_METHOD, (callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { (void)args; - func = sym_new_not_null(ctx); - maybe_self = sym_new_not_null(ctx); + callable = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); } - op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) { PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); co = get_code_with_logging((this_instr + 2)); @@ -722,28 +755,30 @@ dummy_func(void) { break; } - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } - op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _Py_UOpsAbstractFrame *)) { - new_frame = NULL; + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { + new_frame = PyJitRef_NULL; ctx->done = true; } - op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, null, args[oparg] -- self, init, args[oparg])) { + op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { (void)type_version; (void)args; - self = sym_new_not_null(ctx); - init = sym_new_not_null(ctx); + callable = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); } - op(_CREATE_INIT_FRAME, (self, init, args[oparg] -- init_frame: _Py_UOpsAbstractFrame *)) { - init_frame = NULL; + op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) { + init_frame = PyJitRef_NULL; ctx->done = true; } op(_RETURN_VALUE, (retval -- res)) { - JitOptSymbol *temp = retval; + // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe + // in bytecodes.c + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; @@ -789,21 +824,34 @@ dummy_func(void) { } } - op(_YIELD_VALUE, (unused -- res)) { - res = sym_new_unknown(ctx); + op(_YIELD_VALUE, (unused -- value)) { + value = sym_new_unknown(ctx); } - op(_FOR_ITER_GEN_FRAME, ( -- )) { + op(_GET_ITER, (iterable -- iter, index_or_null)) { + if (sym_matches_type(iterable, &PyTuple_Type) || sym_matches_type(iterable, &PyList_Type)) { + iter = iterable; + index_or_null = sym_new_not_null(ctx); + } + else { + iter = sym_new_not_null(ctx); + index_or_null = sym_new_unknown(ctx); + } + } + + op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame)) { + gen_frame = PyJitRef_NULL; /* We are about to hit the end of the trace */ ctx->done = true; } - op(_SEND_GEN_FRAME, ( -- )) { + op(_SEND_GEN_FRAME, (unused, unused -- unused, gen_frame)) { + gen_frame = PyJitRef_NULL; // We are about to hit the end of the trace: ctx->done = true; } - op(_CHECK_STACK_SPACE, ( --)) { + op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) { assert(corresponding_check_stack == NULL); corresponding_check_stack = this_instr; } @@ -815,12 +863,12 @@ dummy_func(void) { Py_UNREACHABLE(); } - op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- )) { + op(_PUSH_FRAME, (new_frame -- )) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame); ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ctx->frame->stack_pointer; co = get_code(this_instr); if (co == NULL) { // should be about to _EXIT_TRACE anyway @@ -848,14 +896,16 @@ dummy_func(void) { corresponding_check_stack = NULL; } - op(_UNPACK_SEQUENCE, (seq -- values[oparg])) { + op(_UNPACK_SEQUENCE, (seq -- values[oparg], top[0])) { + (void)top; /* This has to be done manually */ for (int i = 0; i < oparg; i++) { values[i] = sym_new_unknown(ctx); } } - op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) { + op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8], top[0])) { + (void)top; /* This has to be done manually */ int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; for (int i = 0; i < totalargs; i++) { @@ -863,13 +913,23 @@ dummy_func(void) { } } - op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + op(_ITER_CHECK_TUPLE, (iter, null_or_index -- iter, null_or_index)) { + if (sym_matches_type(iter, &PyTuple_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(iter, &PyTuple_Type); + } + + op(_ITER_NEXT_RANGE, (iter, null_or_index -- iter, null_or_index, next)) { next = sym_new_type(ctx, &PyLong_Type); } op(_CALL_TYPE_1, (unused, unused, arg -- res)) { - if (sym_has_type(arg)) { - res = sym_new_const(ctx, (PyObject *)sym_get_type(arg)); + PyObject* type = (PyObject *)sym_get_type(arg); + if (type) { + res = sym_new_const(ctx, type); + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, + (uintptr_t)type); } else { res = sym_new_not_null(ctx); @@ -886,6 +946,26 @@ dummy_func(void) { } } + op(_CALL_ISINSTANCE, (unused, unused, instance, cls -- res)) { + // the result is always a bool, but sometimes we can + // narrow it down to True or False + res = sym_new_type(ctx, &PyBool_Type); + PyTypeObject *inst_type = sym_get_type(instance); + PyTypeObject *cls_o = (PyTypeObject *)sym_get_const(ctx, cls); + if (inst_type && cls_o && sym_matches_type(cls, &PyType_Type)) { + // isinstance(inst, cls) where both inst and cls have + // known types, meaning we can deduce either True or False + + // The below check is equivalent to PyObject_TypeCheck(inst, cls) + PyObject *out = Py_False; + if (inst_type == cls_o || PyType_IsSubtype(inst_type, cls_o)) { + out = Py_True; + } + sym_set_const(res, out); + REPLACE_OP(this_instr, _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)out); + } + } + op(_GUARD_IS_TRUE_POP, (flag -- )) { if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); @@ -904,27 +984,27 @@ dummy_func(void) { sym_set_const(flag, Py_False); } - op(_GUARD_IS_NONE_POP, (flag -- )) { - if (sym_is_const(ctx, flag)) { - PyObject *value = sym_get_const(ctx, flag); + op(_GUARD_IS_NONE_POP, (val -- )) { + if (sym_is_const(ctx, val)) { + PyObject *value = sym_get_const(ctx, val); assert(value != NULL); eliminate_pop_guard(this_instr, !Py_IsNone(value)); } - else if (sym_has_type(flag)) { - assert(!sym_matches_type(flag, &_PyNone_Type)); + else if (sym_has_type(val)) { + assert(!sym_matches_type(val, &_PyNone_Type)); eliminate_pop_guard(this_instr, true); } - sym_set_const(flag, Py_None); + sym_set_const(val, Py_None); } - op(_GUARD_IS_NOT_NONE_POP, (flag -- )) { - if (sym_is_const(ctx, flag)) { - PyObject *value = sym_get_const(ctx, flag); + op(_GUARD_IS_NOT_NONE_POP, (val -- )) { + if (sym_is_const(ctx, val)) { + PyObject *value = sym_get_const(ctx, val); assert(value != NULL); eliminate_pop_guard(this_instr, Py_IsNone(value)); } - else if (sym_has_type(flag)) { - assert(!sym_matches_type(flag, &_PyNone_Type)); + else if (sym_has_type(val)) { + assert(!sym_matches_type(val, &_PyNone_Type)); eliminate_pop_guard(this_instr, false); } } @@ -969,7 +1049,7 @@ dummy_func(void) { list = sym_new_type(ctx, &PyList_Type); } - op(_BUILD_SLICE, (values[oparg] -- slice)) { + op(_BUILD_SLICE, (args[oparg] -- slice)) { slice = sym_new_type(ctx, &PySlice_Type); } @@ -977,7 +1057,7 @@ dummy_func(void) { map = sym_new_type(ctx, &PyDict_Type); } - op(_BUILD_STRING, (values[oparg] -- str)) { + op(_BUILD_STRING, (pieces[oparg] -- str)) { str = sym_new_type(ctx, &PyUnicode_Type); } @@ -1063,6 +1143,20 @@ dummy_func(void) { sym_set_null(null); } + op(_GUARD_NOS_NOT_NULL, (nos, unused -- nos, unused)) { + if (sym_is_not_null(nos)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_non_null(nos); + } + + op(_GUARD_THIRD_NULL, (null, unused, unused -- null, unused, unused)) { + if (sym_is_null(null)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_null(null); + } + op(_GUARD_CALLABLE_TYPE_1, (callable, unused, unused -- callable, unused, unused)) { if (sym_get_const(ctx, callable) == (PyObject *)&PyType_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1084,8 +1178,40 @@ dummy_func(void) { sym_set_const(callable, (PyObject *)&PyUnicode_Type); } - op(_CALL_LEN, (unused, unused, unused -- res)) { + op(_CALL_LEN, (callable, null, arg -- res)) { res = sym_new_type(ctx, &PyLong_Type); + int tuple_length = sym_tuple_length(arg); + if (tuple_length >= 0) { + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + 0, (uintptr_t)temp); + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + } + } + + op(_GET_LEN, (obj -- obj, len)) { + int tuple_length = sym_tuple_length(obj); + if (tuple_length == -1) { + len = sym_new_type(ctx, &PyLong_Type); + } + else { + assert(tuple_length >= 0); + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)temp); + } + len = sym_new_const(ctx, temp); + Py_DECREF(temp); + } } op(_GUARD_CALLABLE_LEN, (callable, unused, unused -- callable, unused, unused)) { @@ -1096,6 +1222,36 @@ dummy_func(void) { sym_set_const(callable, len); } + op(_GUARD_CALLABLE_ISINSTANCE, (callable, unused, unused, unused -- callable, unused, unused, unused)) { + PyObject *isinstance = _PyInterpreterState_GET()->callable_cache.isinstance; + if (sym_get_const(ctx, callable) == isinstance) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_const(callable, isinstance); + } + + op(_GUARD_CALLABLE_LIST_APPEND, (callable, unused, unused -- callable, unused, unused)) { + PyObject *list_append = _PyInterpreterState_GET()->callable_cache.list_append; + if (sym_get_const(ctx, callable) == list_append) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_const(callable, list_append); + } + + op(_BINARY_SLICE, (container, start, stop -- res)) { + // Slicing a string/list/tuple always returns the same type. + PyTypeObject *type = sym_get_type(container); + if (type == &PyUnicode_Type || + type == &PyList_Type || + type == &PyTuple_Type) + { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } + } + // END BYTECODES // } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 56b4b9983fb..8d30df3aa7d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -26,7 +26,7 @@ /* _MONITOR_RESUME is not a viable micro-op for tier 2 */ case _LOAD_FAST_CHECK: { - JitOptSymbol *value; + JitOptRef value; value = GETLOCAL(oparg); if (sym_is_null(value)) { ctx->done = true; @@ -38,7 +38,7 @@ } case _LOAD_FAST: { - JitOptSymbol *value; + JitOptRef value; value = GETLOCAL(oparg); stack_pointer[0] = value; stack_pointer += 1; @@ -47,8 +47,8 @@ } case _LOAD_FAST_BORROW: { - JitOptSymbol *value; - value = GETLOCAL(oparg); + JitOptRef value; + value = PyJitRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -56,9 +56,9 @@ } case _LOAD_FAST_AND_CLEAR: { - JitOptSymbol *value; + JitOptRef value; value = GETLOCAL(oparg); - JitOptSymbol *temp = sym_new_null(ctx); + JitOptRef temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; stack_pointer[0] = value; stack_pointer += 1; @@ -66,25 +66,11 @@ break; } - /* _LOAD_CONST is not a viable micro-op for tier 2 */ - - case _LOAD_CONST_MORTAL: { - JitOptSymbol *value; - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_CONST_IMMORTAL: { - JitOptSymbol *value; - PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + case _LOAD_CONST: { + JitOptRef value; + PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg); REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); - value = sym_new_const(ctx, val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -92,9 +78,12 @@ } case _LOAD_SMALL_INT: { - JitOptSymbol *value; - PyObject *val = PyLong_FromLong(this_instr->oparg); - value = sym_new_const(ctx, val); + JitOptRef value; + PyObject *val = PyLong_FromLong(oparg); + assert(val); + assert(_Py_IsImmortal(val)); + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val); + value = PyJitRef_Borrow(sym_new_const(ctx, val)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -102,7 +91,7 @@ } case _STORE_FAST: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; GETLOCAL(oparg) = value; stack_pointer += -1; @@ -111,13 +100,60 @@ } case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_NOP: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TWO: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _PUSH_NULL: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -131,8 +167,14 @@ break; } + case _POP_ITER: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _END_SEND: { - JitOptSymbol *val; + JitOptRef val; val = sym_new_not_null(ctx); stack_pointer[-2] = val; stack_pointer += -1; @@ -141,16 +183,44 @@ } case _UNARY_NEGATIVE: { - JitOptSymbol *res; - res = sym_new_not_null(ctx); + JitOptRef value; + JitOptRef res; + value = stack_pointer[-1]; + if (sym_is_compact_int(value)) { + res = sym_new_compact_int(ctx); + } + else { + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } + } stack_pointer[-1] = res; break; } case _UNARY_NOT: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, value) + ) { + JitOptRef value_sym = value; + _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + assert(PyStackRef_BoolCheck(value)); + res_stackref = PyStackRef_IsFalse(value) + ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); stack_pointer[-1] = res; @@ -158,8 +228,8 @@ } case _TO_BOOL: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -170,21 +240,20 @@ } case _TO_BOOL_BOOL: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; value = stack_pointer[-1]; - int already_bool = optimize_to_bool(this_instr, ctx, value, &res); + int already_bool = optimize_to_bool(this_instr, ctx, value, &value); if (!already_bool) { sym_set_type(value, &PyBool_Type); - res = sym_new_truthiness(ctx, value, true); + value = sym_new_truthiness(ctx, value, true); } - stack_pointer[-1] = res; + stack_pointer[-1] = value; break; } case _TO_BOOL_INT: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -196,7 +265,7 @@ } case _GUARD_NOS_LIST: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyList_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -206,7 +275,7 @@ } case _GUARD_TOS_LIST: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PyList_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -220,8 +289,8 @@ } case _TO_BOOL_LIST: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -232,8 +301,8 @@ } case _TO_BOOL_NONE: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -245,7 +314,7 @@ } case _GUARD_NOS_UNICODE: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyUnicode_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -255,7 +324,7 @@ } case _GUARD_TOS_UNICODE: { - JitOptSymbol *value; + JitOptRef value; value = stack_pointer[-1]; if (sym_matches_type(value, &PyUnicode_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -265,8 +334,8 @@ } case _TO_BOOL_STR: { - JitOptSymbol *value; - JitOptSymbol *res; + JitOptRef value; + JitOptRef res; value = stack_pointer[-1]; int already_bool = optimize_to_bool(this_instr, ctx, value, &res); if (!already_bool) { @@ -277,7 +346,7 @@ } case _REPLACE_WITH_TRUE: { - JitOptSymbol *res; + JitOptRef res; REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True); res = sym_new_const(ctx, Py_True); stack_pointer[-1] = res; @@ -285,256 +354,416 @@ } case _UNARY_INVERT: { - JitOptSymbol *res; - res = sym_new_not_null(ctx); + JitOptRef value; + JitOptRef res; + value = stack_pointer[-1]; + if (sym_matches_type(value, &PyLong_Type)) { + res = sym_new_type(ctx, &PyLong_Type); + } + else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } case _GUARD_NOS_INT: { - JitOptSymbol *nos; - nos = stack_pointer[-2]; - if (sym_matches_type(nos, &PyLong_Type)) { + JitOptRef left; + left = stack_pointer[-2]; + if (sym_is_compact_int(left)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyLong_Type); + else { + if (sym_get_type(left) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_NOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(left); + } break; } case _GUARD_TOS_INT: { - JitOptSymbol *tos; - tos = stack_pointer[-1]; - if (sym_matches_type(tos, &PyLong_Type)) { + JitOptRef value; + value = stack_pointer[-1]; + if (sym_is_compact_int(value)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyLong_Type); + else { + if (sym_get_type(value) == &PyLong_Type) { + REPLACE_OP(this_instr, _GUARD_TOS_OVERFLOWED, 0, 0); + } + sym_set_compact_int(value); + } + break; + } + + case _GUARD_NOS_OVERFLOWED: { + break; + } + + case _GUARD_TOS_OVERFLOWED: { break; } case _BINARY_OP_MULTIPLY_INT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; } - res = sym_new_const(ctx, temp); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyLong_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_compact_int(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_ADD_INT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; } - res = sym_new_const(ctx, temp); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyLong_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_compact_int(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_SUBTRACT_INT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(ctx, left), - (PyLongObject *)sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; } - res = sym_new_const(ctx, temp); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyLong_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_compact_int(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _GUARD_NOS_FLOAT: { - JitOptSymbol *nos; - nos = stack_pointer[-2]; - if (sym_matches_type(nos, &PyFloat_Type)) { + JitOptRef left; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyFloat_Type); + sym_set_type(left, &PyFloat_Type); break; } case _GUARD_TOS_FLOAT: { - JitOptSymbol *tos; - tos = stack_pointer[-1]; - if (sym_matches_type(tos, &PyFloat_Type)) { + JitOptRef value; + value = stack_pointer[-1]; + if (sym_matches_type(value, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyFloat_Type); + sym_set_type(value, &PyFloat_Type); break; } case _BINARY_OP_MULTIPLY_FLOAT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); + break; } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + res = sym_new_type(ctx, &PyFloat_Type); + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_ADD_FLOAT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); + break; } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + res = sym_new_type(ctx, &PyFloat_Type); + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_SUBTRACT_FLOAT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); + break; } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + res = sym_new_type(ctx, &PyFloat_Type); + if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { + REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: { + JitOptRef res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: { + JitOptRef res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: { + JitOptRef res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_ADD_UNICODE: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + PyObject *res_o = PyUnicode_Concat(left_o, right_o); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + if (res_o == NULL) { goto error; } - res = sym_new_const(ctx, temp); + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyUnicode_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_OP_INPLACE_ADD_UNICODE: { - JitOptSymbol *right; - JitOptSymbol *left; + JitOptRef right; + JitOptRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - JitOptSymbol *res; + JitOptRef res; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); @@ -559,7 +788,7 @@ } case _BINARY_OP_EXTEND: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -568,8 +797,19 @@ } case _BINARY_SLICE: { - JitOptSymbol *res; - res = sym_new_not_null(ctx); + JitOptRef container; + JitOptRef res; + container = stack_pointer[-3]; + PyTypeObject *type = sym_get_type(container); + if (type == &PyUnicode_Type || + type == &PyList_Type || + type == &PyTuple_Type) + { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -583,7 +823,7 @@ } case _BINARY_OP_SUBSCR_LIST_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -592,7 +832,7 @@ } case _BINARY_OP_SUBSCR_LIST_SLICE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -601,7 +841,7 @@ } case _BINARY_OP_SUBSCR_STR_INT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyUnicode_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -610,7 +850,7 @@ } case _GUARD_NOS_TUPLE: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -620,7 +860,7 @@ } case _GUARD_TOS_TUPLE: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -630,9 +870,9 @@ } case _BINARY_OP_SUBSCR_TUPLE_INT: { - JitOptSymbol *sub_st; - JitOptSymbol *tuple_st; - JitOptSymbol *res; + JitOptRef sub_st; + JitOptRef tuple_st; + JitOptRef res; sub_st = stack_pointer[-1]; tuple_st = stack_pointer[-2]; assert(sym_matches_type(tuple_st, &PyTuple_Type)); @@ -659,7 +899,7 @@ } case _GUARD_NOS_DICT: { - JitOptSymbol *nos; + JitOptRef nos; nos = stack_pointer[-2]; if (sym_matches_type(nos, &PyDict_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -669,7 +909,7 @@ } case _GUARD_TOS_DICT: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PyDict_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -679,7 +919,7 @@ } case _BINARY_OP_SUBSCR_DICT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -688,7 +928,7 @@ } case _BINARY_OP_SUBSCR_CHECK_FUNC: { - JitOptSymbol *getitem; + JitOptRef getitem; getitem = sym_new_not_null(ctx); stack_pointer[0] = getitem; stack_pointer += 1; @@ -697,10 +937,10 @@ } case _BINARY_OP_SUBSCR_INIT_CALL: { - _Py_UOpsAbstractFrame *new_frame; - new_frame = NULL; + JitOptRef new_frame; + new_frame = PyJitRef_NULL; ctx->done = true; - stack_pointer[-3] = (JitOptSymbol *)new_frame; + stack_pointer[-3] = new_frame; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; @@ -743,14 +983,14 @@ } case _CALL_INTRINSIC_1: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _CALL_INTRINSIC_2: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -759,10 +999,10 @@ } case _RETURN_VALUE: { - JitOptSymbol *retval; - JitOptSymbol *res; + JitOptRef retval; + JitOptRef res; retval = stack_pointer[-1]; - JitOptSymbol *temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -786,14 +1026,14 @@ } case _GET_AITER: { - JitOptSymbol *iter; + JitOptRef iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; } case _GET_ANEXT: { - JitOptSymbol *awaitable; + JitOptRef awaitable; awaitable = sym_new_not_null(ctx); stack_pointer[0] = awaitable; stack_pointer += 1; @@ -802,7 +1042,7 @@ } case _GET_AWAITABLE: { - JitOptSymbol *iter; + JitOptRef iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -811,14 +1051,17 @@ /* _SEND is not a viable micro-op for tier 2 */ case _SEND_GEN_FRAME: { + JitOptRef gen_frame; + gen_frame = PyJitRef_NULL; ctx->done = true; + stack_pointer[-1] = gen_frame; break; } case _YIELD_VALUE: { - JitOptSymbol *res; - res = sym_new_unknown(ctx); - stack_pointer[-1] = res; + JitOptRef value; + value = sym_new_unknown(ctx); + stack_pointer[-1] = value; break; } @@ -829,7 +1072,7 @@ } case _LOAD_COMMON_CONSTANT: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -838,7 +1081,7 @@ } case _LOAD_BUILD_CLASS: { - JitOptSymbol *bc; + JitOptRef bc; bc = sym_new_not_null(ctx); stack_pointer[0] = bc; stack_pointer += 1; @@ -857,8 +1100,11 @@ } case _UNPACK_SEQUENCE: { - JitOptSymbol **values; + JitOptRef *values; + JitOptRef *top; values = &stack_pointer[-1]; + top = &stack_pointer[-1 + oparg]; + (void)top; for (int i = 0; i < oparg; i++) { values[i] = sym_new_unknown(ctx); } @@ -868,9 +1114,9 @@ } case _UNPACK_SEQUENCE_TWO_TUPLE: { - JitOptSymbol *seq; - JitOptSymbol *val1; - JitOptSymbol *val0; + JitOptRef seq; + JitOptRef val1; + JitOptRef val0; seq = stack_pointer[-1]; val0 = sym_tuple_getitem(ctx, seq, 0); val1 = sym_tuple_getitem(ctx, seq, 1); @@ -882,8 +1128,8 @@ } case _UNPACK_SEQUENCE_TUPLE: { - JitOptSymbol *seq; - JitOptSymbol **values; + JitOptRef seq; + JitOptRef *values; seq = stack_pointer[-1]; values = &stack_pointer[-1]; for (int i = 0; i < oparg; i++) { @@ -895,7 +1141,7 @@ } case _UNPACK_SEQUENCE_LIST: { - JitOptSymbol **values; + JitOptRef *values; values = &stack_pointer[-1]; for (int _i = oparg; --_i >= 0;) { values[_i] = sym_new_not_null(ctx); @@ -906,8 +1152,11 @@ } case _UNPACK_EX: { - JitOptSymbol **values; + JitOptRef *values; + JitOptRef *top; values = &stack_pointer[-1]; + top = &stack_pointer[(oparg & 0xFF) + (oparg >> 8)]; + (void)top; int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; for (int i = 0; i < totalargs; i++) { values[i] = sym_new_unknown(ctx); @@ -940,7 +1189,7 @@ } case _LOAD_LOCALS: { - JitOptSymbol *locals; + JitOptRef locals; locals = sym_new_not_null(ctx); stack_pointer[0] = locals; stack_pointer += 1; @@ -951,7 +1200,7 @@ /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */ case _LOAD_NAME: { - JitOptSymbol *v; + JitOptRef v; v = sym_new_not_null(ctx); stack_pointer[0] = v; stack_pointer += 1; @@ -960,7 +1209,7 @@ } case _LOAD_GLOBAL: { - JitOptSymbol **res; + JitOptRef *res; res = &stack_pointer[0]; res[0] = sym_new_not_null(ctx); stack_pointer += 1; @@ -969,7 +1218,7 @@ } case _PUSH_NULL_CONDITIONAL: { - JitOptSymbol **null; + JitOptRef *null; null = &stack_pointer[0]; if (oparg & 1) { REPLACE_OP(this_instr, _PUSH_NULL, 0, 0); @@ -988,7 +1237,7 @@ } case _LOAD_GLOBAL_MODULE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -997,7 +1246,7 @@ } case _LOAD_GLOBAL_BUILTINS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1018,14 +1267,14 @@ } case _LOAD_FROM_DICT_OR_DEREF: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[-1] = value; break; } case _LOAD_DEREF: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -1044,7 +1293,7 @@ } case _BUILD_STRING: { - JitOptSymbol *str; + JitOptRef str; str = sym_new_type(ctx, &PyUnicode_Type); stack_pointer[-oparg] = str; stack_pointer += 1 - oparg; @@ -1053,7 +1302,7 @@ } case _BUILD_INTERPOLATION: { - JitOptSymbol *interpolation; + JitOptRef interpolation; interpolation = sym_new_not_null(ctx); stack_pointer[-2 - (oparg & 1)] = interpolation; stack_pointer += -1 - (oparg & 1); @@ -1062,7 +1311,7 @@ } case _BUILD_TEMPLATE: { - JitOptSymbol *template; + JitOptRef template; template = sym_new_not_null(ctx); stack_pointer[-2] = template; stack_pointer += -1; @@ -1071,8 +1320,8 @@ } case _BUILD_TUPLE: { - JitOptSymbol **values; - JitOptSymbol *tup; + JitOptRef *values; + JitOptRef tup; values = &stack_pointer[-oparg]; tup = sym_new_tuple(ctx, oparg, values); stack_pointer[-oparg] = tup; @@ -1082,7 +1331,7 @@ } case _BUILD_LIST: { - JitOptSymbol *list; + JitOptRef list; list = sym_new_type(ctx, &PyList_Type); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; @@ -1103,7 +1352,7 @@ } case _BUILD_SET: { - JitOptSymbol *set; + JitOptRef set; set = sym_new_type(ctx, &PySet_Type); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; @@ -1112,7 +1361,7 @@ } case _BUILD_MAP: { - JitOptSymbol *map; + JitOptRef map; map = sym_new_type(ctx, &PyDict_Type); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; @@ -1143,7 +1392,7 @@ } case _LOAD_SUPER_ATTR_ATTR: { - JitOptSymbol *attr_st; + JitOptRef attr_st; attr_st = sym_new_not_null(ctx); stack_pointer[-3] = attr_st; stack_pointer += -2; @@ -1152,8 +1401,8 @@ } case _LOAD_SUPER_ATTR_METHOD: { - JitOptSymbol *attr; - JitOptSymbol *self_or_null; + JitOptRef attr; + JitOptRef self_or_null; attr = sym_new_not_null(ctx); self_or_null = sym_new_not_null(ctx); stack_pointer[-3] = attr; @@ -1164,14 +1413,14 @@ } case _LOAD_ATTR: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol **self_or_null; + JitOptRef owner; + JitOptRef attr; + JitOptRef *self_or_null; owner = stack_pointer[-1]; self_or_null = &stack_pointer[0]; (void)owner; attr = sym_new_not_null(ctx); - if (oparg &1) { + if (oparg & 1) { self_or_null[0] = sym_new_unknown(ctx); } stack_pointer[-1] = attr; @@ -1181,7 +1430,7 @@ } case _GUARD_TYPE_VERSION: { - JitOptSymbol *owner; + JitOptRef owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand0; assert(type_version); @@ -1208,7 +1457,7 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - JitOptSymbol *attr; + JitOptRef attr; uint16_t offset = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)offset; @@ -1217,14 +1466,14 @@ } case _LOAD_ATTR_MODULE: { - JitOptSymbol *owner; - JitOptSymbol *attr; + JitOptRef owner; + JitOptRef attr; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)this_instr->operand0; uint16_t index = (uint16_t)this_instr->operand0; (void)dict_version; (void)index; - attr = NULL; + attr = PyJitRef_NULL; if (sym_is_const(ctx, owner)) { PyModuleObject *mod = (PyModuleObject *)sym_get_const(ctx, owner); if (PyModule_CheckExact(mod)) { @@ -1235,11 +1484,16 @@ PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); PyObject *res = convert_global_to_const(this_instr, dict, true); - attr = sym_new_const(ctx, res); + if (res == NULL) { + attr = sym_new_not_null(ctx); + } + else { + attr = sym_new_const(ctx, res); + } } } } - if (attr == NULL) { + if (PyJitRef_IsNull(attr)) { attr = sym_new_not_null(ctx); } stack_pointer[-1] = attr; @@ -1247,7 +1501,7 @@ } case _LOAD_ATTR_WITH_HINT: { - JitOptSymbol *attr; + JitOptRef attr; uint16_t hint = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)hint; @@ -1256,7 +1510,7 @@ } case _LOAD_ATTR_SLOT: { - JitOptSymbol *attr; + JitOptRef attr; uint16_t index = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)index; @@ -1265,25 +1519,43 @@ } case _CHECK_ATTR_CLASS: { + JitOptRef owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand0; + PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); + if (type) { + if (type == sym_get_const(ctx, owner)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + sym_set_const(owner, type); + } + } break; } case _LOAD_ATTR_CLASS: { - JitOptSymbol *attr; + JitOptRef owner; + JitOptRef attr; + owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; - attr = sym_new_not_null(ctx); (void)descr; + PyTypeObject *type = (PyTypeObject *)sym_get_const(ctx, owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } case _LOAD_ATTR_PROPERTY_FRAME: { - _Py_UOpsAbstractFrame *new_frame; + JitOptRef new_frame; PyObject *fget = (PyObject *)this_instr->operand0; (void)fget; - new_frame = NULL; + new_frame = PyJitRef_NULL; ctx->done = true; - stack_pointer[-1] = (JitOptSymbol *)new_frame; + stack_pointer[-1] = new_frame; break; } @@ -1312,7 +1584,7 @@ } case _COMPARE_OP: { - JitOptSymbol *res; + JitOptRef res; if (oparg & 16) { res = sym_new_type(ctx, &PyBool_Type); } @@ -1326,7 +1598,7 @@ } case _COMPARE_OP_FLOAT: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -1335,9 +1607,9 @@ } case _COMPARE_OP_INT: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; + JitOptRef right; + JitOptRef left; + JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -1367,7 +1639,7 @@ } case _COMPARE_OP_STR: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -1376,25 +1648,25 @@ } case _IS_OP: { - JitOptSymbol *res; - res = sym_new_type(ctx, &PyBool_Type); - stack_pointer[-2] = res; + JitOptRef b; + b = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = b; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _CONTAINS_OP: { - JitOptSymbol *res; - res = sym_new_type(ctx, &PyBool_Type); - stack_pointer[-2] = res; + JitOptRef b; + b = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = b; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _GUARD_TOS_ANY_SET: { - JitOptSymbol *tos; + JitOptRef tos; tos = stack_pointer[-1]; if (sym_matches_type(tos, &PySet_Type) || sym_matches_type(tos, &PyFrozenSet_Type)) @@ -1405,26 +1677,26 @@ } case _CONTAINS_OP_SET: { - JitOptSymbol *res; - res = sym_new_type(ctx, &PyBool_Type); - stack_pointer[-2] = res; + JitOptRef b; + b = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = b; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _CONTAINS_OP_DICT: { - JitOptSymbol *res; - res = sym_new_type(ctx, &PyBool_Type); - stack_pointer[-2] = res; + JitOptRef b; + b = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = b; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _CHECK_EG_MATCH: { - JitOptSymbol *rest; - JitOptSymbol *match; + JitOptRef rest; + JitOptRef match; rest = sym_new_not_null(ctx); match = sym_new_not_null(ctx); stack_pointer[-2] = rest; @@ -1433,14 +1705,14 @@ } case _CHECK_EXC_MATCH: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _IMPORT_NAME: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -1449,7 +1721,7 @@ } case _IMPORT_FROM: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1462,15 +1734,36 @@ /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ case _IS_NONE: { - JitOptSymbol *b; + JitOptRef b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _GET_LEN: { - JitOptSymbol *len; - len = sym_new_not_null(ctx); + JitOptRef obj; + JitOptRef len; + obj = stack_pointer[-1]; + int tuple_length = sym_tuple_length(obj); + if (tuple_length == -1) { + len = sym_new_type(ctx, &PyLong_Type); + } + else { + assert(tuple_length >= 0); + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)temp); + } + len = sym_new_const(ctx, temp); + stack_pointer[0] = len; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + Py_DECREF(temp); + stack_pointer += -1; + } stack_pointer[0] = len; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -1478,7 +1771,7 @@ } case _MATCH_CLASS: { - JitOptSymbol *attrs; + JitOptRef attrs; attrs = sym_new_not_null(ctx); stack_pointer[-3] = attrs; stack_pointer += -2; @@ -1487,7 +1780,7 @@ } case _MATCH_MAPPING: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1496,7 +1789,7 @@ } case _MATCH_SEQUENCE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1505,7 +1798,7 @@ } case _MATCH_KEYS: { - JitOptSymbol *values_or_none; + JitOptRef values_or_none; values_or_none = sym_new_not_null(ctx); stack_pointer[0] = values_or_none; stack_pointer += 1; @@ -1514,14 +1807,27 @@ } case _GET_ITER: { - JitOptSymbol *iter; - iter = sym_new_not_null(ctx); + JitOptRef iterable; + JitOptRef iter; + JitOptRef index_or_null; + iterable = stack_pointer[-1]; + if (sym_matches_type(iterable, &PyTuple_Type) || sym_matches_type(iterable, &PyList_Type)) { + iter = iterable; + index_or_null = sym_new_not_null(ctx); + } + else { + iter = sym_new_not_null(ctx); + index_or_null = sym_new_unknown(ctx); + } stack_pointer[-1] = iter; + stack_pointer[0] = index_or_null; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } case _GET_YIELD_FROM_ITER: { - JitOptSymbol *iter; + JitOptRef iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -1530,7 +1836,7 @@ /* _FOR_ITER is not a viable micro-op for tier 2 */ case _FOR_ITER_TIER_TWO: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1553,7 +1859,7 @@ /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 */ case _ITER_NEXT_LIST_TIER_TWO: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1562,6 +1868,12 @@ } case _ITER_CHECK_TUPLE: { + JitOptRef iter; + iter = stack_pointer[-2]; + if (sym_matches_type(iter, &PyTuple_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(iter, &PyTuple_Type); break; } @@ -1572,7 +1884,7 @@ } case _ITER_NEXT_TUPLE: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1591,7 +1903,7 @@ } case _ITER_NEXT_RANGE: { - JitOptSymbol *next; + JitOptRef next; next = sym_new_type(ctx, &PyLong_Type); stack_pointer[0] = next; stack_pointer += 1; @@ -1600,13 +1912,18 @@ } case _FOR_ITER_GEN_FRAME: { + JitOptRef gen_frame; + gen_frame = PyJitRef_NULL; ctx->done = true; + stack_pointer[0] = gen_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } case _INSERT_NULL: { - JitOptSymbol *self; - JitOptSymbol **method_and_self; + JitOptRef self; + JitOptRef *method_and_self; self = stack_pointer[-1]; method_and_self = &stack_pointer[-1]; method_and_self[0] = sym_new_null(ctx); @@ -1617,7 +1934,7 @@ } case _LOAD_SPECIAL: { - JitOptSymbol **method_and_self; + JitOptRef *method_and_self; method_and_self = &stack_pointer[-2]; method_and_self[0] = sym_new_not_null(ctx); method_and_self[1] = sym_new_unknown(ctx); @@ -1625,7 +1942,7 @@ } case _WITH_EXCEPT_START: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1634,8 +1951,8 @@ } case _PUSH_EXC_INFO: { - JitOptSymbol *prev_exc; - JitOptSymbol *new_exc; + JitOptRef prev_exc; + JitOptRef new_exc; prev_exc = sym_new_not_null(ctx); new_exc = sym_new_not_null(ctx); stack_pointer[-1] = prev_exc; @@ -1654,13 +1971,17 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol *self; + JitOptRef owner; + JitOptRef attr; + JitOptRef self; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1670,13 +1991,17 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol *self; + JitOptRef owner; + JitOptRef attr; + JitOptRef self; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1686,15 +2011,31 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - JitOptSymbol *attr; - attr = sym_new_not_null(ctx); + JitOptRef owner; + JitOptRef attr; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand0; + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - JitOptSymbol *attr; - attr = sym_new_not_null(ctx); + JitOptRef owner; + JitOptRef attr; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand0; + (void)descr; + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _POP_TOP_LOAD_CONST_INLINE_BORROW, + _POP_TOP_LOAD_CONST_INLINE); stack_pointer[-1] = attr; break; } @@ -1704,13 +2045,17 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { - JitOptSymbol *owner; - JitOptSymbol *attr; - JitOptSymbol *self; + JitOptRef owner; + JitOptRef attr; + JitOptRef self; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand0; (void)descr; - attr = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(owner); + PyObject *name = PyTuple_GET_ITEM(co->co_names, oparg >> 1); + attr = lookup_attr(ctx, this_instr, type, name, + _LOAD_CONST_UNDER_INLINE_BORROW, + _LOAD_CONST_UNDER_INLINE); self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; @@ -1720,16 +2065,17 @@ } case _MAYBE_EXPAND_METHOD: { - JitOptSymbol **args; - JitOptSymbol *func; - JitOptSymbol *maybe_self; - args = &stack_pointer[-oparg]; + JitOptRef *args; + JitOptRef self_or_null; + JitOptRef callable; args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; (void)args; - func = sym_new_not_null(ctx); - maybe_self = sym_new_not_null(ctx); - stack_pointer[-2 - oparg] = func; - stack_pointer[-1 - oparg] = maybe_self; + callable = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = callable; + stack_pointer[-1 - oparg] = self_or_null; break; } @@ -1738,7 +2084,7 @@ /* _MONITOR_CALL is not a viable micro-op for tier 2 */ case _PY_FRAME_GENERAL: { - _Py_UOpsAbstractFrame *new_frame; + JitOptRef new_frame; PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); co = get_code_with_logging((this_instr + 2)); @@ -1746,15 +2092,15 @@ ctx->done = true; break; } - new_frame = frame_new(ctx, co, 0, NULL, 0); - stack_pointer[-2 - oparg] = (JitOptSymbol *)new_frame; + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _CHECK_FUNCTION_VERSION: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)this_instr->operand0; if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) { @@ -1771,6 +2117,16 @@ } case _CHECK_METHOD_VERSION: { + JitOptRef callable; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)this_instr->operand0; + if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) { + PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable); + assert(PyMethod_Check(method)); + REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version); + this_instr->operand1 = (uintptr_t)method->im_func; + } + sym_set_type(callable, &PyMethod_Type); break; } @@ -1783,7 +2139,7 @@ } case _CALL_NON_PY_GENERAL: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1792,8 +2148,8 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - JitOptSymbol *null; - JitOptSymbol *callable; + JitOptRef null; + JitOptRef callable; null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_null(null); @@ -1802,8 +2158,8 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - JitOptSymbol *self_or_null; - JitOptSymbol *callable; + JitOptRef self_or_null; + JitOptRef callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; callable = sym_new_not_null(ctx); @@ -1821,8 +2177,8 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { - JitOptSymbol *self_or_null; - JitOptSymbol *callable; + JitOptRef self_or_null; + JitOptRef callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; assert(sym_matches_type(callable, &PyFunction_Type)); @@ -1849,9 +2205,9 @@ } case _INIT_CALL_PY_EXACT_ARGS: { - JitOptSymbol **args; - JitOptSymbol *self_or_null; - _Py_UOpsAbstractFrame *new_frame; + JitOptRef *args; + JitOptRef self_or_null; + JitOptRef new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; int argcount = oparg; @@ -1862,32 +2218,32 @@ ctx->done = true; break; } - assert(self_or_null != NULL); + assert(!PyJitRef_IsNull(self_or_null)); assert(args != NULL); if (sym_is_not_null(self_or_null)) { args--; argcount++; } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args, argcount)); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } - stack_pointer[-2 - oparg] = (JitOptSymbol *)new_frame; + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_FRAME: { - _Py_UOpsAbstractFrame *new_frame; - new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + JitOptRef new_frame; + new_frame = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame); ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ctx->frame->stack_pointer; co = get_code(this_instr); if (co == NULL) { ctx->done = true; @@ -1912,7 +2268,7 @@ } case _GUARD_NOS_NULL: { - JitOptSymbol *null; + JitOptRef null; null = stack_pointer[-2]; if (sym_is_null(null)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1921,8 +2277,28 @@ break; } + case _GUARD_NOS_NOT_NULL: { + JitOptRef nos; + nos = stack_pointer[-2]; + if (sym_is_not_null(nos)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_non_null(nos); + break; + } + + case _GUARD_THIRD_NULL: { + JitOptRef null; + null = stack_pointer[-3]; + if (sym_is_null(null)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_null(null); + break; + } + case _GUARD_CALLABLE_TYPE_1: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; if (sym_get_const(ctx, callable) == (PyObject *)&PyType_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1932,11 +2308,14 @@ } case _CALL_TYPE_1: { - JitOptSymbol *arg; - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; arg = stack_pointer[-1]; - if (sym_has_type(arg)) { - res = sym_new_const(ctx, (PyObject *)sym_get_type(arg)); + PyObject* type = (PyObject *)sym_get_type(arg); + if (type) { + res = sym_new_const(ctx, type); + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, + (uintptr_t)type); } else { res = sym_new_not_null(ctx); @@ -1948,7 +2327,7 @@ } case _GUARD_CALLABLE_STR_1: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; if (sym_get_const(ctx, callable) == (PyObject *)&PyUnicode_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1958,8 +2337,8 @@ } case _CALL_STR_1: { - JitOptSymbol *arg; - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyUnicode_Type)) { res = arg; @@ -1974,7 +2353,7 @@ } case _GUARD_CALLABLE_TUPLE_1: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; if (sym_get_const(ctx, callable) == (PyObject *)&PyTuple_Type) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -1984,8 +2363,8 @@ } case _CALL_TUPLE_1: { - JitOptSymbol *arg; - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyTuple_Type)) { res = arg; @@ -2000,26 +2379,27 @@ } case _CHECK_AND_ALLOCATE_OBJECT: { - JitOptSymbol **args; - JitOptSymbol *self; - JitOptSymbol *init; - args = &stack_pointer[-oparg]; + JitOptRef *args; + JitOptRef self_or_null; + JitOptRef callable; args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; uint32_t type_version = (uint32_t)this_instr->operand0; (void)type_version; (void)args; - self = sym_new_not_null(ctx); - init = sym_new_not_null(ctx); - stack_pointer[-2 - oparg] = self; - stack_pointer[-1 - oparg] = init; + callable = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = callable; + stack_pointer[-1 - oparg] = self_or_null; break; } case _CREATE_INIT_FRAME: { - _Py_UOpsAbstractFrame *init_frame; - init_frame = NULL; + JitOptRef init_frame; + init_frame = PyJitRef_NULL; ctx->done = true; - stack_pointer[-2 - oparg] = (JitOptSymbol *)init_frame; + stack_pointer[-2 - oparg] = init_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -2032,7 +2412,7 @@ } case _CALL_BUILTIN_CLASS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2041,7 +2421,7 @@ } case _CALL_BUILTIN_O: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2050,7 +2430,7 @@ } case _CALL_BUILTIN_FAST: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2059,7 +2439,7 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2068,7 +2448,7 @@ } case _GUARD_CALLABLE_LEN: { - JitOptSymbol *callable; + JitOptRef callable; callable = stack_pointer[-3]; PyObject *len = _PyInterpreterState_GET()->callable_cache.len; if (sym_get_const(ctx, callable) == len) { @@ -2079,23 +2459,78 @@ } case _CALL_LEN: { - JitOptSymbol *res; + JitOptRef arg; + JitOptRef res; + arg = stack_pointer[-1]; res = sym_new_type(ctx, &PyLong_Type); + int tuple_length = sym_tuple_length(arg); + if (tuple_length >= 0) { + PyObject *temp = PyLong_FromLong(tuple_length); + if (temp == NULL) { + goto error; + } + if (_Py_IsImmortal(temp)) { + REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + 0, (uintptr_t)temp); + } + res = sym_new_const(ctx, temp); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + Py_DECREF(temp); + stack_pointer += 2; + } stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; } + case _GUARD_CALLABLE_ISINSTANCE: { + JitOptRef callable; + callable = stack_pointer[-4]; + PyObject *isinstance = _PyInterpreterState_GET()->callable_cache.isinstance; + if (sym_get_const(ctx, callable) == isinstance) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_const(callable, isinstance); + break; + } + case _CALL_ISINSTANCE: { - JitOptSymbol *res; - res = sym_new_not_null(ctx); - stack_pointer[-2 - oparg] = res; - stack_pointer += -1 - oparg; + JitOptRef cls; + JitOptRef instance; + JitOptRef res; + cls = stack_pointer[-1]; + instance = stack_pointer[-2]; + res = sym_new_type(ctx, &PyBool_Type); + PyTypeObject *inst_type = sym_get_type(instance); + PyTypeObject *cls_o = (PyTypeObject *)sym_get_const(ctx, cls); + if (inst_type && cls_o && sym_matches_type(cls, &PyType_Type)) { + PyObject *out = Py_False; + if (inst_type == cls_o || PyType_IsSubtype(inst_type, cls_o)) { + out = Py_True; + } + sym_set_const(res, out); + REPLACE_OP(this_instr, _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)out); + } + stack_pointer[-4] = res; + stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); break; } + case _GUARD_CALLABLE_LIST_APPEND: { + JitOptRef callable; + callable = stack_pointer[-3]; + PyObject *list_append = _PyInterpreterState_GET()->callable_cache.list_append; + if (sym_get_const(ctx, callable) == list_append) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_const(callable, list_append); + break; + } + case _CALL_LIST_APPEND: { stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); @@ -2103,7 +2538,7 @@ } case _CALL_METHOD_DESCRIPTOR_O: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2112,7 +2547,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2121,7 +2556,7 @@ } case _CALL_METHOD_DESCRIPTOR_NOARGS: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2130,7 +2565,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2147,10 +2582,10 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 */ case _PY_FRAME_KW: { - _Py_UOpsAbstractFrame *new_frame; - new_frame = NULL; + JitOptRef new_frame; + new_frame = PyJitRef_NULL; ctx->done = true; - stack_pointer[-3 - oparg] = (JitOptSymbol *)new_frame; + stack_pointer[-3 - oparg] = new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -2173,7 +2608,7 @@ } case _CALL_KW_NON_PY: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-3 - oparg] = res; stack_pointer += -2 - oparg; @@ -2188,14 +2623,14 @@ /* _DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ case _MAKE_FUNCTION: { - JitOptSymbol *func; + JitOptRef func; func = sym_new_not_null(ctx); stack_pointer[-1] = func; break; } case _SET_FUNCTION_ATTRIBUTE: { - JitOptSymbol *func_out; + JitOptRef func_out; func_out = sym_new_not_null(ctx); stack_pointer[-2] = func_out; stack_pointer += -1; @@ -2204,7 +2639,7 @@ } case _RETURN_GENERATOR: { - JitOptSymbol *res; + JitOptRef res; ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); stack_pointer = ctx->frame->stack_pointer; @@ -2226,7 +2661,7 @@ } case _BUILD_SLICE: { - JitOptSymbol *slice; + JitOptRef slice; slice = sym_new_type(ctx, &PySlice_Type); stack_pointer[-oparg] = slice; stack_pointer += 1 - oparg; @@ -2235,21 +2670,21 @@ } case _CONVERT_VALUE: { - JitOptSymbol *result; + JitOptRef result; result = sym_new_not_null(ctx); stack_pointer[-1] = result; break; } case _FORMAT_SIMPLE: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { - JitOptSymbol *res; + JitOptRef res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -2258,8 +2693,8 @@ } case _COPY: { - JitOptSymbol *bottom; - JitOptSymbol *top; + JitOptRef bottom; + JitOptRef top; bottom = stack_pointer[-1 - (oparg-1)]; assert(oparg > 0); top = bottom; @@ -2270,15 +2705,40 @@ } case _BINARY_OP: { - JitOptSymbol *right; - JitOptSymbol *left; - JitOptSymbol *res; - right = stack_pointer[-1]; - left = stack_pointer[-2]; - bool lhs_int = sym_matches_type(left, &PyLong_Type); - bool rhs_int = sym_matches_type(right, &PyLong_Type); - bool lhs_float = sym_matches_type(left, &PyFloat_Type); - bool rhs_float = sym_matches_type(right, &PyFloat_Type); + JitOptRef rhs; + JitOptRef lhs; + JitOptRef res; + rhs = stack_pointer[-1]; + lhs = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, lhs) && + sym_is_safe_const(ctx, rhs) + ) { + JitOptRef lhs_sym = lhs; + JitOptRef rhs_sym = rhs; + _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); + _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); + PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); + assert(_PyEval_BinaryOps[oparg]); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + break; + } + bool lhs_int = sym_matches_type(lhs, &PyLong_Type); + bool rhs_int = sym_matches_type(rhs, &PyLong_Type); + bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); + bool rhs_float = sym_matches_type(rhs, &PyFloat_Type); if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) { res = sym_new_unknown(ctx); } @@ -2289,10 +2749,10 @@ else if (lhs_float) { res = sym_new_type(ctx, &PyFloat_Type); } - else if (!sym_is_const(ctx, right)) { + else if (!sym_is_const(ctx, rhs)) { res = sym_new_unknown(ctx); } - else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(ctx, right))) { + else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(ctx, rhs))) { res = sym_new_type(ctx, &PyFloat_Type); } else { @@ -2315,11 +2775,11 @@ } case _SWAP: { - JitOptSymbol *top; - JitOptSymbol *bottom; + JitOptRef top; + JitOptRef bottom; top = stack_pointer[-1]; bottom = stack_pointer[-2 - (oparg-2)]; - JitOptSymbol *temp = bottom; + JitOptRef temp = bottom; bottom = top; top = temp; assert(oparg >= 2); @@ -2347,7 +2807,7 @@ /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ case _GUARD_IS_TRUE_POP: { - JitOptSymbol *flag; + JitOptRef flag; flag = stack_pointer[-1]; if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); @@ -2361,7 +2821,7 @@ } case _GUARD_IS_FALSE_POP: { - JitOptSymbol *flag; + JitOptRef flag; flag = stack_pointer[-1]; if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); @@ -2375,33 +2835,33 @@ } case _GUARD_IS_NONE_POP: { - JitOptSymbol *flag; - flag = stack_pointer[-1]; - if (sym_is_const(ctx, flag)) { - PyObject *value = sym_get_const(ctx, flag); + JitOptRef val; + val = stack_pointer[-1]; + if (sym_is_const(ctx, val)) { + PyObject *value = sym_get_const(ctx, val); assert(value != NULL); eliminate_pop_guard(this_instr, !Py_IsNone(value)); } - else if (sym_has_type(flag)) { - assert(!sym_matches_type(flag, &_PyNone_Type)); + else if (sym_has_type(val)) { + assert(!sym_matches_type(val, &_PyNone_Type)); eliminate_pop_guard(this_instr, true); } - sym_set_const(flag, Py_None); + sym_set_const(val, Py_None); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _GUARD_IS_NOT_NONE_POP: { - JitOptSymbol *flag; - flag = stack_pointer[-1]; - if (sym_is_const(ctx, flag)) { - PyObject *value = sym_get_const(ctx, flag); + JitOptRef val; + val = stack_pointer[-1]; + if (sym_is_const(ctx, val)) { + PyObject *value = sym_get_const(ctx, val); assert(value != NULL); eliminate_pop_guard(this_instr, Py_IsNone(value)); } - else if (sym_has_type(flag)) { - assert(!sym_matches_type(flag, &_PyNone_Type)); + else if (sym_has_type(val)) { + assert(!sym_matches_type(val, &_PyNone_Type)); eliminate_pop_guard(this_instr, false); } stack_pointer += -1; @@ -2441,7 +2901,7 @@ } case _LOAD_CONST_INLINE: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; value = sym_new_const(ctx, ptr); stack_pointer[0] = value; @@ -2451,7 +2911,7 @@ } case _POP_TOP_LOAD_CONST_INLINE: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; value = sym_new_const(ctx, ptr); stack_pointer[-1] = value; @@ -2459,25 +2919,43 @@ } case _LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } + case _POP_CALL: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_TWO: { + stack_pointer += -4; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = sym_new_const(ctx, ptr); + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); stack_pointer[-1] = value; break; } case _POP_TWO_LOAD_CONST_INLINE_BORROW: { - JitOptSymbol *value; + JitOptRef value; value = sym_new_not_null(ctx); stack_pointer[-2] = value; stack_pointer += -1; @@ -2485,6 +2963,60 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + JitOptRef value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + stack_pointer[-2] = value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + JitOptRef value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + stack_pointer[-3] = value; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { + JitOptRef value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + stack_pointer[-4] = value; + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE: { + JitOptRef value; + JitOptRef new; + value = sym_new_not_null(ctx); + new = sym_new_not_null(ctx); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_UNDER_INLINE_BORROW: { + JitOptRef value; + JitOptRef new; + value = sym_new_not_null(ctx); + new = sym_new_not_null(ctx); + stack_pointer[-1] = value; + stack_pointer[0] = new; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _CHECK_FUNCTION: { break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e8a4f87031b..8a3df236c80 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -13,22 +13,50 @@ #include <stdint.h> #include <stddef.h> -/* Symbols - ======= - - See the diagram at - https://github.com/faster-cpython/ideas/blob/main/3.13/redundancy_eliminator.md - - We represent the nodes in the diagram as follows - (the flag bits are only defined in optimizer_symbols.c): - - Top: no flag bits, typ and const_val are NULL. - - NULL: IS_NULL flag set, type and const_val NULL. - - Not NULL: NOT_NULL flag set, type and const_val NULL. - - None/not None: not used. (None could be represented as any other constant.) - - Known type: NOT_NULL flag set and typ set; const_val is NULL. - - Known constant: NOT_NULL flag set, type set, const_val set. - - Bottom: IS_NULL and NOT_NULL flags set, type and const_val NULL. - */ +/* + +Symbols +======= + +https://github.com/faster-cpython/ideas/blob/main/3.13/redundancy_eliminator.md + +Logically, all symbols begin as UNKNOWN, and can transition downwards along the +edges of the lattice, but *never* upwards (see the diagram below). The UNKNOWN +state represents no information, and the BOTTOM state represents contradictory +information. Though symbols logically progress through all intermediate nodes, +we often skip in-between states for convenience: + + UNKNOWN + | | +NULL | +| | <- Anything below this level is an object. +| NON_NULL-+ +| | | <- Anything below this level has a known type version. +| TYPE_VERSION | +| | | <- Anything below this level has a known type. +| KNOWN_CLASS | +| | | | | | +| | | INT* | | +| | | | | | <- Anything below this level has a known truthiness. +| | | | | TRUTHINESS +| | | | | | +| TUPLE | | | | +| | | | | | <- Anything below this level is a known constant. +| KNOWN_VALUE--+ +| | <- Anything below this level is unreachable. +BOTTOM + +For example, after guarding that the type of an UNKNOWN local is int, we can +narrow the symbol to KNOWN_CLASS (logically progressing though NON_NULL and +TYPE_VERSION to get there). Later, we may learn that it is falsey based on the +result of a truth test, which would allow us to narrow the symbol to KNOWN_VALUE +(with a value of integer zero). If at any point we encounter a float guard on +the same symbol, that would be a contradiction, and the symbol would be set to +BOTTOM (indicating that the code is unreachable). + +INT* is a limited range int, currently a "compact" int. + +*/ #ifdef Py_DEBUG static inline int get_lltrace(void) { @@ -64,6 +92,12 @@ out_of_space(JitOptContext *ctx) return &NO_SPACE_SYMBOL; } +JitOptRef +out_of_space_ref(JitOptContext *ctx) +{ + return PyJitRef_Wrap(out_of_space(ctx)); +} + static JitOptSymbol * sym_new(JitOptContext *ctx) { @@ -74,7 +108,7 @@ sym_new(JitOptContext *ctx) return NULL; } ctx->t_arena.ty_curr_number++; - self->tag = JIT_SYM_UNKNOWN_TAG; + self->tag = JIT_SYM_UNKNOWN_TAG;; return self; } @@ -93,25 +127,28 @@ sym_set_bottom(JitOptContext *ctx, JitOptSymbol *sym) } bool -_Py_uop_sym_is_bottom(JitOptSymbol *sym) +_Py_uop_sym_is_bottom(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); return sym->tag == JIT_SYM_BOTTOM_TAG; } bool -_Py_uop_sym_is_not_null(JitOptSymbol *sym) { +_Py_uop_sym_is_not_null(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); return sym->tag == JIT_SYM_NON_NULL_TAG || sym->tag > JIT_SYM_BOTTOM_TAG; } bool -_Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { return true; } if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, PyJitRef_Wrap(value)); if (truthiness < 0) { return false; } @@ -122,21 +159,22 @@ _Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym) } bool -_Py_uop_sym_is_null(JitOptSymbol *sym) +_Py_uop_sym_is_null(JitOptRef ref) { - return sym->tag == JIT_SYM_NULL_TAG; + return PyJitRef_Unwrap(ref)->tag == JIT_SYM_NULL_TAG; } PyObject * -_Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { return sym->value.value; } if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, PyJitRef_Wrap(value)); if (truthiness < 0) { return NULL; } @@ -147,9 +185,41 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym) return NULL; } +_PyStackRef +_Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectBorrow(const_val); +} + +/* + Indicates whether the constant is safe to constant evaluate + (without side effects). + */ +bool +_Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return false; + } + if (_PyLong_CheckExactAndCompact(const_val)) { + return true; + } + PyTypeObject *typ = Py_TYPE(const_val); + return (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type); +} + void -_Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ) +_Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -194,12 +264,22 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ) sym_set_bottom(ctx, sym); } return; + case JIT_SYM_COMPACT_INT: + if (typ != &PyLong_Type) { + sym_set_bottom(ctx, sym); + } + return; } } bool -_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version) +_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int version) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + PyTypeObject *type = _PyType_LookupByVersion(version); + if (type) { + _Py_uop_sym_set_type(ctx, ref, type); + } JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -215,18 +295,24 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int return true; } case JIT_SYM_KNOWN_VALUE_TAG: - Py_CLEAR(sym->value.value); - sym_set_bottom(ctx, sym); - return false; + if (Py_TYPE(sym->value.value)->tp_version_tag != version) { + Py_CLEAR(sym->value.value); + sym_set_bottom(ctx, sym); + return false; + }; + return true; case JIT_SYM_TUPLE_TAG: - sym_set_bottom(ctx, sym); - return false; + if (PyTuple_Type.tp_version_tag != version) { + sym_set_bottom(ctx, sym); + return false; + }; + return true; case JIT_SYM_TYPE_VERSION_TAG: - if (sym->version.version == version) { - return true; + if (sym->version.version != version) { + sym_set_bottom(ctx, sym); + return false; } - sym_set_bottom(ctx, sym); - return false; + return true; case JIT_SYM_BOTTOM_TAG: return false; case JIT_SYM_NON_NULL_TAG: @@ -240,13 +326,20 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int return false; } return true; + case JIT_SYM_COMPACT_INT: + if (version != PyLong_Type.tp_version_tag) { + sym_set_bottom(ctx, sym); + return false; + } + return true; } Py_UNREACHABLE(); } void -_Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val) +_Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -266,6 +359,18 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val } return; case JIT_SYM_TUPLE_TAG: + if (PyTuple_CheckExact(const_val)) { + Py_ssize_t len = _Py_uop_sym_tuple_length(ref); + if (len == PyTuple_GET_SIZE(const_val)) { + for (Py_ssize_t i = 0; i < len; i++) { + JitOptRef sym_item = _Py_uop_sym_tuple_getitem(ctx, ref, i); + PyObject *item = PyTuple_GET_ITEM(const_val, i); + _Py_uop_sym_set_const(ctx, sym_item, item); + } + make_const(sym, const_val); + return; + } + } sym_set_bottom(ctx, sym); return; case JIT_SYM_TYPE_VERSION_TAG: @@ -283,13 +388,14 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val return; case JIT_SYM_TRUTHINESS_TAG: if (!PyBool_Check(const_val) || - (_Py_uop_sym_is_const(ctx, sym) && - _Py_uop_sym_get_const(ctx, sym) != const_val)) + (_Py_uop_sym_is_const(ctx, ref) && + _Py_uop_sym_get_const(ctx, ref) != const_val)) { sym_set_bottom(ctx, sym); return; } - JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; + JitOptRef value = PyJitRef_Wrap( + allocation_base(ctx) + sym->truthiness.value); PyTypeObject *type = _Py_uop_sym_get_type(value); if (const_val == (sym->truthiness.invert ? Py_False : Py_True)) { // value is truthy. This is only useful for bool: @@ -310,12 +416,21 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val // TODO: More types (GH-130415)! make_const(sym, const_val); return; + case JIT_SYM_COMPACT_INT: + if (_PyLong_CheckExactAndCompact(const_val)) { + make_const(sym, const_val); + } + else { + sym_set_bottom(ctx, sym); + } + return; } } void -_Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_set_null(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_UNKNOWN_TAG) { sym->tag = JIT_SYM_NULL_TAG; } @@ -325,8 +440,9 @@ _Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym) } void -_Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_UNKNOWN_TAG) { sym->tag = JIT_SYM_NON_NULL_TAG; } @@ -335,70 +451,82 @@ _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym) } } - -JitOptSymbol * +JitOptRef _Py_uop_sym_new_unknown(JitOptContext *ctx) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - return res; + return PyJitRef_Wrap(res); } -JitOptSymbol * +JitOptRef _Py_uop_sym_new_not_null(JitOptContext *ctx) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } res->tag = JIT_SYM_NON_NULL_TAG; - return res; + return PyJitRef_Wrap(res); } -JitOptSymbol * +JitOptRef _Py_uop_sym_new_type(JitOptContext *ctx, PyTypeObject *typ) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - _Py_uop_sym_set_type(ctx, res, typ); - return res; + JitOptRef ref = PyJitRef_Wrap(res); + _Py_uop_sym_set_type(ctx, ref, typ); + return ref; } // Adds a new reference to const_val, owned by the symbol. -JitOptSymbol * +JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val) { assert(const_val != NULL); JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - _Py_uop_sym_set_const(ctx, res, const_val); + JitOptRef ref = PyJitRef_Wrap(res); + _Py_uop_sym_set_const(ctx, ref, const_val); + return ref; +} + +JitOptRef +_Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + JitOptRef res = _Py_uop_sym_new_const(ctx, const_val); + // Decref once because sym_new_const increfs it. + Py_DECREF(const_val); return res; } -JitOptSymbol * +JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx) { JitOptSymbol *null_sym = sym_new(ctx); if (null_sym == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - _Py_uop_sym_set_null(ctx, null_sym); - return null_sym; + JitOptRef ref = PyJitRef_Wrap(null_sym); + _Py_uop_sym_set_null(ctx, ref); + return ref; } PyTypeObject * -_Py_uop_sym_get_type(JitOptSymbol *sym) +_Py_uop_sym_get_type(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: - case JIT_SYM_TYPE_VERSION_TAG: case JIT_SYM_BOTTOM_TAG: case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: @@ -407,17 +535,23 @@ _Py_uop_sym_get_type(JitOptSymbol *sym) return sym->cls.type; case JIT_SYM_KNOWN_VALUE_TAG: return Py_TYPE(sym->value.value); + case JIT_SYM_TYPE_VERSION_TAG: + return _PyType_LookupByVersion(sym->version.version); case JIT_SYM_TUPLE_TAG: return &PyTuple_Type; case JIT_SYM_TRUTHINESS_TAG: return &PyBool_Type; + case JIT_SYM_COMPACT_INT: + return &PyLong_Type; + } Py_UNREACHABLE(); } unsigned int -_Py_uop_sym_get_type_version(JitOptSymbol *sym) +_Py_uop_sym_get_type_version(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -435,52 +569,42 @@ _Py_uop_sym_get_type_version(JitOptSymbol *sym) return PyTuple_Type.tp_version_tag; case JIT_SYM_TRUTHINESS_TAG: return PyBool_Type.tp_version_tag; + case JIT_SYM_COMPACT_INT: + return PyLong_Type.tp_version_tag; } Py_UNREACHABLE(); } bool -_Py_uop_sym_has_type(JitOptSymbol *sym) +_Py_uop_sym_has_type(JitOptRef sym) { - JitSymType tag = sym->tag; - switch(tag) { - case JIT_SYM_NULL_TAG: - case JIT_SYM_TYPE_VERSION_TAG: - case JIT_SYM_BOTTOM_TAG: - case JIT_SYM_NON_NULL_TAG: - case JIT_SYM_UNKNOWN_TAG: - return false; - case JIT_SYM_KNOWN_CLASS_TAG: - case JIT_SYM_KNOWN_VALUE_TAG: - case JIT_SYM_TUPLE_TAG: - case JIT_SYM_TRUTHINESS_TAG: - return true; - } - Py_UNREACHABLE(); + return _Py_uop_sym_get_type(sym) != NULL; } bool -_Py_uop_sym_matches_type(JitOptSymbol *sym, PyTypeObject *typ) +_Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ) { assert(typ != NULL && PyType_Check(typ)); return _Py_uop_sym_get_type(sym) == typ; } bool -_Py_uop_sym_matches_type_version(JitOptSymbol *sym, unsigned int version) +_Py_uop_sym_matches_type_version(JitOptRef sym, unsigned int version) { return _Py_uop_sym_get_type_version(sym) == version; } int -_Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym) +_Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); switch(sym->tag) { case JIT_SYM_NULL_TAG: case JIT_SYM_TYPE_VERSION_TAG: case JIT_SYM_BOTTOM_TAG: case JIT_SYM_NON_NULL_TAG: case JIT_SYM_UNKNOWN_TAG: + case JIT_SYM_COMPACT_INT: return -1; case JIT_SYM_KNOWN_CLASS_TAG: /* TODO : @@ -494,7 +618,8 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym) case JIT_SYM_TRUTHINESS_TAG: ; JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value; - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, + PyJitRef_Wrap(value)); if (truthiness < 0) { return truthiness; } @@ -520,12 +645,12 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym) return -1; } -JitOptSymbol * -_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args) +JitOptRef +_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef *args) { JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } if (size > MAX_SYMBOLIC_TUPLE_SIZE) { res->tag = JIT_SYM_KNOWN_CLASS_TAG; @@ -535,15 +660,16 @@ _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args) res->tag = JIT_SYM_TUPLE_TAG; res->tuple.length = size; for (int i = 0; i < size; i++) { - res->tuple.items[i] = (uint16_t)(args[i] - allocation_base(ctx)); + res->tuple.items[i] = (uint16_t)(PyJitRef_Unwrap(args[i]) - allocation_base(ctx)); } } - return res; + return PyJitRef_Wrap(res); } -JitOptSymbol * -_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item) +JitOptRef +_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef ref, int item) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); assert(item >= 0); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { PyObject *tuple = sym->value.value; @@ -552,14 +678,15 @@ _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item) } } else if (sym->tag == JIT_SYM_TUPLE_TAG && item < sym->tuple.length) { - return allocation_base(ctx) + sym->tuple.items[item]; + return PyJitRef_Wrap(allocation_base(ctx) + sym->tuple.items[item]); } - return _Py_uop_sym_new_unknown(ctx); + return _Py_uop_sym_new_not_null(ctx); } int -_Py_uop_sym_tuple_length(JitOptSymbol *sym) +_Py_uop_sym_tuple_length(JitOptRef ref) { + JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { PyObject *tuple = sym->value.value; if (PyTuple_CheckExact(tuple)) { @@ -574,7 +701,7 @@ _Py_uop_sym_tuple_length(JitOptSymbol *sym) // Return true if known to be immortal. bool -_Py_uop_sym_is_immortal(JitOptSymbol *sym) +_Py_uop_symbol_is_immortal(JitOptSymbol *sym) { if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { return _Py_IsImmortal(sym->value.value); @@ -582,25 +709,84 @@ _Py_uop_sym_is_immortal(JitOptSymbol *sym) if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { return sym->cls.type == &PyBool_Type; } - if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { - return true; - } return false; } -JitOptSymbol * -_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy) +bool +_Py_uop_sym_is_compact_int(JitOptRef ref) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { + return (bool)_PyLong_CheckExactAndCompact(sym->value.value); + } + return sym->tag == JIT_SYM_COMPACT_INT; +} + +bool +_Py_uop_sym_is_immortal(JitOptRef ref) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + return _Py_uop_symbol_is_immortal(sym); +} + +void +_Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + JitSymType tag = sym->tag; + switch(tag) { + case JIT_SYM_NULL_TAG: + sym_set_bottom(ctx, sym); + return; + case JIT_SYM_KNOWN_CLASS_TAG: + if (sym->cls.type == &PyLong_Type) { + sym->tag = JIT_SYM_COMPACT_INT; + } else { + sym_set_bottom(ctx, sym); + } + return; + case JIT_SYM_TYPE_VERSION_TAG: + if (sym->version.version == PyLong_Type.tp_version_tag) { + sym->tag = JIT_SYM_COMPACT_INT; + } + else { + sym_set_bottom(ctx, sym); + } + return; + case JIT_SYM_KNOWN_VALUE_TAG: + if (!_PyLong_CheckExactAndCompact(sym->value.value)) { + Py_CLEAR(sym->value.value); + sym_set_bottom(ctx, sym); + } + return; + case JIT_SYM_TUPLE_TAG: + case JIT_SYM_TRUTHINESS_TAG: + sym_set_bottom(ctx, sym); + return; + case JIT_SYM_BOTTOM_TAG: + case JIT_SYM_COMPACT_INT: + return; + case JIT_SYM_NON_NULL_TAG: + case JIT_SYM_UNKNOWN_TAG: + sym->tag = JIT_SYM_COMPACT_INT; + return; + } +} + +JitOptRef +_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef ref, bool truthy) { + JitOptSymbol *value = PyJitRef_Unwrap(ref); // It's clearer to invert this in the signature: bool invert = !truthy; if (value->tag == JIT_SYM_TRUTHINESS_TAG && value->truthiness.invert == invert) { - return value; + return ref; } JitOptSymbol *res = sym_new(ctx); if (res == NULL) { - return out_of_space(ctx); + return out_of_space_ref(ctx); } - int truthiness = _Py_uop_sym_truthiness(ctx, value); + int truthiness = _Py_uop_sym_truthiness(ctx, ref); if (truthiness < 0) { res->tag = JIT_SYM_TRUTHINESS_TAG; res->truthiness.invert = invert; @@ -609,7 +795,18 @@ _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy) else { make_const(res, (truthiness ^ invert) ? Py_True : Py_False); } - return res; + return PyJitRef_Wrap(res); +} + +JitOptRef +_Py_uop_sym_new_compact_int(JitOptContext *ctx) +{ + JitOptSymbol *sym = sym_new(ctx); + if (sym == NULL) { + return out_of_space_ref(ctx); + } + sym->tag = JIT_SYM_COMPACT_INT; + return PyJitRef_Wrap(sym); } // 0 on success, -1 on error. @@ -618,7 +815,7 @@ _Py_uop_frame_new( JitOptContext *ctx, PyCodeObject *co, int curr_stackentries, - JitOptSymbol **args, + JitOptRef *args, int arg_len) { assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); @@ -643,14 +840,14 @@ _Py_uop_frame_new( } for (int i = arg_len; i < co->co_nlocalsplus; i++) { - JitOptSymbol *local = _Py_uop_sym_new_unknown(ctx); + JitOptRef local = _Py_uop_sym_new_unknown(ctx); frame->locals[i] = local; } // Initialize the stack as well for (int i = 0; i < curr_stackentries; i++) { - JitOptSymbol *stackvar = _Py_uop_sym_new_unknown(ctx); + JitOptRef stackvar = _Py_uop_sym_new_unknown(ctx); frame->stack[i] = stackvar; } @@ -676,12 +873,12 @@ _Py_uop_abstractcontext_fini(JitOptContext *ctx) void _Py_uop_abstractcontext_init(JitOptContext *ctx) { - static_assert(sizeof(JitOptSymbol) <= 2 * sizeof(uint64_t), "JitOptSymbol has grown"); + static_assert(sizeof(JitOptSymbol) <= 3 * sizeof(uint64_t), "JitOptSymbol has grown"); ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; ctx->n_consumed = ctx->locals_and_stack; #ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { - ctx->locals_and_stack[i] = NULL; + ctx->locals_and_stack[i] = PyJitRef_NULL; } #endif @@ -731,47 +928,48 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) _Py_uop_abstractcontext_init(ctx); PyObject *val_42 = NULL; PyObject *val_43 = NULL; + PyObject *val_big = NULL; PyObject *tuple = NULL; // Use a single 'sym' variable so copy-pasting tests is easier. - JitOptSymbol *sym = _Py_uop_sym_new_unknown(ctx); - if (sym == NULL) { + JitOptRef ref = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(ref)) { goto fail; } - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "top is NULL"); - TEST_PREDICATE(!_Py_uop_sym_is_not_null(sym), "top is not NULL"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyLong_Type), "top matches a type"); - TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "top is a constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "top as constant is not NULL"); - TEST_PREDICATE(!_Py_uop_sym_is_bottom(sym), "top is bottom"); - - sym = make_bottom(ctx); - if (sym == NULL) { + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "top is NULL"); + TEST_PREDICATE(!_Py_uop_sym_is_not_null(ref), "top is not NULL"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyLong_Type), "top matches a type"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "top is a constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "top as constant is not NULL"); + TEST_PREDICATE(!_Py_uop_sym_is_bottom(ref), "top is bottom"); + + ref = PyJitRef_Wrap(make_bottom(ctx)); + if (PyJitRef_IsNull(ref)) { goto fail; } - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "bottom is NULL is not false"); - TEST_PREDICATE(!_Py_uop_sym_is_not_null(sym), "bottom is not NULL is not false"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyLong_Type), "bottom matches a type"); - TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "bottom is a constant is not false"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "bottom as constant is not NULL"); - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "bottom isn't bottom"); - - sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "bottom is NULL is not false"); + TEST_PREDICATE(!_Py_uop_sym_is_not_null(ref), "bottom is not NULL is not false"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyLong_Type), "bottom matches a type"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "bottom is a constant is not false"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "bottom as constant is not NULL"); + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "bottom isn't bottom"); + + ref = _Py_uop_sym_new_type(ctx, &PyLong_Type); + if (PyJitRef_IsNull(ref)) { goto fail; } - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "int is NULL"); - TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "int isn't not NULL"); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "int isn't int"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyFloat_Type), "int matches float"); - TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "int is a constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "int as constant is not NULL"); + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "int is NULL"); + TEST_PREDICATE(_Py_uop_sym_is_not_null(ref), "int isn't not NULL"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "int isn't int"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyFloat_Type), "int matches float"); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "int is a constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "int as constant is not NULL"); - _Py_uop_sym_set_type(ctx, sym, &PyLong_Type); // Should be a no-op - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "(int and int) isn't int"); + _Py_uop_sym_set_type(ctx, ref, &PyLong_Type); // Should be a no-op + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "(int and int) isn't int"); - _Py_uop_sym_set_type(ctx, sym, &PyFloat_Type); // Should make it bottom - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(int and float) isn't bottom"); + _Py_uop_sym_set_type(ctx, ref, &PyFloat_Type); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(int and float) isn't bottom"); val_42 = PyLong_FromLong(42); assert(val_42 != NULL); @@ -781,84 +979,118 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) assert(val_43 != NULL); assert(_Py_IsImmortal(val_43)); - sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + ref = _Py_uop_sym_new_type(ctx, &PyLong_Type); + if (PyJitRef_IsNull(ref)) { goto fail; } - _Py_uop_sym_set_const(ctx, sym, val_42); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 1, "bool(42) is not True"); - TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "42 is NULL"); - TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "42 isn't not NULL"); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "42 isn't an int"); - TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyFloat_Type), "42 matches float"); - TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym), "42 is not a constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) != NULL, "42 as constant is NULL"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == val_42, "42 as constant isn't 42"); - TEST_PREDICATE(_Py_uop_sym_is_immortal(sym), "42 is not immortal"); - - _Py_uop_sym_set_type(ctx, sym, &PyLong_Type); // Should be a no-op - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "(42 and 42) isn't an int"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == val_42, "(42 and 42) as constant isn't 42"); - - _Py_uop_sym_set_type(ctx, sym, &PyFloat_Type); // Should make it bottom - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and float) isn't bottom"); - - sym = _Py_uop_sym_new_type(ctx, &PyBool_Type); - TEST_PREDICATE(_Py_uop_sym_is_immortal(sym), "a bool is not immortal"); - - sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + _Py_uop_sym_set_const(ctx, ref, val_42); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 1, "bool(42) is not True"); + TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "42 is NULL"); + TEST_PREDICATE(_Py_uop_sym_is_not_null(ref), "42 isn't not NULL"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "42 isn't an int"); + TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyFloat_Type), "42 matches float"); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref), "42 is not a constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) != NULL, "42 as constant is NULL"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == val_42, "42 as constant isn't 42"); + TEST_PREDICATE(_Py_uop_sym_is_immortal(ref), "42 is not immortal"); + + _Py_uop_sym_set_type(ctx, ref, &PyLong_Type); // Should be a no-op + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "(42 and 42) isn't an int"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == val_42, "(42 and 42) as constant isn't 42"); + + _Py_uop_sym_set_type(ctx, ref, &PyFloat_Type); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(42 and float) isn't bottom"); + + ref = _Py_uop_sym_new_type(ctx, &PyBool_Type); + TEST_PREDICATE(_Py_uop_sym_is_immortal(ref), "a bool is not immortal"); + + ref = _Py_uop_sym_new_type(ctx, &PyLong_Type); + if (PyJitRef_IsNull(ref)) { goto fail; } - _Py_uop_sym_set_const(ctx, sym, val_42); - _Py_uop_sym_set_const(ctx, sym, val_43); // Should make it bottom - TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and 43) isn't bottom"); - - - sym = _Py_uop_sym_new_const(ctx, Py_None); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(None) is not False"); - sym = _Py_uop_sym_new_const(ctx, Py_False); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(False) is not False"); - sym = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0)); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(0) is not False"); - - JitOptSymbol *i1 = _Py_uop_sym_new_type(ctx, &PyFloat_Type); - JitOptSymbol *i2 = _Py_uop_sym_new_const(ctx, val_43); - JitOptSymbol *array[2] = { i1, i2 }; - sym = _Py_uop_sym_new_tuple(ctx, 2, array); + _Py_uop_sym_set_const(ctx, ref, val_42); + _Py_uop_sym_set_const(ctx, ref, val_43); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(42 and 43) isn't bottom"); + + + ref = _Py_uop_sym_new_const(ctx, Py_None); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(None) is not False"); + ref = _Py_uop_sym_new_const(ctx, Py_False); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(False) is not False"); + ref = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0)); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(0) is not False"); + + JitOptRef i1 = _Py_uop_sym_new_type(ctx, &PyFloat_Type); + JitOptRef i2 = _Py_uop_sym_new_const(ctx, val_43); + JitOptRef array[2] = { i1, i2 }; + ref = _Py_uop_sym_new_tuple(ctx, 2, array); TEST_PREDICATE( - _Py_uop_sym_matches_type(_Py_uop_sym_tuple_getitem(ctx, sym, 0), &PyFloat_Type), + _Py_uop_sym_matches_type(_Py_uop_sym_tuple_getitem(ctx, ref, 0), &PyFloat_Type), "tuple item does not match value used to create tuple" ); TEST_PREDICATE( - _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43, + _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, ref, 1)) == val_43, "tuple item does not match value used to create tuple" ); PyObject *pair[2] = { val_42, val_43 }; tuple = _PyTuple_FromArray(pair, 2); - sym = _Py_uop_sym_new_const(ctx, tuple); + ref = _Py_uop_sym_new_const(ctx, tuple); TEST_PREDICATE( - _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43, + _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, ref, 1)) == val_43, "tuple item does not match value used to create tuple" ); - JitOptSymbol *value = _Py_uop_sym_new_type(ctx, &PyBool_Type); - sym = _Py_uop_sym_new_truthiness(ctx, value, false); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean"); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == -1, "truthiness is not unknown"); - TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym) == false, "truthiness is constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "truthiness is not NULL"); + ref = _Py_uop_sym_new_type(ctx, &PyTuple_Type); + TEST_PREDICATE( + _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, ref, 42)), + "Unknown tuple item is not narrowed to non-NULL" + ); + JitOptRef value = _Py_uop_sym_new_type(ctx, &PyBool_Type); + ref = _Py_uop_sym_new_truthiness(ctx, value, false); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyBool_Type), "truthiness is not boolean"); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == -1, "truthiness is not unknown"); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref) == false, "truthiness is constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "truthiness is not NULL"); TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == false, "value is constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == NULL, "value is not NULL"); - _Py_uop_sym_set_const(ctx, sym, Py_False); - TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean"); - TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "truthiness is not True"); - TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym) == true, "truthiness is not constant"); - TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == Py_False, "truthiness is not False"); + _Py_uop_sym_set_const(ctx, ref, Py_False); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyBool_Type), "truthiness is not boolean"); + TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "truthiness is not True"); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref) == true, "truthiness is not constant"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == Py_False, "truthiness is not False"); TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == true, "value is not constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == Py_True, "value is not True"); + + + val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(66)); + if (val_big == NULL) { + goto fail; + } + + JitOptRef ref_42 = _Py_uop_sym_new_const(ctx, val_42); + JitOptRef ref_big = _Py_uop_sym_new_const(ctx, val_big); + JitOptRef ref_int = _Py_uop_sym_new_compact_int(ctx); + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_42), "42 is not a compact int"); + TEST_PREDICATE(!_Py_uop_sym_is_compact_int(ref_big), "(1 << 66) is a compact int"); + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int"); + + _Py_uop_sym_set_type(ctx, ref_int, &PyLong_Type); // Should have no effect + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int after cast"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int after cast"); + + _Py_uop_sym_set_type(ctx, ref_int, &PyFloat_Type); // Should make it bottom + TEST_PREDICATE(_Py_uop_sym_is_bottom(ref_int), "compact int cast to float isn't bottom"); + + ref_int = _Py_uop_sym_new_compact_int(ctx); + _Py_uop_sym_set_const(ctx, ref_int, val_43); + TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "43 is not a compact int"); + TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "43 is not an int"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref_int) == val_43, "43 isn't 43"); + _Py_uop_abstractcontext_fini(ctx); Py_DECREF(val_42); Py_DECREF(val_43); + Py_DECREF(val_big); Py_DECREF(tuple); Py_RETURN_NONE; @@ -866,6 +1098,7 @@ fail: _Py_uop_abstractcontext_fini(ctx); Py_XDECREF(val_42); Py_XDECREF(val_43); + Py_XDECREF(val_big); Py_DECREF(tuple); return NULL; } diff --git a/Python/parking_lot.c b/Python/parking_lot.c index 8edf4323594..e896dea0271 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -112,17 +112,27 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout) } } - // NOTE: we wait on the sigint event even in non-main threads to match the - // behavior of the other platforms. Non-main threads will ignore the - // Py_PARK_INTR result. - HANDLE sigint_event = _PyOS_SigintEvent(); - HANDLE handles[2] = { sema->platform_sem, sigint_event }; - DWORD count = sigint_event != NULL ? 2 : 1; + HANDLE handles[2] = { sema->platform_sem, NULL }; + HANDLE sigint_event = NULL; + DWORD count = 1; + if (_Py_IsMainThread()) { + // gh-135099: Wait on the SIGINT event only in the main thread. Other + // threads would ignore the result anyways, and accessing + // `_PyOS_SigintEvent()` from non-main threads may race with + // interpreter shutdown, which closes the event handle. Note that + // non-main interpreters will ignore the result. + sigint_event = _PyOS_SigintEvent(); + if (sigint_event != NULL) { + handles[1] = sigint_event; + count = 2; + } + } wait = WaitForMultipleObjects(count, handles, FALSE, millis); if (wait == WAIT_OBJECT_0) { res = Py_PARK_OK; } else if (wait == WAIT_OBJECT_0 + 1) { + assert(sigint_event != NULL); ResetEvent(sigint_event); res = Py_PARK_INTR; } diff --git a/Python/pathconfig.c b/Python/pathconfig.c index 92360c1bb02..d034562c43f 100644 --- a/Python/pathconfig.c +++ b/Python/pathconfig.c @@ -272,7 +272,8 @@ Py_SetProgramName(const wchar_t *program_name) } -wchar_t * +/* removed in 3.15, but kept for stable ABI compatibility */ +PyAPI_FUNC(wchar_t *) Py_GetPath(void) { /* If the user has provided a path, return that */ @@ -284,7 +285,7 @@ Py_GetPath(void) } -wchar_t * +PyAPI_FUNC(wchar_t *) _Py_GetStdlibDir(void) { wchar_t *stdlib_dir = _Py_path_config.stdlib_dir; @@ -295,35 +296,40 @@ _Py_GetStdlibDir(void) } -wchar_t * +/* removed in 3.15, but kept for stable ABI compatibility */ +PyAPI_FUNC(wchar_t *) Py_GetPrefix(void) { return _Py_path_config.prefix; } -wchar_t * +/* removed in 3.15, but kept for stable ABI compatibility */ +PyAPI_FUNC(wchar_t *) Py_GetExecPrefix(void) { return _Py_path_config.exec_prefix; } -wchar_t * +/* removed in 3.15, but kept for stable ABI compatibility */ +PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void) { return _Py_path_config.program_full_path; } -wchar_t* +/* removed in 3.15, but kept for stable ABI compatibility */ +PyAPI_FUNC(wchar_t *) Py_GetPythonHome(void) { return _Py_path_config.home; } -wchar_t * +/* removed in 3.15, but kept for stable ABI compatibility */ +PyAPI_FUNC(wchar_t *) Py_GetProgramName(void) { return _Py_path_config.program_name; diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 1211e0e9f11..2ca18c23593 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -1,241 +1,354 @@ +/* + * Python Perf Trampoline Support - JIT Dump Implementation + * + * This file implements the perf jitdump API for Python's performance profiling + * integration. It allows perf (Linux performance analysis tool) to understand + * and profile dynamically generated Python bytecode by creating JIT dump files + * that perf can inject into its analysis. + * + * + * IMPORTANT: This file exports specific callback functions that are part of + * Python's internal API. Do not modify the function signatures or behavior + * of exported functions without coordinating with the Python core team. + * + * Usually the binary and libraries are mapped in separate region like below: + * + * address -> + * --+---------------------+--//--+---------------------+-- + * | .text | .data | ... | | .text | .data | ... | + * --+---------------------+--//--+---------------------+-- + * myprog libc.so + * + * So it'd be easy and straight-forward to find a mapped binary or library from an + * address. + * + * But for JIT code, the code arena only cares about the code section. But the + * resulting DSOs (which is generated by perf inject -j) contain ELF headers and + * unwind info too. Then it'd generate following address space with synthesized + * MMAP events. Let's say it has a sample between address B and C. + * + * sample + * | + * address -> A B v C + * --------------------------------------------------------------------------------------------------- + * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | + * ... + * --------------------------------------------------------------------------------------------------- + * + * If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see + * the unwind info. If it maps both .text section and unwind sections, the sample + * could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing + * which one is right. So to make perf happy we have non-overlapping ranges for each + * DSO: + * + * address -> + * ------------------------------------------------------------------------------------------------------- + * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | + * ... + * ------------------------------------------------------------------------------------------------------- + * + * As the trampolines are constant, we add a constant padding but in general the padding needs to have the + * size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 + */ + + + #include "Python.h" #include "pycore_ceval.h" // _PyPerf_Callbacks #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_runtime.h" // _PyRuntime - #ifdef PY_HAVE_PERF_TRAMPOLINE -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> // mmap() -#include <sys/types.h> -#include <unistd.h> // sysconf() -#include <sys/time.h> // gettimeofday() -#include <sys/syscall.h> - -// ---------------------------------- -// Perf jitdump API -// ---------------------------------- - -typedef struct { - FILE* perf_map; - PyThread_type_lock map_lock; - void* mapped_buffer; - size_t mapped_size; - int code_id; -} PerfMapJitState; - -static PerfMapJitState perf_jit_map_state; +/* Standard library includes for perf jitdump implementation */ +#include <elf.h> // ELF architecture constants +#include <fcntl.h> // File control operations +#include <stdio.h> // Standard I/O operations +#include <stdlib.h> // Standard library functions +#include <sys/mman.h> // Memory mapping functions (mmap) +#include <sys/types.h> // System data types +#include <unistd.h> // System calls (sysconf, getpid) +#include <sys/time.h> // Time functions (gettimeofday) +#include <sys/syscall.h> // System call interface + +// ============================================================================= +// CONSTANTS AND CONFIGURATION +// ============================================================================= /* -Usually the binary and libraries are mapped in separate region like below: - - address -> - --+---------------------+--//--+---------------------+-- - | .text | .data | ... | | .text | .data | ... | - --+---------------------+--//--+---------------------+-- - myprog libc.so - -So it'd be easy and straight-forward to find a mapped binary or library from an -address. - -But for JIT code, the code arena only cares about the code section. But the -resulting DSOs (which is generated by perf inject -j) contain ELF headers and -unwind info too. Then it'd generate following address space with synthesized -MMAP events. Let's say it has a sample between address B and C. - - sample - | - address -> A B v C - --------------------------------------------------------------------------------------------------- - /tmp/jitted-PID-0.so | (headers) | .text | unwind info | - /tmp/jitted-PID-1.so | (headers) | .text | unwind info | - /tmp/jitted-PID-2.so | (headers) | .text | unwind info | - ... - --------------------------------------------------------------------------------------------------- - -If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see -the unwind info. If it maps both .text section and unwind sections, the sample -could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing -which one is right. So to make perf happy we have non-overlapping ranges for each -DSO: - - address -> - ------------------------------------------------------------------------------------------------------- - /tmp/jitted-PID-0.so | (headers) | .text | unwind info | - /tmp/jitted-PID-1.so | (headers) | .text | unwind info | - /tmp/jitted-PID-2.so | (headers) | .text | unwind info | - ... - ------------------------------------------------------------------------------------------------------- - -As the trampolines are constant, we add a constant padding but in general the padding needs to have the -size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 + * Memory layout considerations for perf jitdump: + * + * Perf expects non-overlapping memory regions for each JIT-compiled function. + * When perf processes the jitdump file, it creates synthetic DSO (Dynamic + * Shared Object) files that contain: + * - ELF headers + * - .text section (actual machine code) + * - Unwind information (for stack traces) + * + * To ensure proper address space layout, we add padding between code regions. + * This prevents address conflicts when perf maps the synthesized DSOs. + * + * Memory layout example: + * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] + * /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding] + * + * The padding size (0x100) is chosen to accommodate typical unwind info sizes + * while maintaining 16-byte alignment requirements. */ - #define PERF_JIT_CODE_PADDING 0x100 -#define trampoline_api _PyRuntime.ceval.perf.trampoline_api - -typedef uint64_t uword; -typedef const char* CodeComments; -#define Pd "d" -#define MB (1024 * 1024) - -#define EM_386 3 -#define EM_X86_64 62 -#define EM_ARM 40 -#define EM_AARCH64 183 -#define EM_RISCV 243 +/* Convenient access to the global trampoline API state */ +#define trampoline_api _PyRuntime.ceval.perf.trampoline_api -#define TARGET_ARCH_IA32 0 -#define TARGET_ARCH_X64 0 -#define TARGET_ARCH_ARM 0 -#define TARGET_ARCH_ARM64 0 -#define TARGET_ARCH_RISCV32 0 -#define TARGET_ARCH_RISCV64 0 +/* Type aliases for clarity and portability */ +typedef uint64_t uword; // Word-sized unsigned integer +typedef const char* CodeComments; // Code comment strings -#define FLAG_generate_perf_jitdump 0 -#define FLAG_write_protect_code 0 -#define FLAG_write_protect_vm_isolate 0 -#define FLAG_code_comments 0 +/* Memory size constants */ +#define MB (1024 * 1024) // 1 Megabyte for buffer sizing -#define UNREACHABLE() +// ============================================================================= +// ARCHITECTURE-SPECIFIC DEFINITIONS +// ============================================================================= -static uword GetElfMachineArchitecture(void) { -#if TARGET_ARCH_IA32 - return EM_386; -#elif TARGET_ARCH_X64 +/* + * Returns the ELF machine architecture constant for the current platform. + * This is required for the jitdump header to correctly identify the target + * architecture for perf processing. + * + */ +static uint64_t GetElfMachineArchitecture(void) { +#if defined(__x86_64__) || defined(_M_X64) return EM_X86_64; -#elif TARGET_ARCH_ARM - return EM_ARM; -#elif TARGET_ARCH_ARM64 +#elif defined(__i386__) || defined(_M_IX86) + return EM_386; +#elif defined(__aarch64__) return EM_AARCH64; -#elif TARGET_ARCH_RISCV32 || TARGET_ARCH_RISCV64 +#elif defined(__arm__) || defined(_M_ARM) + return EM_ARM; +#elif defined(__riscv) return EM_RISCV; #else - UNREACHABLE(); + Py_UNREACHABLE(); // Unsupported architecture - should never reach here return 0; #endif } +// ============================================================================= +// PERF JITDUMP DATA STRUCTURES +// ============================================================================= + +/* + * Perf jitdump file format structures + * + * These structures define the binary format that perf expects for JIT dump files. + * The format is documented in the Linux perf tools source code and must match + * exactly for proper perf integration. + */ + +/* + * Jitdump file header - written once at the beginning of each jitdump file + * Contains metadata about the process and jitdump format version + */ typedef struct { - uint32_t magic; - uint32_t version; - uint32_t size; - uint32_t elf_mach_target; - uint32_t reserved; - uint32_t process_id; - uint64_t time_stamp; - uint64_t flags; + uint32_t magic; // Magic number (0x4A695444 = "JiTD") + uint32_t version; // Jitdump format version (currently 1) + uint32_t size; // Size of this header structure + uint32_t elf_mach_target; // Target architecture (from GetElfMachineArchitecture) + uint32_t reserved; // Reserved field (must be 0) + uint32_t process_id; // Process ID of the JIT compiler + uint64_t time_stamp; // Timestamp when jitdump was created + uint64_t flags; // Feature flags (currently unused) } Header; - enum PerfEvent { - PerfLoad = 0, - PerfMove = 1, - PerfDebugInfo = 2, - PerfClose = 3, - PerfUnwindingInfo = 4 +/* + * Perf event types supported by the jitdump format + * Each event type has a corresponding structure format + */ +enum PerfEvent { + PerfLoad = 0, // Code load event (new JIT function) + PerfMove = 1, // Code move event (function relocated) + PerfDebugInfo = 2, // Debug information event + PerfClose = 3, // JIT session close event + PerfUnwindingInfo = 4 // Stack unwinding information event }; +/* + * Base event structure - common header for all perf events + * Every event in the jitdump file starts with this structure + */ struct BaseEvent { - uint32_t event; - uint32_t size; - uint64_t time_stamp; - }; + uint32_t event; // Event type (from PerfEvent enum) + uint32_t size; // Total size of this event including payload + uint64_t time_stamp; // Timestamp when event occurred +}; +/* + * Code load event - indicates a new JIT-compiled function is available + * This is the most important event type for Python profiling + */ typedef struct { - struct BaseEvent base; - uint32_t process_id; - uint32_t thread_id; - uint64_t vma; - uint64_t code_address; - uint64_t code_size; - uint64_t code_id; + struct BaseEvent base; // Common event header + uint32_t process_id; // Process ID where code was generated + uint32_t thread_id; // Thread ID where code was generated + uint64_t vma; // Virtual memory address where code is loaded + uint64_t code_address; // Address of the actual machine code + uint64_t code_size; // Size of the machine code in bytes + uint64_t code_id; // Unique identifier for this code region + /* Followed by: + * - null-terminated function name string + * - raw machine code bytes + */ } CodeLoadEvent; +/* + * Code unwinding information event - provides DWARF data for stack traces + * Essential for proper stack unwinding during profiling + */ typedef struct { - struct BaseEvent base; - uint64_t unwind_data_size; - uint64_t eh_frame_hdr_size; - uint64_t mapped_size; + struct BaseEvent base; // Common event header + uint64_t unwind_data_size; // Size of the unwinding data + uint64_t eh_frame_hdr_size; // Size of the EH frame header + uint64_t mapped_size; // Total mapped size (with padding) + /* Followed by: + * - EH frame header + * - DWARF unwinding information + * - Padding to alignment boundary + */ } CodeUnwindingInfoEvent; -static const intptr_t nanoseconds_per_second = 1000000000; - -// Dwarf encoding constants +// ============================================================================= +// GLOBAL STATE MANAGEMENT +// ============================================================================= -static const uint8_t DwarfUData4 = 0x03; -static const uint8_t DwarfSData4 = 0x0b; -static const uint8_t DwarfPcRel = 0x10; -static const uint8_t DwarfDataRel = 0x30; -// static uint8_t DwarfOmit = 0xff; +/* + * Global state for the perf jitdump implementation + * + * This structure maintains all the state needed for generating jitdump files. + * It's designed as a singleton since there's typically only one jitdump file + * per Python process. + */ typedef struct { - unsigned char version; - unsigned char eh_frame_ptr_enc; - unsigned char fde_count_enc; - unsigned char table_enc; - int32_t eh_frame_ptr; - int32_t eh_fde_count; - int32_t from; - int32_t to; -} EhFrameHeader; + FILE* perf_map; // File handle for the jitdump file + PyThread_type_lock map_lock; // Thread synchronization lock + void* mapped_buffer; // Memory-mapped region (signals perf we're active) + size_t mapped_size; // Size of the mapped region + int code_id; // Counter for unique code region identifiers +} PerfMapJitState; + +/* Global singleton instance */ +static PerfMapJitState perf_jit_map_state; + +// ============================================================================= +// TIME UTILITIES +// ============================================================================= +/* Time conversion constant */ +static const intptr_t nanoseconds_per_second = 1000000000; + +/* + * Get current monotonic time in nanoseconds + * + * Monotonic time is preferred for event timestamps because it's not affected + * by system clock adjustments. This ensures consistent timing relationships + * between events even if the system clock is changed. + * + * Returns: Current monotonic time in nanoseconds since an arbitrary epoch + */ static int64_t get_current_monotonic_ticks(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { - UNREACHABLE(); + Py_UNREACHABLE(); // Should never fail on supported systems return 0; } - // Convert to nanoseconds. + + /* Convert to nanoseconds for maximum precision */ int64_t result = ts.tv_sec; result *= nanoseconds_per_second; result += ts.tv_nsec; return result; } +/* + * Get current wall clock time in microseconds + * + * Used for the jitdump file header timestamp. Unlike monotonic time, + * this represents actual wall clock time that can be correlated with + * other system events. + * + * Returns: Current time in microseconds since Unix epoch + */ static int64_t get_current_time_microseconds(void) { - // gettimeofday has microsecond resolution. - struct timeval tv; - if (gettimeofday(&tv, NULL) < 0) { - UNREACHABLE(); - return 0; - } - return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; + struct timeval tv; + if (gettimeofday(&tv, NULL) < 0) { + Py_UNREACHABLE(); // Should never fail on supported systems + return 0; + } + return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; } +// ============================================================================= +// UTILITY FUNCTIONS +// ============================================================================= +/* + * Round up a value to the next multiple of a given number + * + * This is essential for maintaining proper alignment requirements in the + * jitdump format. Many structures need to be aligned to specific boundaries + * (typically 8 or 16 bytes) for efficient processing by perf. + * + * Args: + * value: The value to round up + * multiple: The multiple to round up to + * + * Returns: The smallest value >= input that is a multiple of 'multiple' + */ static size_t round_up(int64_t value, int64_t multiple) { if (multiple == 0) { - // Avoid division by zero - return value; + return value; // Avoid division by zero } int64_t remainder = value % multiple; if (remainder == 0) { - // Value is already a multiple of 'multiple' - return value; + return value; // Already aligned } - // Calculate the difference to the next multiple + /* Calculate how much to add to reach the next multiple */ int64_t difference = multiple - remainder; - - // Add the difference to the value int64_t rounded_up_value = value + difference; return rounded_up_value; } +// ============================================================================= +// FILE I/O UTILITIES +// ============================================================================= +/* + * Write data to the jitdump file with error handling + * + * This function ensures that all data is written to the file, handling + * partial writes that can occur with large buffers or when the system + * is under load. + * + * Args: + * buffer: Pointer to data to write + * size: Number of bytes to write + */ static void perf_map_jit_write_fully(const void* buffer, size_t size) { FILE* out_file = perf_jit_map_state.perf_map; const char* ptr = (const char*)(buffer); + while (size > 0) { const size_t written = fwrite(ptr, 1, size, out_file); if (written == 0) { - UNREACHABLE(); + Py_UNREACHABLE(); // Write failure - should be very rare break; } size -= written; @@ -243,284 +356,724 @@ static void perf_map_jit_write_fully(const void* buffer, size_t size) { } } +/* + * Write the jitdump file header + * + * The header must be written exactly once at the beginning of each jitdump + * file. It provides metadata that perf uses to parse the rest of the file. + * + * Args: + * pid: Process ID to include in the header + * out_file: File handle to write to (currently unused, uses global state) + */ static void perf_map_jit_write_header(int pid, FILE* out_file) { Header header; - header.magic = 0x4A695444; - header.version = 1; - header.size = sizeof(Header); - header.elf_mach_target = GetElfMachineArchitecture(); - header.process_id = pid; - header.time_stamp = get_current_time_microseconds(); - header.flags = 0; - perf_map_jit_write_fully(&header, sizeof(header)); -} -static void* perf_map_jit_init(void) { - char filename[100]; - int pid = getpid(); - snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); - const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); - if (fd == -1) { - return NULL; - } + /* Initialize header with required values */ + header.magic = 0x4A695444; // "JiTD" magic number + header.version = 1; // Current jitdump version + header.size = sizeof(Header); // Header size for validation + header.elf_mach_target = GetElfMachineArchitecture(); // Target architecture + header.process_id = pid; // Process identifier + header.time_stamp = get_current_time_microseconds(); // Creation time + header.flags = 0; // No special flags currently used - const long page_size = sysconf(_SC_PAGESIZE); // NOLINT(runtime/int) - if (page_size == -1) { - close(fd); - return NULL; - } - - // The perf jit interface forces us to map the first page of the file - // to signal that we are using the interface. - perf_jit_map_state.mapped_buffer = mmap(NULL, page_size, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); - if (perf_jit_map_state.mapped_buffer == NULL) { - close(fd); - return NULL; - } - perf_jit_map_state.mapped_size = page_size; - perf_jit_map_state.perf_map = fdopen(fd, "w+"); - if (perf_jit_map_state.perf_map == NULL) { - close(fd); - return NULL; - } - setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); - perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); - - perf_jit_map_state.map_lock = PyThread_allocate_lock(); - if (perf_jit_map_state.map_lock == NULL) { - fclose(perf_jit_map_state.perf_map); - return NULL; - } - perf_jit_map_state.code_id = 0; - - trampoline_api.code_padding = PERF_JIT_CODE_PADDING; - return &perf_jit_map_state; + perf_map_jit_write_fully(&header, sizeof(header)); } -/* DWARF definitions. */ +// ============================================================================= +// DWARF CONSTANTS AND UTILITIES +// ============================================================================= + +/* + * DWARF (Debug With Arbitrary Record Formats) constants + * + * DWARF is a debugging data format used to provide stack unwinding information. + * These constants define the various encoding types and opcodes used in + * DWARF Call Frame Information (CFI) records. + */ +/* DWARF Call Frame Information version */ #define DWRF_CIE_VERSION 1 +/* DWARF CFA (Call Frame Address) opcodes */ enum { - DWRF_CFA_nop = 0x0, - DWRF_CFA_offset_extended = 0x5, - DWRF_CFA_def_cfa = 0xc, - DWRF_CFA_def_cfa_offset = 0xe, - DWRF_CFA_offset_extended_sf = 0x11, - DWRF_CFA_advance_loc = 0x40, - DWRF_CFA_offset = 0x80 + DWRF_CFA_nop = 0x0, // No operation + DWRF_CFA_offset_extended = 0x5, // Extended offset instruction + DWRF_CFA_def_cfa = 0xc, // Define CFA rule + DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset + DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset + DWRF_CFA_advance_loc = 0x40, // Advance location counter + DWRF_CFA_offset = 0x80 // Simple offset instruction }; -enum - { - DWRF_EH_PE_absptr = 0x00, - DWRF_EH_PE_omit = 0xff, - - /* FDE data encoding. */ - DWRF_EH_PE_uleb128 = 0x01, - DWRF_EH_PE_udata2 = 0x02, - DWRF_EH_PE_udata4 = 0x03, - DWRF_EH_PE_udata8 = 0x04, - DWRF_EH_PE_sleb128 = 0x09, - DWRF_EH_PE_sdata2 = 0x0a, - DWRF_EH_PE_sdata4 = 0x0b, - DWRF_EH_PE_sdata8 = 0x0c, - DWRF_EH_PE_signed = 0x08, - - /* FDE flags. */ - DWRF_EH_PE_pcrel = 0x10, - DWRF_EH_PE_textrel = 0x20, - DWRF_EH_PE_datarel = 0x30, - DWRF_EH_PE_funcrel = 0x40, - DWRF_EH_PE_aligned = 0x50, - - DWRF_EH_PE_indirect = 0x80 - }; +/* DWARF Exception Handling pointer encodings */ +enum { + DWRF_EH_PE_absptr = 0x00, // Absolute pointer + DWRF_EH_PE_omit = 0xff, // Omitted value + + /* Data type encodings */ + DWRF_EH_PE_uleb128 = 0x01, // Unsigned LEB128 + DWRF_EH_PE_udata2 = 0x02, // Unsigned 2-byte + DWRF_EH_PE_udata4 = 0x03, // Unsigned 4-byte + DWRF_EH_PE_udata8 = 0x04, // Unsigned 8-byte + DWRF_EH_PE_sleb128 = 0x09, // Signed LEB128 + DWRF_EH_PE_sdata2 = 0x0a, // Signed 2-byte + DWRF_EH_PE_sdata4 = 0x0b, // Signed 4-byte + DWRF_EH_PE_sdata8 = 0x0c, // Signed 8-byte + DWRF_EH_PE_signed = 0x08, // Signed flag + + /* Reference type encodings */ + DWRF_EH_PE_pcrel = 0x10, // PC-relative + DWRF_EH_PE_textrel = 0x20, // Text-relative + DWRF_EH_PE_datarel = 0x30, // Data-relative + DWRF_EH_PE_funcrel = 0x40, // Function-relative + DWRF_EH_PE_aligned = 0x50, // Aligned + DWRF_EH_PE_indirect = 0x80 // Indirect +}; +/* Additional DWARF constants for debug information */ enum { DWRF_TAG_compile_unit = 0x11 }; - enum { DWRF_children_no = 0, DWRF_children_yes = 1 }; +enum { + DWRF_AT_name = 0x03, // Name attribute + DWRF_AT_stmt_list = 0x10, // Statement list + DWRF_AT_low_pc = 0x11, // Low PC address + DWRF_AT_high_pc = 0x12 // High PC address +}; +enum { + DWRF_FORM_addr = 0x01, // Address form + DWRF_FORM_data4 = 0x06, // 4-byte data + DWRF_FORM_string = 0x08 // String form +}; -enum { DWRF_AT_name = 0x03, DWRF_AT_stmt_list = 0x10, DWRF_AT_low_pc = 0x11, DWRF_AT_high_pc = 0x12 }; - -enum { DWRF_FORM_addr = 0x01, DWRF_FORM_data4 = 0x06, DWRF_FORM_string = 0x08 }; - -enum { DWRF_LNS_extended_op = 0, DWRF_LNS_copy = 1, DWRF_LNS_advance_pc = 2, DWRF_LNS_advance_line = 3 }; +/* Line number program opcodes */ +enum { + DWRF_LNS_extended_op = 0, // Extended opcode + DWRF_LNS_copy = 1, // Copy operation + DWRF_LNS_advance_pc = 2, // Advance program counter + DWRF_LNS_advance_line = 3 // Advance line number +}; -enum { DWRF_LNE_end_sequence = 1, DWRF_LNE_set_address = 2 }; +/* Line number extended opcodes */ +enum { + DWRF_LNE_end_sequence = 1, // End of sequence + DWRF_LNE_set_address = 2 // Set address +}; +/* + * Architecture-specific DWARF register numbers + * + * These constants define the register numbering scheme used by DWARF + * for each supported architecture. The numbers must match the ABI + * specification for proper stack unwinding. + */ enum { #ifdef __x86_64__ - /* Yes, the order is strange, but correct. */ - DWRF_REG_AX, - DWRF_REG_DX, - DWRF_REG_CX, - DWRF_REG_BX, - DWRF_REG_SI, - DWRF_REG_DI, - DWRF_REG_BP, - DWRF_REG_SP, - DWRF_REG_8, - DWRF_REG_9, - DWRF_REG_10, - DWRF_REG_11, - DWRF_REG_12, - DWRF_REG_13, - DWRF_REG_14, - DWRF_REG_15, - DWRF_REG_RA, + /* x86_64 register numbering (note: order is defined by x86_64 ABI) */ + DWRF_REG_AX, // RAX + DWRF_REG_DX, // RDX + DWRF_REG_CX, // RCX + DWRF_REG_BX, // RBX + DWRF_REG_SI, // RSI + DWRF_REG_DI, // RDI + DWRF_REG_BP, // RBP + DWRF_REG_SP, // RSP + DWRF_REG_8, // R8 + DWRF_REG_9, // R9 + DWRF_REG_10, // R10 + DWRF_REG_11, // R11 + DWRF_REG_12, // R12 + DWRF_REG_13, // R13 + DWRF_REG_14, // R14 + DWRF_REG_15, // R15 + DWRF_REG_RA, // Return address (RIP) #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - DWRF_REG_SP = 31, - DWRF_REG_RA = 30, + /* AArch64 register numbering */ + DWRF_REG_FP = 29, // Frame Pointer + DWRF_REG_RA = 30, // Link register (return address) + DWRF_REG_SP = 31, // Stack pointer #else # error "Unsupported target architecture" #endif }; -typedef struct ELFObjectContext -{ - uint8_t* p; /* Pointer to next address in obj.space. */ - uint8_t* startp; /* Pointer to start address in obj.space. */ - uint8_t* eh_frame_p; /* Pointer to start address in obj.space. */ - uint32_t code_size; /* Size of machine code. */ +/* DWARF encoding constants used in EH frame headers */ +static const uint8_t DwarfUData4 = 0x03; // Unsigned 4-byte data +static const uint8_t DwarfSData4 = 0x0b; // Signed 4-byte data +static const uint8_t DwarfPcRel = 0x10; // PC-relative encoding +static const uint8_t DwarfDataRel = 0x30; // Data-relative encoding + +// ============================================================================= +// ELF OBJECT CONTEXT +// ============================================================================= + +/* + * Context for building ELF/DWARF structures + * + * This structure maintains state while constructing DWARF unwind information. + * It acts as a simple buffer manager with pointers to track current position + * and important landmarks within the buffer. + */ +typedef struct ELFObjectContext { + uint8_t* p; // Current write position in buffer + uint8_t* startp; // Start of buffer (for offset calculations) + uint8_t* eh_frame_p; // Start of EH frame data (for relative offsets) + uint32_t code_size; // Size of the code being described } ELFObjectContext; -/* Append a null-terminated string. */ -static uint32_t -elfctx_append_string(ELFObjectContext* ctx, const char* str) -{ +/* + * EH Frame Header structure for DWARF unwinding + * + * This structure provides metadata about the DWARF unwinding information + * that follows. It's required by the perf jitdump format to enable proper + * stack unwinding during profiling. + */ +typedef struct { + unsigned char version; // EH frame version (always 1) + unsigned char eh_frame_ptr_enc; // Encoding of EH frame pointer + unsigned char fde_count_enc; // Encoding of FDE count + unsigned char table_enc; // Encoding of table entries + int32_t eh_frame_ptr; // Pointer to EH frame data + int32_t eh_fde_count; // Number of FDEs (Frame Description Entries) + int32_t from; // Start address of code range + int32_t to; // End address of code range +} EhFrameHeader; + +// ============================================================================= +// DWARF GENERATION UTILITIES +// ============================================================================= + +/* + * Append a null-terminated string to the ELF context buffer + * + * Args: + * ctx: ELF object context + * str: String to append (must be null-terminated) + * + * Returns: Offset from start of buffer where string was written + */ +static uint32_t elfctx_append_string(ELFObjectContext* ctx, const char* str) { uint8_t* p = ctx->p; uint32_t ofs = (uint32_t)(p - ctx->startp); + + /* Copy string including null terminator */ do { *p++ = (uint8_t)*str; } while (*str++); + ctx->p = p; return ofs; } -/* Append a SLEB128 value. */ -static void -elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) -{ +/* + * Append a SLEB128 (Signed Little Endian Base 128) value + * + * SLEB128 is a variable-length encoding used extensively in DWARF. + * It efficiently encodes small numbers in fewer bytes. + * + * Args: + * ctx: ELF object context + * v: Signed value to encode + */ +static void elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) { uint8_t* p = ctx->p; + + /* Encode 7 bits at a time, with continuation bit in MSB */ for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) { - *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)((v & 0x7f) | 0x80); // Set continuation bit } - *p++ = (uint8_t)(v & 0x7f); + *p++ = (uint8_t)(v & 0x7f); // Final byte without continuation bit + ctx->p = p; } -/* Append a ULEB128 to buffer. */ -static void -elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) -{ +/* + * Append a ULEB128 (Unsigned Little Endian Base 128) value + * + * Similar to SLEB128 but for unsigned values. + * + * Args: + * ctx: ELF object context + * v: Unsigned value to encode + */ +static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { uint8_t* p = ctx->p; + + /* Encode 7 bits at a time, with continuation bit in MSB */ for (; v >= 0x80; v >>= 7) { - *p++ = (char)((v & 0x7f) | 0x80); + *p++ = (char)((v & 0x7f) | 0x80); // Set continuation bit } - *p++ = (char)v; + *p++ = (char)v; // Final byte without continuation bit + ctx->p = p; } -/* Shortcuts to generate DWARF structures. */ -#define DWRF_U8(x) (*p++ = (x)) -#define DWRF_I8(x) (*(int8_t*)p = (x), p++) -#define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) -#define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) -#define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) -#define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) -#define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) -#define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) -#define DWRF_ALIGNNOP(s) \ - while ((uintptr_t)p & ((s)-1)) { \ - *p++ = DWRF_CFA_nop; \ +/* + * Macros for generating DWARF structures + * + * These macros provide a convenient way to write various data types + * to the DWARF buffer while automatically advancing the pointer. + */ +#define DWRF_U8(x) (*p++ = (x)) // Write unsigned 8-bit +#define DWRF_I8(x) (*(int8_t*)p = (x), p++) // Write signed 8-bit +#define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) // Write unsigned 16-bit +#define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) // Write unsigned 32-bit +#define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) // Write address +#define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) // Write ULEB128 +#define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) // Write SLEB128 +#define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) // Write string + +/* Align to specified boundary with NOP instructions */ +#define DWRF_ALIGNNOP(s) \ + while ((uintptr_t)p & ((s)-1)) { \ + *p++ = DWRF_CFA_nop; \ } -#define DWRF_SECTION(name, stmt) \ - { \ - uint32_t* szp_##name = (uint32_t*)p; \ - p += 4; \ - stmt; \ - *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ + +/* Write a DWARF section with automatic size calculation */ +#define DWRF_SECTION(name, stmt) \ + { \ + uint32_t* szp_##name = (uint32_t*)p; \ + p += 4; \ + stmt; \ + *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ } -/* Initialize .eh_frame section. */ -static void -elf_init_ehframe(ELFObjectContext* ctx) -{ +// ============================================================================= +// DWARF EH FRAME GENERATION +// ============================================================================= + +/* + * Initialize DWARF .eh_frame section for a code region + * + * The .eh_frame section contains Call Frame Information (CFI) that describes + * how to unwind the stack at any point in the code. This is essential for + * proper profiling as it allows perf to generate accurate call graphs. + * + * The function generates two main components: + * 1. CIE (Common Information Entry) - describes calling conventions + * 2. FDE (Frame Description Entry) - describes specific function unwinding + * + * Args: + * ctx: ELF object context containing code size and buffer pointers + */ +static void elf_init_ehframe(ELFObjectContext* ctx) { uint8_t* p = ctx->p; - uint8_t* framep = p; - - /* Emit DWARF EH CIE. */ - DWRF_SECTION(CIE, DWRF_U32(0); /* Offset to CIE itself. */ - DWRF_U8(DWRF_CIE_VERSION); - DWRF_STR("zR"); /* Augmentation. */ - DWRF_UV(1); /* Code alignment factor. */ - DWRF_SV(-(int64_t)sizeof(uintptr_t)); /* Data alignment factor. */ - DWRF_U8(DWRF_REG_RA); /* Return address register. */ - DWRF_UV(1); - DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); /* Augmentation data. */ - DWRF_U8(DWRF_CFA_def_cfa); DWRF_UV(DWRF_REG_SP); DWRF_UV(sizeof(uintptr_t)); - DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); DWRF_UV(1); - DWRF_ALIGNNOP(sizeof(uintptr_t)); + uint8_t* framep = p; // Remember start of frame data + + /* + * DWARF Unwind Table for Trampoline Function + * + * This section defines DWARF Call Frame Information (CFI) using encoded macros + * like `DWRF_U8`, `DWRF_UV`, and `DWRF_SECTION` to describe how the trampoline function + * preserves and restores registers. This is used by profiling tools (e.g., `perf`) + * and debuggers for stack unwinding in JIT-compiled code. + * + * ------------------------------------------------- + * TO REGENERATE THIS TABLE FROM GCC OBJECTS: + * ------------------------------------------------- + * + * 1. Create a trampoline source file (e.g., `trampoline.c`): + * + * #include <Python.h> + * typedef PyObject* (*py_evaluator)(void*, void*, int); + * PyObject* trampoline(void *ts, void *f, int throwflag, py_evaluator evaluator) { + * return evaluator(ts, f, throwflag); + * } + * + * 2. Compile to an object file with frame pointer preservation: + * + * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c + * + * 3. Extract DWARF unwind info from the object file: + * + * readelf -w trampoline.o + * + * Example output from `.eh_frame`: + * + * 00000000 CIE + * Version: 1 + * Augmentation: "zR" + * Code alignment factor: 4 + * Data alignment factor: -8 + * Return address column: 30 + * DW_CFA_def_cfa: r31 (sp) ofs 0 + * + * 00000014 FDE cie=00000000 pc=0..14 + * DW_CFA_advance_loc: 4 + * DW_CFA_def_cfa_offset: 16 + * DW_CFA_offset: r29 at cfa-16 + * DW_CFA_offset: r30 at cfa-8 + * DW_CFA_advance_loc: 12 + * DW_CFA_restore: r30 + * DW_CFA_restore: r29 + * DW_CFA_def_cfa_offset: 0 + * + * -- These values can be verified by comparing with `readelf -w` or `llvm-dwarfdump --eh-frame`. + * + * ---------------------------------- + * HOW TO TRANSLATE TO DWRF_* MACROS: + * ---------------------------------- + * + * After compiling your trampoline with: + * + * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c + * + * run: + * + * readelf -w trampoline.o + * + * to inspect the generated `.eh_frame` data. You will see two main components: + * + * 1. A CIE (Common Information Entry): shared configuration used by all FDEs. + * 2. An FDE (Frame Description Entry): function-specific unwind instructions. + * + * --------------------- + * Translating the CIE: + * --------------------- + * From `readelf -w`, you might see: + * + * 00000000 0000000000000010 00000000 CIE + * Version: 1 + * Augmentation: "zR" + * Code alignment factor: 4 + * Data alignment factor: -8 + * Return address column: 30 + * Augmentation data: 1b + * DW_CFA_def_cfa: r31 (sp) ofs 0 + * + * Map this to: + * + * DWRF_SECTION(CIE, + * DWRF_U32(0); // CIE ID (always 0 for CIEs) + * DWRF_U8(DWRF_CIE_VERSION); // Version: 1 + * DWRF_STR("zR"); // Augmentation string "zR" + * DWRF_UV(4); // Code alignment factor = 4 + * DWRF_SV(-8); // Data alignment factor = -8 + * DWRF_U8(DWRF_REG_RA); // Return address register (e.g., x30 = 30) + * DWRF_UV(1); // Augmentation data length = 1 + * DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // Encoding for FDE pointers + * + * DWRF_U8(DWRF_CFA_def_cfa); // DW_CFA_def_cfa + * DWRF_UV(DWRF_REG_SP); // Register: SP (r31) + * DWRF_UV(0); // Offset = 0 + * + * DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer size boundary + * ) + * + * Notes: + * - Use `DWRF_UV` for unsigned LEB128, `DWRF_SV` for signed LEB128. + * - `DWRF_REG_RA` and `DWRF_REG_SP` are architecture-defined constants. + * + * --------------------- + * Translating the FDE: + * --------------------- + * From `readelf -w`: + * + * 00000014 0000000000000020 00000018 FDE cie=00000000 pc=0000000000000000..0000000000000014 + * DW_CFA_advance_loc: 4 + * DW_CFA_def_cfa_offset: 16 + * DW_CFA_offset: r29 at cfa-16 + * DW_CFA_offset: r30 at cfa-8 + * DW_CFA_advance_loc: 12 + * DW_CFA_restore: r30 + * DW_CFA_restore: r29 + * DW_CFA_def_cfa_offset: 0 + * + * Map the FDE header and instructions to: + * + * DWRF_SECTION(FDE, + * DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here) + * DWRF_U32(-0x30); // Initial PC-relative location of the code + * DWRF_U32(ctx->code_size); // Code range covered by this FDE + * DWRF_U8(0); // Augmentation data length (none) + * + * DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 unit (1 * 4 = 4 bytes) + * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 + * DWRF_UV(16); + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Save x29 (frame pointer) + * DWRF_UV(2); // At offset 2 * 8 = 16 bytes + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Save x30 (return address) + * DWRF_UV(1); // At offset 1 * 8 = 8 bytes + * + * DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance location by 3 units (3 * 4 = 12 bytes) + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore x30 + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore x29 + * + * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + * DWRF_UV(0); + * ) + * + * To regenerate: + * 1. Get the `code alignment factor`, `data alignment factor`, and `RA column` from the CIE. + * 2. Note the range of the function from the FDE's `pc=...` line and map it to the JIT code as + * the code is in a different address space every time. + * 3. For each `DW_CFA_*` entry, use the corresponding `DWRF_*` macro: + * - `DW_CFA_def_cfa_offset` → DWRF_U8(DWRF_CFA_def_cfa_offset), DWRF_UV(value) + * - `DW_CFA_offset: rX` → DWRF_U8(DWRF_CFA_offset | reg), DWRF_UV(offset) + * - `DW_CFA_restore: rX` → DWRF_U8(DWRF_CFA_offset | reg) // restore is same as reusing offset + * - `DW_CFA_advance_loc: N` → DWRF_U8(DWRF_CFA_advance_loc | (N / code_alignment_factor)) + * 4. Use `DWRF_REG_FP`, `DWRF_REG_RA`, etc., for register numbers. + * 5. Use `sizeof(uintptr_t)` (typically 8) for pointer size calculations and alignment. + */ + + /* + * Emit DWARF EH CIE (Common Information Entry) + * + * The CIE describes the calling conventions and basic unwinding rules + * that apply to all functions in this compilation unit. + */ + DWRF_SECTION(CIE, + DWRF_U32(0); // CIE ID (0 indicates this is a CIE) + DWRF_U8(DWRF_CIE_VERSION); // CIE version (1) + DWRF_STR("zR"); // Augmentation string ("zR" = has LSDA) + DWRF_UV(1); // Code alignment factor + DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative) + DWRF_U8(DWRF_REG_RA); // Return address register number + DWRF_UV(1); // Augmentation data length + DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // FDE pointer encoding + + /* Initial CFI instructions - describe default calling convention */ + DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) + DWRF_UV(DWRF_REG_SP); // CFA = SP register + DWRF_UV(sizeof(uintptr_t)); // CFA = SP + pointer_size + DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); // Return address is saved + DWRF_UV(1); // At offset 1 from CFA + + DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary ) - ctx->eh_frame_p = p; - - /* Emit DWARF EH FDE. */ - DWRF_SECTION(FDE, DWRF_U32((uint32_t)(p - framep)); /* Offset to CIE. */ - DWRF_U32(-0x30); /* Machine code offset relative to .text. */ - DWRF_U32(ctx->code_size); /* Machine code length. */ - DWRF_U8(0); /* Augmentation data. */ - /* Registers saved in CFRAME. */ + ctx->eh_frame_p = p; // Remember start of FDE data + + /* + * Emit DWARF EH FDE (Frame Description Entry) + * + * The FDE describes unwinding information specific to this function. + * It references the CIE and provides function-specific CFI instructions. + */ + DWRF_SECTION(FDE, + DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference) + DWRF_U32(-0x30); // Machine code offset relative to .text + DWRF_U32(ctx->code_size); // Address range covered by this FDE (code lenght) + DWRF_U8(0); // Augmentation data length (none) + + /* + * Architecture-specific CFI instructions + * + * These instructions describe how registers are saved and restored + * during function calls. Each architecture has different calling + * conventions and register usage patterns. + */ #ifdef __x86_64__ - DWRF_U8(DWRF_CFA_advance_loc | 4); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); - DWRF_U8(DWRF_CFA_advance_loc | 6); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(8); - /* Extra registers saved for JIT-compiled code. */ + /* x86_64 calling convention unwinding rules */ +# if defined(__CET__) && (__CET__ & 1) + DWRF_U8(DWRF_CFA_advance_loc | 8); // Advance location by 8 bytes when CET protection is enabled +# else + DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance location by 4 bytes +# endif + DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_UV(16); // New offset: SP + 16 + DWRF_U8(DWRF_CFA_advance_loc | 6); // Advance location by 6 bytes + DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_UV(8); // New offset: SP + 8 #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - DWRF_U8(DWRF_CFA_advance_loc | 1); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); - DWRF_U8(DWRF_CFA_offset | 29); DWRF_UV(2); - DWRF_U8(DWRF_CFA_offset | 30); DWRF_UV(1); - DWRF_U8(DWRF_CFA_advance_loc | 3); - DWRF_U8(DWRF_CFA_offset | -(64 - 29)); - DWRF_U8(DWRF_CFA_offset | -(64 - 30)); - DWRF_U8(DWRF_CFA_def_cfa_offset); - DWRF_UV(0); + /* AArch64 calling convention unwinding rules */ + DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30) + DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset + DWRF_UV(16); // CFA = SP + 16 (stack pointer after push) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Frame pointer (x29) saved + DWRF_UV(2); // At offset 2 from CFA (2 * 8 = 16 bytes) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Link register (x30) saved + DWRF_UV(1); // At offset 1 from CFA (1 * 8 = 8 bytes) + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (mov x16, x3; mov x29, sp; ldp...) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore frame pointer (x29) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore link register (x30) + DWRF_U8(DWRF_CFA_def_cfa_offset); // Final CFA adjustment + DWRF_UV(0); // CFA = SP + 0 (stack restored) + #else # error "Unsupported target architecture" #endif - DWRF_ALIGNNOP(sizeof(uintptr_t));) - ctx->p = p; + DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary + ) + + ctx->p = p; // Update context pointer to end of generated data +} + +// ============================================================================= +// JITDUMP INITIALIZATION +// ============================================================================= + +/* + * Initialize the perf jitdump interface + * + * This function sets up everything needed to generate jitdump files: + * 1. Creates the jitdump file with a unique name + * 2. Maps the first page to signal perf that we're using the interface + * 3. Writes the jitdump header + * 4. Initializes synchronization primitives + * + * The memory mapping is crucial - perf detects jitdump files by scanning + * for processes that have mapped files matching the pattern /tmp/jit-*.dump + * + * Returns: Pointer to initialized state, or NULL on failure + */ +static void* perf_map_jit_init(void) { + char filename[100]; + int pid = getpid(); + + /* Create unique filename based on process ID */ + snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); + + /* Create/open the jitdump file with appropriate permissions */ + const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (fd == -1) { + return NULL; // Failed to create file + } + + /* Get system page size for memory mapping */ + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size == -1) { + close(fd); + return NULL; // Failed to get page size + } + + /* + * Map the first page of the jitdump file + * + * This memory mapping serves as a signal to perf that this process + * is generating JIT code. Perf scans /proc/.../maps looking for mapped + * files that match the jitdump naming pattern. + * + * The mapping must be PROT_READ | PROT_EXEC to be detected by perf. + */ + perf_jit_map_state.mapped_buffer = mmap( + NULL, // Let kernel choose address + page_size, // Map one page + PROT_READ | PROT_EXEC, // Read and execute permissions (required by perf) + MAP_PRIVATE, // Private mapping + fd, // File descriptor + 0 // Offset 0 (first page) + ); + + if (perf_jit_map_state.mapped_buffer == NULL) { + close(fd); + return NULL; // Memory mapping failed + } + + perf_jit_map_state.mapped_size = page_size; + + /* Convert file descriptor to FILE* for easier I/O operations */ + perf_jit_map_state.perf_map = fdopen(fd, "w+"); + if (perf_jit_map_state.perf_map == NULL) { + close(fd); + return NULL; // Failed to create FILE* + } + + /* + * Set up file buffering for better performance + * + * We use a large buffer (2MB) because jitdump files can be written + * frequently during program execution. Buffering reduces system call + * overhead and improves overall performance. + */ + setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); + + /* Write the jitdump file header */ + perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); + + /* + * Initialize thread synchronization lock + * + * Multiple threads may attempt to write to the jitdump file + * simultaneously. This lock ensures thread-safe access to the + * global jitdump state. + */ + perf_jit_map_state.map_lock = PyThread_allocate_lock(); + if (perf_jit_map_state.map_lock == NULL) { + fclose(perf_jit_map_state.perf_map); + return NULL; // Failed to create lock + } + + /* Initialize code ID counter */ + perf_jit_map_state.code_id = 0; + + /* Configure trampoline API with padding information */ + trampoline_api.code_padding = PERF_JIT_CODE_PADDING; + + return &perf_jit_map_state; } +// ============================================================================= +// MAIN JITDUMP ENTRY WRITING +// ============================================================================= + +/* + * Write a complete jitdump entry for a Python function + * + * This is the main function called by Python's trampoline system whenever + * a new piece of JIT-compiled code needs to be recorded. It writes both + * the unwinding information and the code load event to the jitdump file. + * + * The function performs these steps: + * 1. Initialize jitdump system if not already done + * 2. Extract function name and filename from Python code object + * 3. Generate DWARF unwinding information + * 4. Write unwinding info event to jitdump file + * 5. Write code load event to jitdump file + * + * Args: + * state: Jitdump state (currently unused, uses global state) + * code_addr: Address where the compiled code resides + * code_size: Size of the compiled code in bytes + * co: Python code object containing metadata + * + * IMPORTANT: This function signature is part of Python's internal API + * and must not be changed without coordinating with core Python development. + */ static void perf_map_jit_write_entry(void *state, const void *code_addr, - unsigned int code_size, PyCodeObject *co) + unsigned int code_size, PyCodeObject *co) { - + /* Initialize jitdump system on first use */ if (perf_jit_map_state.perf_map == NULL) { void* ret = perf_map_jit_init(); if(ret == NULL){ - return; + return; // Initialization failed, silently abort } } + /* + * Extract function information from Python code object + * + * We create a human-readable function name by combining the qualified + * name (includes class/module context) with the filename. This helps + * developers identify functions in perf reports. + */ const char *entry = ""; if (co->co_qualname != NULL) { entry = PyUnicode_AsUTF8(co->co_qualname); } + const char *filename = ""; if (co->co_filename != NULL) { filename = PyUnicode_AsUTF8(co->co_filename); } - + /* + * Create formatted function name for perf display + * + * Format: "py::<function_name>:<filename>" + * The "py::" prefix helps identify Python functions in mixed-language + * profiles (e.g., when profiling C extensions alongside Python code). + */ size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); if (perf_map_entry == NULL) { - return; + return; // Memory allocation failed } snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); @@ -528,90 +1081,185 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr, uword base = (uword)code_addr; uword size = code_size; - // Write the code unwinding info event. - - // Create unwinding information (eh frame) + /* + * Generate DWARF unwinding information + * + * DWARF data is essential for proper stack unwinding during profiling. + * Without it, perf cannot generate accurate call graphs, especially + * in optimized code where frame pointers may be omitted. + */ ELFObjectContext ctx; - char buffer[1024]; + char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) ctx.code_size = code_size; ctx.startp = ctx.p = (uint8_t*)buffer; + + /* Generate EH frame (Exception Handling frame) data */ elf_init_ehframe(&ctx); int eh_frame_size = ctx.p - ctx.startp; - // Populate the unwind info event for perf + /* + * Write Code Unwinding Information Event + * + * This event must be written before the code load event to ensure + * perf has the unwinding information available when it processes + * the code region. + */ CodeUnwindingInfoEvent ev2; ev2.base.event = PerfUnwindingInfo; ev2.base.time_stamp = get_current_monotonic_ticks(); ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; - // Ensure we have enough space between DSOs when perf maps them + + /* Verify we don't exceed our padding budget */ assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING); + ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); - ev2.mapped_size = round_up(ev2.unwind_data_size, 16); + ev2.mapped_size = round_up(ev2.unwind_data_size, 16); // 16-byte alignment + + /* Calculate total event size with padding */ int content_size = sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size; - int padding_size = round_up(content_size, 8) - content_size; + int padding_size = round_up(content_size, 8) - content_size; // 8-byte align ev2.base.size = content_size + padding_size; - perf_map_jit_write_fully(&ev2, sizeof(ev2)); + /* Write the unwinding info event header */ + perf_map_jit_write_fully(&ev2, sizeof(ev2)); - // Populate the eh Frame header + /* + * Write EH Frame Header + * + * The EH frame header provides metadata about the DWARF unwinding + * information that follows. It includes pointers and counts that + * help perf navigate the unwinding data efficiently. + */ EhFrameHeader f; f.version = 1; - f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; - f.fde_count_enc = DwarfUData4; - f.table_enc = DwarfSData4 | DwarfDataRel; + f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; // PC-relative signed 4-byte + f.fde_count_enc = DwarfUData4; // Unsigned 4-byte count + f.table_enc = DwarfSData4 | DwarfDataRel; // Data-relative signed 4-byte + + /* Calculate relative offsets for EH frame navigation */ f.eh_frame_ptr = -(eh_frame_size + 4 * sizeof(unsigned char)); - f.eh_fde_count = 1; + f.eh_fde_count = 1; // We generate exactly one FDE per function f.from = -(round_up(code_size, 8) + eh_frame_size); + int cie_size = ctx.eh_frame_p - ctx.startp; f.to = -(eh_frame_size - cie_size); + /* Write EH frame data and header */ perf_map_jit_write_fully(ctx.startp, eh_frame_size); perf_map_jit_write_fully(&f, sizeof(f)); + /* Write padding to maintain alignment */ char padding_bytes[] = "\0\0\0\0\0\0\0\0"; perf_map_jit_write_fully(&padding_bytes, padding_size); - // Write the code load event. + /* + * Write Code Load Event + * + * This event tells perf about the new code region. It includes: + * - Memory addresses and sizes + * - Process and thread identification + * - Function name for symbol resolution + * - The actual machine code bytes + */ CodeLoadEvent ev; ev.base.event = PerfLoad; ev.base.size = sizeof(ev) + (name_length+1) + size; ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); - ev.thread_id = syscall(SYS_gettid); - ev.vma = base; - ev.code_address = base; + ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call + ev.vma = base; // Virtual memory address + ev.code_address = base; // Same as VMA for our use case ev.code_size = size; + + /* Assign unique code ID and increment counter */ perf_jit_map_state.code_id += 1; ev.code_id = perf_jit_map_state.code_id; + /* Write code load event and associated data */ perf_map_jit_write_fully(&ev, sizeof(ev)); - perf_map_jit_write_fully(perf_map_entry, name_length+1); - perf_map_jit_write_fully((void*)(base), size); - return; + perf_map_jit_write_fully(perf_map_entry, name_length+1); // Include null terminator + perf_map_jit_write_fully((void*)(base), size); // Copy actual machine code + + /* Clean up allocated memory */ + PyMem_RawFree(perf_map_entry); } +// ============================================================================= +// CLEANUP AND FINALIZATION +// ============================================================================= + +/* + * Finalize and cleanup the perf jitdump system + * + * This function is called when Python is shutting down or when the + * perf trampoline system is being disabled. It ensures all resources + * are properly released and all buffered data is flushed to disk. + * + * Args: + * state: Jitdump state (currently unused, uses global state) + * + * Returns: 0 on success + * + * IMPORTANT: This function signature is part of Python's internal API + * and must not be changed without coordinating with core Python development. + */ static int perf_map_jit_fini(void* state) { + /* + * Close jitdump file with proper synchronization + * + * We need to acquire the lock to ensure no other threads are + * writing to the file when we close it. This prevents corruption + * and ensures all data is properly flushed. + */ if (perf_jit_map_state.perf_map != NULL) { - // close the file PyThread_acquire_lock(perf_jit_map_state.map_lock, 1); - fclose(perf_jit_map_state.perf_map); + fclose(perf_jit_map_state.perf_map); // This also flushes buffers PyThread_release_lock(perf_jit_map_state.map_lock); - // clean up the lock and state + /* Clean up synchronization primitive */ PyThread_free_lock(perf_jit_map_state.map_lock); perf_jit_map_state.perf_map = NULL; } + + /* + * Unmap the memory region + * + * This removes the signal to perf that we were generating JIT code. + * After this point, perf will no longer detect this process as + * having JIT capabilities. + */ if (perf_jit_map_state.mapped_buffer != NULL) { munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size); + perf_jit_map_state.mapped_buffer = NULL; } + + /* Clear global state reference */ trampoline_api.state = NULL; - return 0; + + return 0; // Success } +// ============================================================================= +// PUBLIC API EXPORT +// ============================================================================= + +/* + * Python Perf Callbacks Structure + * + * This structure defines the callback interface that Python's trampoline + * system uses to integrate with perf profiling. It contains function + * pointers for initialization, event writing, and cleanup. + * + * CRITICAL: This structure and its contents are part of Python's internal + * API. The function signatures and behavior must remain stable to maintain + * compatibility with the Python interpreter's perf integration system. + * + * Used by: Python's _PyPerf_Callbacks system in pycore_ceval.h + */ _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { - &perf_map_jit_init, - &perf_map_jit_write_entry, - &perf_map_jit_fini, + &perf_map_jit_init, // Initialization function + &perf_map_jit_write_entry, // Event writing function + &perf_map_jit_fini, // Cleanup function }; -#endif +#endif /* PY_HAVE_PERF_TRAMPOLINE */
\ No newline at end of file diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index c4c1d9fd9e1..724fda63511 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1283,7 +1283,7 @@ init_interp_main(PyThreadState *tstate) if (is_main_interp) { /* Initialize warnings. */ PyObject *warnoptions; - if (_PySys_GetOptionalAttrString("warnoptions", &warnoptions) < 0) { + if (PySys_GetOptionalAttrString("warnoptions", &warnoptions) < 0) { return _PyStatus_ERR("can't initialize warnings"); } if (warnoptions != NULL && PyList_Check(warnoptions) && @@ -1806,7 +1806,7 @@ flush_std_files(void) PyObject *file; int status = 0; - if (_PySys_GetOptionalAttr(&_Py_ID(stdout), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stdout), &file) < 0) { status = -1; } else if (file != NULL && file != Py_None && !file_is_closed(file)) { @@ -1819,7 +1819,7 @@ flush_std_files(void) } Py_XDECREF(file); - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { PyErr_Clear(); status = -1; } @@ -3046,7 +3046,7 @@ _Py_FatalError_PrintExc(PyThreadState *tstate) } PyObject *ferr; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &ferr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &ferr) < 0) { _PyErr_Clear(tstate); } if (ferr == NULL || ferr == Py_None) { @@ -3144,7 +3144,7 @@ static inline void _Py_NO_RETURN fatal_error_exit(int status) { if (status < 0) { -#if defined(MS_WINDOWS) && defined(_DEBUG) +#if defined(MS_WINDOWS) && defined(Py_DEBUG) DebugBreak(); #endif abort(); diff --git a/Python/pystate.c b/Python/pystate.c index 1ac13440085..0d4c26f92ce 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -69,7 +69,12 @@ to avoid the expense of doing their own locking). #ifdef HAVE_THREAD_LOCAL +/* The attached thread state for the current thread. */ _Py_thread_local PyThreadState *_Py_tss_tstate = NULL; + +/* The "bound" thread state used by PyGILState_Ensure(), + also known as a "gilstate." */ +_Py_thread_local PyThreadState *_Py_tss_gilstate = NULL; #endif static inline PyThreadState * @@ -118,79 +123,9 @@ _PyThreadState_GetCurrent(void) } -//------------------------------------------------ -// the thread state bound to the current OS thread -//------------------------------------------------ - -static inline int -tstate_tss_initialized(Py_tss_t *key) -{ - return PyThread_tss_is_created(key); -} - -static inline int -tstate_tss_init(Py_tss_t *key) -{ - assert(!tstate_tss_initialized(key)); - return PyThread_tss_create(key); -} - -static inline void -tstate_tss_fini(Py_tss_t *key) -{ - assert(tstate_tss_initialized(key)); - PyThread_tss_delete(key); -} - -static inline PyThreadState * -tstate_tss_get(Py_tss_t *key) -{ - assert(tstate_tss_initialized(key)); - return (PyThreadState *)PyThread_tss_get(key); -} - -static inline int -tstate_tss_set(Py_tss_t *key, PyThreadState *tstate) -{ - assert(tstate != NULL); - assert(tstate_tss_initialized(key)); - return PyThread_tss_set(key, (void *)tstate); -} - -static inline int -tstate_tss_clear(Py_tss_t *key) -{ - assert(tstate_tss_initialized(key)); - return PyThread_tss_set(key, (void *)NULL); -} - -#ifdef HAVE_FORK -/* Reset the TSS key - called by PyOS_AfterFork_Child(). - * This should not be necessary, but some - buggy - pthread implementations - * don't reset TSS upon fork(), see issue #10517. - */ -static PyStatus -tstate_tss_reinit(Py_tss_t *key) -{ - if (!tstate_tss_initialized(key)) { - return _PyStatus_OK(); - } - PyThreadState *tstate = tstate_tss_get(key); - - tstate_tss_fini(key); - if (tstate_tss_init(key) != 0) { - return _PyStatus_NO_MEMORY(); - } - - /* If the thread had an associated auto thread state, reassociate it with - * the new key. */ - if (tstate && tstate_tss_set(key, tstate) != 0) { - return _PyStatus_ERR("failed to re-set autoTSSkey"); - } - return _PyStatus_OK(); -} -#endif - +//--------------------------------------------- +// The thread state used by PyGILState_Ensure() +//--------------------------------------------- /* The stored thread state is set by bind_tstate() (AKA PyThreadState_Bind(). @@ -198,36 +133,23 @@ tstate_tss_reinit(Py_tss_t *key) The GIL does no need to be held for these. */ -#define gilstate_tss_initialized(runtime) \ - tstate_tss_initialized(&(runtime)->autoTSSkey) -#define gilstate_tss_init(runtime) \ - tstate_tss_init(&(runtime)->autoTSSkey) -#define gilstate_tss_fini(runtime) \ - tstate_tss_fini(&(runtime)->autoTSSkey) -#define gilstate_tss_get(runtime) \ - tstate_tss_get(&(runtime)->autoTSSkey) -#define _gilstate_tss_set(runtime, tstate) \ - tstate_tss_set(&(runtime)->autoTSSkey, tstate) -#define _gilstate_tss_clear(runtime) \ - tstate_tss_clear(&(runtime)->autoTSSkey) -#define gilstate_tss_reinit(runtime) \ - tstate_tss_reinit(&(runtime)->autoTSSkey) +static inline PyThreadState * +gilstate_get(void) +{ + return _Py_tss_gilstate; +} static inline void -gilstate_tss_set(_PyRuntimeState *runtime, PyThreadState *tstate) +gilstate_set(PyThreadState *tstate) { - assert(tstate != NULL && tstate->interp->runtime == runtime); - if (_gilstate_tss_set(runtime, tstate) != 0) { - Py_FatalError("failed to set current tstate (TSS)"); - } + assert(tstate != NULL); + _Py_tss_gilstate = tstate; } static inline void -gilstate_tss_clear(_PyRuntimeState *runtime) +gilstate_clear(void) { - if (_gilstate_tss_clear(runtime) != 0) { - Py_FatalError("failed to clear current tstate (TSS)"); - } + _Py_tss_gilstate = NULL; } @@ -253,7 +175,7 @@ bind_tstate(PyThreadState *tstate) assert(tstate_is_alive(tstate) && !tstate->_status.bound); assert(!tstate->_status.unbound); // just in case assert(!tstate->_status.bound_gilstate); - assert(tstate != gilstate_tss_get(tstate->interp->runtime)); + assert(tstate != gilstate_get()); assert(!tstate->_status.active); assert(tstate->thread_id == 0); assert(tstate->native_thread_id == 0); @@ -328,14 +250,13 @@ bind_gilstate_tstate(PyThreadState *tstate) // XXX assert(!tstate->_status.active); assert(!tstate->_status.bound_gilstate); - _PyRuntimeState *runtime = tstate->interp->runtime; - PyThreadState *tcur = gilstate_tss_get(runtime); + PyThreadState *tcur = gilstate_get(); assert(tstate != tcur); if (tcur != NULL) { tcur->_status.bound_gilstate = 0; } - gilstate_tss_set(runtime, tstate); + gilstate_set(tstate); tstate->_status.bound_gilstate = 1; } @@ -347,9 +268,8 @@ unbind_gilstate_tstate(PyThreadState *tstate) assert(tstate_is_bound(tstate)); // XXX assert(!tstate->_status.active); assert(tstate->_status.bound_gilstate); - assert(tstate == gilstate_tss_get(tstate->interp->runtime)); - - gilstate_tss_clear(tstate->interp->runtime); + assert(tstate == gilstate_get()); + gilstate_clear(); tstate->_status.bound_gilstate = 0; } @@ -373,7 +293,7 @@ holds_gil(PyThreadState *tstate) // (and tstate->interp->runtime->ceval.gil.locked). assert(tstate != NULL); /* Must be the tstate for this thread */ - assert(tstate == gilstate_tss_get(tstate->interp->runtime)); + assert(tstate == gilstate_get()); return tstate == current_fast_get(); } @@ -469,16 +389,6 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) return status; } - if (gilstate_tss_init(runtime) != 0) { - _PyRuntimeState_Fini(runtime); - return _PyStatus_NO_MEMORY(); - } - - if (PyThread_tss_create(&runtime->trashTSSkey) != 0) { - _PyRuntimeState_Fini(runtime); - return _PyStatus_NO_MEMORY(); - } - init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head, unicode_next_index); @@ -492,14 +402,7 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) /* The count is cleared by _Py_FinalizeRefTotal(). */ assert(runtime->object_state.interpreter_leaks == 0); #endif - - if (gilstate_tss_initialized(runtime)) { - gilstate_tss_fini(runtime); - } - - if (PyThread_tss_is_created(&runtime->trashTSSkey)) { - PyThread_tss_delete(&runtime->trashTSSkey); - } + gilstate_clear(); } #ifdef HAVE_FORK @@ -532,18 +435,6 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyTypes_AfterFork(); - PyStatus status = gilstate_tss_reinit(runtime); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - - if (PyThread_tss_is_created(&runtime->trashTSSkey)) { - PyThread_tss_delete(&runtime->trashTSSkey); - } - if (PyThread_tss_create(&runtime->trashTSSkey) != 0) { - return _PyStatus_NO_MEMORY(); - } - _PyThread_AfterFork(&runtime->threads); return _PyStatus_OK(); @@ -676,6 +567,7 @@ init_interpreter(PyInterpreterState *interp, } interp->sys_profile_initialized = false; interp->sys_trace_initialized = false; + interp->_code_object_generation = 0; interp->jit = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -886,6 +778,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) for (int t = 0; t < PY_MONITORING_TOOL_IDS; t++) { Py_CLEAR(interp->monitoring_tool_names[t]); } + interp->_code_object_generation = 0; +#ifdef Py_GIL_DISABLED + interp->tlbc_indices.tlbc_generation = 0; +#endif PyConfig_Clear(&interp->config); _PyCodec_Fini(interp); @@ -1246,6 +1142,7 @@ _Py_CheckMainModule(PyObject *module) PyObject *msg = PyUnicode_FromString("invalid __main__ module"); if (msg != NULL) { (void)PyErr_SetImportError(msg, &_Py_ID(__main__), NULL); + Py_DECREF(msg); } return -1; } @@ -1393,10 +1290,8 @@ interp_look_up_id(_PyRuntimeState *runtime, int64_t requested_id) { PyInterpreterState *interp = runtime->interpreters.head; while (interp != NULL) { - int64_t id = PyInterpreterState_GetID(interp); - if (id < 0) { - return NULL; - } + int64_t id = interp->id; + assert(id >= 0); if (requested_id == id) { return interp; } @@ -1457,9 +1352,6 @@ tstate_is_alive(PyThreadState *tstate) // lifecycle //---------- -/* Minimum size of data stack chunk */ -#define DATA_STACK_CHUNK_SIZE (16*1024) - static _PyStackChunk* allocate_chunk(int size_in_bytes, _PyStackChunk* previous) { @@ -1671,7 +1563,7 @@ _PyThreadState_NewBound(PyInterpreterState *interp, int whence) bind_tstate(tstate); // This makes sure there's a gilstate tstate bound // as soon as possible. - if (gilstate_tss_get(tstate->interp->runtime) == NULL) { + if (gilstate_get() == NULL) { bind_gilstate_tstate(tstate); } } @@ -1908,9 +1800,14 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) static void zapthreads(PyInterpreterState *interp) { + PyThreadState *tstate; /* No need to lock the mutex here because this should only happen - when the threads are all really dead (XXX famous last words). */ - _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { + when the threads are all really dead (XXX famous last words). + + Cannot use _Py_FOR_EACH_TSTATE_UNLOCKED because we are freeing + the thread states here. + */ + while ((tstate = interp->threads.head) != NULL) { tstate_verify_not_active(tstate); tstate_delete_common(tstate, 0); free_threadstate((_PyThreadStateImpl *)tstate); @@ -2092,7 +1989,7 @@ tstate_activate(PyThreadState *tstate) assert(!tstate->_status.active); assert(!tstate->_status.bound_gilstate || - tstate == gilstate_tss_get((tstate->interp->runtime))); + tstate == gilstate_get()); if (!tstate->_status.bound_gilstate) { bind_gilstate_tstate(tstate); } @@ -2560,7 +2457,7 @@ _PyThreadState_Bind(PyThreadState *tstate) bind_tstate(tstate); // This makes sure there's a gilstate tstate bound // as soon as possible. - if (gilstate_tss_get(tstate->interp->runtime) == NULL) { + if (gilstate_get() == NULL) { bind_gilstate_tstate(tstate); } } @@ -2762,7 +2659,7 @@ _PyGILState_Init(PyInterpreterState *interp) return _PyStatus_OK(); } _PyRuntimeState *runtime = interp->runtime; - assert(gilstate_tss_get(runtime) == NULL); + assert(gilstate_get() == NULL); assert(runtime->gilstate.autoInterpreterState == NULL); runtime->gilstate.autoInterpreterState = interp; return _PyStatus_OK(); @@ -2798,7 +2695,7 @@ _PyGILState_SetTstate(PyThreadState *tstate) _PyRuntimeState *runtime = tstate->interp->runtime; assert(runtime->gilstate.autoInterpreterState == tstate->interp); - assert(gilstate_tss_get(runtime) == tstate); + assert(gilstate_get() == tstate); assert(tstate->gilstate_counter == 1); #endif } @@ -2814,11 +2711,7 @@ _PyGILState_GetInterpreterStateUnsafe(void) PyThreadState * PyGILState_GetThisThreadState(void) { - _PyRuntimeState *runtime = &_PyRuntime; - if (!gilstate_tss_initialized(runtime)) { - return NULL; - } - return gilstate_tss_get(runtime); + return gilstate_get(); } int @@ -2829,16 +2722,12 @@ PyGILState_Check(void) return 1; } - if (!gilstate_tss_initialized(runtime)) { - return 1; - } - PyThreadState *tstate = current_fast_get(); if (tstate == NULL) { return 0; } - PyThreadState *tcur = gilstate_tss_get(runtime); + PyThreadState *tcur = gilstate_get(); return (tstate == tcur); } @@ -2853,12 +2742,17 @@ PyGILState_Ensure(void) called Py_Initialize(). */ /* Ensure that _PyEval_InitThreads() and _PyGILState_Init() have been - called by Py_Initialize() */ - assert(_PyEval_ThreadsInitialized()); - assert(gilstate_tss_initialized(runtime)); - assert(runtime->gilstate.autoInterpreterState != NULL); + called by Py_Initialize() - PyThreadState *tcur = gilstate_tss_get(runtime); + TODO: This isn't thread-safe. There's no protection here against + concurrent finalization of the interpreter; it's simply a guard + for *after* the interpreter has finalized. + */ + if (!_PyEval_ThreadsInitialized() || runtime->gilstate.autoInterpreterState == NULL) { + PyThread_hang_thread(); + } + + PyThreadState *tcur = gilstate_get(); int has_gil; if (tcur == NULL) { /* Create a new Python thread state for this thread */ @@ -2898,8 +2792,7 @@ PyGILState_Ensure(void) void PyGILState_Release(PyGILState_STATE oldstate) { - _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = gilstate_tss_get(runtime); + PyThreadState *tstate = gilstate_get(); if (tstate == NULL) { Py_FatalError("auto-releasing thread-state, " "but no thread-state for this thread"); @@ -3007,7 +2900,7 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature static PyObject ** push_chunk(PyThreadState *tstate, int size) { - int allocate_size = DATA_STACK_CHUNK_SIZE; + int allocate_size = _PY_DATA_STACK_CHUNK_SIZE; while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 4ee287af72f..8f1c78bf831 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -114,7 +114,7 @@ _PyRun_InteractiveLoopObject(FILE *fp, PyObject *filename, PyCompilerFlags *flag } PyObject *v; - if (_PySys_GetOptionalAttr(&_Py_ID(ps1), &v) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps1), &v) < 0) { PyErr_Print(); return -1; } @@ -128,7 +128,7 @@ _PyRun_InteractiveLoopObject(FILE *fp, PyObject *filename, PyCompilerFlags *flag } } Py_XDECREF(v); - if (_PySys_GetOptionalAttr(&_Py_ID(ps2), &v) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps2), &v) < 0) { PyErr_Print(); return -1; } @@ -206,7 +206,7 @@ pyrun_one_parse_ast(FILE *fp, PyObject *filename, PyObject *encoding_obj = NULL; const char *encoding = NULL; if (fp == stdin) { - if (_PySys_GetOptionalAttr(&_Py_ID(stdin), &attr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stdin), &attr) < 0) { PyErr_Clear(); } else if (attr != NULL && attr != Py_None) { @@ -226,7 +226,7 @@ pyrun_one_parse_ast(FILE *fp, PyObject *filename, // Get sys.ps1 (as UTF-8) PyObject *ps1_obj = NULL; const char *ps1 = ""; - if (_PySys_GetOptionalAttr(&_Py_ID(ps1), &attr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps1), &attr) < 0) { PyErr_Clear(); } else if (attr != NULL) { @@ -247,7 +247,7 @@ pyrun_one_parse_ast(FILE *fp, PyObject *filename, // Get sys.ps2 (as UTF-8) PyObject *ps2_obj = NULL; const char *ps2 = ""; - if (_PySys_GetOptionalAttr(&_Py_ID(ps2), &attr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(ps2), &attr) < 0) { PyErr_Clear(); } else if (attr != NULL) { @@ -658,7 +658,7 @@ _Py_HandleSystemExitAndKeyboardInterrupt(int *exitcode_p) } PyObject *sys_stderr; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &sys_stderr) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &sys_stderr) < 0) { PyErr_Clear(); } else if (sys_stderr != NULL && sys_stderr != Py_None) { @@ -722,7 +722,7 @@ _PyErr_PrintEx(PyThreadState *tstate, int set_sys_last_vars) _PyErr_Clear(tstate); } } - if (_PySys_GetOptionalAttr(&_Py_ID(excepthook), &hook) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(excepthook), &hook) < 0) { PyErr_Clear(); } if (_PySys_Audit(tstate, "sys.excepthook", "OOOO", hook ? hook : Py_None, @@ -1197,7 +1197,7 @@ void PyErr_Display(PyObject *unused, PyObject *value, PyObject *tb) { PyObject *file; - if (_PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) { PyObject *exc = PyErr_GetRaisedException(); _PyObject_Dump(value); fprintf(stderr, "lost sys.stderr\n"); @@ -1321,7 +1321,7 @@ static void flush_io_stream(PyThreadState *tstate, PyObject *name) { PyObject *f; - if (_PySys_GetOptionalAttr(name, &f) < 0) { + if (PySys_GetOptionalAttr(name, &f) < 0) { PyErr_Clear(); } if (f != NULL) { @@ -1524,6 +1524,26 @@ Py_CompileStringExFlags(const char *str, const char *filename_str, int start, return co; } +int +_PyObject_SupportedAsScript(PyObject *cmd) +{ + if (PyUnicode_Check(cmd)) { + return 1; + } + else if (PyBytes_Check(cmd)) { + return 1; + } + else if (PyByteArray_Check(cmd)) { + return 1; + } + else if (PyObject_CheckBuffer(cmd)) { + return 1; + } + else { + return 0; + } +} + const char * _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) { diff --git a/Python/qsbr.c b/Python/qsbr.c index bf34fb2523d..c992c285cb1 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -1,6 +1,6 @@ /* * Implementation of safe memory reclamation scheme using - * quiescent states. + * quiescent states. See InternalDocs/qsbr.md. * * This is derived from the "GUS" safe memory reclamation technique * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs @@ -41,10 +41,6 @@ // Starting size of the array of qsbr thread states #define MIN_ARRAY_SIZE 8 -// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing -// the write sequence. -#define QSBR_DEFERRED_LIMIT 10 - // Allocate a QSBR thread state from the freelist static struct _qsbr_thread_state * qsbr_allocate(struct _qsbr_shared *shared) @@ -117,13 +113,9 @@ _Py_qsbr_advance(struct _qsbr_shared *shared) } uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr) +_Py_qsbr_shared_next(struct _qsbr_shared *shared) { - if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) { - return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR; - } - qsbr->deferrals = 0; - return _Py_qsbr_advance(qsbr->shared); + return _Py_qsbr_shared_current(shared) + QSBR_INCR; } static uint64_t diff --git a/Python/remote_debug.h b/Python/remote_debug.h index edc77c30291..d1fcb478d2b 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -13,6 +13,16 @@ If you need to add a new function ensure that is declared 'static'. extern "C" { #endif +#ifdef __clang__ + #define UNUSED __attribute__((unused)) +#elif defined(__GNUC__) + #define UNUSED __attribute__((unused)) +#elif defined(_MSC_VER) + #define UNUSED __pragma(warning(suppress: 4505)) +#else + #define UNUSED +#endif + #if !defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # error "this header requires Py_BUILD_CORE or Py_BUILD_CORE_MODULE define" #endif @@ -35,7 +45,7 @@ extern "C" { # include <sys/mman.h> #endif -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX # include <libproc.h> # include <mach-o/fat.h> # include <mach-o/loader.h> @@ -73,27 +83,75 @@ extern "C" { # define HAVE_PROCESS_VM_READV 0 #endif +#define _set_debug_exception_cause(exception, format, ...) \ + do { \ + if (!PyErr_ExceptionMatches(PyExc_PermissionError)) { \ + PyThreadState *tstate = _PyThreadState_GET(); \ + if (!_PyErr_Occurred(tstate)) { \ + _PyErr_Format(tstate, exception, format, ##__VA_ARGS__); \ + } else { \ + _PyErr_FormatFromCause(exception, format, ##__VA_ARGS__); \ + } \ + } \ + } while (0) + +static inline size_t +get_page_size(void) { + size_t page_size = 0; + if (page_size == 0) { +#ifdef MS_WINDOWS + SYSTEM_INFO si; + GetSystemInfo(&si); + page_size = si.dwPageSize; +#else + page_size = (size_t)getpagesize(); +#endif + } + return page_size; +} + + // Define a platform-independent process handle structure typedef struct { pid_t pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX + mach_port_t task; +#elif defined(MS_WINDOWS) HANDLE hProcess; +#elif defined(__linux__) + int memfd; #endif + Py_ssize_t page_size; } proc_handle_t; + +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX +static mach_port_t pid_to_task(pid_t pid); +#endif + // Initialize the process handle static int _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->pid = pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX + handle->task = pid_to_task(handle->pid); + if (handle->task == 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize macOS process handle"); + return -1; + } +#elif defined(MS_WINDOWS) handle->hProcess = OpenProcess( PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, FALSE, pid); if (handle->hProcess == NULL) { PyErr_SetFromWindowsErr(0); + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize Windows process handle"); return -1; } +#elif defined(__linux__) + handle->memfd = -1; #endif + handle->page_size = get_page_size(); return 0; } @@ -105,11 +163,16 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { CloseHandle(handle->hProcess); handle->hProcess = NULL; } +#elif defined(__linux__) + if (handle->memfd != -1) { + close(handle->memfd); + handle->memfd = -1; + } #endif handle->pid = 0; } -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX static uintptr_t return_section_address64( @@ -148,8 +211,10 @@ return_section_address64( &object_name ); if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_region failed while parsing 64-bit Mach-O binary " + "at base address 0x%lx (kern_return_t: %d)", + base, ret); return 0; } } @@ -169,9 +234,6 @@ return_section_address64( cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); } - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -212,8 +274,10 @@ return_section_address32( &object_name ); if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_region failed while parsing 32-bit Mach-O binary " + "at base address 0x%lx (kern_return_t: %d)", + base, ret); return 0; } } @@ -233,9 +297,6 @@ return_section_address32( cmd = (struct segment_command*)((void*)cmd + cmd->cmdsize); } - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -253,8 +314,20 @@ return_section_address_fat( int is_abi64; size_t cpu_size = sizeof(cpu), abi64_size = sizeof(is_abi64); - sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0); - sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0); + if (sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to determine CPU type via sysctlbyname " + "for fat binary analysis at 0x%lx: %s", + base, strerror(errno)); + return 0; + } + if (sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to determine CPU ABI capability via sysctlbyname " + "for fat binary analysis at 0x%lx: %s", + base, strerror(errno)); + return 0; + } cpu |= is_abi64 * CPU_ARCH_ABI64; @@ -285,13 +358,18 @@ return_section_address_fat( return return_section_address64(section, proc_ref, base, (void*)hdr); default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic in fat binary.\n"); + PyErr_Format(PyExc_RuntimeError, + "Unknown Mach-O magic number 0x%x in fat binary architecture %u at base 0x%lx", + hdr->magic, i, base); return 0; } } } - PyErr_SetString(PyExc_RuntimeError, "No matching architecture found in fat binary.\n"); + PyErr_Format(PyExc_RuntimeError, + "No matching architecture found for CPU type 0x%x " + "in fat binary at base 0x%lx (%u architectures examined)", + cpu, base, nfat_arch); return 0; } @@ -300,20 +378,26 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ { int fd = open(path, O_RDONLY); if (fd == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot open binary file '%s' for section '%s' search: %s", + path, secname, strerror(errno)); return 0; } struct stat fs; if (fstat(fd, &fs) == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot get file size for binary '%s' during section '%s' search: %s", + path, secname, strerror(errno)); close(fd); return 0; } void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { - PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot memory map binary file '%s' (size: %lld bytes) for section '%s' search: %s", + path, (long long)fs.st_size, secname, strerror(errno)); close(fd); return 0; } @@ -335,13 +419,22 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ result = return_section_address_fat(secname, proc_ref, base, map); break; default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); + PyErr_Format(PyExc_RuntimeError, + "Unrecognized Mach-O magic number 0x%x in binary file '%s' for section '%s' search", + magic, path, secname); break; } - munmap(map, fs.st_size); + if (munmap(map, fs.st_size) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to unmap binary file '%s' (size: %lld bytes): %s", + path, (long long)fs.st_size, strerror(errno)); + result = 0; + } if (close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close binary file '%s': %s", + path, strerror(errno)); result = 0; } return result; @@ -356,7 +449,10 @@ pid_to_task(pid_t pid) result = task_for_pid(mach_task_self(), pid, &task); if (result != KERN_SUCCESS) { - PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); + PyErr_Format(PyExc_PermissionError, + "Cannot get task port for PID %d (kern_return_t: %d). " + "This typically requires running as root or having the 'com.apple.system-task-ports' entitlement.", + pid, result); return 0; } return task; @@ -373,13 +469,15 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s mach_port_t proc_ref = pid_to_task(handle->pid); if (proc_ref == 0) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); + PyErr_Format(PyExc_PermissionError, + "Cannot get task port for PID %d during section search", + handle->pid); } return 0; } - int match_found = 0; char map_filename[MAXPATHLEN + 1]; + while (mach_vm_region( proc_ref, &address, @@ -389,6 +487,7 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s &count, &object_name) == KERN_SUCCESS) { + if ((region_info.protection & VM_PROT_READ) == 0 || (region_info.protection & VM_PROT_EXECUTE) == 0) { address += size; @@ -409,21 +508,21 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s filename = map_filename; // No path, use the whole string } - if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { - match_found = 1; - return search_section_in_file( + if (strncmp(filename, substr, strlen(substr)) == 0) { + uintptr_t result = search_section_in_file( secname, map_filename, address, size, proc_ref); + if (result != 0) { + return result; + } } address += size; } - PyErr_SetString(PyExc_RuntimeError, - "mach_vm_region failed to find the section"); return 0; } -#endif // (__APPLE__ && TARGET_OS_OSX) +#endif // (__APPLE__ && defined(TARGET_OS_OSX) && TARGET_OS_OSX) #if defined(__linux__) && HAVE_PROCESS_VM_READV static uintptr_t @@ -442,24 +541,38 @@ search_elf_file_for_section( int fd = open(elf_file, O_RDONLY); if (fd < 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot open ELF file '%s' for section '%s' search: %s", + elf_file, secname, strerror(errno)); goto exit; } struct stat file_stats; if (fstat(fd, &file_stats) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot get file size for ELF file '%s' during section '%s' search: %s", + elf_file, secname, strerror(errno)); goto exit; } file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (file_memory == MAP_FAILED) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot memory map ELF file '%s' (size: %lld bytes) for section '%s' search: %s", + elf_file, (long long)file_stats.st_size, secname, strerror(errno)); goto exit; } Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; + // Validate ELF header + if (elf_header->e_shstrndx >= elf_header->e_shnum) { + PyErr_Format(PyExc_RuntimeError, + "Invalid ELF file '%s': string table index %u >= section count %u", + elf_file, elf_header->e_shstrndx, elf_header->e_shnum); + goto exit; + } + Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff); Elf_Shdr* shstrtab_section = §ion_header_table[elf_header->e_shstrndx]; @@ -476,6 +589,10 @@ search_elf_file_for_section( } } + if (section == NULL) { + goto exit; + } + Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff); // Find the first PT_LOAD segment Elf_Phdr* first_load_segment = NULL; @@ -486,18 +603,25 @@ search_elf_file_for_section( } } - if (section != NULL && first_load_segment != NULL) { - uintptr_t elf_load_addr = first_load_segment->p_vaddr - - (first_load_segment->p_vaddr % first_load_segment->p_align); - result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + if (first_load_segment == NULL) { + PyErr_Format(PyExc_RuntimeError, + "No PT_LOAD segment found in ELF file '%s' (%u program headers examined)", + elf_file, elf_header->e_phnum); + goto exit; } + uintptr_t elf_load_addr = first_load_segment->p_vaddr + - (first_load_segment->p_vaddr % first_load_segment->p_align); + result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + exit: if (file_memory != NULL) { munmap(file_memory, file_stats.st_size); } if (fd >= 0 && close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close ELF file '%s': %s", + elf_file, strerror(errno)); result = 0; } return result; @@ -511,7 +635,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c FILE* maps_file = fopen(maps_file_path, "r"); if (maps_file == NULL) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot open process memory map file '%s' for PID %d section search: %s", + maps_file_path, handle->pid, strerror(errno)); return 0; } @@ -520,11 +646,14 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c char *line = PyMem_Malloc(linesz); if (!line) { fclose(maps_file); - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot allocate memory for reading process map file '%s'", + maps_file_path); return 0; } uintptr_t retval = 0; + while (fgets(line + linelen, linesz - linelen, maps_file) != NULL) { linelen = strlen(line); if (line[linelen - 1] != '\n') { @@ -535,7 +664,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c if (!biggerline) { PyMem_Free(line); fclose(maps_file); - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot reallocate memory while reading process map file '%s' (attempted size: %zu)", + maps_file_path, linesz); return 0; } line = biggerline; @@ -575,7 +706,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c PyMem_Free(line); if (fclose(maps_file) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close process map file '%s': %s", + maps_file_path, strerror(errno)); retval = 0; } @@ -591,11 +724,20 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot open PE file for section '%s' analysis (error %lu)", + secname, error); return NULL; } + HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, 0); if (!hMap) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot create file mapping for PE file section '%s' analysis (error %lu)", + secname, error); CloseHandle(hFile); return NULL; } @@ -603,6 +745,10 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* BYTE* mapView = (BYTE*)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); if (!mapView) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot map view of PE file for section '%s' analysis (error %lu)", + secname, error); CloseHandle(hMap); CloseHandle(hFile); return NULL; @@ -610,7 +756,9 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* IMAGE_DOS_HEADER* pDOSHeader = (IMAGE_DOS_HEADER*)mapView; if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid DOS signature."); + PyErr_Format(PyExc_RuntimeError, + "Invalid DOS signature (0x%x) in PE file for section '%s' analysis (expected 0x%x)", + pDOSHeader->e_magic, secname, IMAGE_DOS_SIGNATURE); UnmapViewOfFile(mapView); CloseHandle(hMap); CloseHandle(hFile); @@ -619,7 +767,9 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* IMAGE_NT_HEADERS* pNTHeaders = (IMAGE_NT_HEADERS*)(mapView + pDOSHeader->e_lfanew); if (pNTHeaders->Signature != IMAGE_NT_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid NT signature."); + PyErr_Format(PyExc_RuntimeError, + "Invalid NT signature (0x%lx) in PE file for section '%s' analysis (expected 0x%lx)", + pNTHeaders->Signature, secname, IMAGE_NT_SIGNATURE); UnmapViewOfFile(mapView); CloseHandle(hMap); CloseHandle(hFile); @@ -653,7 +803,12 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const } while (hProcSnap == INVALID_HANDLE_VALUE && GetLastError() == ERROR_BAD_LENGTH); if (hProcSnap == INVALID_HANDLE_VALUE) { - PyErr_SetString(PyExc_PermissionError, "Unable to create module snapshot. Check permissions or PID."); + PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_PermissionError, + "Unable to create module snapshot for PID %d section '%s' " + "search (error %lu). Check permissions or PID validity", + handle->pid, secname, error); return 0; } @@ -672,6 +827,7 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const } CloseHandle(hProcSnap); + return (uintptr_t)runtime_addr; } @@ -689,7 +845,9 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d on Windows platform", + handle->pid); _PyErr_ChainExceptions1(exc); } #elif defined(__linux__) @@ -698,16 +856,28 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d on Linux platform", + handle->pid); _PyErr_ChainExceptions1(exc); } -#elif defined(__APPLE__) && TARGET_OS_OSX +#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "PyRuntime", "libpython"); - if (address == 0) { - // TODO: Differentiate between not found and error + const char* candidates[] = {"libpython", "python", "Python", NULL}; + for (const char** candidate = candidates; *candidate; candidate++) { PyErr_Clear(); - address = search_map_for_section(handle, "PyRuntime", "python"); + address = search_map_for_section(handle, "PyRuntime", *candidate); + if (address != 0) { + break; + } + } + if (address == 0) { + PyObject *exc = PyErr_GetRaisedException(); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d " + "on macOS platform (tried both libpython and python)", + handle->pid); + _PyErr_ChainExceptions1(exc); } #else Py_UNREACHABLE(); @@ -716,6 +886,61 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) return address; } +#if defined(__linux__) && HAVE_PROCESS_VM_READV + +static int +open_proc_mem_fd(proc_handle_t *handle) +{ + char mem_file_path[64]; + sprintf(mem_file_path, "/proc/%d/mem", handle->pid); + + handle->memfd = open(mem_file_path, O_RDWR); + if (handle->memfd == -1) { + PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "failed to open file %s: %s", mem_file_path, strerror(errno)); + return -1; + } + return 0; +} + +// Why is pwritev not guarded? Except on Android API level 23 (no longer +// supported), HAVE_PROCESS_VM_READV is sufficient. +static int +read_remote_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) +{ + if (handle->memfd == -1) { + if (open_proc_mem_fd(handle) < 0) { + return -1; + } + } + + struct iovec local[1]; + Py_ssize_t result = 0; + Py_ssize_t read_bytes = 0; + + do { + local[0].iov_base = (char*)dst + result; + local[0].iov_len = len - result; + off_t offset = remote_address + result; + + read_bytes = preadv(handle->memfd, local, 1, offset); + if (read_bytes < 0) { + PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "preadv failed for PID %d at address 0x%lx " + "(size %zu, partial read %zd bytes): %s", + handle->pid, remote_address + result, len - result, result, strerror(errno)); + return -1; + } + + result += read_bytes; + } while ((size_t)read_bytes != local[0].iov_len); + return 0; +} + +#endif // __linux__ + // Platform-independent memory read function static int _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) @@ -726,12 +951,20 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address do { if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + _set_debug_exception_cause(PyExc_OSError, + "ReadProcessMemory failed for PID %d at address 0x%lx " + "(size %zu, partial read %zu bytes): Windows error %lu", + handle->pid, remote_address + result, len - result, result, error); return -1; } result += read_bytes; } while (result < len); return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV + if (handle->memfd != -1) { + return read_remote_memory_fallback(handle, remote_address, len, dst); + } struct iovec local[1]; struct iovec remote[1]; Py_ssize_t result = 0; @@ -745,17 +978,24 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); if (read_bytes < 0) { + if (errno == ENOSYS) { + return read_remote_memory_fallback(handle, remote_address, len, dst); + } PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "process_vm_readv failed for PID %d at address 0x%lx " + "(size %zu, partial read %zd bytes): %s", + handle->pid, remote_address + result, len - result, result, strerror(errno)); return -1; } result += read_bytes; } while ((size_t)read_bytes != local[0].iov_len); return 0; -#elif defined(__APPLE__) && TARGET_OS_OSX +#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(handle->pid), + handle->task, (mach_vm_address_t)remote_address, len, (mach_vm_address_t)dst, @@ -764,13 +1004,22 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address if (kr != KERN_SUCCESS) { switch (kr) { case KERN_PROTECTION_FAILURE: - PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); + PyErr_Format(PyExc_PermissionError, + "Memory protection failure reading from PID %d at address " + "0x%lx (size %zu): insufficient permissions", + handle->pid, remote_address, len); break; case KERN_INVALID_ARGUMENT: - PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); + PyErr_Format(PyExc_ValueError, + "Invalid argument to mach_vm_read_overwrite for PID %d at " + "address 0x%lx (size %zu)", + handle->pid, remote_address, len); break; default: - PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_read_overwrite failed for PID %d at address 0x%lx " + "(size %zu): kern_return_t %d", + handle->pid, remote_address, len, kr); } return -1; } @@ -780,6 +1029,15 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #endif } +UNUSED static int +_Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, + uintptr_t addr, + size_t size, + void *out) +{ + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); +} + static int _Py_RemoteDebug_ReadDebugOffsets( proc_handle_t *handle, @@ -789,13 +1047,16 @@ _Py_RemoteDebug_ReadDebugOffsets( *runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); if (!*runtime_start_address) { if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get PyRuntime address"); + PyErr_Format(PyExc_RuntimeError, + "Failed to locate PyRuntime address for PID %d", + handle->pid); } + _set_debug_exception_cause(PyExc_RuntimeError, "PyRuntime address lookup failed during debug offsets initialization"); return -1; } size_t size = sizeof(struct _Py_DebugOffsets); if (0 != _Py_RemoteDebug_ReadRemoteMemory(handle, *runtime_start_address, size, debug_offsets)) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read debug offsets structure from remote process"); return -1; } return 0; diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index dd55b7812d4..7aee87ef05a 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -24,6 +24,39 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds return _Py_RemoteDebug_ReadRemoteMemory(handle, remote_address, len, dst); } +// Why is pwritev not guarded? Except on Android API level 23 (no longer +// supported), HAVE_PROCESS_VM_READV is sufficient. +#if defined(__linux__) && HAVE_PROCESS_VM_READV +static int +write_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) +{ + if (handle->memfd == -1) { + if (open_proc_mem_fd(handle) < 0) { + return -1; + } + } + + struct iovec local[1]; + Py_ssize_t result = 0; + Py_ssize_t written = 0; + + do { + local[0].iov_base = (char*)src + result; + local[0].iov_len = len - result; + off_t offset = remote_address + result; + + written = pwritev(handle->memfd, local, 1, offset); + if (written < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + result += written; + } while ((size_t)written != local[0].iov_len); + return 0; +} +#endif // __linux__ + static int write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { @@ -39,6 +72,9 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const } while (result < len); return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV + if (handle->memfd != -1) { + return write_memory_fallback(handle, remote_address, len, src); + } struct iovec local[1]; struct iovec remote[1]; Py_ssize_t result = 0; @@ -52,6 +88,9 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const written = process_vm_writev(handle->pid, local, 1, remote, 1, 0); if (written < 0) { + if (errno == ENOSYS) { + return write_memory_fallback(handle, remote_address, len, src); + } PyErr_SetFromErrno(PyExc_OSError); return -1; } diff --git a/Python/specialize.c b/Python/specialize.c index bbe725c8ec8..fe8d04cf344 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -118,6 +118,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); err += add_stat_dict(stats, STORE_SUBSCR, "store_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); + err += add_stat_dict(stats, JUMP_BACKWARD, "jump_backward"); err += add_stat_dict(stats, CALL, "call"); err += add_stat_dict(stats, CALL_KW, "call_kw"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); @@ -2158,7 +2159,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (nargs == 2) { /* isinstance(o1, o2) */ PyInterpreterState *interp = _PyInterpreterState_GET(); - if (callable == interp->callable_cache.isinstance) { + if (callable == interp->callable_cache.isinstance && instr->op.arg == 2) { specialize(instr, CALL_ISINSTANCE); return 0; } @@ -2904,53 +2905,57 @@ int #endif // Py_STATS Py_NO_INLINE void -_Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) +_Py_Specialize_ForIter(_PyStackRef iter, _PyStackRef null_or_index, _Py_CODEUNIT *instr, int oparg) { assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER); PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); PyTypeObject *tp = Py_TYPE(iter_o); + + if (PyStackRef_IsNull(null_or_index)) { #ifdef Py_GIL_DISABLED - // Only specialize for uniquely referenced iterators, so that we know - // they're only referenced by this one thread. This is more limiting - // than we need (even `it = iter(mylist); for item in it:` won't get - // specialized) but we don't have a way to check whether we're the only - // _thread_ who has access to the object. - if (!_PyObject_IsUniquelyReferenced(iter_o)) - goto failure; -#endif - if (tp == &PyListIter_Type) { -#ifdef Py_GIL_DISABLED - _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) && - !_PyObject_GC_IS_SHARED(it->it_seq)) { - // Maybe this should just set GC_IS_SHARED in a critical - // section, instead of leaving it to the first iteration? + // Only specialize for uniquely referenced iterators, so that we know + // they're only referenced by this one thread. This is more limiting + // than we need (even `it = iter(mylist); for item in it:` won't get + // specialized) but we don't have a way to check whether we're the only + // _thread_ who has access to the object. + if (!_PyObject_IsUniquelyReferenced(iter_o)) { goto failure; } #endif - specialize(instr, FOR_ITER_LIST); - return; - } - else if (tp == &PyTupleIter_Type) { - specialize(instr, FOR_ITER_TUPLE); - return; - } - else if (tp == &PyRangeIter_Type) { - specialize(instr, FOR_ITER_RANGE); - return; + if (tp == &PyRangeIter_Type) { + specialize(instr, FOR_ITER_RANGE); + return; + } + else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { + // Generators are very much not thread-safe, so don't worry about + // the specialization not being thread-safe. + assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || + instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR + ); + /* Don't specialize if PEP 523 is active */ + if (_PyInterpreterState_GET()->eval_frame) { + goto failure; + } + specialize(instr, FOR_ITER_GEN); + return; + } } - else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { - // Generators are very much not thread-safe, so don't worry about - // the specialization not being thread-safe. - assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || - instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR - ); - /* Don't specialize if PEP 523 is active */ - if (_PyInterpreterState_GET()->eval_frame) - goto failure; - specialize(instr, FOR_ITER_GEN); - return; + else { + if (tp == &PyList_Type) { +#ifdef Py_GIL_DISABLED + // Only specialize for lists owned by this thread or shared + if (!_Py_IsOwnedByCurrentThread(iter_o) && !_PyObject_GC_IS_SHARED(iter_o)) { + goto failure; + } +#endif + specialize(instr, FOR_ITER_LIST); + return; + } + else if (tp == &PyTuple_Type) { + specialize(instr, FOR_ITER_TUPLE); + return; + } } failure: SPECIALIZATION_FAIL(FOR_ITER, diff --git a/Python/stackrefs.c b/Python/stackrefs.c index 979a6b1c628..ecc0012ef17 100644 --- a/Python/stackrefs.c +++ b/Python/stackrefs.c @@ -1,4 +1,3 @@ - #include "Python.h" #include "pycore_object.h" @@ -34,12 +33,14 @@ make_table_entry(PyObject *obj, const char *filename, int linenumber) result->filename = filename; result->linenumber = linenumber; result->filename_borrow = NULL; + result->linenumber_borrow = 0; return result; } PyObject * _Py_stackref_get_object(_PyStackRef ref) { + assert(!PyStackRef_IsError(ref)); if (ref.index == 0) { return NULL; } @@ -64,6 +65,7 @@ PyStackRef_Is(_PyStackRef a, _PyStackRef b) PyObject * _Py_stackref_close(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); PyInterpreterState *interp = PyInterpreterState_Get(); if (ref.index >= interp->next_stackref) { _Py_FatalErrorFormat(__func__, "Invalid StackRef with ID %" PRIu64 " at %s:%d\n", (void *)ref.index, filename, linenumber); @@ -128,6 +130,7 @@ _Py_stackref_create(PyObject *obj, const char *filename, int linenumber) void _Py_stackref_record_borrow(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); if (ref.index < INITIAL_STACKREF_INDEX) { return; } @@ -152,6 +155,7 @@ _Py_stackref_record_borrow(_PyStackRef ref, const char *filename, int linenumber void _Py_stackref_associate(PyInterpreterState *interp, PyObject *obj, _PyStackRef ref) { + assert(!PyStackRef_IsError(ref)); assert(ref.index < INITIAL_STACKREF_INDEX); TableEntry *entry = make_table_entry(obj, "builtin-object", 0); if (entry == NULL) { @@ -216,4 +220,12 @@ PyStackRef_IsNullOrInt(_PyStackRef ref) return PyStackRef_IsNull(ref) || PyStackRef_IsTaggedInt(ref); } +_PyStackRef +PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref) +{ + assert(ref.index <= INT_MAX - 2); // No overflow + return (_PyStackRef){ .index = ref.index + 2 }; +} + + #endif diff --git a/Python/symtable.c b/Python/symtable.c index f633e281019..a3d0fff80d2 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -380,8 +380,8 @@ static void dump_symtable(PySTEntryObject* ste) } #endif -#define DUPLICATE_ARGUMENT \ -"duplicate argument '%U' in function definition" +#define DUPLICATE_PARAMETER \ +"duplicate parameter '%U' in function definition" static struct symtable * symtable_new(void) @@ -1494,7 +1494,7 @@ symtable_add_def_helper(struct symtable *st, PyObject *name, int flag, struct _s } if ((flag & DEF_PARAM) && (val & DEF_PARAM)) { /* Is it better to use 'mangled' or 'name' here? */ - PyErr_Format(PyExc_SyntaxError, DUPLICATE_ARGUMENT, name); + PyErr_Format(PyExc_SyntaxError, DUPLICATE_PARAMETER, name); SET_ERROR_LOCATION(st->st_filename, loc); goto error; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 00dce4527fb..ae6cf306735 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -76,12 +76,12 @@ module sys PyObject * -_PySys_GetRequiredAttr(PyObject *name) +PySys_GetAttr(PyObject *name) { if (!PyUnicode_Check(name)) { PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - Py_TYPE(name)->tp_name); + "attribute name must be string, not '%T'", + name); return NULL; } PyThreadState *tstate = _PyThreadState_GET(); @@ -98,7 +98,7 @@ _PySys_GetRequiredAttr(PyObject *name) } PyObject * -_PySys_GetRequiredAttrString(const char *name) +PySys_GetAttrString(const char *name) { PyThreadState *tstate = _PyThreadState_GET(); PyObject *sysdict = tstate->interp->sysdict; @@ -114,12 +114,12 @@ _PySys_GetRequiredAttrString(const char *name) } int -_PySys_GetOptionalAttr(PyObject *name, PyObject **value) +PySys_GetOptionalAttr(PyObject *name, PyObject **value) { if (!PyUnicode_Check(name)) { PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - Py_TYPE(name)->tp_name); + "attribute name must be string, not '%T'", + name); *value = NULL; return -1; } @@ -133,7 +133,7 @@ _PySys_GetOptionalAttr(PyObject *name, PyObject **value) } int -_PySys_GetOptionalAttrString(const char *name, PyObject **value) +PySys_GetOptionalAttrString(const char *name, PyObject **value) { PyThreadState *tstate = _PyThreadState_GET(); PyObject *sysdict = tstate->interp->sysdict; @@ -773,7 +773,7 @@ sys_displayhook(PyObject *module, PyObject *o) } if (PyObject_SetAttr(builtins, _Py_LATIN1_CHR('_'), Py_None) != 0) return NULL; - outf = _PySys_GetRequiredAttr(&_Py_ID(stdout)); + outf = PySys_GetAttr(&_Py_ID(stdout)); if (outf == NULL) { return NULL; } @@ -1643,6 +1643,7 @@ static PyObject * _sys_getwindowsversion_from_kernel32(void) { #ifndef MS_WINDOWS_DESKTOP + PyErr_SetString(PyExc_OSError, "cannot read version info on this platform"); return NULL; #else HANDLE hKernel32; @@ -1670,6 +1671,9 @@ _sys_getwindowsversion_from_kernel32(void) !GetFileVersionInfoW(kernel32_path, 0, verblock_size, verblock) || !VerQueryValueW(verblock, L"", (LPVOID)&ffi, &ffi_len)) { PyErr_SetFromWindowsErr(0); + if (verblock) { + PyMem_RawFree(verblock); + } return NULL; } @@ -2448,26 +2452,63 @@ sys_is_remote_debug_enabled_impl(PyObject *module) #endif } +/*[clinic input] +sys.remote_exec + + pid: int + script: object + +Executes a file containing Python code in a given remote Python process. + +This function returns immediately, and the code will be executed by the +target process's main thread at the next available opportunity, similarly +to how signals are handled. There is no interface to determine when the +code has been executed. The caller is responsible for making sure that +the file still exists whenever the remote process tries to read it and that +it hasn't been overwritten. + +The remote process must be running a CPython interpreter of the same major +and minor version as the local process. If either the local or remote +interpreter is pre-release (alpha, beta, or release candidate) then the +local and remote interpreters must be the same exact version. + +Args: + pid (int): The process ID of the target Python process. + script (str|bytes): The path to a file containing + the Python code to be executed. +[clinic start generated code]*/ + static PyObject * -sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) +sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) +/*[clinic end generated code: output=7d94c56afe4a52c0 input=39908ca2c5fe1eb0]*/ { - const char *debugger_script_path = PyUnicode_AsUTF8(script); - if (debugger_script_path == NULL) { + PyObject *path; + const char *debugger_script_path; + + if (PyUnicode_FSConverter(script, &path) == 0) { + return NULL; + } + + if (PySys_Audit("sys.remote_exec", "iO", pid, script) < 0) { return NULL; } + debugger_script_path = PyBytes_AS_STRING(path); #ifdef MS_WINDOWS + PyObject *unicode_path; + if (PyUnicode_FSDecoder(path, &unicode_path) < 0) { + goto error; + } // Use UTF-16 (wide char) version of the path for permission checks - wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(script, NULL); + wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(unicode_path, NULL); + Py_DECREF(unicode_path); if (debugger_script_path_w == NULL) { - return NULL; + goto error; } - - // Check file attributes using wide character version (W) instead of ANSI (A) DWORD attr = GetFileAttributesW(debugger_script_path_w); - PyMem_Free(debugger_script_path_w); if (attr == INVALID_FILE_ATTRIBUTES) { DWORD err = GetLastError(); + PyMem_Free(debugger_script_path_w); if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) { PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); } @@ -2475,11 +2516,12 @@ sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) PyErr_SetString(PyExc_PermissionError, "Script file cannot be read"); } else { - PyErr_SetFromWindowsErr(0); + PyErr_SetFromWindowsErr(err); } - return NULL; + goto error; } -#else + PyMem_Free(debugger_script_path_w); +#else // MS_WINDOWS if (access(debugger_script_path, F_OK | R_OK) != 0) { switch (errno) { case ENOENT: @@ -2491,54 +2533,19 @@ sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) default: PyErr_SetFromErrno(PyExc_OSError); } - return NULL; + goto error; } -#endif - +#endif // MS_WINDOWS if (_PySysRemoteDebug_SendExec(pid, 0, debugger_script_path) < 0) { - return NULL; + goto error; } + Py_DECREF(path); Py_RETURN_NONE; -} - -/*[clinic input] -sys.remote_exec - - pid: int - script: object - -Executes a file containing Python code in a given remote Python process. - -This function returns immediately, and the code will be executed by the -target process's main thread at the next available opportunity, similarly -to how signals are handled. There is no interface to determine when the -code has been executed. The caller is responsible for making sure that -the file still exists whenever the remote process tries to read it and that -it hasn't been overwritten. - -The remote process must be running a CPython interpreter of the same major -and minor version as the local process. If either the local or remote -interpreter is pre-release (alpha, beta, or release candidate) then the -local and remote interpreters must be the same exact version. - -Args: - pid (int): The process ID of the target Python process. - script (str|bytes): The path to a file containing - the Python code to be executed. -[clinic start generated code]*/ -static PyObject * -sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) -/*[clinic end generated code: output=7d94c56afe4a52c0 input=39908ca2c5fe1eb0]*/ -{ - PyObject *ret = NULL; - PyObject *path; - if (PyUnicode_FSDecoder(script, &path)) { - ret = sys_remote_exec_unicode_path(module, pid, path); - Py_DECREF(path); - } - return ret; +error: + Py_DECREF(path); + return NULL; } @@ -3003,7 +3010,7 @@ static PyObject * get_warnoptions(PyThreadState *tstate) { PyObject *warnoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { return NULL; } if (warnoptions == NULL || !PyList_Check(warnoptions)) { @@ -3040,7 +3047,7 @@ PySys_ResetWarnOptions(void) } PyObject *warnoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { PyErr_Clear(); return; } @@ -3104,7 +3111,7 @@ PyAPI_FUNC(int) PySys_HasWarnOptions(void) { PyObject *warnoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(warnoptions), &warnoptions) < 0) { PyErr_Clear(); return 0; } @@ -3118,7 +3125,7 @@ static PyObject * get_xoptions(PyThreadState *tstate) { PyObject *xoptions; - if (_PySys_GetOptionalAttr(&_Py_ID(_xoptions), &xoptions) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(_xoptions), &xoptions) < 0) { return NULL; } if (xoptions == NULL || !PyDict_Check(xoptions)) { @@ -3371,7 +3378,7 @@ sys_set_flag(PyObject *flags, Py_ssize_t pos, PyObject *value) int _PySys_SetFlagObj(Py_ssize_t pos, PyObject *value) { - PyObject *flags = _PySys_GetRequiredAttrString("flags"); + PyObject *flags = PySys_GetAttrString("flags"); if (flags == NULL) { return -1; } @@ -3600,6 +3607,18 @@ make_impl_info(PyObject *version_info) goto error; #endif + // PEP-734 +#if defined(__wasi__) || defined(__EMSCRIPTEN__) + // It is not enabled on WASM builds just yet + value = Py_False; +#else + value = Py_True; +#endif + res = PyDict_SetItemString(impl_info, "supports_isolated_interpreters", value); + if (res < 0) { + goto error; + } + /* dict ready */ ns = _PyNamespace_New(impl_info); @@ -3933,7 +3952,7 @@ _PySys_UpdateConfig(PyThreadState *tstate) #undef COPY_WSTR // sys.flags - PyObject *flags = _PySys_GetRequiredAttrString("flags"); + PyObject *flags = PySys_GetAttrString("flags"); if (flags == NULL) { return -1; } @@ -4249,7 +4268,7 @@ PySys_SetArgvEx(int argc, wchar_t **argv, int updatepath) } PyObject *sys_path; - if (_PySys_GetOptionalAttr(&_Py_ID(path), &sys_path) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(path), &sys_path) < 0) { Py_FatalError("can't get sys.path"); } else if (sys_path != NULL) { @@ -4345,7 +4364,7 @@ sys_write(PyObject *key, FILE *fp, const char *format, va_list va) PyObject *exc = _PyErr_GetRaisedException(tstate); written = PyOS_vsnprintf(buffer, sizeof(buffer), format, va); - file = _PySys_GetRequiredAttr(key); + file = PySys_GetAttr(key); if (sys_pyfile_write(buffer, file) != 0) { _PyErr_Clear(tstate); fputs(buffer, fp); @@ -4389,7 +4408,7 @@ sys_format(PyObject *key, FILE *fp, const char *format, va_list va) PyObject *exc = _PyErr_GetRaisedException(tstate); message = PyUnicode_FromFormatV(format, va); if (message != NULL) { - file = _PySys_GetRequiredAttr(key); + file = PySys_GetAttr(key); if (sys_pyfile_write_unicode(message, file) != 0) { _PyErr_Clear(tstate); utf8 = PyUnicode_AsUTF8(message); diff --git a/Python/thread.c b/Python/thread.c index 4ff5f11a348..18c4af7f634 100644 --- a/Python/thread.c +++ b/Python/thread.c @@ -39,7 +39,8 @@ const long long PY_TIMEOUT_MAX = PY_TIMEOUT_MAX_VALUE; -static void PyThread__init_thread(void); /* Forward */ +/* Forward declaration */ +static void PyThread__init_thread(void); #define initialized _PyRuntime.threads.initialized @@ -71,6 +72,79 @@ PyThread_init_thread(void) #endif +/* + * Lock support. + */ + +PyThread_type_lock +PyThread_allocate_lock(void) +{ + if (!initialized) { + PyThread_init_thread(); + } + + PyMutex *lock = (PyMutex *)PyMem_RawMalloc(sizeof(PyMutex)); + if (lock) { + *lock = (PyMutex){0}; + } + + return (PyThread_type_lock)lock; +} + +void +PyThread_free_lock(PyThread_type_lock lock) +{ + PyMem_RawFree(lock); +} + +PyLockStatus +PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, + int intr_flag) +{ + PyTime_t timeout; // relative timeout + if (microseconds >= 0) { + // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout + // overflow to the caller, so clamp the timeout to + // [PyTime_MIN, PyTime_MAX]. + // + // PyTime_MAX nanoseconds is around 292.3 years. + // + // _thread.Lock.acquire() and _thread.RLock.acquire() raise an + // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. + timeout = _PyTime_FromMicrosecondsClamp(microseconds); + } + else { + timeout = -1; + } + + _PyLockFlags flags = _Py_LOCK_DONT_DETACH; + if (intr_flag) { + flags |= _PY_FAIL_IF_INTERRUPTED; + } + + return _PyMutex_LockTimed((PyMutex *)lock, timeout, flags); +} + +void +PyThread_release_lock(PyThread_type_lock lock) +{ + PyMutex_Unlock((PyMutex *)lock); +} + +int +_PyThread_at_fork_reinit(PyThread_type_lock *lock) +{ + _PyMutex_at_fork_reinit((PyMutex *)lock); + return 0; +} + +int +PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) +{ + return PyThread_acquire_lock_timed(lock, waitflag ? -1 : 0, /*intr_flag=*/0); +} + + /* return the current thread stack size */ size_t PyThread_get_stacksize(void) @@ -261,11 +335,7 @@ PyThread_GetInfo(void) #ifdef HAVE_PTHREAD_STUBS value = Py_NewRef(Py_None); #elif defined(_POSIX_THREADS) -#ifdef USE_SEMAPHORES - value = PyUnicode_FromString("semaphore"); -#else - value = PyUnicode_FromString("mutex+cond"); -#endif + value = PyUnicode_FromString("pymutex"); if (value == NULL) { Py_DECREF(threadinfo); return NULL; diff --git a/Python/thread_nt.h b/Python/thread_nt.h index e078b98be3c..9a29d14ef67 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -300,98 +300,6 @@ PyThread_hang_thread(void) } } -/* - * Lock support. It has to be implemented as semaphores. - * I [Dag] tried to implement it with mutex but I could find a way to - * tell whether a thread already own the lock or not. - */ -PyThread_type_lock -PyThread_allocate_lock(void) -{ - PNRMUTEX mutex; - - if (!initialized) - PyThread_init_thread(); - - mutex = AllocNonRecursiveMutex() ; - - PyThread_type_lock aLock = (PyThread_type_lock) mutex; - assert(aLock); - - return aLock; -} - -void -PyThread_free_lock(PyThread_type_lock aLock) -{ - FreeNonRecursiveMutex(aLock) ; -} - -// WaitForSingleObject() accepts timeout in milliseconds in the range -// [0; 0xFFFFFFFE] (DWORD type). INFINITE value (0xFFFFFFFF) means no -// timeout. 0xFFFFFFFE milliseconds is around 49.7 days. -const DWORD TIMEOUT_MS_MAX = 0xFFFFFFFE; - -/* - * Return 1 on success if the lock was acquired - * - * and 0 if the lock was not acquired. This means a 0 is returned - * if the lock has already been acquired by this thread! - */ -PyLockStatus -PyThread_acquire_lock_timed(PyThread_type_lock aLock, - PY_TIMEOUT_T microseconds, int intr_flag) -{ - assert(aLock); - - /* Fow now, intr_flag does nothing on Windows, and lock acquires are - * uninterruptible. */ - PyLockStatus success; - PY_TIMEOUT_T milliseconds; - - if (microseconds >= 0) { - milliseconds = microseconds / 1000; - // Round milliseconds away from zero - if (microseconds % 1000 > 0) { - milliseconds++; - } - if (milliseconds > (PY_TIMEOUT_T)TIMEOUT_MS_MAX) { - // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout - // overflow to the caller, so clamp the timeout to - // [0, TIMEOUT_MS_MAX] milliseconds. - // - // _thread.Lock.acquire() and _thread.RLock.acquire() raise an - // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. - milliseconds = TIMEOUT_MS_MAX; - } - assert(milliseconds != INFINITE); - } - else { - milliseconds = INFINITE; - } - - if (EnterNonRecursiveMutex((PNRMUTEX)aLock, - (DWORD)milliseconds) == WAIT_OBJECT_0) { - success = PY_LOCK_ACQUIRED; - } - else { - success = PY_LOCK_FAILURE; - } - - return success; -} -int -PyThread_acquire_lock(PyThread_type_lock aLock, int waitflag) -{ - return PyThread_acquire_lock_timed(aLock, waitflag ? -1 : 0, 0); -} - -void -PyThread_release_lock(PyThread_type_lock aLock) -{ - assert(aLock); - (void)LeaveNonRecursiveMutex((PNRMUTEX) aLock); -} /* minimum/maximum thread stack sizes supported */ #define THREAD_MIN_STACKSIZE 0x8000 /* 32 KiB */ diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index da405824244..13992f95723 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -99,16 +99,6 @@ #undef HAVE_SEM_CLOCKWAIT #endif -/* Whether or not to use semaphores directly rather than emulating them with - * mutexes and condition variables: - */ -#if (defined(_POSIX_SEMAPHORES) && !defined(HAVE_BROKEN_POSIX_SEMAPHORES) && \ - (defined(HAVE_SEM_TIMEDWAIT) || defined(HAVE_SEM_CLOCKWAIT))) -# define USE_SEMAPHORES -#else -# undef USE_SEMAPHORES -#endif - /* On platforms that don't use standard POSIX threads pthread_sigmask() * isn't present. DEC threads uses sigprocmask() instead as do most @@ -442,388 +432,6 @@ PyThread_hang_thread(void) } } -#ifdef USE_SEMAPHORES - -/* - * Lock support. - */ - -PyThread_type_lock -PyThread_allocate_lock(void) -{ - sem_t *lock; - int status, error = 0; - - if (!initialized) - PyThread_init_thread(); - - lock = (sem_t *)PyMem_RawMalloc(sizeof(sem_t)); - - if (lock) { - status = sem_init(lock,0,1); - CHECK_STATUS("sem_init"); - - if (error) { - PyMem_RawFree((void *)lock); - lock = NULL; - } - } - - return (PyThread_type_lock)lock; -} - -void -PyThread_free_lock(PyThread_type_lock lock) -{ - sem_t *thelock = (sem_t *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - if (!thelock) - return; - - status = sem_destroy(thelock); - CHECK_STATUS("sem_destroy"); - - PyMem_RawFree((void *)thelock); -} - -/* - * As of February 2002, Cygwin thread implementations mistakenly report error - * codes in the return value of the sem_ calls (like the pthread_ functions). - * Correct implementations return -1 and put the code in errno. This supports - * either. - */ -static int -fix_status(int status) -{ - return (status == -1) ? errno : status; -} - -PyLockStatus -PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, - int intr_flag) -{ - PyLockStatus success; - sem_t *thelock = (sem_t *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - PyTime_t timeout; // relative timeout - if (microseconds >= 0) { - // bpo-41710: PyThread_acquire_lock_timed() cannot report timeout - // overflow to the caller, so clamp the timeout to - // [PyTime_MIN, PyTime_MAX]. - // - // PyTime_MAX nanoseconds is around 292.3 years. - // - // _thread.Lock.acquire() and _thread.RLock.acquire() raise an - // OverflowError if microseconds is greater than PY_TIMEOUT_MAX. - timeout = _PyTime_FromMicrosecondsClamp(microseconds); - } - else { - timeout = -1; - } - -#ifdef HAVE_SEM_CLOCKWAIT - struct timespec abs_timeout; - // Local scope for deadline - { - PyTime_t now; - // silently ignore error: cannot report error to the caller - (void)PyTime_MonotonicRaw(&now); - PyTime_t deadline = _PyTime_Add(now, timeout); - _PyTime_AsTimespec_clamp(deadline, &abs_timeout); - } -#else - PyTime_t deadline = 0; - if (timeout > 0 && !intr_flag) { - deadline = _PyDeadline_Init(timeout); - } -#endif - - while (1) { - if (timeout > 0) { -#ifdef HAVE_SEM_CLOCKWAIT - status = fix_status(sem_clockwait(thelock, CLOCK_MONOTONIC, - &abs_timeout)); -#else - PyTime_t now; - // silently ignore error: cannot report error to the caller - (void)PyTime_TimeRaw(&now); - PyTime_t abs_time = _PyTime_Add(now, timeout); - - struct timespec ts; - _PyTime_AsTimespec_clamp(abs_time, &ts); - status = fix_status(sem_timedwait(thelock, &ts)); -#endif - } - else if (timeout == 0) { - status = fix_status(sem_trywait(thelock)); - } - else { - status = fix_status(sem_wait(thelock)); - } - - /* Retry if interrupted by a signal, unless the caller wants to be - notified. */ - if (intr_flag || status != EINTR) { - break; - } - - // sem_clockwait() uses an absolute timeout, there is no need - // to recompute the relative timeout. -#ifndef HAVE_SEM_CLOCKWAIT - if (timeout > 0) { - /* wait interrupted by a signal (EINTR): recompute the timeout */ - timeout = _PyDeadline_Get(deadline); - if (timeout < 0) { - status = ETIMEDOUT; - break; - } - } -#endif - } - - /* Don't check the status if we're stopping because of an interrupt. */ - if (!(intr_flag && status == EINTR)) { - if (timeout > 0) { - if (status != ETIMEDOUT) { -#ifdef HAVE_SEM_CLOCKWAIT - CHECK_STATUS("sem_clockwait"); -#else - CHECK_STATUS("sem_timedwait"); -#endif - } - } - else if (timeout == 0) { - if (status != EAGAIN) { - CHECK_STATUS("sem_trywait"); - } - } - else { - CHECK_STATUS("sem_wait"); - } - } - - if (status == 0) { - success = PY_LOCK_ACQUIRED; - } else if (intr_flag && status == EINTR) { - success = PY_LOCK_INTR; - } else { - success = PY_LOCK_FAILURE; - } - - return success; -} - -void -PyThread_release_lock(PyThread_type_lock lock) -{ - sem_t *thelock = (sem_t *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - status = sem_post(thelock); - CHECK_STATUS("sem_post"); -} - -#else /* USE_SEMAPHORES */ - -/* - * Lock support. - */ -PyThread_type_lock -PyThread_allocate_lock(void) -{ - pthread_lock *lock; - int status, error = 0; - - if (!initialized) - PyThread_init_thread(); - - lock = (pthread_lock *) PyMem_RawCalloc(1, sizeof(pthread_lock)); - if (lock) { - lock->locked = 0; - - status = pthread_mutex_init(&lock->mut, NULL); - CHECK_STATUS_PTHREAD("pthread_mutex_init"); - /* Mark the pthread mutex underlying a Python mutex as - pure happens-before. We can't simply mark the - Python-level mutex as a mutex because it can be - acquired and released in different threads, which - will cause errors. */ - _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(&lock->mut); - - status = _PyThread_cond_init(&lock->lock_released); - CHECK_STATUS_PTHREAD("pthread_cond_init"); - - if (error) { - PyMem_RawFree((void *)lock); - lock = 0; - } - } - - return (PyThread_type_lock) lock; -} - -void -PyThread_free_lock(PyThread_type_lock lock) -{ - pthread_lock *thelock = (pthread_lock *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - /* some pthread-like implementations tie the mutex to the cond - * and must have the cond destroyed first. - */ - status = pthread_cond_destroy( &thelock->lock_released ); - CHECK_STATUS_PTHREAD("pthread_cond_destroy"); - - status = pthread_mutex_destroy( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_destroy"); - - PyMem_RawFree((void *)thelock); -} - -PyLockStatus -PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds, - int intr_flag) -{ - PyLockStatus success = PY_LOCK_FAILURE; - pthread_lock *thelock = (pthread_lock *)lock; - int status, error = 0; - - if (microseconds == 0) { - status = pthread_mutex_trylock( &thelock->mut ); - if (status != EBUSY) { - CHECK_STATUS_PTHREAD("pthread_mutex_trylock[1]"); - } - } - else { - status = pthread_mutex_lock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_lock[1]"); - } - if (status != 0) { - goto done; - } - - if (thelock->locked == 0) { - success = PY_LOCK_ACQUIRED; - goto unlock; - } - if (microseconds == 0) { - goto unlock; - } - - struct timespec abs_timeout; - if (microseconds > 0) { - _PyThread_cond_after(microseconds, &abs_timeout); - } - // Continue trying until we get the lock - - // mut must be locked by me -- part of the condition protocol - while (1) { - if (microseconds > 0) { - status = pthread_cond_timedwait(&thelock->lock_released, - &thelock->mut, &abs_timeout); - if (status == 1) { - break; - } - if (status == ETIMEDOUT) { - break; - } - CHECK_STATUS_PTHREAD("pthread_cond_timedwait"); - } - else { - status = pthread_cond_wait( - &thelock->lock_released, - &thelock->mut); - CHECK_STATUS_PTHREAD("pthread_cond_wait"); - } - - if (intr_flag && status == 0 && thelock->locked) { - // We were woken up, but didn't get the lock. We probably received - // a signal. Return PY_LOCK_INTR to allow the caller to handle - // it and retry. - success = PY_LOCK_INTR; - break; - } - - if (status == 0 && !thelock->locked) { - success = PY_LOCK_ACQUIRED; - break; - } - - // Wait got interrupted by a signal: retry - } - -unlock: - if (success == PY_LOCK_ACQUIRED) { - thelock->locked = 1; - } - status = pthread_mutex_unlock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_unlock[1]"); - -done: - if (error) { - success = PY_LOCK_FAILURE; - } - return success; -} - -void -PyThread_release_lock(PyThread_type_lock lock) -{ - pthread_lock *thelock = (pthread_lock *)lock; - int status, error = 0; - - (void) error; /* silence unused-but-set-variable warning */ - - status = pthread_mutex_lock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_lock[3]"); - - thelock->locked = 0; - - /* wake up someone (anyone, if any) waiting on the lock */ - status = pthread_cond_signal( &thelock->lock_released ); - CHECK_STATUS_PTHREAD("pthread_cond_signal"); - - status = pthread_mutex_unlock( &thelock->mut ); - CHECK_STATUS_PTHREAD("pthread_mutex_unlock[3]"); -} - -#endif /* USE_SEMAPHORES */ - -int -_PyThread_at_fork_reinit(PyThread_type_lock *lock) -{ - PyThread_type_lock new_lock = PyThread_allocate_lock(); - if (new_lock == NULL) { - return -1; - } - - /* bpo-6721, bpo-40089: The old lock can be in an inconsistent state. - fork() can be called in the middle of an operation on the lock done by - another thread. So don't call PyThread_free_lock(*lock). - - Leak memory on purpose. Don't release the memory either since the - address of a mutex is relevant. Putting two mutexes at the same address - can lead to problems. */ - - *lock = new_lock; - return 0; -} - -int -PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) -{ - return PyThread_acquire_lock_timed(lock, waitflag ? -1 : 0, /*intr_flag=*/0); -} /* set the thread stack size. * Return 0 if size is valid, -1 if size is invalid, diff --git a/Python/traceback.c b/Python/traceback.c index c06cb1a5908..4f674eaf557 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -9,7 +9,6 @@ #include "pycore_interpframe.h" // _PyFrame_GetCode() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_sysmodule.h" // _PySys_GetOptionalAttr() #include "pycore_traceback.h" // EXCEPTION_TB_HEADER #include "frameobject.h" // PyFrame_New() @@ -399,7 +398,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * taillen = strlen(tail); PyThreadState *tstate = _PyThreadState_GET(); - if (_PySys_GetOptionalAttr(&_Py_ID(path), &syspath) < 0) { + if (PySys_GetOptionalAttr(&_Py_ID(path), &syspath) < 0) { PyErr_Clear(); goto error; } @@ -777,7 +776,7 @@ _PyTraceBack_Print(PyObject *v, const char *header, PyObject *f) PyErr_BadInternalCall(); return -1; } - if (_PySys_GetOptionalAttrString("tracebacklimit", &limitv) < 0) { + if (PySys_GetOptionalAttrString("tracebacklimit", &limitv) < 0) { return -1; } else if (limitv != NULL && PyLong_Check(limitv)) { |