diff options
author | mpage <mpage@meta.com> | 2024-12-03 11:20:20 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-03 11:20:20 -0800 |
commit | dabcecfd6dadb9430733105ba36925b290343d31 (patch) | |
tree | 56bd2af5b31dbc182c6cd7c81a33745112871391 /Python/generated_cases.c.h | |
parent | fc5a0dc22483a35068888e828c65796d7a792c14 (diff) | |
download | cpython-dabcecfd6dadb9430733105ba36925b290343d31.tar.gz cpython-dabcecfd6dadb9430733105ba36925b290343d31.zip |
gh-115999: Enable specialization of `CALL` instructions in free-threaded builds (#127123)
The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below.
A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe:
Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref.
Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__.
Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes.
Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure.
A few other miscellaneous changes were also needed:
Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it.
Add missing co_tlbc for _Py_InitCleanup.
Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below.
Diffstat (limited to 'Python/generated_cases.c.h')
-rw-r--r-- | Python/generated_cases.c.h | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ef191f6f697..33f32aba1e5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -880,7 +880,7 @@ callable = &stack_pointer[-2 - oparg]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -890,7 +890,7 @@ } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 2 cache entries */ // _MAYBE_EXPAND_METHOD @@ -1048,10 +1048,10 @@ DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL); DEOPT_IF(!PyType_Check(callable_o), CALL); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version, CALL); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL); STAT_INC(CALL, hit); @@ -1073,20 +1073,21 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); goto error; } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, @@ -1100,8 +1101,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); @@ -2383,8 +2382,10 @@ DEOPT_IF(callable_o != interp->callable_cache.list_append, CALL); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o), CALL); + DEOPT_IF(!LOCK_OBJECT(self_o), CALL); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) goto pop_3_error; |