diff options
author | Brandt Bucher <brandtbucher@microsoft.com> | 2024-08-12 12:39:31 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-12 12:39:31 -0700 |
commit | 9621a7d0170bf1ec48bcfc35825007cdf75265ea (patch) | |
tree | 354b055c106043806da2b10b9cf144aa98ded0de /Python/optimizer.c | |
parent | 503af8fe9a93ea6bc5bdfc76eb56b106a47c7292 (diff) | |
download | cpython-9621a7d0170bf1ec48bcfc35825007cdf75265ea.tar.gz cpython-9621a7d0170bf1ec48bcfc35825007cdf75265ea.zip |
GH-118093: Handle some polymorphism before requiring progress in tier two (GH-122843)
Diffstat (limited to 'Python/optimizer.c')
-rw-r--r-- | Python/optimizer.c | 85 |
1 files changed, 53 insertions, 32 deletions
diff --git a/Python/optimizer.c b/Python/optimizer.c index e9cbfc54971..1758329c832 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -111,7 +111,8 @@ never_optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, _PyExecutorObject **exec, - int Py_UNUSED(stack_entries)) + int Py_UNUSED(stack_entries), + bool Py_UNUSED(progress_needed)) { // This may be called if the optimizer is reset return 0; @@ -176,32 +177,44 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer) int _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, - _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr) + _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr, int chain_depth) { + // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* + // make progress in order to avoid infinite loops or excessively-long + // side-exit chains. We can only insert the executor into the bytecode if + // this is true, since a deopt won't infinitely re-enter the executor: + chain_depth %= MAX_CHAIN_DEPTH; + bool progress_needed = chain_depth == 0; PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!has_space_for_executor(code, start)) { + if (progress_needed && !has_space_for_executor(code, start)) { return 0; } _PyOptimizerObject *opt = interp->optimizer; - int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame))); + int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { return err; } assert(*executor_ptr != NULL); - int index = get_index_for_executor(code, start); - if (index < 0) { - /* Out of memory. Don't raise and assume that the - * error will show up elsewhere. - * - * If an optimizer has already produced an executor, - * it might get confused by the executor disappearing, - * but there is not much we can do about that here. */ - Py_DECREF(*executor_ptr); - return 0; + if (progress_needed) { + int index = get_index_for_executor(code, start); + if (index < 0) { + /* Out of memory. Don't raise and assume that the + * error will show up elsewhere. + * + * If an optimizer has already produced an executor, + * it might get confused by the executor disappearing, + * but there is not much we can do about that here. */ + Py_DECREF(*executor_ptr); + return 0; + } + insert_executor(code, start, index, *executor_ptr); } - insert_executor(code, start, index, *executor_ptr); + else { + (*executor_ptr)->vm_data.code = NULL; + } + (*executor_ptr)->vm_data.chain_depth = chain_depth; assert((*executor_ptr)->vm_data.valid); return 1; } @@ -530,9 +543,9 @@ translate_bytecode_to_trace( _Py_CODEUNIT *instr, _PyUOpInstruction *trace, int buffer_size, - _PyBloomFilter *dependencies) + _PyBloomFilter *dependencies, bool progress_needed) { - bool progress_needed = true; + bool first = true; PyCodeObject *code = _PyFrame_GetCode(frame); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; assert(PyFunction_Check(func)); @@ -576,7 +589,7 @@ translate_bytecode_to_trace( uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; - if (!progress_needed && instr == initial_instr) { + if (!first && instr == initial_instr) { // We have looped around to the start: RESERVE(1); ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); @@ -585,14 +598,6 @@ translate_bytecode_to_trace( DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg); - if (opcode == ENTER_EXECUTOR) { - assert(oparg < 256); - _PyExecutorObject *executor = code->co_executors->executors[oparg]; - opcode = executor->vm_data.opcode; - DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); - oparg = executor->vm_data.oparg; - } - if (opcode == EXTENDED_ARG) { instr++; opcode = instr->op.code; @@ -602,13 +607,27 @@ translate_bytecode_to_trace( goto done; } } + if (opcode == ENTER_EXECUTOR) { + // We have a couple of options here. We *could* peek "underneath" + // this executor and continue tracing, which could give us a longer, + // more optimizeable trace (at the expense of lots of duplicated + // tier two code). Instead, we choose to just end here and stitch to + // the other trace, which allows a side-exit traces to rejoin the + // "main" trace periodically (and also helps protect us against + // pathological behavior where the amount of tier two code explodes + // for a medium-length, branchy code path). This seems to work + // better in practice, but in the future we could be smarter about + // what we do here: + goto done; + } assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP"); ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); /* Special case the first instruction, * so that we can guarantee forward progress */ - if (progress_needed) { + if (first && progress_needed) { + assert(first); if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { opcode = _PyOpcode_Deopt[opcode]; } @@ -903,7 +922,7 @@ translate_bytecode_to_trace( } top: // Jump here after _PUSH_FRAME or likely branches. - progress_needed = false; + first = false; } // End for (;;) done: @@ -912,7 +931,7 @@ done: } assert(code == initial_code); // Skip short traces where we can't even translate a single instruction: - if (progress_needed) { + if (first) { OPT_STAT_INC(trace_too_short); DPRINTF(2, "No trace for %s (%s:%d) at byte offset %d (no progress)\n", @@ -1225,13 +1244,14 @@ uop_optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, - int curr_stackentries) + int curr_stackentries, + bool progress_needed) { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; OPT_STAT_INC(attempts); - int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies); + int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies, progress_needed); if (length <= 0) { // Error or nothing translated return length; @@ -1328,7 +1348,8 @@ counter_optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, - int Py_UNUSED(curr_stackentries) + int Py_UNUSED(curr_stackentries), + bool Py_UNUSED(progress_needed) ) { PyCodeObject *code = _PyFrame_GetCode(frame); |