aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Python/optimizer.c
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2024-08-12 12:39:31 -0700
committerGitHub <noreply@github.com>2024-08-12 12:39:31 -0700
commit9621a7d0170bf1ec48bcfc35825007cdf75265ea (patch)
tree354b055c106043806da2b10b9cf144aa98ded0de /Python/optimizer.c
parent503af8fe9a93ea6bc5bdfc76eb56b106a47c7292 (diff)
downloadcpython-9621a7d0170bf1ec48bcfc35825007cdf75265ea.tar.gz
cpython-9621a7d0170bf1ec48bcfc35825007cdf75265ea.zip
GH-118093: Handle some polymorphism before requiring progress in tier two (GH-122843)
Diffstat (limited to 'Python/optimizer.c')
-rw-r--r--Python/optimizer.c85
1 files changed, 53 insertions, 32 deletions
diff --git a/Python/optimizer.c b/Python/optimizer.c
index e9cbfc54971..1758329c832 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -111,7 +111,8 @@ never_optimize(
_PyInterpreterFrame *frame,
_Py_CODEUNIT *instr,
_PyExecutorObject **exec,
- int Py_UNUSED(stack_entries))
+ int Py_UNUSED(stack_entries),
+ bool Py_UNUSED(progress_needed))
{
// This may be called if the optimizer is reset
return 0;
@@ -176,32 +177,44 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer)
int
_PyOptimizer_Optimize(
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
- _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr)
+ _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr, int chain_depth)
{
+ // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must*
+ // make progress in order to avoid infinite loops or excessively-long
+ // side-exit chains. We can only insert the executor into the bytecode if
+ // this is true, since a deopt won't infinitely re-enter the executor:
+ chain_depth %= MAX_CHAIN_DEPTH;
+ bool progress_needed = chain_depth == 0;
PyCodeObject *code = _PyFrame_GetCode(frame);
assert(PyCode_Check(code));
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (!has_space_for_executor(code, start)) {
+ if (progress_needed && !has_space_for_executor(code, start)) {
return 0;
}
_PyOptimizerObject *opt = interp->optimizer;
- int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
+ int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed);
if (err <= 0) {
return err;
}
assert(*executor_ptr != NULL);
- int index = get_index_for_executor(code, start);
- if (index < 0) {
- /* Out of memory. Don't raise and assume that the
- * error will show up elsewhere.
- *
- * If an optimizer has already produced an executor,
- * it might get confused by the executor disappearing,
- * but there is not much we can do about that here. */
- Py_DECREF(*executor_ptr);
- return 0;
+ if (progress_needed) {
+ int index = get_index_for_executor(code, start);
+ if (index < 0) {
+ /* Out of memory. Don't raise and assume that the
+ * error will show up elsewhere.
+ *
+ * If an optimizer has already produced an executor,
+ * it might get confused by the executor disappearing,
+ * but there is not much we can do about that here. */
+ Py_DECREF(*executor_ptr);
+ return 0;
+ }
+ insert_executor(code, start, index, *executor_ptr);
}
- insert_executor(code, start, index, *executor_ptr);
+ else {
+ (*executor_ptr)->vm_data.code = NULL;
+ }
+ (*executor_ptr)->vm_data.chain_depth = chain_depth;
assert((*executor_ptr)->vm_data.valid);
return 1;
}
@@ -530,9 +543,9 @@ translate_bytecode_to_trace(
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int buffer_size,
- _PyBloomFilter *dependencies)
+ _PyBloomFilter *dependencies, bool progress_needed)
{
- bool progress_needed = true;
+ bool first = true;
PyCodeObject *code = _PyFrame_GetCode(frame);
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
assert(PyFunction_Check(func));
@@ -576,7 +589,7 @@ translate_bytecode_to_trace(
uint32_t opcode = instr->op.code;
uint32_t oparg = instr->op.arg;
- if (!progress_needed && instr == initial_instr) {
+ if (!first && instr == initial_instr) {
// We have looped around to the start:
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
@@ -585,14 +598,6 @@ translate_bytecode_to_trace(
DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
- if (opcode == ENTER_EXECUTOR) {
- assert(oparg < 256);
- _PyExecutorObject *executor = code->co_executors->executors[oparg];
- opcode = executor->vm_data.opcode;
- DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
- oparg = executor->vm_data.oparg;
- }
-
if (opcode == EXTENDED_ARG) {
instr++;
opcode = instr->op.code;
@@ -602,13 +607,27 @@ translate_bytecode_to_trace(
goto done;
}
}
+ if (opcode == ENTER_EXECUTOR) {
+ // We have a couple of options here. We *could* peek "underneath"
+ // this executor and continue tracing, which could give us a longer,
+ // more optimizeable trace (at the expense of lots of duplicated
+ // tier two code). Instead, we choose to just end here and stitch to
+ // the other trace, which allows a side-exit traces to rejoin the
+ // "main" trace periodically (and also helps protect us against
+ // pathological behavior where the amount of tier two code explodes
+ // for a medium-length, branchy code path). This seems to work
+ // better in practice, but in the future we could be smarter about
+ // what we do here:
+ goto done;
+ }
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
/* Special case the first instruction,
* so that we can guarantee forward progress */
- if (progress_needed) {
+ if (first && progress_needed) {
+ assert(first);
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
@@ -903,7 +922,7 @@ translate_bytecode_to_trace(
}
top:
// Jump here after _PUSH_FRAME or likely branches.
- progress_needed = false;
+ first = false;
} // End for (;;)
done:
@@ -912,7 +931,7 @@ done:
}
assert(code == initial_code);
// Skip short traces where we can't even translate a single instruction:
- if (progress_needed) {
+ if (first) {
OPT_STAT_INC(trace_too_short);
DPRINTF(2,
"No trace for %s (%s:%d) at byte offset %d (no progress)\n",
@@ -1225,13 +1244,14 @@ uop_optimize(
_PyInterpreterFrame *frame,
_Py_CODEUNIT *instr,
_PyExecutorObject **exec_ptr,
- int curr_stackentries)
+ int curr_stackentries,
+ bool progress_needed)
{
_PyBloomFilter dependencies;
_Py_BloomFilter_Init(&dependencies);
_PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
OPT_STAT_INC(attempts);
- int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
+ int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies, progress_needed);
if (length <= 0) {
// Error or nothing translated
return length;
@@ -1328,7 +1348,8 @@ counter_optimize(
_PyInterpreterFrame *frame,
_Py_CODEUNIT *instr,
_PyExecutorObject **exec_ptr,
- int Py_UNUSED(curr_stackentries)
+ int Py_UNUSED(curr_stackentries),
+ bool Py_UNUSED(progress_needed)
)
{
PyCodeObject *code = _PyFrame_GetCode(frame);