diff options
-rw-r--r-- | Include/internal/pycore_uop_ids.h | 75 | ||||
-rw-r--r-- | Include/internal/pycore_uop_metadata.h | 4 | ||||
-rw-r--r-- | Lib/test/test_capi/test_opt.py | 19 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst | 1 | ||||
-rw-r--r-- | Python/bytecodes.c | 5 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 18 | ||||
-rw-r--r-- | Python/optimizer_analysis.c | 74 | ||||
-rw-r--r-- | Python/optimizer_bytecodes.c | 12 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 18 | ||||
-rw-r--r-- | Python/optimizer_symbols.c | 40 |
10 files changed, 179 insertions, 87 deletions
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 6014e1bf3c0..4c270211d4c 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -270,67 +270,68 @@ extern "C" { #define _POP_TOP POP_TOP #define _POP_TOP_LOAD_CONST_INLINE 486 #define _POP_TOP_LOAD_CONST_INLINE_BORROW 487 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 488 +#define _POP_TWO 488 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 489 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 489 +#define _PUSH_FRAME 490 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 490 -#define _PY_FRAME_GENERAL 491 -#define _PY_FRAME_KW 492 -#define _QUICKEN_RESUME 493 -#define _REPLACE_WITH_TRUE 494 +#define _PUSH_NULL_CONDITIONAL 491 +#define _PY_FRAME_GENERAL 492 +#define _PY_FRAME_KW 493 +#define _QUICKEN_RESUME 494 +#define _REPLACE_WITH_TRUE 495 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 495 -#define _SEND 496 -#define _SEND_GEN_FRAME 497 +#define _SAVE_RETURN_OFFSET 496 +#define _SEND 497 +#define _SEND_GEN_FRAME 498 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 498 -#define _STORE_ATTR 499 -#define _STORE_ATTR_INSTANCE_VALUE 500 -#define _STORE_ATTR_SLOT 501 -#define _STORE_ATTR_WITH_HINT 502 +#define _START_EXECUTOR 499 +#define _STORE_ATTR 500 +#define _STORE_ATTR_INSTANCE_VALUE 501 +#define _STORE_ATTR_SLOT 502 +#define _STORE_ATTR_WITH_HINT 503 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 503 -#define _STORE_FAST_0 504 -#define _STORE_FAST_1 505 -#define _STORE_FAST_2 506 -#define _STORE_FAST_3 507 -#define _STORE_FAST_4 508 -#define _STORE_FAST_5 509 -#define _STORE_FAST_6 510 -#define _STORE_FAST_7 511 +#define _STORE_FAST 504 +#define _STORE_FAST_0 505 +#define _STORE_FAST_1 506 +#define _STORE_FAST_2 507 +#define _STORE_FAST_3 508 +#define _STORE_FAST_4 509 +#define _STORE_FAST_5 510 +#define _STORE_FAST_6 511 +#define _STORE_FAST_7 512 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 512 -#define _STORE_SUBSCR 513 -#define _STORE_SUBSCR_DICT 514 -#define _STORE_SUBSCR_LIST_INT 515 +#define _STORE_SLICE 513 +#define _STORE_SUBSCR 514 +#define _STORE_SUBSCR_DICT 515 +#define _STORE_SUBSCR_LIST_INT 516 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 516 -#define _TO_BOOL 517 +#define _TIER2_RESUME_CHECK 517 +#define _TO_BOOL 518 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 518 +#define _TO_BOOL_LIST 519 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 519 +#define _TO_BOOL_STR 520 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 520 -#define _UNPACK_SEQUENCE_LIST 521 -#define _UNPACK_SEQUENCE_TUPLE 522 -#define _UNPACK_SEQUENCE_TWO_TUPLE 523 +#define _UNPACK_SEQUENCE 521 +#define _UNPACK_SEQUENCE_LIST 522 +#define _UNPACK_SEQUENCE_TUPLE 523 +#define _UNPACK_SEQUENCE_TWO_TUPLE 524 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 523 +#define MAX_UOP_ID 524 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 002591318b3..5f8c4f3210a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -63,6 +63,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, [_END_SEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -557,6 +558,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_FRAME] = "_PUSH_FRAME", @@ -708,6 +710,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TWO: + return 2; case _PUSH_NULL: return 0; case _END_FOR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 50c4f19a1ab..98b434313e4 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2137,6 +2137,25 @@ class TestUopsOptimization(unittest.TestCase): self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertIn("_GUARD_IS_TRUE_POP", uops) + def test_set_type_version_sets_type(self): + class C: + A = 1 + + def testfunc(n): + x = 0 + c = C() + for _ in range(n): + x += c.A # Guarded. + x += type(c).A # Unguarded! + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 2 * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_GUARD_TYPE_VERSION", uops) + self.assertNotIn("_CHECK_ATTR_CLASS", uops) + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst new file mode 100644 index 00000000000..c490ecf1560 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-20-13-58-18.gh-issue-131798.hG8xBw.rst @@ -0,0 +1 @@ +Improve the JIT's ability to narrow unknown classes to constant values. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 65f411fa105..a2367026cde 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,11 @@ dummy_func( PyStackRef_CLOSE(value); } + tier2 op(_POP_TWO, (nos, tos --)) { + PyStackRef_CLOSE(tos); + PyStackRef_CLOSE(nos); + } + pure inst(PUSH_NULL, (-- res)) { res = PyStackRef_NULL; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cc521bd7923..1c8239f38ee 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,24 @@ break; } + case _POP_TWO: { + _PyStackRef tos; + _PyStackRef nos; + tos = stack_pointer[-1]; + nos = stack_pointer[-2]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(tos); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(nos); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _PUSH_NULL: { _PyStackRef res; res = PyStackRef_NULL; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b0bd1e9518..53ab289b75c 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -523,6 +523,25 @@ error: } +const uint16_t op_without_push[MAX_UOP_ID + 1] = { + [_COPY] = _NOP, + [_LOAD_CONST_INLINE] = _NOP, + [_LOAD_CONST_INLINE_BORROW] = _NOP, + [_LOAD_FAST] = _NOP, + [_LOAD_FAST_BORROW] = _NOP, + [_LOAD_SMALL_INT] = _NOP, + [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, +}; + +const uint16_t op_without_pop[MAX_UOP_ID + 1] = { + [_POP_TOP] = _NOP, + [_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, +}; + static int remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) @@ -551,50 +570,23 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _NOP; } break; - case _POP_TOP: - case _POP_TOP_LOAD_CONST_INLINE: - case _POP_TOP_LOAD_CONST_INLINE_BORROW: - case _POP_TWO_LOAD_CONST_INLINE_BORROW: - optimize_pop_top_again: + default: { - _PyUOpInstruction *last = &buffer[pc-1]; - while (last->opcode == _NOP) { - last--; - } - switch (last->opcode) { - case _POP_TWO_LOAD_CONST_INLINE_BORROW: - last->opcode = _POP_TOP; + // Cancel out pushes and pops, repeatedly. So: + // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP + // ...becomes: + // _NOP + _POP_TOP + _NOP + while (op_without_pop[opcode]) { + _PyUOpInstruction *last = &buffer[pc - 1]; + while (last->opcode == _NOP) { + last--; + } + if (!op_without_push[last->opcode]) { break; - case _POP_TOP_LOAD_CONST_INLINE: - case _POP_TOP_LOAD_CONST_INLINE_BORROW: - last->opcode = _NOP; - goto optimize_pop_top_again; - case _COPY: - case _LOAD_CONST_INLINE: - case _LOAD_CONST_INLINE_BORROW: - case _LOAD_FAST: - case _LOAD_FAST_BORROW: - case _LOAD_SMALL_INT: - last->opcode = _NOP; - if (opcode == _POP_TOP) { - opcode = buffer[pc].opcode = _NOP; - } - else if (opcode == _POP_TOP_LOAD_CONST_INLINE) { - opcode = buffer[pc].opcode = _LOAD_CONST_INLINE; - } - else if (opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) { - opcode = buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW; - } - else { - assert(opcode == _POP_TWO_LOAD_CONST_INLINE_BORROW); - opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; - goto optimize_pop_top_again; - } + } + last->opcode = op_without_push[last->opcode]; + opcode = buffer[pc].opcode = op_without_pop[opcode]; } - _Py_FALLTHROUGH; - } - default: - { /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ bool needs_ip = opcode == _PUSH_FRAME; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 639b4b7af16..f12cd7b968c 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -118,6 +118,18 @@ dummy_func(void) { sym_set_type(left, &PyLong_Type); } + op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) { + PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); + if (type) { + if (type == sym_get_const(ctx, owner)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + sym_set_const(owner, type); + } + } + } + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { assert(type_version); if (sym_matches_type_version(owner, type_version)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0a539b2829e..602f5e2cfaf 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -102,6 +102,12 @@ break; } + case _POP_TWO: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _PUSH_NULL: { JitOptSymbol *res; res = sym_new_null(ctx); @@ -1259,6 +1265,18 @@ } case _CHECK_ATTR_CLASS: { + JitOptSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand0; + PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version); + if (type) { + if (type == sym_get_const(ctx, owner)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + sym_set_const(owner, type); + } + } break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e8a4f87031b..2e619fa6f99 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -200,6 +200,10 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ) bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version) { + PyTypeObject *type = _PyType_LookupByVersion(version); + if (type) { + _Py_uop_sym_set_type(ctx, sym, type); + } JitSymType tag = sym->tag; switch(tag) { case JIT_SYM_NULL_TAG: @@ -215,18 +219,24 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int return true; } case JIT_SYM_KNOWN_VALUE_TAG: - Py_CLEAR(sym->value.value); - sym_set_bottom(ctx, sym); - return false; + if (Py_TYPE(sym->value.value)->tp_version_tag != version) { + Py_CLEAR(sym->value.value); + sym_set_bottom(ctx, sym); + return false; + }; + return true; case JIT_SYM_TUPLE_TAG: - sym_set_bottom(ctx, sym); - return false; + if (PyTuple_Type.tp_version_tag != version) { + sym_set_bottom(ctx, sym); + return false; + }; + return true; case JIT_SYM_TYPE_VERSION_TAG: - if (sym->version.version == version) { - return true; + if (sym->version.version != version) { + sym_set_bottom(ctx, sym); + return false; } - sym_set_bottom(ctx, sym); - return false; + return true; case JIT_SYM_BOTTOM_TAG: return false; case JIT_SYM_NON_NULL_TAG: @@ -266,6 +276,18 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val } return; case JIT_SYM_TUPLE_TAG: + if (PyTuple_CheckExact(const_val)) { + Py_ssize_t len = _Py_uop_sym_tuple_length(sym); + if (len == PyTuple_GET_SIZE(const_val)) { + for (Py_ssize_t i = 0; i < len; i++) { + JitOptSymbol *sym_item = _Py_uop_sym_tuple_getitem(ctx, sym, i); + PyObject *item = PyTuple_GET_ITEM(const_val, i); + _Py_uop_sym_set_const(ctx, sym_item, item); + } + make_const(sym, const_val); + return; + } + } sym_set_bottom(ctx, sym); return; case JIT_SYM_TYPE_VERSION_TAG: |