6 files changed, 251 insertions, 23 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index fca9b29f9eb..6466d2615cd 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -180,7 +180,7 @@ class Uop:
     properties: Properties
     _size: int = -1
     implicitly_created: bool = False
-    replicated = 0
+    replicated = range(0)
     replicates: "Uop | None" = None
     # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
     instruction_size: int | None = None
@@ -596,6 +596,7 @@ NON_ESCAPING_FUNCTIONS = (
     "PyStackRef_IsNull",
     "PyStackRef_MakeHeapSafe",
     "PyStackRef_None",
+    "PyStackRef_RefcountOnObject",
     "PyStackRef_TYPE",
     "PyStackRef_True",
     "PyTuple_GET_ITEM",
@@ -635,6 +636,10 @@ NON_ESCAPING_FUNCTIONS = (
     "_PyLong_IsNegative",
     "_PyLong_IsNonNegativeCompact",
     "_PyLong_IsZero",
+    "_PyLong_BothAreCompact",
+    "_PyCompactLong_Add",
+    "_PyCompactLong_Multiply",
+    "_PyCompactLong_Subtract",
     "_PyManagedDictPointer_IsValues",
     "_PyObject_GC_IS_SHARED",
     "_PyObject_GC_IS_TRACKED",
@@ -683,6 +688,7 @@ NON_ESCAPING_FUNCTIONS = (
     "PyStackRef_IsValid",
     "PyStackRef_Wrap",
     "PyStackRef_Unwrap",
+    "_PyLong_CheckExactAndCompact",
 )
 
 
@@ -737,7 +743,7 @@ def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingC
                     continue
                 #if not tkn.text.startswith(("Py", "_Py", "monitor")):
                 #    continue
-                if tkn.text.startswith(("sym_", "optimize_")):
+                if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")):
                     # Optimize functions
                     continue
                 if tkn.text.endswith("Check"):
@@ -864,6 +870,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
         needs_prev=variable_used(op, "prev_instr"),
     )
 
+def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
+    # Only replace array item with scalar if no more than one item is an array
+    index = -1
+    for i, item in enumerate(items):
+        if "oparg" in item.size:
+            if index >= 0:
+                return items
+            index = i
+    if index < 0:
+        return items
+    try:
+        count = int(eval(items[index].size.replace("oparg", str(oparg))))
+    except ValueError:
+        return items
+    return items[:index] + [
+        StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count)
+        ] + items[index+1:]
+
+def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
+    stack.inputs = expand(stack.inputs, oparg)
+    stack.outputs = expand(stack.outputs, oparg)
+    return stack
 
 def make_uop(
     name: str,
@@ -883,20 +911,26 @@ def make_uop(
     )
     for anno in op.annotations:
         if anno.startswith("replicate"):
-            result.replicated = int(anno[10:-1])
+            text = anno[10:-1]
+            start, stop = text.split(":")
+            result.replicated = range(int(start), int(stop))
             break
     else:
         return result
-    for oparg in range(result.replicated):
+    for oparg in result.replicated:
         name_x = name + "_" + str(oparg)
         properties = compute_properties(op)
         properties.oparg = False
-        properties.const_oparg = oparg
+        stack = analyze_stack(op)
+        if not variable_used(op, "oparg"):
+            stack = scalarize_stack(stack, oparg)
+        else:
+            properties.const_oparg = oparg
         rep = Uop(
             name=name_x,
             context=op.context,
             annotations=op.annotations,
-            stack=analyze_stack(op),
+            stack=stack,
             caches=analyze_caches(inputs),
             local_stores=find_variable_stores(op),
             body=op.block,
diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py
index 10567204dcc..0bcdc5395dc 100644
--- a/Tools/cases_generator/opcode_metadata_generator.py
+++ b/Tools/cases_generator/opcode_metadata_generator.py
@@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None:
             assert name2 in analysis.instructions, f"{name2} doesn't match any instr"
             instr1 = analysis.instructions[name1]
             instr2 = analysis.instructions[name2]
-            assert (
-                len(instr1.parts) == 1
-            ), f"{name1} is not a good superinstruction part"
-            assert (
-                len(instr2.parts) == 1
-            ), f"{name2} is not a good superinstruction part"
-            expansions.append((instr1.parts[0].name, "OPARG_TOP", 0))
-            expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0))
+            for part in instr1.parts:
+                expansions.append((part.name, "OPARG_TOP", 0))
+            for part in instr2.parts:
+                expansions.append((part.name, "OPARG_BOTTOM", 0))
         elif not is_viable_expansion(inst):
             continue
         else:
diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py
index 75805dbd7f3..4556b6d5a74 100644
--- a/Tools/cases_generator/optimizer_generator.py
+++ b/Tools/cases_generator/optimizer_generator.py
@@ -12,6 +12,8 @@ from analyzer import (
     analyze_files,
     StackItem,
     analysis_error,
+    CodeSection,
+    Label,
 )
 from generators_common import (
     DEFAULT_INPUT,
@@ -19,6 +21,7 @@ from generators_common import (
     write_header,
     Emitter,
     TokenIterator,
+    always_true,
 )
 from cwriter import CWriter
 from typing import TextIO
@@ -72,9 +75,12 @@ def validate_uop(override: Uop, uop: Uop) -> None:
 
 def type_name(var: StackItem) -> str:
     if var.is_array():
-        return "JitOptSymbol **"
-    return "JitOptSymbol *"
+        return "JitOptRef *"
+    return "JitOptRef "
 
+def stackref_type_name(var: StackItem) -> str:
+    assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef."
+    return "_PyStackRef "
 
 def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
     variables = {"unused"}
@@ -135,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None:
 
 class OptimizerEmitter(Emitter):
 
+    def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack):
+        super().__init__(out, labels)
+        self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure
+        self.original_uop = original_uop
+        self.stack = stack
+
     def emit_save(self, storage: Storage) -> None:
         storage.flush(self.out)
 
@@ -145,6 +157,185 @@ class OptimizerEmitter(Emitter):
         self.out.emit(goto)
         self.out.emit(label)
 
+    def replace_opcode_if_evaluates_pure(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        assert isinstance(uop, Uop)
+        input_identifiers = []
+        for token in tkn_iter:
+            if token.kind == "IDENTIFIER":
+                input_identifiers.append(token)
+            if token.kind == "SEMI":
+                break
+
+        if len(input_identifiers) == 0:
+            raise analysis_error(
+                "To evaluate an operation as pure, it must have at least 1 input",
+                tkn
+            )
+        # Check that the input identifiers belong to the uop's
+        # input stack effect
+        uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs}
+        for input_tkn in input_identifiers:
+            if input_tkn.text not in uop_stack_effect_input_identifers:
+                raise analysis_error(f"{input_tkn.text} referenced in "
+                                     f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not "
+                                     f"exist in the base uop's input stack effects",
+                                     input_tkn)
+        input_identifiers_as_str = {tkn.text for tkn in input_identifiers}
+        used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str]
+        assert len(used_stack_inputs) > 0
+        emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy())
+        emitter.emit("if (\n")
+        for inp in used_stack_inputs[:-1]:
+            emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n")
+        emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n")
+        emitter.emit(') {\n')
+        # Declare variables, before they are shadowed.
+        for inp in used_stack_inputs:
+            if inp.used:
+                emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n")
+        # Shadow the symbolic variables with stackrefs.
+        for inp in used_stack_inputs:
+            if inp.is_array():
+                raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn)
+            if inp.used:
+                emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n")
+        # Rename all output variables to stackref variant.
+        for outp in self.original_uop.stack.outputs:
+            if outp.is_array():
+                raise analysis_error(
+                    "Array output StackRefs not supported for evaluating pure ops.",
+                    self.original_uop.body.open
+                )
+            emitter.emit(f"_PyStackRef {outp.name}_stackref;\n")
+
+
+        storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False)
+        # No reference management of outputs needed.
+        for var in storage.outputs:
+            var.in_local = True
+        emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n")
+        emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False)
+        self.out.start_line()
+        emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n")
+        # Finally, assign back the output stackrefs to symbolics.
+        for outp in self.original_uop.stack.outputs:
+            # All new stackrefs are created from new references.
+            # That's how the stackref contract works.
+            if not outp.peek:
+                emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n")
+            else:
+                emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n")
+        storage.flush(self.out)
+        emitter.emit("break;\n")
+        emitter.emit("}\n")
+        return True
+
+class OptimizerConstantEmitter(OptimizerEmitter):
+    def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack):
+        super().__init__(out, labels, original_uop, stack)
+        # Replace all outputs to point to their stackref versions.
+        overrides = {
+            outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs
+        }
+        self._replacers = {**self._replacers, **overrides}
+
+    def emit_to_with_replacement(
+        self,
+        out: CWriter,
+        tkn_iter: TokenIterator,
+        end: str,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None
+    ) -> Token:
+        parens = 0
+        for tkn in tkn_iter:
+            if tkn.kind == end and parens == 0:
+                return tkn
+            if tkn.kind == "LPAREN":
+                parens += 1
+            if tkn.kind == "RPAREN":
+                parens -= 1
+            if tkn.text in self._replacers:
+                self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst)
+            else:
+                out.emit(tkn)
+        raise analysis_error(f"Expecting {end}. Reached end of file", tkn)
+
+    def emit_stackref_override(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        self.out.emit(tkn)
+        self.out.emit("_stackref ")
+        return True
+
+    def deopt_if(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        self.out.start_line()
+        self.out.emit("if (")
+        lparen = next(tkn_iter)
+        assert lparen.kind == "LPAREN"
+        first_tkn = tkn_iter.peek()
+        self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst)
+        self.emit(") {\n")
+        next(tkn_iter)  # Semi colon
+        # We guarantee this will deopt in real-world code
+        # via constants analysis. So just bail.
+        self.emit("ctx->done = true;\n")
+        self.emit("break;\n")
+        self.emit("}\n")
+        return not always_true(first_tkn)
+
+    exit_if = deopt_if
+
+    def error_if(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        lparen = next(tkn_iter)
+        assert lparen.kind == "LPAREN"
+        first_tkn = tkn_iter.peek()
+        unconditional = always_true(first_tkn)
+        if unconditional:
+            next(tkn_iter)
+            next(tkn_iter)  # RPAREN
+            self.out.start_line()
+        else:
+            self.out.emit_at("if ", tkn)
+            self.emit(lparen)
+            self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst)
+            self.out.emit(") {\n")
+        next(tkn_iter)  # Semi colon
+        storage.clear_inputs("at ERROR_IF")
+
+        self.out.emit("goto error;\n")
+        if not unconditional:
+            self.out.emit("}\n")
+        return not unconditional
+
+
 def write_uop(
     override: Uop | None,
     uop: Uop,
@@ -175,13 +366,14 @@ def write_uop(
                         cast = f"uint{cache.size*16}_t"
                     out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n")
         if override:
-            emitter = OptimizerEmitter(out, {})
+            emitter = OptimizerEmitter(out, {}, uop, stack.copy())
             # No reference management of inputs needed.
             for var in storage.inputs:  # type: ignore[possibly-undefined]
                 var.in_local = False
             _, storage = emitter.emit_tokens(override, storage, None, False)
             out.start_line()
             storage.flush(out)
+            out.start_line()
         else:
             emit_default(out, uop, stack)
             out.start_line()
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index a6dac481875..c7fe0d162ac 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -379,9 +379,13 @@ class Parser(PLexer):
         while anno := self.expect(lx.ANNOTATION):
             if anno.text == "replicate":
                 self.require(lx.LPAREN)
-                times = self.require(lx.NUMBER)
+                stop = self.require(lx.NUMBER)
+                start_text = "0"
+                if self.expect(lx.COLON):
+                    start_text = stop.text
+                    stop = self.require(lx.NUMBER)
                 self.require(lx.RPAREN)
-                annotations.append(f"replicate({times.text})")
+                annotations.append(f"replicate({start_text}:{stop.text})")
             else:
                 annotations.append(anno.text)
         tkn = self.expect(lx.INST)
diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py
index 276f306dfff..fc3bc47286f 100644
--- a/Tools/cases_generator/tier2_generator.py
+++ b/Tools/cases_generator/tier2_generator.py
@@ -91,7 +91,7 @@ class Tier2Emitter(Emitter):
         self.emit("}\n")
         return not always_true(first_tkn)
 
-    def exit_if(  # type: ignore[override]
+    def exit_if(
         self,
         tkn: Token,
         tkn_iter: TokenIterator,
diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py
index 6f995e5c46b..1cc23837a72 100644
--- a/Tools/cases_generator/uop_metadata_generator.py
+++ b/Tools/cases_generator/uop_metadata_generator.py
@@ -24,7 +24,8 @@ DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_uop_metadata.h"
 
 def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
     out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
-    out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
+    out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n")
+    out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n")
     out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
     out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
     out.emit("#ifdef NEED_OPCODE_METADATA\n")
@@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
             out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
 
     out.emit("};\n\n")
-    out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
+    out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
     for uop in analysis.uops.values():
         if uop.replicated:
-            out.emit(f"[{uop.name}] = {uop.replicated},\n")
+            assert(uop.replicated.step == 1)
+            out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n")
 
     out.emit("};\n\n")
     out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")