diff options
Diffstat (limited to 'Tools')
25 files changed, 1220 insertions, 213 deletions
diff --git a/Tools/build/generate-build-details.py b/Tools/build/generate-build-details.py index 87e262065ec..8cd23e2f54f 100644 --- a/Tools/build/generate-build-details.py +++ b/Tools/build/generate-build-details.py @@ -75,7 +75,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: PY3LIBRARY = sysconfig.get_config_var('PY3LIBRARY') LIBPYTHON = sysconfig.get_config_var('LIBPYTHON') LIBPC = sysconfig.get_config_var('LIBPC') - INCLUDEDIR = sysconfig.get_config_var('INCLUDEDIR') + INCLUDEPY = sysconfig.get_config_var('INCLUDEPY') if os.name == 'posix': # On POSIX, LIBRARY is always the static library, while LDLIBRARY is the @@ -123,7 +123,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: if has_static_library: data['libpython']['static'] = os.path.join(LIBDIR, LIBRARY) - data['c_api']['headers'] = INCLUDEDIR + data['c_api']['headers'] = INCLUDEPY if LIBPC: data['c_api']['pkgconfig_path'] = LIBPC diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 5845f2d85c7..968397728b2 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -4,6 +4,7 @@ import glob import hashlib import json import os +import random import re import subprocess import sys @@ -164,16 +165,18 @@ def get_externals() -> list[str]: def download_with_retries(download_location: str, - max_retries: int = 5, - base_delay: float = 2.0) -> typing.Any: + max_retries: int = 7, + base_delay: float = 2.25, + max_jitter: float = 1.0) -> typing.Any: """Download a file with exponential backoff retry.""" - for attempt in range(max_retries): + for attempt in range(max_retries + 1): try: resp = urllib.request.urlopen(download_location) - except urllib.error.URLError as ex: + except (urllib.error.URLError, ConnectionError) as ex: if attempt == max_retries: - raise ex - time.sleep(base_delay**attempt) + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(base_delay**attempt + random.uniform(0, max_jitter)) else: return resp diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 96d64a27e0a..6466d2615cd 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -135,15 +135,13 @@ class Flush: @dataclass class StackItem: name: str - type: str | None size: str peek: bool = False used: bool = False def __str__(self) -> str: size = f"[{self.size}]" if self.size else "" - type = "" if self.type is None else f"{self.type} " - return f"{type}{self.name}{size} {self.peek}" + return f"{self.name}{size} {self.peek}" def is_array(self) -> bool: return self.size != "" @@ -182,7 +180,7 @@ class Uop: properties: Properties _size: int = -1 implicitly_created: bool = False - replicated = 0 + replicated = range(0) replicates: "Uop | None" = None # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro instruction_size: int | None = None @@ -345,7 +343,7 @@ def override_error( def convert_stack_item( item: parser.StackEffect, replace_op_arg_1: str | None ) -> StackItem: - return StackItem(item.name, item.type, item.size) + return StackItem(item.name, item.size) def check_unused(stack: list[StackItem], input_names: dict[str, lexer.Token]) -> None: "Unused items cannot be on the stack above used, non-peek items" @@ -598,6 +596,7 @@ NON_ESCAPING_FUNCTIONS = ( "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", @@ -637,6 +636,10 @@ NON_ESCAPING_FUNCTIONS = ( "_PyLong_IsNegative", "_PyLong_IsNonNegativeCompact", "_PyLong_IsZero", + "_PyLong_BothAreCompact", + "_PyCompactLong_Add", + "_PyCompactLong_Multiply", + "_PyCompactLong_Subtract", "_PyManagedDictPointer_IsValues", "_PyObject_GC_IS_SHARED", "_PyObject_GC_IS_TRACKED", @@ -679,8 +682,16 @@ NON_ESCAPING_FUNCTIONS = ( "PyStackRef_IsTaggedInt", "PyStackRef_TagInt", "PyStackRef_UntagInt", + "PyStackRef_IncrementTaggedIntNoOverflow", + "PyStackRef_IsNullOrInt", + "PyStackRef_IsError", + "PyStackRef_IsValid", + "PyStackRef_Wrap", + "PyStackRef_Unwrap", + "_PyLong_CheckExactAndCompact", ) + def check_escaping_calls(instr: parser.CodeDef, escapes: dict[SimpleStmt, EscapingCall]) -> None: error: lexer.Token | None = None calls = {e.call for e in escapes.values()} @@ -732,7 +743,7 @@ def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingC continue #if not tkn.text.startswith(("Py", "_Py", "monitor")): # continue - if tkn.text.startswith(("sym_", "optimize_")): + if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): # Optimize functions continue if tkn.text.endswith("Check"): @@ -806,7 +817,7 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: if len(stack_inputs) == 0: return False return all( - (s.name == other.name and s.type == other.type and s.size == other.size) + (s.name == other.name and s.size == other.size) for s, other in zip(stack_inputs, instr.outputs) ) @@ -859,6 +870,28 @@ def compute_properties(op: parser.CodeDef) -> Properties: needs_prev=variable_used(op, "prev_instr"), ) +def expand(items: list[StackItem], oparg: int) -> list[StackItem]: + # Only replace array item with scalar if no more than one item is an array + index = -1 + for i, item in enumerate(items): + if "oparg" in item.size: + if index >= 0: + return items + index = i + if index < 0: + return items + try: + count = int(eval(items[index].size.replace("oparg", str(oparg)))) + except ValueError: + return items + return items[:index] + [ + StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count) + ] + items[index+1:] + +def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect: + stack.inputs = expand(stack.inputs, oparg) + stack.outputs = expand(stack.outputs, oparg) + return stack def make_uop( name: str, @@ -878,20 +911,26 @@ def make_uop( ) for anno in op.annotations: if anno.startswith("replicate"): - result.replicated = int(anno[10:-1]) + text = anno[10:-1] + start, stop = text.split(":") + result.replicated = range(int(start), int(stop)) break else: return result - for oparg in range(result.replicated): + for oparg in result.replicated: name_x = name + "_" + str(oparg) properties = compute_properties(op) properties.oparg = False - properties.const_oparg = oparg + stack = analyze_stack(op) + if not variable_used(op, "oparg"): + stack = scalarize_stack(stack, oparg) + else: + properties.const_oparg = oparg rep = Uop( name=name_x, context=op.context, annotations=op.annotations, - stack=analyze_stack(op), + stack=stack, caches=analyze_caches(inputs), local_stores=find_variable_stores(op), body=op.block, diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 9e60d219a71..4c210fbf8d2 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -56,9 +56,7 @@ def root_relative_path(filename: str) -> str: def type_and_null(var: StackItem) -> tuple[str, str]: - if var.type: - return var.type, "NULL" - elif var.is_array(): + if var.is_array(): return "_PyStackRef *", "NULL" else: return "_PyStackRef", "PyStackRef_NULL" @@ -108,8 +106,9 @@ class Emitter: out: CWriter labels: dict[str, Label] _replacers: dict[str, ReplacementFunctionType] + cannot_escape: bool - def __init__(self, out: CWriter, labels: dict[str, Label]): + def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = False): self._replacers = { "EXIT_IF": self.exit_if, "DEOPT_IF": self.deopt_if, @@ -129,6 +128,7 @@ class Emitter: } self.out = out self.labels = labels + self.cannot_escape = cannot_escape def dispatch( self, @@ -140,6 +140,7 @@ class Emitter: ) -> bool: if storage.spilled: raise analysis_error("stack_pointer needs reloading before dispatch", tkn) + storage.stack.flush(self.out) self.emit(tkn) return False @@ -239,7 +240,8 @@ class Emitter: next(tkn_iter) self._print_storage("DECREF_INPUTS", storage) try: - storage.close_inputs(self.out) + if not self.cannot_escape: + storage.close_inputs(self.out) except StackError as ex: raise analysis_error(ex.args[0], tkn) except Exception as ex: @@ -477,7 +479,7 @@ class Emitter: reachable = True tkn = stmt.contents[-1] try: - if stmt in uop.properties.escaping_calls: + if stmt in uop.properties.escaping_calls and not self.cannot_escape: escape = uop.properties.escaping_calls[stmt] if escape.kills is not None: self.stackref_kill(escape.kills, storage, True) @@ -514,7 +516,7 @@ class Emitter: self.out.emit(tkn) else: self.out.emit(tkn) - if stmt in uop.properties.escaping_calls: + if stmt in uop.properties.escaping_calls and not self.cannot_escape: self.emit_reload(storage) return reachable, None, storage except StackError as ex: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 10567204dcc..0bcdc5395dc 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index fda022a44e5..81ae534bdda 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -12,6 +12,8 @@ from analyzer import ( analyze_files, StackItem, analysis_error, + CodeSection, + Label, ) from generators_common import ( DEFAULT_INPUT, @@ -19,6 +21,7 @@ from generators_common import ( write_header, Emitter, TokenIterator, + always_true, ) from cwriter import CWriter from typing import TextIO @@ -72,11 +75,12 @@ def validate_uop(override: Uop, uop: Uop) -> None: def type_name(var: StackItem) -> str: if var.is_array(): - return "JitOptSymbol **" - if var.type: - return var.type - return "JitOptSymbol *" + return "JitOptRef *" + return "JitOptRef " +def stackref_type_name(var: StackItem) -> str: + assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef." + return "_PyStackRef " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: variables = {"unused"} @@ -137,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None: class OptimizerEmitter(Emitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels) + self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure + self.original_uop = original_uop + self.stack = stack + def emit_save(self, storage: Storage) -> None: storage.flush(self.out) @@ -147,6 +157,186 @@ class OptimizerEmitter(Emitter): self.out.emit(goto) self.out.emit(label) + def replace_opcode_if_evaluates_pure( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + assert isinstance(uop, Uop) + input_identifiers = [] + for token in tkn_iter: + if token.kind == "IDENTIFIER": + input_identifiers.append(token) + if token.kind == "SEMI": + break + + if len(input_identifiers) == 0: + raise analysis_error( + "To evaluate an operation as pure, it must have at least 1 input", + tkn + ) + # Check that the input identifiers belong to the uop's + # input stack effect + uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs} + for input_tkn in input_identifiers: + if input_tkn.text not in uop_stack_effect_input_identifers: + raise analysis_error(f"{input_tkn.text} referenced in " + f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not " + f"exist in the base uop's input stack effects", + input_tkn) + input_identifiers_as_str = {tkn.text for tkn in input_identifiers} + used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str] + assert len(used_stack_inputs) > 0 + emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy()) + emitter.emit("if (\n") + for inp in used_stack_inputs[:-1]: + emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n") + emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n") + emitter.emit(') {\n') + # Declare variables, before they are shadowed. + for inp in used_stack_inputs: + if inp.used: + emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n") + # Shadow the symbolic variables with stackrefs. + for inp in used_stack_inputs: + if inp.is_array(): + raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn) + if inp.used: + emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n") + # Rename all output variables to stackref variant. + for outp in self.original_uop.stack.outputs: + if outp.is_array(): + raise analysis_error( + "Array output StackRefs not supported for evaluating pure ops.", + self.original_uop.body.open + ) + emitter.emit(f"_PyStackRef {outp.name}_stackref;\n") + + + storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False) + # No reference management of outputs needed. + for var in storage.outputs: + var.in_local = True + emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n") + emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False) + self.out.start_line() + emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n") + # Finally, assign back the output stackrefs to symbolics. + for outp in self.original_uop.stack.outputs: + # All new stackrefs are created from new references. + # That's how the stackref contract works. + if not outp.peek: + emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") + else: + emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") + storage.flush(self.out) + emitter.emit("break;\n") + emitter.emit("}\n") + return True + +class OptimizerConstantEmitter(OptimizerEmitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels, original_uop, stack) + # Replace all outputs to point to their stackref versions. + overrides = { + outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs + } + self._replacers = {**self._replacers, **overrides} + self.cannot_escape = True + + def emit_to_with_replacement( + self, + out: CWriter, + tkn_iter: TokenIterator, + end: str, + uop: CodeSection, + storage: Storage, + inst: Instruction | None + ) -> Token: + parens = 0 + for tkn in tkn_iter: + if tkn.kind == end and parens == 0: + return tkn + if tkn.kind == "LPAREN": + parens += 1 + if tkn.kind == "RPAREN": + parens -= 1 + if tkn.text in self._replacers: + self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst) + else: + out.emit(tkn) + raise analysis_error(f"Expecting {end}. Reached end of file", tkn) + + def emit_stackref_override( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.emit(tkn) + self.out.emit("_stackref ") + return True + + def deopt_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.start_line() + self.out.emit("if (") + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.emit(") {\n") + next(tkn_iter) # Semi colon + # We guarantee this will deopt in real-world code + # via constants analysis. So just bail. + self.emit("ctx->done = true;\n") + self.emit("break;\n") + self.emit("}\n") + return not always_true(first_tkn) + + exit_if = deopt_if + + def error_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + next(tkn_iter) # RPAREN + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.out.emit(") {\n") + next(tkn_iter) # Semi colon + storage.clear_inputs("at ERROR_IF") + + self.out.emit("goto error;\n") + if not unconditional: + self.out.emit("}\n") + return not unconditional + + def write_uop( override: Uop | None, uop: Uop, @@ -177,13 +367,14 @@ def write_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n") if override: - emitter = OptimizerEmitter(out, {}) + emitter = OptimizerEmitter(out, {}, uop, stack.copy()) # No reference management of inputs needed. for var in storage.inputs: # type: ignore[possibly-undefined] var.in_local = False _, storage = emitter.emit_tokens(override, storage, None, False) out.start_line() storage.flush(out) + out.start_line() else: emit_default(out, uop, stack) out.start_line() @@ -230,7 +421,7 @@ def generate_abstract_interpreter( declare_variables(override, out, skip_inputs=False) else: declare_variables(uop, out, skip_inputs=True) - stack = Stack(extract_bits=False, cast_type="JitOptSymbol *") + stack = Stack() write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) out.start_line() out.emit("break;\n") diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 9c9b0053a59..c7fe0d162ac 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -247,12 +247,11 @@ class SimpleStmt(Stmt): @dataclass class StackEffect(Node): name: str = field(compare=False) # __eq__ only uses type, cond, size - type: str = "" # Optional `:type` size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond def __repr__(self) -> str: - items = [self.name, self.type, self.size] + items = [self.name, self.size] while items and items[-1] == "": del items[-1] return f"StackEffect({', '.join(repr(item) for item in items)})" @@ -380,9 +379,13 @@ class Parser(PLexer): while anno := self.expect(lx.ANNOTATION): if anno.text == "replicate": self.require(lx.LPAREN) - times = self.require(lx.NUMBER) + stop = self.require(lx.NUMBER) + start_text = "0" + if self.expect(lx.COLON): + start_text = stop.text + stop = self.require(lx.NUMBER) self.require(lx.RPAREN) - annotations.append(f"replicate({times.text})") + annotations.append(f"replicate({start_text}:{stop.text})") else: annotations.append(anno.text) tkn = self.expect(lx.INST) @@ -463,20 +466,13 @@ class Parser(PLexer): # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): - type_text = "" - if self.expect(lx.COLON): - type_text = self.require(lx.IDENTIFIER).text.strip() - if self.expect(lx.TIMES): - type_text += " *" size_text = "" if self.expect(lx.LBRACKET): - if type_text: - raise self.make_syntax_error("Unexpected [") if not (size := self.expression()): raise self.make_syntax_error("Expected expression") self.require(lx.RBRACKET) size_text = size.text.strip() - return StackEffect(tkn.text, type_text, size_text) + return StackEffect(tkn.text, size_text) return None @contextual diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 6b681775f48..3a0e7e5d0d5 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,7 +168,7 @@ class Local: @staticmethod def register(name: str) -> "Local": - item = StackItem(name, None, "", False, True) + item = StackItem(name, "", False, True) return Local(item, None, True) def kill(self) -> None: @@ -216,13 +216,11 @@ def array_or_scalar(var: StackItem | Local) -> str: return "array" if var.is_array() else "scalar" class Stack: - def __init__(self, extract_bits: bool=True, cast_type: str = "uintptr_t") -> None: + def __init__(self) -> None: self.base_offset = PointerOffset.zero() self.physical_sp = PointerOffset.zero() self.logical_sp = PointerOffset.zero() self.variables: list[Local] = [] - self.extract_bits = extract_bits - self.cast_type = cast_type def drop(self, var: StackItem, check_liveness: bool) -> None: self.logical_sp = self.logical_sp.pop(var) @@ -268,10 +266,8 @@ class Stack: self.base_offset = self.logical_sp if var.name in UNUSED or not var.used: return Local.unused(var, self.base_offset) - cast = f"({var.type})" if (not indirect and var.type) else "" - bits = ".bits" if cast and self.extract_bits else "" c_offset = (self.base_offset - self.physical_sp).to_c() - assign = f"{var.name} = {cast}{indirect}stack_pointer[{c_offset}]{bits};\n" + assign = f"{var.name} = {indirect}stack_pointer[{c_offset}];\n" out.emit(assign) self._print(out) return Local.from_memory(var, self.base_offset) @@ -292,12 +288,8 @@ class Stack: out: CWriter, var: StackItem, stack_offset: PointerOffset, - cast_type: str, - extract_bits: bool, ) -> None: - cast = f"({cast_type})" if var.type else "" - bits = ".bits" if cast and extract_bits else "" - out.emit(f"stack_pointer[{stack_offset.to_c()}]{bits} = {cast}{var.name};\n") + out.emit(f"stack_pointer[{stack_offset.to_c()}] = {var.name};\n") def _save_physical_sp(self, out: CWriter) -> None: if self.physical_sp != self.logical_sp: @@ -320,7 +312,7 @@ class Stack: self._print(out) var.memory_offset = var_offset stack_offset = var_offset - self.physical_sp - Stack._do_emit(out, var.item, stack_offset, self.cast_type, self.extract_bits) + Stack._do_emit(out, var.item, stack_offset) self._print(out) var_offset = var_offset.push(var.item) @@ -350,7 +342,7 @@ class Stack: out.emit(self.as_comment() + "\n") def copy(self) -> "Stack": - other = Stack(self.extract_bits, self.cast_type) + other = Stack() other.base_offset = self.base_offset other.physical_sp = self.physical_sp other.logical_sp = self.logical_sp @@ -496,7 +488,7 @@ class Storage: f"Expected '{undefined}' to be defined before '{out.name}'" else: undefined = out.name - while len(self.outputs) > self.peeks and not self.needs_defining(self.outputs[0]): + while len(self.outputs) > self.peeks and not self.needs_defining(self.outputs[self.peeks]): out = self.outputs.pop(self.peeks) self.stack.push(out) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 276f306dfff..fc3bc47286f 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -91,7 +91,7 @@ class Tier2Emitter(Emitter): self.emit("}\n") return not always_true(first_tkn) - def exit_if( # type: ignore[override] + def exit_if( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 6f995e5c46b..1cc23837a72 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -24,7 +24,8 @@ DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_uop_metadata.h" def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n") - out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n") + out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n") + out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n") out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n") out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n") out.emit("#ifdef NEED_OPCODE_METADATA\n") @@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n") out.emit("};\n\n") - out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n") + out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n") for uop in analysis.uops.values(): if uop.replicated: - out.emit(f"[{uop.name}] = {uop.replicated},\n") + assert(uop.replicated.step == 1) + out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n") out.emit("};\n\n") out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n") diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 926bc66b944..1a59e25189d 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -27,6 +27,7 @@ import queue import sys import threading import time +from operator import methodcaller # The iterations in individual benchmarks are scaled by this factor. WORK_SCALE = 100 @@ -188,6 +189,18 @@ def thread_local_read(): _ = tmp.x _ = tmp.x +class MyClass: + __slots__ = () + + def func(self): + pass + +@register_benchmark +def method_caller(): + mc = methodcaller("func") + obj = MyClass() + for i in range(1000 * WORK_SCALE): + mc(obj) def bench_one_thread(func): t0 = time.perf_counter_ns() diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index b407a8a643b..02af1caff7d 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -140,6 +140,9 @@ if __name__ == '__main__': data = locale.locale_alias.copy() data.update(parse_glibc_supported(args.glibc_supported)) data.update(parse(args.locale_alias)) + # Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist + # on all platforms. + data['c.utf8'] = 'C.UTF-8' while True: # Repeat optimization while the size is decreased. n = len(data) diff --git a/Tools/inspection/benchmark_external_inspection.py b/Tools/inspection/benchmark_external_inspection.py new file mode 100644 index 00000000000..0ac7ac4d385 --- /dev/null +++ b/Tools/inspection/benchmark_external_inspection.py @@ -0,0 +1,473 @@ +import _remote_debugging +import time +import subprocess +import sys +import contextlib +import tempfile +import os +import argparse +from _colorize import get_colors, can_colorize + +CODE = '''\ +import time +import os +import sys +import math + +def slow_fibonacci(n): + """Intentionally slow recursive fibonacci - should show up prominently in profiler""" + if n <= 1: + return n + return slow_fibonacci(n-1) + slow_fibonacci(n-2) + +def medium_computation(): + """Medium complexity function""" + result = 0 + for i in range(1000): + result += math.sqrt(i) * math.sin(i) + return result + +def fast_loop(): + """Fast simple loop""" + total = 0 + for i in range(100): + total += i + return total + +def string_operations(): + """String manipulation that should be visible in profiler""" + text = "hello world " * 100 + words = text.split() + return " ".join(reversed(words)) + +def nested_calls(): + """Nested function calls to test call stack depth""" + def level1(): + def level2(): + def level3(): + return medium_computation() + return level3() + return level2() + return level1() + +def main_loop(): + """Main computation loop with different execution paths""" + iteration = 0 + + while True: + iteration += 1 + + # Different execution paths with different frequencies + if iteration % 50 == 0: + # Expensive operation - should show high per-call time + result = slow_fibonacci(20) + + elif iteration % 10 == 0: + # Medium operation + result = nested_calls() + + elif iteration % 5 == 0: + # String operations + result = string_operations() + + else: + # Fast operation - most common + result = fast_loop() + + # Small delay to make sampling more interesting + time.sleep(0.001) + +if __name__ == "__main__": + main_loop() +''' + +DEEP_STATIC_CODE = """\ +import time +def factorial(n): + if n <= 1: + time.sleep(10000) + return 1 + return n * factorial(n-1) + +factorial(900) +""" + +CODE_WITH_TONS_OF_THREADS = '''\ +import time +import threading +import random +import math + +def cpu_intensive_work(): + """Do some CPU intensive calculations""" + result = 0 + for _ in range(10000): + result += math.sin(random.random()) * math.cos(random.random()) + return result + +def io_intensive_work(): + """Simulate IO intensive work with sleeps""" + time.sleep(0.1) + +def mixed_workload(): + """Mix of CPU and IO work""" + while True: + if random.random() < 0.3: + cpu_intensive_work() + else: + io_intensive_work() + +def create_threads(n): + """Create n threads doing mixed workloads""" + threads = [] + for _ in range(n): + t = threading.Thread(target=mixed_workload, daemon=True) + t.start() + threads.append(t) + return threads + +# Start with 5 threads +active_threads = create_threads(5) +thread_count = 5 + +# Main thread manages threads and does work +while True: + # Randomly add or remove threads + if random.random() < 0.1: # 10% chance each iteration + if random.random() < 0.5 and thread_count < 100: + # Add 1-5 new threads + new_count = random.randint(1, 5) + new_threads = create_threads(new_count) + active_threads.extend(new_threads) + thread_count += new_count + elif thread_count > 10: + # Remove 1-3 threads + remove_count = random.randint(1, 5) + # The threads will terminate naturally since they're daemons + active_threads = active_threads[remove_count:] + thread_count -= remove_count + + cpu_intensive_work() + time.sleep(0.05) +''' + +CODE_EXAMPLES = { + "basic": { + "code": CODE, + "description": "Mixed workload with fibonacci, computations, and string operations", + }, + "deep_static": { + "code": DEEP_STATIC_CODE, + "description": "Deep recursive call stack with 900+ frames (factorial)", + }, + "threads": { + "code": CODE_WITH_TONS_OF_THREADS, + "description": "Tons of threads doing mixed CPU/IO work", + }, +} + + +def benchmark(unwinder, duration_seconds=10): + """Benchmark mode - measure raw sampling speed for specified duration""" + sample_count = 0 + fail_count = 0 + total_work_time = 0.0 + start_time = time.perf_counter() + end_time = start_time + duration_seconds + total_attempts = 0 + + colors = get_colors(can_colorize()) + + print( + f"{colors.BOLD_BLUE}Benchmarking sampling speed for {duration_seconds} seconds...{colors.RESET}" + ) + + try: + while time.perf_counter() < end_time: + total_attempts += 1 + work_start = time.perf_counter() + try: + stack_trace = unwinder.get_stack_trace() + if stack_trace: + sample_count += 1 + except (OSError, RuntimeError, UnicodeDecodeError) as e: + fail_count += 1 + + work_end = time.perf_counter() + total_work_time += work_end - work_start + + if total_attempts % 10000 == 0: + avg_work_time_us = (total_work_time / total_attempts) * 1e6 + work_rate = ( + total_attempts / total_work_time if total_work_time > 0 else 0 + ) + success_rate = (sample_count / total_attempts) * 100 + + # Color code the success rate + if success_rate >= 95: + success_color = colors.GREEN + elif success_rate >= 80: + success_color = colors.YELLOW + else: + success_color = colors.RED + + print( + f"{colors.CYAN}Attempts:{colors.RESET} {total_attempts} | " + f"{colors.CYAN}Success:{colors.RESET} {success_color}{success_rate:.1f}%{colors.RESET} | " + f"{colors.CYAN}Rate:{colors.RESET} {colors.MAGENTA}{work_rate:.1f}Hz{colors.RESET} | " + f"{colors.CYAN}Avg:{colors.RESET} {colors.YELLOW}{avg_work_time_us:.2f}µs{colors.RESET}" + ) + except KeyboardInterrupt: + print(f"\n{colors.YELLOW}Benchmark interrupted by user{colors.RESET}") + + actual_end_time = time.perf_counter() + wall_time = actual_end_time - start_time + + # Return final statistics + return { + "wall_time": wall_time, + "total_attempts": total_attempts, + "sample_count": sample_count, + "fail_count": fail_count, + "success_rate": ( + (sample_count / total_attempts) * 100 if total_attempts > 0 else 0 + ), + "total_work_time": total_work_time, + "avg_work_time_us": ( + (total_work_time / total_attempts) * 1e6 if total_attempts > 0 else 0 + ), + "work_rate_hz": total_attempts / total_work_time if total_work_time > 0 else 0, + "samples_per_sec": sample_count / wall_time if wall_time > 0 else 0, + } + + +def print_benchmark_results(results): + """Print comprehensive benchmark results""" + colors = get_colors(can_colorize()) + + print(f"\n{colors.BOLD_GREEN}{'='*60}{colors.RESET}") + print(f"{colors.BOLD_GREEN}get_stack_trace() Benchmark Results{colors.RESET}") + print(f"{colors.BOLD_GREEN}{'='*60}{colors.RESET}") + + # Basic statistics + print(f"\n{colors.BOLD_CYAN}Basic Statistics:{colors.RESET}") + print( + f" {colors.CYAN}Wall time:{colors.RESET} {colors.YELLOW}{results['wall_time']:.3f}{colors.RESET} seconds" + ) + print( + f" {colors.CYAN}Total attempts:{colors.RESET} {colors.MAGENTA}{results['total_attempts']:,}{colors.RESET}" + ) + print( + f" {colors.CYAN}Successful samples:{colors.RESET} {colors.GREEN}{results['sample_count']:,}{colors.RESET}" + ) + print( + f" {colors.CYAN}Failed samples:{colors.RESET} {colors.RED}{results['fail_count']:,}{colors.RESET}" + ) + + # Color code the success rate + success_rate = results["success_rate"] + if success_rate >= 95: + success_color = colors.BOLD_GREEN + elif success_rate >= 80: + success_color = colors.BOLD_YELLOW + else: + success_color = colors.BOLD_RED + + print( + f" {colors.CYAN}Success rate:{colors.RESET} {success_color}{success_rate:.2f}%{colors.RESET}" + ) + + # Performance metrics + print(f"\n{colors.BOLD_CYAN}Performance Metrics:{colors.RESET}") + print( + f" {colors.CYAN}Average call time:{colors.RESET} {colors.YELLOW}{results['avg_work_time_us']:.2f}{colors.RESET} µs" + ) + print( + f" {colors.CYAN}Work rate:{colors.RESET} {colors.MAGENTA}{results['work_rate_hz']:.1f}{colors.RESET} calls/sec" + ) + print( + f" {colors.CYAN}Sample rate:{colors.RESET} {colors.MAGENTA}{results['samples_per_sec']:.1f}{colors.RESET} samples/sec" + ) + print( + f" {colors.CYAN}Total work time:{colors.RESET} {colors.YELLOW}{results['total_work_time']:.3f}{colors.RESET} seconds" + ) + + # Color code work efficiency + efficiency = (results["total_work_time"] / results["wall_time"]) * 100 + if efficiency >= 80: + efficiency_color = colors.GREEN + elif efficiency >= 50: + efficiency_color = colors.YELLOW + else: + efficiency_color = colors.RED + + print( + f" {colors.CYAN}Work efficiency:{colors.RESET} {efficiency_color}{efficiency:.1f}%{colors.RESET}" + ) + + +def parse_arguments(): + """Parse command line arguments""" + # Build the code examples description + examples_desc = "\n".join( + [f" {name}: {info['description']}" for name, info in CODE_EXAMPLES.items()] + ) + + parser = argparse.ArgumentParser( + description="Benchmark get_stack_trace() performance", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=f""" +Examples: + %(prog)s # Run basic benchmark for 10 seconds (default) + %(prog)s --duration 30 # Run basic benchmark for 30 seconds + %(prog)s -d 60 # Run basic benchmark for 60 seconds + %(prog)s --code deep_static # Run deep static call stack benchmark + %(prog)s --code deep_static -d 30 # Run deep static benchmark for 30 seconds + +Available code examples: +{examples_desc} + """, + color=True, + ) + + parser.add_argument( + "--duration", + "-d", + type=int, + default=10, + help="Benchmark duration in seconds (default: 10)", + ) + + parser.add_argument( + "--code", + "-c", + choices=list(CODE_EXAMPLES.keys()), + default="basic", + help="Code example to benchmark (default: basic)", + ) + + parser.add_argument( + "--threads", + choices=["all", "main", "only_active"], + default="all", + help="Which threads to include in the benchmark (default: all)", + ) + + return parser.parse_args() + + +def create_target_process(temp_file, code_example="basic"): + """Create and start the target process for benchmarking""" + example_info = CODE_EXAMPLES.get(code_example, {"code": CODE}) + selected_code = example_info["code"] + temp_file.write(selected_code) + temp_file.flush() + + process = subprocess.Popen( + [sys.executable, temp_file.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + + # Give it time to start + time.sleep(1.0) + + # Check if it's still running + if process.poll() is not None: + stdout, stderr = process.communicate() + raise RuntimeError( + f"Target process exited unexpectedly:\nSTDOUT: {stdout.decode()}\nSTDERR: {stderr.decode()}" + ) + + return process, temp_file.name + + +def cleanup_process(process, temp_file_path): + """Clean up the target process and temporary file""" + with contextlib.suppress(Exception): + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=5.0) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +def main(): + """Main benchmark function""" + colors = get_colors(can_colorize()) + args = parse_arguments() + + print(f"{colors.BOLD_MAGENTA}External Inspection Benchmark Tool{colors.RESET}") + print(f"{colors.BOLD_MAGENTA}{'=' * 34}{colors.RESET}") + + example_info = CODE_EXAMPLES.get(args.code, {"description": "Unknown"}) + print( + f"\n{colors.CYAN}Code Example:{colors.RESET} {colors.GREEN}{args.code}{colors.RESET}" + ) + print(f"{colors.CYAN}Description:{colors.RESET} {example_info['description']}") + print( + f"{colors.CYAN}Benchmark Duration:{colors.RESET} {colors.YELLOW}{args.duration}{colors.RESET} seconds" + ) + + process = None + temp_file_path = None + + try: + # Create target process + print(f"\n{colors.BLUE}Creating and starting target process...{colors.RESET}") + with tempfile.NamedTemporaryFile(mode="w", suffix=".py") as temp_file: + process, temp_file_path = create_target_process(temp_file, args.code) + print( + f"{colors.GREEN}Target process started with PID: {colors.BOLD_WHITE}{process.pid}{colors.RESET}" + ) + + # Run benchmark with specified duration + with process: + # Create unwinder and run benchmark + print(f"{colors.BLUE}Initializing unwinder...{colors.RESET}") + try: + kwargs = {} + if args.threads == "all": + kwargs["all_threads"] = True + elif args.threads == "main": + kwargs["all_threads"] = False + elif args.threads == "only_active": + kwargs["only_active_thread"] = True + unwinder = _remote_debugging.RemoteUnwinder( + process.pid, **kwargs + ) + results = benchmark(unwinder, duration_seconds=args.duration) + finally: + cleanup_process(process, temp_file_path) + + # Print results + print_benchmark_results(results) + + except PermissionError as e: + print( + f"{colors.BOLD_RED}Error: Insufficient permissions to read stack trace: {e}{colors.RESET}" + ) + print( + f"{colors.YELLOW}Try running with appropriate privileges (e.g., sudo){colors.RESET}" + ) + return 1 + except Exception as e: + print(f"{colors.BOLD_RED}Error during benchmarking: {e}{colors.RESET}") + if process: + with contextlib.suppress(Exception): + stdout, stderr = process.communicate(timeout=1) + if stdout: + print( + f"{colors.CYAN}Process STDOUT:{colors.RESET} {stdout.decode()}" + ) + if stderr: + print( + f"{colors.RED}Process STDERR:{colors.RESET} {stderr.decode()}" + ) + raise + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py new file mode 100644 index 00000000000..1077e4106fd --- /dev/null +++ b/Tools/jit/_optimizers.py @@ -0,0 +1,319 @@ +"""Low-level optimization of textual assembly.""" + +import dataclasses +import pathlib +import re +import typing + +# Same as saying "not string.startswith('')": +_RE_NEVER_MATCH = re.compile(r"(?!)") +# Dictionary mapping branch instructions to their inverted branch instructions. +# If a branch cannot be inverted, the value is None: +_X86_BRANCHES = { + # https://www.felixcloutier.com/x86/jcc + "ja": "jna", + "jae": "jnae", + "jb": "jnb", + "jbe": "jnbe", + "jc": "jnc", + "jcxz": None, + "je": "jne", + "jecxz": None, + "jg": "jng", + "jge": "jnge", + "jl": "jnl", + "jle": "jnle", + "jo": "jno", + "jp": "jnp", + "jpe": "jpo", + "jrcxz": None, + "js": "jns", + "jz": "jnz", + # https://www.felixcloutier.com/x86/loop:loopcc + "loop": None, + "loope": None, + "loopne": None, + "loopnz": None, + "loopz": None, +} +# Update with all of the inverted branches, too: +_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} + + +@dataclasses.dataclass +class _Block: + label: str | None = None + # Non-instruction lines like labels, directives, and comments: + noninstructions: list[str] = dataclasses.field(default_factory=list) + # Instruction lines: + instructions: list[str] = dataclasses.field(default_factory=list) + # If this block ends in a jump, where to? + target: typing.Self | None = None + # The next block in the linked list: + link: typing.Self | None = None + # Whether control flow can fall through to the linked block above: + fallthrough: bool = True + # Whether this block can eventually reach the next uop (_JIT_CONTINUE): + hot: bool = False + + def resolve(self) -> typing.Self: + """Find the first non-empty block reachable from this one.""" + block = self + while block.link and not block.instructions: + block = block.link + return block + + +@dataclasses.dataclass +class Optimizer: + """Several passes of analysis and optimization for textual assembly.""" + + path: pathlib.Path + _: dataclasses.KW_ONLY + # prefix used to mangle symbols on some platforms: + prefix: str = "" + # The first block in the linked list: + _root: _Block = dataclasses.field(init=False, default_factory=_Block) + _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) + # No groups: + _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( + r"\s*(?:\.|#|//|$)" + ) + # One group (label): + _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( + r'\s*(?P<label>[\w."$?@]+):' + ) + # Override everything that follows in subclasses: + _alignment: typing.ClassVar[int] = 1 + _branches: typing.ClassVar[dict[str, str | None]] = {} + # Two groups (instruction and target): + _re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + # One group (target): + _re_jump: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + # No groups: + _re_return: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + + def __post_init__(self) -> None: + # Split the code into a linked list of basic blocks. A basic block is an + # optional label, followed by zero or more non-instruction lines, + # followed by zero or more instruction lines (only the last of which may + # be a branch, jump, or return): + text = self._preprocess(self.path.read_text()) + block = self._root + for line in text.splitlines(): + # See if we need to start a new block: + if match := self._re_label.match(line): + # Label. New block: + block.link = block = self._lookup_label(match["label"]) + block.noninstructions.append(line) + continue + if self._re_noninstructions.match(line): + if block.instructions: + # Non-instruction lines. New block: + block.link = block = _Block() + block.noninstructions.append(line) + continue + if block.target or not block.fallthrough: + # Current block ends with a branch, jump, or return. New block: + block.link = block = _Block() + block.instructions.append(line) + if match := self._re_branch.match(line): + # A block ending in a branch has a target and fallthrough: + block.target = self._lookup_label(match["target"]) + assert block.fallthrough + elif match := self._re_jump.match(line): + # A block ending in a jump has a target and no fallthrough: + block.target = self._lookup_label(match["target"]) + block.fallthrough = False + elif self._re_return.match(line): + # A block ending in a return has no target and fallthrough: + assert not block.target + block.fallthrough = False + + def _preprocess(self, text: str) -> str: + # Override this method to do preprocessing of the textual assembly: + return text + + @classmethod + def _invert_branch(cls, line: str, target: str) -> str | None: + match = cls._re_branch.match(line) + assert match + inverted = cls._branches.get(match["instruction"]) + if not inverted: + return None + (a, b), (c, d) = match.span("instruction"), match.span("target") + # Before: + # je FOO + # After: + # jne BAR + return "".join([line[:a], inverted, line[b:c], target, line[d:]]) + + @classmethod + def _update_jump(cls, line: str, target: str) -> str: + match = cls._re_jump.match(line) + assert match + a, b = match.span("target") + # Before: + # jmp FOO + # After: + # jmp BAR + return "".join([line[:a], target, line[b:]]) + + def _lookup_label(self, label: str) -> _Block: + if label not in self._labels: + self._labels[label] = _Block(label) + return self._labels[label] + + def _blocks(self) -> typing.Generator[_Block, None, None]: + block: _Block | None = self._root + while block: + yield block + block = block.link + + def _body(self) -> str: + lines = [] + hot = True + for block in self._blocks(): + if hot != block.hot: + hot = block.hot + # Make it easy to tell at a glance where cold code is: + lines.append(f"# JIT: {'HOT' if hot else 'COLD'} ".ljust(80, "#")) + lines.extend(block.noninstructions) + lines.extend(block.instructions) + return "\n".join(lines) + + def _predecessors(self, block: _Block) -> typing.Generator[_Block, None, None]: + # This is inefficient, but it's never wrong: + for pre in self._blocks(): + if pre.target is block or pre.fallthrough and pre.link is block: + yield pre + + def _insert_continue_label(self) -> None: + # Find the block with the last instruction: + for end in reversed(list(self._blocks())): + if end.instructions: + break + # Before: + # jmp FOO + # After: + # jmp FOO + # .balign 8 + # _JIT_CONTINUE: + # This lets the assembler encode _JIT_CONTINUE jumps at build time! + align = _Block() + align.noninstructions.append(f"\t.balign\t{self._alignment}") + continuation = self._lookup_label(f"{self.prefix}_JIT_CONTINUE") + assert continuation.label + continuation.noninstructions.append(f"{continuation.label}:") + end.link, align.link, continuation.link = align, continuation, end.link + + def _mark_hot_blocks(self) -> None: + # Start with the last block, and perform a DFS to find all blocks that + # can eventually reach it: + todo = list(self._blocks())[-1:] + while todo: + block = todo.pop() + block.hot = True + todo.extend(pre for pre in self._predecessors(block) if not pre.hot) + + def _invert_hot_branches(self) -> None: + for branch in self._blocks(): + link = branch.link + if link is None: + continue + jump = link.resolve() + # Before: + # je HOT + # jmp COLD + # After: + # jne COLD + # jmp HOT + if ( + # block ends with a branch to hot code... + branch.target + and branch.fallthrough + and branch.target.hot + # ...followed by a jump to cold code with no other predecessors: + and jump.target + and not jump.fallthrough + and not jump.target.hot + and len(jump.instructions) == 1 + and list(self._predecessors(jump)) == [branch] + ): + assert jump.target.label + assert branch.target.label + inverted = self._invert_branch( + branch.instructions[-1], jump.target.label + ) + # Check to see if the branch can even be inverted: + if inverted is None: + continue + branch.instructions[-1] = inverted + jump.instructions[-1] = self._update_jump( + jump.instructions[-1], branch.target.label + ) + branch.target, jump.target = jump.target, branch.target + jump.hot = True + + def _remove_redundant_jumps(self) -> None: + # Zero-length jumps can be introduced by _insert_continue_label and + # _invert_hot_branches: + for block in self._blocks(): + # Before: + # jmp FOO + # FOO: + # After: + # FOO: + if ( + block.target + and block.link + and block.target.resolve() is block.link.resolve() + ): + block.target = None + block.fallthrough = True + block.instructions.pop() + + def run(self) -> None: + """Run this optimizer.""" + self._insert_continue_label() + self._mark_hot_blocks() + self._invert_hot_branches() + self._remove_redundant_jumps() + self.path.write_text(self._body()) + + +class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods + """aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu""" + + # TODO: @diegorusso + _alignment = 8 + # https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch- + _re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)") + + +class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods + """i686-pc-windows-msvc/x86_64-apple-darwin/x86_64-unknown-linux-gnu""" + + _branches = _X86_BRANCHES + _re_branch = re.compile( + rf"\s*(?P<instruction>{'|'.join(_X86_BRANCHES)})\s+(?P<target>[\w.]+)" + ) + # https://www.felixcloutier.com/x86/jmp + _re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)") + # https://www.felixcloutier.com/x86/ret + _re_return = re.compile(r"\s*ret\b") + + +class OptimizerX8664Windows(OptimizerX86): # pylint: disable = too-few-public-methods + """x86_64-pc-windows-msvc""" + + def _preprocess(self, text: str) -> str: + text = super()._preprocess(text) + # Before: + # rex64 jmpq *__imp__JIT_CONTINUE(%rip) + # After: + # jmp _JIT_CONTINUE + far_indirect_jump = ( + rf"rex64\s+jmpq\s+\*__imp_(?P<target>{self.prefix}_JIT_\w+)\(%rip\)" + ) + return re.sub(far_indirect_jump, r"jmp\t\g<target>", text) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 03b0ba647b0..1d82f5366f6 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -17,8 +17,6 @@ class HoleValue(enum.Enum): # The base address of the machine code for the current uop (exposed as _JIT_ENTRY): CODE = enum.auto() - # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE): - CONTINUE = enum.auto() # The base address of the read-only data for this uop: DATA = enum.auto() # The address of the current executor (exposed as _JIT_EXECUTOR): @@ -97,7 +95,6 @@ _PATCH_FUNCS = { # Translate HoleValues to C expressions: _HOLE_EXPRS = { HoleValue.CODE: "(uintptr_t)code", - HoleValue.CONTINUE: "(uintptr_t)code + sizeof(code_body)", HoleValue.DATA: "(uintptr_t)data", HoleValue.EXECUTOR: "(uintptr_t)executor", # These should all have been turned into DATA values by process_relocations: @@ -209,64 +206,6 @@ class Stencil: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def add_nops(self, nop: bytes, alignment: int) -> None: - """Add NOPs until there is alignment. Fail if it is not possible.""" - offset = len(self.body) - nop_size = len(nop) - - # Calculate the gap to the next multiple of alignment. - gap = -offset % alignment - if gap: - if gap % nop_size == 0: - count = gap // nop_size - self.body.extend(nop * count) - else: - raise ValueError( - f"Cannot add nops of size '{nop_size}' to a body with " - f"offset '{offset}' to align with '{alignment}'" - ) - - def remove_jump(self) -> None: - """Remove a zero-length continuation jump, if it exists.""" - hole = max(self.holes, key=lambda hole: hole.offset) - match hole: - case Hole( - offset=offset, - kind="IMAGE_REL_AMD64_REL32", - value=HoleValue.GOT, - symbol="_JIT_CONTINUE", - addend=-4, - ) as hole: - # jmp qword ptr [rip] - jump = b"\x48\xff\x25\x00\x00\x00\x00" - offset -= 3 - case Hole( - offset=offset, - kind="IMAGE_REL_I386_REL32" | "R_X86_64_PLT32" | "X86_64_RELOC_BRANCH", - value=HoleValue.CONTINUE, - symbol=None, - addend=addend, - ) as hole if ( - _signed(addend) == -4 - ): - # jmp 5 - jump = b"\xe9\x00\x00\x00\x00" - offset -= 1 - case Hole( - offset=offset, - kind="R_AARCH64_JUMP26", - value=HoleValue.CONTINUE, - symbol=None, - addend=0, - ) as hole: - # b #4 - jump = b"\x00\x00\x00\x14" - case _: - return - if self.body[offset:] == jump: - self.body = self.body[:offset] - self.holes.remove(hole) - @dataclasses.dataclass class StencilGroup: @@ -284,9 +223,7 @@ class StencilGroup: _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False) _trampolines: set[int] = dataclasses.field(default_factory=set, init=False) - def process_relocations( - self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b"" - ) -> None: + def process_relocations(self, known_symbols: dict[str, int]) -> None: """Fix up all GOT and internal relocations for this stencil group.""" for hole in self.code.holes.copy(): if ( @@ -306,8 +243,6 @@ class StencilGroup: self._trampolines.add(ordinal) hole.addend = ordinal hole.symbol = None - self.code.remove_jump() - self.code.add_nops(nop=nop, alignment=alignment) self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index d0a1c081ffe..ed10329d25d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -10,8 +10,10 @@ import re import sys import tempfile import typing +import shlex import _llvm +import _optimizers import _schema import _stencils import _writer @@ -40,12 +42,13 @@ class _Target(typing.Generic[_S, _R]): triple: str condition: str _: dataclasses.KW_ONLY - alignment: int = 1 args: typing.Sequence[str] = () + optimizer: type[_optimizers.Optimizer] = _optimizers.Optimizer prefix: str = "" stable: bool = False debug: bool = False verbose: bool = False + cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() @@ -62,6 +65,7 @@ class _Target(typing.Generic[_S, _R]): hasher = hashlib.sha256() hasher.update(self.triple.encode()) hasher.update(self.debug.to_bytes()) + hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) @@ -118,8 +122,9 @@ class _Target(typing.Generic[_S, _R]): async def _compile( self, opname: str, c: pathlib.Path, tempdir: pathlib.Path ) -> _stencils.StencilGroup: + s = tempdir / f"{opname}.s" o = tempdir / f"{opname}.o" - args = [ + args_s = [ f"--target={self.triple}", "-DPy_BUILD_CORE_MODULE", "-D_DEBUG" if self.debug else "-DNDEBUG", @@ -133,7 +138,7 @@ class _Target(typing.Generic[_S, _R]): f"-I{CPYTHON / 'Python'}", f"-I{CPYTHON / 'Tools' / 'jit'}", "-O3", - "-c", + "-S", # Shorten full absolute file paths in the generated code (like the # __FILE__ macro and assert failure messages) for reproducibility: f"-ffile-prefix-map={CPYTHON}=.", @@ -152,11 +157,16 @@ class _Target(typing.Generic[_S, _R]): "-fno-stack-protector", "-std=c11", "-o", - f"{o}", + f"{s}", f"{c}", *self.args, + # Allow user-provided CFLAGS to override any defaults + *shlex.split(self.cflags), ] - await _llvm.run("clang", args, echo=self.verbose) + await _llvm.run("clang", args_s, echo=self.verbose) + self.optimizer(s, prefix=self.prefix).run() + args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] + await _llvm.run("clang", args_o, echo=self.verbose) return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: @@ -185,11 +195,7 @@ class _Target(typing.Generic[_S, _R]): tasks.append(group.create_task(coro, name=opname)) stencil_groups = {task.get_name(): task.result() for task in tasks} for stencil_group in stencil_groups.values(): - stencil_group.process_relocations( - known_symbols=self.known_symbols, - alignment=self.alignment, - nop=self._get_nop(), - ) + stencil_group.process_relocations(self.known_symbols) return stencil_groups def build( @@ -519,42 +525,43 @@ class _MachO( def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" + optimizer: type[_optimizers.Optimizer] target: _COFF | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): condition = "defined(__aarch64__) && defined(__APPLE__)" - target = _MachO(host, condition, alignment=8, prefix="_") + optimizer = _optimizers.OptimizerAArch64 + target = _MachO(host, condition, optimizer=optimizer, prefix="_") elif re.fullmatch(r"aarch64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll", "-fplt"] condition = "defined(_M_ARM64)" - target = _COFF(host, condition, alignment=8, args=args) + optimizer = _optimizers.OptimizerAArch64 + target = _COFF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): - args = [ - "-fpic", - # On aarch64 Linux, intrinsics were being emitted and this flag - # was required to disable them. - "-mno-outline-atomics", - ] + # -mno-outline-atomics: Keep intrinsics from being emitted. + args = ["-fpic", "-mno-outline-atomics"] condition = "defined(__aarch64__) && defined(__linux__)" - target = _ELF(host, condition, alignment=8, args=args) + optimizer = _optimizers.OptimizerAArch64 + target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): - args = [ - "-DPy_NO_ENABLE_SHARED", - # __attribute__((preserve_none)) is not supported - "-Wno-ignored-attributes", - ] + # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. + args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] + optimizer = _optimizers.OptimizerX86 condition = "defined(_M_IX86)" - target = _COFF(host, condition, args=args, prefix="_") + target = _COFF(host, condition, args=args, optimizer=optimizer, prefix="_") elif re.fullmatch(r"x86_64-apple-darwin.*", host): condition = "defined(__x86_64__) && defined(__APPLE__)" - target = _MachO(host, condition, prefix="_") + optimizer = _optimizers.OptimizerX86 + target = _MachO(host, condition, optimizer=optimizer, prefix="_") elif re.fullmatch(r"x86_64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] condition = "defined(_M_X64)" - target = _COFF(host, condition, args=args) + optimizer = _optimizers.OptimizerX8664Windows + target = _COFF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] condition = "defined(__x86_64__) && defined(__linux__)" - target = _ELF(host, condition, args=args) + optimizer = _optimizers.OptimizerX86 + target = _ELF(host, condition, args=args, optimizer=optimizer) else: raise ValueError(host) return target diff --git a/Tools/jit/build.py b/Tools/jit/build.py index 1afd0c76bad..a0733005929 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -39,11 +39,15 @@ if __name__ == "__main__": parser.add_argument( "-v", "--verbose", action="store_true", help="echo commands as they are run" ) + parser.add_argument( + "--cflags", help="additional flags to pass to the compiler", default="" + ) args = parser.parse_args() for target in args.target: target.debug = args.debug target.force = args.force target.verbose = args.verbose + target.cflags = args.cflags target.pyconfig_dir = args.pyconfig_dir target.build( comment=comment, diff --git a/Tools/msi/freethreaded/freethreaded_files.wxs b/Tools/msi/freethreaded/freethreaded_files.wxs index 86d9a8b83f6..0707e77b5e9 100644 --- a/Tools/msi/freethreaded/freethreaded_files.wxs +++ b/Tools/msi/freethreaded/freethreaded_files.wxs @@ -103,7 +103,7 @@ </ComponentGroup> </Fragment> - <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_uuid;_wmi;_zoneinfo;_zstd;_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited;_tkinter ?> + <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_remote_debugging;_uuid;_wmi;_zoneinfo;_zstd;_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited;_tkinter ?> <Fragment> <DirectoryRef Id="Lib_venv_scripts_nt__freethreaded" /> diff --git a/Tools/msi/lib/lib_files.wxs b/Tools/msi/lib/lib_files.wxs index 8439518bcbd..4d44299f783 100644 --- a/Tools/msi/lib/lib_files.wxs +++ b/Tools/msi/lib/lib_files.wxs @@ -1,6 +1,6 @@ <?xml version="1.0" encoding="UTF-8"?> <Wix xmlns="http://schemas.microsoft.com/wix/2006/wi"> - <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_uuid;_wmi;_zoneinfo;_zstd ?> + <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_remote_debugging;_uuid;_wmi;_zoneinfo;_zstd ?> <Fragment> <DirectoryRef Id="Lib_venv_scripts_nt" /> diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py index 0dcf6ef844a..afd010a5254 100755 --- a/Tools/patchcheck/patchcheck.py +++ b/Tools/patchcheck/patchcheck.py @@ -53,19 +53,43 @@ def get_git_branch(): def get_git_upstream_remote(): - """Get the remote name to use for upstream branches + """ + Get the remote name to use for upstream branches - Uses "upstream" if it exists, "origin" otherwise + Check for presence of "https://github.com/python/cpython" remote URL. + If only one is found, return that remote name. If multiple are found, + check for and return "upstream", "origin", or "python", in that + order. Raise an error if no valid matches are found. """ - cmd = "git remote get-url upstream".split() - try: - subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8') - except subprocess.CalledProcessError: - return "origin" - return "upstream" + cmd = "git remote -v".split() + output = subprocess.check_output( + cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding="UTF-8" + ) + # Filter to desired remotes, accounting for potential uppercasing + filtered_remotes = { + remote.split("\t")[0].lower() for remote in output.split('\n') + if "python/cpython" in remote.lower() and remote.endswith("(fetch)") + } + if len(filtered_remotes) == 1: + [remote] = filtered_remotes + return remote + for remote_name in ["upstream", "origin", "python"]: + if remote_name in filtered_remotes: + return remote_name + remotes_found = "\n".join( + {remote for remote in output.split('\n') if remote.endswith("(fetch)")} + ) + raise ValueError( + f"Patchcheck was unable to find an unambiguous upstream remote, " + f"with URL matching 'https://github.com/python/cpython'. " + f"For help creating an upstream remote, see Dev Guide: " + f"https://devguide.python.org/getting-started/" + f"git-boot-camp/#cloning-a-forked-cpython-repository " + f"\nRemotes found: \n{remotes_found}" + ) def get_git_remote_default_branch(remote_name): diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 2be85a163b4..04f66eec1a0 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -44,7 +44,7 @@ EXTENSION_PREFIX = """\ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif """ @@ -214,33 +214,47 @@ class CCallMakerVisitor(GrammarVisitor): call.assigned_variable_type = node.type return call + def assert_no_undefined_behavior( + self, call: FunctionCall, wrapper: str, expected_rtype: str | None, + ) -> None: + if call.return_type != expected_rtype: + raise RuntimeError( + f"{call.function} return type is incompatible with {wrapper}: " + f"expect: {expected_rtype}, actual: {call.return_type}" + ) + def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: call = self.generate_call(node.node) - if call.nodetype == NodeTypes.NAME_TOKEN: - return FunctionCall( - function=f"_PyPegen_lookahead_with_name", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + comment = None + if call.nodetype is NodeTypes.NAME_TOKEN: + function = "_PyPegen_lookahead_for_expr" + self.assert_no_undefined_behavior(call, function, "expr_ty") + elif call.nodetype is NodeTypes.STRING_TOKEN: + # _PyPegen_string_token() returns 'void *' instead of 'Token *'; + # in addition, the overall function call would return 'expr_ty'. + assert call.function == "_PyPegen_string_token" + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype == NodeTypes.SOFT_KEYWORD: - return FunctionCall( - function=f"_PyPegen_lookahead_with_string", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead_with_string" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: - return FunctionCall( - function=f"_PyPegen_lookahead_with_int", - arguments=[positive, call.function, *call.arguments], - return_type="int", - comment=f"token={node.node}", - ) + function = "_PyPegen_lookahead_with_int" + self.assert_no_undefined_behavior(call, function, "Token *") + comment = f"token={node.node}" + elif call.return_type == "expr_ty": + function = "_PyPegen_lookahead_for_expr" + elif call.return_type == "stmt_ty": + function = "_PyPegen_lookahead_for_stmt" else: - return FunctionCall( - function=f"_PyPegen_lookahead", - arguments=[positive, f"(void *(*)(Parser *)) {call.function}", *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, None) + return FunctionCall( + function=function, + arguments=[positive, call.function, *call.arguments], + return_type="int", + comment=comment, + ) def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: return self.lookahead_call_helper(node, 1) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 5bf180bb30a..0beaab2d3e7 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,7 +1,7 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.15 +mypy==1.16.1 # needed for peg_generator: -types-psutil==6.0.0.20240901 -types-setuptools==74.0.0.20240831 +types-psutil==7.0.0.20250601 +types-setuptools==80.9.0.20250529 diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 68cfad3f92c..905af9dcfd8 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -492,7 +492,7 @@ class Stats: ): (trace_too_long, attempts), Doc( "Trace too short", - "A potential trace is abandoned because it it too short.", + "A potential trace is abandoned because it is too short.", ): (trace_too_short, attempts), Doc( "Inner loop found", "A trace is truncated because it has an inner loop" diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 3230f969436..93421b623b9 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -12,15 +12,12 @@ # These warnings trigger directly in a CPython function. -race_top:assign_version_tag -race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -29,9 +26,6 @@ race_top:rangeiter_next # gh-129748: test.test_free_threading.test_slots.TestSlots.test_object race_top:mi_block_set_nextx -# gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -race_top:update_one_slot - # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create @@ -49,7 +43,6 @@ race:list_inplace_repeat_lock_held race:PyObject_Realloc # gh-133467. Some of these could be hard to trigger. -race_top:update_one_slot race_top:_Py_slot_tp_getattr_hook race_top:slot_tp_descr_get race_top:type_set_name diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 849bd5de44e..c0d58aeaadd 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -167,11 +167,12 @@ def make_build_python(context, working_dir): @subdir(HOST_BUILD_DIR, clean_ok=True) def make_emscripten_libffi(context, working_dir): shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True) - with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file: + with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete_on_close=False) as tmp_file: with urlopen( "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz" ) as response: shutil.copyfileobj(response, tmp_file) + tmp_file.close() shutil.unpack_archive(tmp_file.name, working_dir) call( [EMSCRIPTEN_DIR / "make_libffi.sh"], |