diff options
Diffstat (limited to 'Tools')
56 files changed, 1972 insertions, 1701 deletions
diff --git a/Tools/build/.ruff.toml b/Tools/build/.ruff.toml index e4f024333ad..dcbf2936290 100644 --- a/Tools/build/.ruff.toml +++ b/Tools/build/.ruff.toml @@ -1,6 +1,12 @@ -target-version = "py310" -fix = true -line-length = 79 +extend = "../../.ruff.toml" # Inherit the project-wide settings + +[per-file-target-version] +"deepfreeze.py" = "py311" # requires `code.co_exceptiontable` +"stable_abi.py" = "py311" # requires 'tomllib' + +[format] +preview = true +docstring-code-format = true [lint] select = [ @@ -26,10 +32,6 @@ ignore = [ "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` ] -[per-file-target-version] -"deepfreeze.py" = "py310" -"stable_abi.py" = "py311" # requires 'tomllib' - [lint.per-file-ignores] "{check_extension_modules,freeze_modules}.py" = [ "UP031", # Use format specifiers instead of percent format diff --git a/Tools/build/check_warnings.py b/Tools/build/check_warnings.py index 7a8721087b6..3f49d8e7f2e 100644 --- a/Tools/build/check_warnings.py +++ b/Tools/build/check_warnings.py @@ -83,17 +83,13 @@ def extract_warnings_from_compiler_output( for i, line in enumerate(compiler_output.splitlines(), start=1): if match := compiled_regex.match(line): try: - compiler_warnings.append( - { - "file": match.group("file").removeprefix(path_prefix), - "line": match.group("line"), - "column": match.group("column"), - "message": match.group("message"), - "option": match.group("option") - .lstrip("[") - .rstrip("]"), - } - ) + compiler_warnings.append({ + "file": match.group("file").removeprefix(path_prefix), + "line": match.group("line"), + "column": match.group("column"), + "message": match.group("message"), + "option": match.group("option").lstrip("[").rstrip("]"), + }) except AttributeError: print( f"Error parsing compiler output. " @@ -151,7 +147,6 @@ def get_unexpected_warnings( """ unexpected_warnings = {} for file in files_with_warnings.keys(): - rule = is_file_ignored(file, ignore_rules) if rule: @@ -201,13 +196,11 @@ def get_unexpected_improvements( if rule.file_path not in files_with_warnings.keys(): unexpected_improvements.append((rule.file_path, rule.count, 0)) elif len(files_with_warnings[rule.file_path]) < rule.count: - unexpected_improvements.append( - ( - rule.file_path, - rule.count, - len(files_with_warnings[rule.file_path]), - ) - ) + unexpected_improvements.append(( + rule.file_path, + rule.count, + len(files_with_warnings[rule.file_path]), + )) if unexpected_improvements: print("Unexpected improvements:") diff --git a/Tools/build/compute-changes.py b/Tools/build/compute-changes.py index b3be7df2dba..b5993d29b92 100644 --- a/Tools/build/compute-changes.py +++ b/Tools/build/compute-changes.py @@ -56,12 +56,10 @@ class Outputs: def compute_changes() -> None: - target_branch, head_branch = git_branches() - if target_branch and head_branch: + target_branch, head_ref = git_refs() + if os.environ.get("GITHUB_EVENT_NAME", "") == "pull_request": # Getting changed files only makes sense on a pull request - files = get_changed_files( - f"origin/{target_branch}", f"origin/{head_branch}" - ) + files = get_changed_files(target_branch, head_ref) outputs = process_changed_files(files) else: # Otherwise, just run the tests @@ -89,15 +87,15 @@ def compute_changes() -> None: write_github_output(outputs) -def git_branches() -> tuple[str, str]: - target_branch = os.environ.get("GITHUB_BASE_REF", "") - target_branch = target_branch.removeprefix("refs/heads/") - print(f"target branch: {target_branch!r}") +def git_refs() -> tuple[str, str]: + target_ref = os.environ.get("CCF_TARGET_REF", "") + target_ref = target_ref.removeprefix("refs/heads/") + print(f"target ref: {target_ref!r}") - head_branch = os.environ.get("GITHUB_HEAD_REF", "") - head_branch = head_branch.removeprefix("refs/heads/") - print(f"head branch: {head_branch!r}") - return target_branch, head_branch + head_ref = os.environ.get("CCF_HEAD_REF", "") + head_ref = head_ref.removeprefix("refs/heads/") + print(f"head ref: {head_ref!r}") + return f"origin/{target_ref}", head_ref def get_changed_files( diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index 23f58447937..2b9f03aebb6 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -2,9 +2,12 @@ The script may be executed by _bootstrap_python interpreter. Shared library extension modules are not available in that case. -On Windows, and in cross-compilation cases, it is executed -by Python 3.10, and 3.11 features are not available. +Requires 3.11+ to be executed, +because relies on `code.co_qualname` and `code.co_exceptiontable`. """ + +from __future__ import annotations + import argparse import builtins import collections @@ -13,10 +16,14 @@ import os import re import time import types -from typing import TextIO import umarshal +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, TextIO + ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) verbose = False @@ -45,8 +52,8 @@ CO_FAST_FREE = 0x80 next_code_version = 1 -def get_localsplus(code: types.CodeType): - a = collections.defaultdict(int) +def get_localsplus(code: types.CodeType) -> tuple[tuple[str, ...], bytes]: + a: collections.defaultdict[str, int] = collections.defaultdict(int) for name in code.co_varnames: a[name] |= CO_FAST_LOCAL for name in code.co_cellvars: @@ -136,7 +143,7 @@ class Printer: return identifiers, strings @contextlib.contextmanager - def indent(self) -> None: + def indent(self) -> Iterator[None]: save_level = self.level try: self.level += 1 @@ -148,7 +155,7 @@ class Printer: self.file.writelines((" "*self.level, arg, "\n")) @contextlib.contextmanager - def block(self, prefix: str, suffix: str = "") -> None: + def block(self, prefix: str, suffix: str = "") -> Iterator[None]: self.write(prefix + " {") with self.indent(): yield @@ -250,9 +257,17 @@ class Printer: co_names = self.generate(name + "_names", code.co_names) co_filename = self.generate(name + "_filename", code.co_filename) co_name = self.generate(name + "_name", code.co_name) - co_qualname = self.generate(name + "_qualname", code.co_qualname) co_linetable = self.generate(name + "_linetable", code.co_linetable) - co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) + # We use 3.10 for type checking, but this module requires 3.11 + # TODO: bump python version for this script. + co_qualname = self.generate( + name + "_qualname", + code.co_qualname, # type: ignore[attr-defined] + ) + co_exceptiontable = self.generate( + name + "_exceptiontable", + code.co_exceptiontable, # type: ignore[attr-defined] + ) # These fields are not directly accessible localsplusnames, localspluskinds = get_localsplus(code) co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) @@ -379,13 +394,13 @@ class Printer: self.write(f".cval = {{ {z.real}, {z.imag} }},") return f"&{name}.ob_base" - def generate_frozenset(self, name: str, fs: frozenset[object]) -> str: + def generate_frozenset(self, name: str, fs: frozenset[Any]) -> str: try: - fs = sorted(fs) + fs_sorted = sorted(fs) except TypeError: # frozen set with incompatible types, fallback to repr() - fs = sorted(fs, key=repr) - ret = self.generate_tuple(name, tuple(fs)) + fs_sorted = sorted(fs, key=repr) + ret = self.generate_tuple(name, tuple(fs_sorted)) self.write("// TODO: The above tuple should be a frozenset") return ret @@ -402,7 +417,7 @@ class Printer: # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") return self.cache[key] self.misses += 1 - if isinstance(obj, (types.CodeType, umarshal.Code)) : + if isinstance(obj, types.CodeType) : val = self.generate_code(name, obj) elif isinstance(obj, tuple): val = self.generate_tuple(name, obj) @@ -458,7 +473,7 @@ def decode_frozen_data(source: str) -> types.CodeType: if re.match(FROZEN_DATA_LINE, line): values.extend([int(x) for x in line.split(",") if x.strip()]) data = bytes(values) - return umarshal.loads(data) + return umarshal.loads(data) # type: ignore[no-any-return] def generate(args: list[str], output: TextIO) -> None: @@ -494,12 +509,12 @@ group.add_argument('args', nargs="*", default=(), help="Input file and module name (required) in file:modname format") @contextlib.contextmanager -def report_time(label: str): - t0 = time.time() +def report_time(label: str) -> Iterator[None]: + t0 = time.perf_counter() try: yield finally: - t1 = time.time() + t1 = time.perf_counter() if verbose: print(f"{label}: {t1-t0:.3f} sec") diff --git a/Tools/build/generate-build-details.py b/Tools/build/generate-build-details.py index 0da6c2948d6..8cd23e2f54f 100644 --- a/Tools/build/generate-build-details.py +++ b/Tools/build/generate-build-details.py @@ -3,6 +3,8 @@ # Script initially imported from: # https://github.com/FFY00/python-instrospection/blob/main/python_introspection/scripts/generate-build-details.py +from __future__ import annotations + import argparse import collections import importlib.machinery @@ -11,19 +13,23 @@ import os import sys import sysconfig +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Any + -def version_info_to_dict(obj): # (object) -> dict[str, Any] +def version_info_to_dict(obj: sys._version_info) -> dict[str, Any]: field_names = ('major', 'minor', 'micro', 'releaselevel', 'serial') return {field: getattr(obj, field) for field in field_names} -def get_dict_key(container, key): # (dict[str, Any], str) -> dict[str, Any] +def get_dict_key(container: dict[str, Any], key: str) -> dict[str, Any]: for part in key.split('.'): container = container[part] return container -def generate_data(schema_version): +def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: """Generate the build-details.json data (PEP 739). :param schema_version: The schema version of the data we want to generate. @@ -32,7 +38,9 @@ def generate_data(schema_version): if schema_version != '1.0': raise ValueError(f'Unsupported schema_version: {schema_version}') - data = collections.defaultdict(lambda: collections.defaultdict(dict)) + data: collections.defaultdict[str, Any] = collections.defaultdict( + lambda: collections.defaultdict(dict), + ) data['schema_version'] = schema_version @@ -67,7 +75,7 @@ def generate_data(schema_version): PY3LIBRARY = sysconfig.get_config_var('PY3LIBRARY') LIBPYTHON = sysconfig.get_config_var('LIBPYTHON') LIBPC = sysconfig.get_config_var('LIBPC') - INCLUDEDIR = sysconfig.get_config_var('INCLUDEDIR') + INCLUDEPY = sysconfig.get_config_var('INCLUDEPY') if os.name == 'posix': # On POSIX, LIBRARY is always the static library, while LDLIBRARY is the @@ -115,14 +123,14 @@ def generate_data(schema_version): if has_static_library: data['libpython']['static'] = os.path.join(LIBDIR, LIBRARY) - data['c_api']['include'] = INCLUDEDIR + data['c_api']['headers'] = INCLUDEPY if LIBPC: data['c_api']['pkgconfig_path'] = LIBPC return data -def make_paths_relative(data, config_path=None): # (dict[str, Any], str | None) -> None +def make_paths_relative(data: dict[str, Any], config_path: str | None = None) -> None: # Make base_prefix relative to the config_path directory if config_path: data['base_prefix'] = os.path.relpath(data['base_prefix'], os.path.dirname(config_path)) @@ -152,7 +160,7 @@ def make_paths_relative(data, config_path=None): # (dict[str, Any], str | None) container[child] = new_path -def main(): # () -> None +def main() -> None: parser = argparse.ArgumentParser(exit_on_error=False) parser.add_argument('location') parser.add_argument( diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index db01426e972..968397728b2 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -4,10 +4,13 @@ import glob import hashlib import json import os +import random import re import subprocess import sys +import time import typing +import urllib.error import urllib.request from pathlib import Path, PurePosixPath, PureWindowsPath @@ -161,6 +164,23 @@ def get_externals() -> list[str]: return externals +def download_with_retries(download_location: str, + max_retries: int = 7, + base_delay: float = 2.25, + max_jitter: float = 1.0) -> typing.Any: + """Download a file with exponential backoff retry.""" + for attempt in range(max_retries + 1): + try: + resp = urllib.request.urlopen(download_location) + except (urllib.error.URLError, ConnectionError) as ex: + if attempt == max_retries: + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(base_delay**attempt + random.uniform(0, max_jitter)) + else: + return resp + + def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: """Make a bunch of assertions about the SBOM package data to ensure it's consistent.""" @@ -175,7 +195,7 @@ def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: # and that the download URL is valid. if "checksums" not in package or "CI" in os.environ: download_location = package["downloadLocation"] - resp = urllib.request.urlopen(download_location) + resp = download_with_retries(download_location) error_if(resp.status != 200, f"Couldn't access URL: {download_location}'") package["checksums"] = [{ diff --git a/Tools/build/generate_stdlib_module_names.py b/Tools/build/generate_stdlib_module_names.py index 9873890837f..88414cdbb37 100644 --- a/Tools/build/generate_stdlib_module_names.py +++ b/Tools/build/generate_stdlib_module_names.py @@ -34,7 +34,6 @@ IGNORE = { '_testlimitedcapi', '_testmultiphase', '_testsinglephase', - '_testexternalinspection', '_xxtestfuzz', 'idlelib.idle_test', 'test', diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py index 7316333dcd4..9ee5ec86e75 100755 --- a/Tools/build/generate_token.py +++ b/Tools/build/generate_token.py @@ -87,7 +87,8 @@ extern "C" { (x) == INDENT || \\ (x) == DEDENT) #define ISSTRINGLIT(x) ((x) == STRING || \\ - (x) == FSTRING_MIDDLE) + (x) == FSTRING_MIDDLE || \\ + (x) == TSTRING_MIDDLE) // Export these 4 symbols for 'test_peg_generator' @@ -277,13 +278,13 @@ EXACT_TOKEN_TYPES = { %s } -def ISTERMINAL(x): +def ISTERMINAL(x: int) -> bool: return x < NT_OFFSET -def ISNONTERMINAL(x): +def ISNONTERMINAL(x: int) -> bool: return x >= NT_OFFSET -def ISEOF(x): +def ISEOF(x: int) -> bool: return x == ENDMARKER ''' diff --git a/Tools/build/mypy.ini b/Tools/build/mypy.ini index 06224163884..123dc895f90 100644 --- a/Tools/build/mypy.ini +++ b/Tools/build/mypy.ini @@ -1,7 +1,16 @@ [mypy] + +# Please, when adding new files here, also add them to: +# .github/workflows/mypy.yml files = Tools/build/compute-changes.py, - Tools/build/generate_sbom.py + Tools/build/deepfreeze.py, + Tools/build/generate-build-details.py, + Tools/build/generate_sbom.py, + Tools/build/verify_ensurepip_wheels.py, + Tools/build/update_file.py, + Tools/build/umarshal.py + pretty = True # Make sure Python can still be built @@ -10,6 +19,8 @@ python_version = 3.10 # ...And be strict: strict = True +strict_bytes = True +local_partial_types = True extra_checks = True enable_error_code = ignore-without-code,redundant-expr,truthy-bool,possibly-undefined warn_unreachable = True diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index 679fa7caf9f..865cffc2440 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -145,12 +145,12 @@ class Reader: def r_float_bin(self) -> float: buf = self.r_string(8) import struct # Lazy import to avoid breaking UNIX build - return struct.unpack("d", buf)[0] + return struct.unpack("d", buf)[0] # type: ignore[no-any-return] def r_float_str(self) -> float: n = self.r_byte() buf = self.r_string(n) - return ast.literal_eval(buf.decode("ascii")) + return ast.literal_eval(buf.decode("ascii")) # type: ignore[no-any-return] def r_ref_reserve(self, flag: int) -> int: if flag: @@ -306,7 +306,7 @@ def loads(data: bytes) -> Any: return r.r_object() -def main(): +def main() -> None: # Test import marshal import pprint @@ -314,8 +314,9 @@ def main(): data = marshal.dumps(sample) retval = loads(data) assert retval == sample, retval - sample = main.__code__ - data = marshal.dumps(sample) + + sample2 = main.__code__ + data = marshal.dumps(sample2) retval = loads(data) assert isinstance(retval, Code), retval pprint.pprint(retval.__dict__) diff --git a/Tools/build/update_file.py b/Tools/build/update_file.py index b4182c1d0cb..b4a5fb6e778 100644 --- a/Tools/build/update_file.py +++ b/Tools/build/update_file.py @@ -6,14 +6,27 @@ This avoids wholesale rebuilds when a code (re)generation phase does not actually change the in-tree generated code. """ +from __future__ import annotations + import contextlib import os import os.path import sys +TYPE_CHECKING = False +if TYPE_CHECKING: + import typing + from collections.abc import Iterator + from io import TextIOWrapper + + _Outcome: typing.TypeAlias = typing.Literal['created', 'updated', 'same'] + @contextlib.contextmanager -def updating_file_with_tmpfile(filename, tmpfile=None): +def updating_file_with_tmpfile( + filename: str, + tmpfile: str | None = None, +) -> Iterator[tuple[TextIOWrapper, TextIOWrapper]]: """A context manager for updating a file via a temp file. The context manager provides two open files: the source file open @@ -46,13 +59,18 @@ def updating_file_with_tmpfile(filename, tmpfile=None): update_file_with_tmpfile(filename, tmpfile) -def update_file_with_tmpfile(filename, tmpfile, *, create=False): +def update_file_with_tmpfile( + filename: str, + tmpfile: str, + *, + create: bool = False, +) -> _Outcome: try: targetfile = open(filename, 'rb') except FileNotFoundError: if not create: raise # re-raise - outcome = 'created' + outcome: _Outcome = 'created' os.replace(tmpfile, filename) else: with targetfile: diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py index a37da2f7075..46c42916d93 100755 --- a/Tools/build/verify_ensurepip_wheels.py +++ b/Tools/build/verify_ensurepip_wheels.py @@ -20,13 +20,13 @@ ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding=" GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" -def print_notice(file_path: str, message: str) -> None: +def print_notice(file_path: str | Path, message: str) -> None: if GITHUB_ACTIONS: message = f"::notice file={file_path}::{message}" print(message, end="\n\n") -def print_error(file_path: str, message: str) -> None: +def print_error(file_path: str | Path, message: str) -> None: if GITHUB_ACTIONS: message = f"::error file={file_path}::{message}" print(message, end="\n\n") @@ -67,6 +67,7 @@ def verify_wheel(package_name: str) -> bool: return False release_files = json.loads(raw_text)["releases"][package_version] + expected_digest = "" for release_info in release_files: if package_path.name != release_info["filename"]: continue @@ -95,6 +96,7 @@ def verify_wheel(package_name: str) -> bool: return True + if __name__ == "__main__": exit_status = int(not verify_wheel("pip")) raise SystemExit(exit_status) diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO index d509489176b..2077534ccf4 100644 --- a/Tools/c-analyzer/TODO +++ b/Tools/c-analyzer/TODO @@ -794,6 +794,7 @@ Objects/genobject.c:_PyAsyncGenASend_Type PyTypeObject _P Objects/genobject.c:_PyAsyncGenAThrow_Type PyTypeObject _PyAsyncGenAThrow_Type Objects/genobject.c:_PyAsyncGenWrappedValue_Type PyTypeObject _PyAsyncGenWrappedValue_Type Objects/genobject.c:_PyCoroWrapper_Type PyTypeObject _PyCoroWrapper_Type +Objects/interpolationobject.c:_PyInterpolation_Type PyTypeObject _PyInterpolation_Type Objects/interpreteridobject.c:_PyInterpreterID_Type PyTypeObject _PyInterpreterID_Type Objects/iterobject.c:PyCallIter_Type PyTypeObject PyCallIter_Type Objects/iterobject.c:PySeqIter_Type PyTypeObject PySeqIter_Type @@ -827,6 +828,8 @@ Objects/sliceobject.c:PyEllipsis_Type PyTypeObject Py Objects/sliceobject.c:PySlice_Type PyTypeObject PySlice_Type Objects/stringlib/unicode_format.h:PyFieldNameIter_Type static PyTypeObject PyFieldNameIter_Type Objects/stringlib/unicode_format.h:PyFormatterIter_Type static PyTypeObject PyFormatterIter_Type +Objects/templateobject.c:_PyTemplateIter_Type PyTypeObject _PyTemplateIter_Type +Objects/templateobject.c:_PyTemplate_Type PyTypeObject _PyTemplate_Type Objects/tupleobject.c:PyTupleIter_Type PyTypeObject PyTupleIter_Type Objects/tupleobject.c:PyTuple_Type PyTypeObject PyTuple_Type Objects/typeobject.c:PyBaseObject_Type PyTypeObject PyBaseObject_Type diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 54954cfb5f8..3c3cb2f9c86 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -55,6 +55,7 @@ Objects/genobject.c - _PyAsyncGenASend_Type - Objects/genobject.c - _PyAsyncGenAThrow_Type - Objects/genobject.c - _PyAsyncGenWrappedValue_Type - Objects/genobject.c - _PyCoroWrapper_Type - +Objects/interpolationobject.c - _PyInterpolation_Type - Objects/iterobject.c - PyCallIter_Type - Objects/iterobject.c - PySeqIter_Type - Objects/iterobject.c - _PyAnextAwaitable_Type - @@ -86,6 +87,8 @@ Objects/setobject.c - PySetIter_Type - Objects/setobject.c - PySet_Type - Objects/sliceobject.c - PyEllipsis_Type - Objects/sliceobject.c - PySlice_Type - +Objects/templateobject.c - _PyTemplateIter_Type - +Objects/templateobject.c - _PyTemplate_Type - Objects/tupleobject.c - PyTupleIter_Type - Objects/tupleobject.c - PyTuple_Type - Objects/typeobject.c - _PyBufferWrapper_Type - diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index a33619b1b34..15b18f5286b 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -191,6 +191,7 @@ Python/pyfpe.c - PyFPE_counter - Python/import.c - pkgcontext - Python/pystate.c - _Py_tss_tstate - +Python/pystate.c - _Py_tss_gilstate - ##----------------------- ## should be const @@ -349,7 +350,6 @@ Objects/unicodeobject.c unicode_translate_call_errorhandler argparse - Parser/parser.c - reserved_keywords - Parser/parser.c - soft_keywords - Parser/lexer/lexer.c - type_comment_prefix - -Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - @@ -748,6 +748,7 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - +Modules/_zstd/_zstdmodule.c - _zstdmodule - Modules/clinic/md5module.c.h _md5_md5 _keywords - Modules/clinic/grpmodule.c.h grp_getgrgid _keywords - Modules/clinic/grpmodule.c.h grp_getgrnam _keywords - diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index dddbf2cf872..6466d2615cd 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -135,15 +135,13 @@ class Flush: @dataclass class StackItem: name: str - type: str | None size: str peek: bool = False used: bool = False def __str__(self) -> str: size = f"[{self.size}]" if self.size else "" - type = "" if self.type is None else f"{self.type} " - return f"{type}{self.name}{size} {self.peek}" + return f"{self.name}{size} {self.peek}" def is_array(self) -> bool: return self.size != "" @@ -182,7 +180,7 @@ class Uop: properties: Properties _size: int = -1 implicitly_created: bool = False - replicated = 0 + replicated = range(0) replicates: "Uop | None" = None # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro instruction_size: int | None = None @@ -345,7 +343,7 @@ def override_error( def convert_stack_item( item: parser.StackEffect, replace_op_arg_1: str | None ) -> StackItem: - return StackItem(item.name, item.type, item.size) + return StackItem(item.name, item.size) def check_unused(stack: list[StackItem], input_names: dict[str, lexer.Token]) -> None: "Unused items cannot be on the stack above used, non-peek items" @@ -543,7 +541,6 @@ def tier_variable(node: parser.CodeDef) -> int | None: def has_error_with_pop(op: parser.CodeDef) -> bool: return ( variable_used(op, "ERROR_IF") - or variable_used(op, "pop_1_error") or variable_used(op, "exception_unwind") ) @@ -551,7 +548,6 @@ def has_error_with_pop(op: parser.CodeDef) -> bool: def has_error_without_pop(op: parser.CodeDef) -> bool: return ( variable_used(op, "ERROR_NO_POP") - or variable_used(op, "pop_1_error") or variable_used(op, "exception_unwind") ) @@ -587,7 +583,7 @@ NON_ESCAPING_FUNCTIONS = ( "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", "PyStackRef_False", - "PyStackRef_FromPyObjectImmortal", + "PyStackRef_FromPyObjectBorrow", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", "PyStackRef_IsExactly", @@ -600,6 +596,7 @@ NON_ESCAPING_FUNCTIONS = ( "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", @@ -639,6 +636,10 @@ NON_ESCAPING_FUNCTIONS = ( "_PyLong_IsNegative", "_PyLong_IsNonNegativeCompact", "_PyLong_IsZero", + "_PyLong_BothAreCompact", + "_PyCompactLong_Add", + "_PyCompactLong_Multiply", + "_PyCompactLong_Subtract", "_PyManagedDictPointer_IsValues", "_PyObject_GC_IS_SHARED", "_PyObject_GC_IS_TRACKED", @@ -681,8 +682,16 @@ NON_ESCAPING_FUNCTIONS = ( "PyStackRef_IsTaggedInt", "PyStackRef_TagInt", "PyStackRef_UntagInt", + "PyStackRef_IncrementTaggedIntNoOverflow", + "PyStackRef_IsNullOrInt", + "PyStackRef_IsError", + "PyStackRef_IsValid", + "PyStackRef_Wrap", + "PyStackRef_Unwrap", + "_PyLong_CheckExactAndCompact", ) + def check_escaping_calls(instr: parser.CodeDef, escapes: dict[SimpleStmt, EscapingCall]) -> None: error: lexer.Token | None = None calls = {e.call for e in escapes.values()} @@ -734,7 +743,7 @@ def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingC continue #if not tkn.text.startswith(("Py", "_Py", "monitor")): # continue - if tkn.text.startswith(("sym_", "optimize_")): + if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): # Optimize functions continue if tkn.text.endswith("Check"): @@ -808,7 +817,7 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: if len(stack_inputs) == 0: return False return all( - (s.name == other.name and s.type == other.type and s.size == other.size) + (s.name == other.name and s.size == other.size) for s, other in zip(stack_inputs, instr.outputs) ) @@ -834,7 +843,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) - escapes = bool(escaping_calls) + escapes = bool(escaping_calls) or variable_used(op, "DECREF_INPUTS") pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations return Properties( @@ -861,6 +870,28 @@ def compute_properties(op: parser.CodeDef) -> Properties: needs_prev=variable_used(op, "prev_instr"), ) +def expand(items: list[StackItem], oparg: int) -> list[StackItem]: + # Only replace array item with scalar if no more than one item is an array + index = -1 + for i, item in enumerate(items): + if "oparg" in item.size: + if index >= 0: + return items + index = i + if index < 0: + return items + try: + count = int(eval(items[index].size.replace("oparg", str(oparg)))) + except ValueError: + return items + return items[:index] + [ + StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count) + ] + items[index+1:] + +def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect: + stack.inputs = expand(stack.inputs, oparg) + stack.outputs = expand(stack.outputs, oparg) + return stack def make_uop( name: str, @@ -880,20 +911,26 @@ def make_uop( ) for anno in op.annotations: if anno.startswith("replicate"): - result.replicated = int(anno[10:-1]) + text = anno[10:-1] + start, stop = text.split(":") + result.replicated = range(int(start), int(stop)) break else: return result - for oparg in range(result.replicated): + for oparg in result.replicated: name_x = name + "_" + str(oparg) properties = compute_properties(op) properties.oparg = False - properties.const_oparg = oparg + stack = analyze_stack(op) + if not variable_used(op, "oparg"): + stack = scalarize_stack(stack, oparg) + else: + properties.const_oparg = oparg rep = Uop( name=name_x, context=op.context, annotations=op.annotations, - stack=analyze_stack(op), + stack=stack, caches=analyze_caches(inputs), local_stores=find_variable_stores(op), body=op.block, diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 9ba0767cba3..47de205c0e9 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -56,9 +56,7 @@ def root_relative_path(filename: str) -> str: def type_and_null(var: StackItem) -> tuple[str, str]: - if var.type: - return var.type, "NULL" - elif var.is_array(): + if var.is_array(): return "_PyStackRef *", "NULL" else: return "_PyStackRef", "PyStackRef_NULL" @@ -140,6 +138,7 @@ class Emitter: ) -> bool: if storage.spilled: raise analysis_error("stack_pointer needs reloading before dispatch", tkn) + storage.stack.flush(self.out) self.emit(tkn) return False @@ -170,12 +169,12 @@ class Emitter: exit_if = deopt_if - def goto_error(self, offset: int, label: str, storage: Storage) -> str: + def goto_error(self, offset: int, storage: Storage) -> str: if offset > 0: - return f"JUMP_TO_LABEL(pop_{offset}_{label});" + return f"JUMP_TO_LABEL(pop_{offset}_error);" if offset < 0: storage.copy().flush(self.out) - return f"JUMP_TO_LABEL({label});" + return f"JUMP_TO_LABEL(error);" def error_if( self, @@ -191,17 +190,13 @@ class Emitter: unconditional = always_true(first_tkn) if unconditional: next(tkn_iter) - comma = next(tkn_iter) - if comma.kind != "COMMA": - raise analysis_error(f"Expected comma, got '{comma.text}'", comma) + next(tkn_iter) # RPAREN self.out.start_line() else: self.out.emit_at("if ", tkn) self.emit(lparen) - emit_to(self.out, tkn_iter, "COMMA") + emit_to(self.out, tkn_iter, "RPAREN") self.out.emit(") {\n") - label = next(tkn_iter).text - next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon storage.clear_inputs("at ERROR_IF") @@ -210,7 +205,7 @@ class Emitter: offset = int(c_offset) except ValueError: offset = -1 - self.out.emit(self.goto_error(offset, label, storage)) + self.out.emit(self.goto_error(offset, storage)) self.out.emit("\n") if not unconditional: self.out.emit("}\n") @@ -227,7 +222,7 @@ class Emitter: next(tkn_iter) # LPAREN next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon - self.out.emit_at(self.goto_error(0, "error", storage), tkn) + self.out.emit_at(self.goto_error(0, storage), tkn) return False def decref_inputs( @@ -492,6 +487,11 @@ class Emitter: label_tkn = next(tkn_iter) self.goto_label(tkn, label_tkn, storage) reachable = False + elif tkn.kind == "RETURN": + self.emit(tkn) + semicolon = emit_to(self.out, tkn_iter, "SEMI") + self.emit(semicolon) + reachable = False elif tkn.kind == "IDENTIFIER": if tkn.text in self._replacers: if not self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst): diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 7901f3d92e0..72020133738 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -81,7 +81,7 @@ and a piece of C code describing its semantics: (definition | family | pseudo)+ definition: - "inst" "(" NAME ["," stack_effect] ")" "{" C-code "}" + "inst" "(" NAME "," stack_effect ")" "{" C-code "}" | "op" "(" NAME "," stack_effect ")" "{" C-code "}" | @@ -184,7 +184,7 @@ part of the DSL. Those include: * `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met. -* `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true. +* `ERROR_IF(cond)`. Jump to error handler if `cond` is true. * `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects. * `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects. * `INSTRUCTION_SIZE`. Replaced with the size of the instruction which is equal @@ -209,7 +209,7 @@ These requirements result in the following constraints on the use of 2. Before the first `ERROR_IF`, all input values must be `DECREF`ed, and no objects may be allocated or `INCREF`ed, with the exception of attempting to create an object and checking for success using - `ERROR_IF(result == NULL, label)`. (TODO: Unclear what to do with + `ERROR_IF(result == NULL)`. (TODO: Unclear what to do with intermediate results.) 3. No `DEOPT_IF` may follow an `ERROR_IF` in the same block. @@ -221,14 +221,14 @@ two idioms are valid: - Use `goto error`. - Use a block containing the appropriate `DECREF` calls ending in - `ERROR_IF(true, error)`. + `ERROR_IF(true)`. An example of the latter would be: ```cc res = PyObject_Add(left, right); if (res == NULL) { DECREF_INPUTS(); - ERROR_IF(true, error); + ERROR_IF(true); } ``` @@ -346,7 +346,7 @@ For explanations see "Generating the interpreter" below. ```C inst ( BUILD_TUPLE, (items[oparg] -- tuple) ) { tuple = _PyTuple_FromArraySteal(items, oparg); - ERROR_IF(tuple == NULL, error); + ERROR_IF(tuple == NULL); } ``` ```C diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 620e4b6f1f4..0bcdc5395dc 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -157,6 +157,13 @@ def generate_deopt_table(analysis: Analysis, out: CWriter) -> None: if inst.family is not None: deopt = inst.family.name deopts.append((inst.name, deopt)) + defined = set(analysis.opmap.values()) + for i in range(256): + if i not in defined: + deopts.append((f'{i}', f'{i}')) + + assert len(deopts) == 256 + assert len(set(x[0] for x in deopts)) == 256 for name, deopt in sorted(deopts): out.emit(f"[{name}] = {deopt},\n") out.emit("};\n\n") @@ -235,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 7a32275347e..4556b6d5a74 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -12,6 +12,8 @@ from analyzer import ( analyze_files, StackItem, analysis_error, + CodeSection, + Label, ) from generators_common import ( DEFAULT_INPUT, @@ -19,6 +21,7 @@ from generators_common import ( write_header, Emitter, TokenIterator, + always_true, ) from cwriter import CWriter from typing import TextIO @@ -30,17 +33,54 @@ DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_p def validate_uop(override: Uop, uop: Uop) -> None: - # To do - pass + """ + Check that the overridden uop (defined in 'optimizer_bytecodes.c') + has the same stack effects as the original uop (defined in 'bytecodes.c'). + + Ensure that: + - The number of inputs and outputs is the same. + - The names of the inputs and outputs are the same + (except for 'unused' which is ignored). + - The sizes of the inputs and outputs are the same. + """ + for stack_effect in ('inputs', 'outputs'): + orig_effects = getattr(uop.stack, stack_effect) + new_effects = getattr(override.stack, stack_effect) + + if len(orig_effects) != len(new_effects): + msg = ( + f"{uop.name}: Must have the same number of {stack_effect} " + "in bytecodes.c and optimizer_bytecodes.c " + f"({len(orig_effects)} != {len(new_effects)})" + ) + raise analysis_error(msg, override.body.open) + + for orig, new in zip(orig_effects, new_effects, strict=True): + if orig.name != new.name and orig.name != "unused" and new.name != "unused": + msg = ( + f"{uop.name}: {stack_effect.capitalize()} must have " + "equal names in bytecodes.c and optimizer_bytecodes.c " + f"({orig.name} != {new.name})" + ) + raise analysis_error(msg, override.body.open) + + if orig.size != new.size: + msg = ( + f"{uop.name}: {stack_effect.capitalize()} must have " + "equal sizes in bytecodes.c and optimizer_bytecodes.c " + f"({orig.size!r} != {new.size!r})" + ) + raise analysis_error(msg, override.body.open) def type_name(var: StackItem) -> str: if var.is_array(): - return f"JitOptSymbol **" - if var.type: - return var.type - return f"JitOptSymbol *" + return "JitOptRef *" + return "JitOptRef " +def stackref_type_name(var: StackItem) -> str: + assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef." + return "_PyStackRef " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: variables = {"unused"} @@ -101,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None: class OptimizerEmitter(Emitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels) + self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure + self.original_uop = original_uop + self.stack = stack + def emit_save(self, storage: Storage) -> None: storage.flush(self.out) @@ -111,6 +157,185 @@ class OptimizerEmitter(Emitter): self.out.emit(goto) self.out.emit(label) + def replace_opcode_if_evaluates_pure( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + assert isinstance(uop, Uop) + input_identifiers = [] + for token in tkn_iter: + if token.kind == "IDENTIFIER": + input_identifiers.append(token) + if token.kind == "SEMI": + break + + if len(input_identifiers) == 0: + raise analysis_error( + "To evaluate an operation as pure, it must have at least 1 input", + tkn + ) + # Check that the input identifiers belong to the uop's + # input stack effect + uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs} + for input_tkn in input_identifiers: + if input_tkn.text not in uop_stack_effect_input_identifers: + raise analysis_error(f"{input_tkn.text} referenced in " + f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not " + f"exist in the base uop's input stack effects", + input_tkn) + input_identifiers_as_str = {tkn.text for tkn in input_identifiers} + used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str] + assert len(used_stack_inputs) > 0 + emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy()) + emitter.emit("if (\n") + for inp in used_stack_inputs[:-1]: + emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n") + emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n") + emitter.emit(') {\n') + # Declare variables, before they are shadowed. + for inp in used_stack_inputs: + if inp.used: + emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n") + # Shadow the symbolic variables with stackrefs. + for inp in used_stack_inputs: + if inp.is_array(): + raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn) + if inp.used: + emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n") + # Rename all output variables to stackref variant. + for outp in self.original_uop.stack.outputs: + if outp.is_array(): + raise analysis_error( + "Array output StackRefs not supported for evaluating pure ops.", + self.original_uop.body.open + ) + emitter.emit(f"_PyStackRef {outp.name}_stackref;\n") + + + storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False) + # No reference management of outputs needed. + for var in storage.outputs: + var.in_local = True + emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n") + emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False) + self.out.start_line() + emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n") + # Finally, assign back the output stackrefs to symbolics. + for outp in self.original_uop.stack.outputs: + # All new stackrefs are created from new references. + # That's how the stackref contract works. + if not outp.peek: + emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") + else: + emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") + storage.flush(self.out) + emitter.emit("break;\n") + emitter.emit("}\n") + return True + +class OptimizerConstantEmitter(OptimizerEmitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels, original_uop, stack) + # Replace all outputs to point to their stackref versions. + overrides = { + outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs + } + self._replacers = {**self._replacers, **overrides} + + def emit_to_with_replacement( + self, + out: CWriter, + tkn_iter: TokenIterator, + end: str, + uop: CodeSection, + storage: Storage, + inst: Instruction | None + ) -> Token: + parens = 0 + for tkn in tkn_iter: + if tkn.kind == end and parens == 0: + return tkn + if tkn.kind == "LPAREN": + parens += 1 + if tkn.kind == "RPAREN": + parens -= 1 + if tkn.text in self._replacers: + self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst) + else: + out.emit(tkn) + raise analysis_error(f"Expecting {end}. Reached end of file", tkn) + + def emit_stackref_override( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.emit(tkn) + self.out.emit("_stackref ") + return True + + def deopt_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.start_line() + self.out.emit("if (") + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.emit(") {\n") + next(tkn_iter) # Semi colon + # We guarantee this will deopt in real-world code + # via constants analysis. So just bail. + self.emit("ctx->done = true;\n") + self.emit("break;\n") + self.emit("}\n") + return not always_true(first_tkn) + + exit_if = deopt_if + + def error_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + next(tkn_iter) # RPAREN + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.out.emit(") {\n") + next(tkn_iter) # Semi colon + storage.clear_inputs("at ERROR_IF") + + self.out.emit("goto error;\n") + if not unconditional: + self.out.emit("}\n") + return not unconditional + + def write_uop( override: Uop | None, uop: Uop, @@ -141,13 +366,14 @@ def write_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n") if override: - emitter = OptimizerEmitter(out, {}) + emitter = OptimizerEmitter(out, {}, uop, stack.copy()) # No reference management of inputs needed. for var in storage.inputs: # type: ignore[possibly-undefined] var.in_local = False _, storage = emitter.emit_tokens(override, storage, None, False) out.start_line() storage.flush(out) + out.start_line() else: emit_default(out, uop, stack) out.start_line() @@ -194,7 +420,7 @@ def generate_abstract_interpreter( declare_variables(override, out, skip_inputs=False) else: declare_variables(uop, out, skip_inputs=True) - stack = Stack(extract_bits=False, cast_type="JitOptSymbol *") + stack = Stack() write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) out.start_line() out.emit("break;\n") diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 9c9b0053a59..c7fe0d162ac 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -247,12 +247,11 @@ class SimpleStmt(Stmt): @dataclass class StackEffect(Node): name: str = field(compare=False) # __eq__ only uses type, cond, size - type: str = "" # Optional `:type` size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond def __repr__(self) -> str: - items = [self.name, self.type, self.size] + items = [self.name, self.size] while items and items[-1] == "": del items[-1] return f"StackEffect({', '.join(repr(item) for item in items)})" @@ -380,9 +379,13 @@ class Parser(PLexer): while anno := self.expect(lx.ANNOTATION): if anno.text == "replicate": self.require(lx.LPAREN) - times = self.require(lx.NUMBER) + stop = self.require(lx.NUMBER) + start_text = "0" + if self.expect(lx.COLON): + start_text = stop.text + stop = self.require(lx.NUMBER) self.require(lx.RPAREN) - annotations.append(f"replicate({times.text})") + annotations.append(f"replicate({start_text}:{stop.text})") else: annotations.append(anno.text) tkn = self.expect(lx.INST) @@ -463,20 +466,13 @@ class Parser(PLexer): # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): - type_text = "" - if self.expect(lx.COLON): - type_text = self.require(lx.IDENTIFIER).text.strip() - if self.expect(lx.TIMES): - type_text += " *" size_text = "" if self.expect(lx.LBRACKET): - if type_text: - raise self.make_syntax_error("Unexpected [") if not (size := self.expression()): raise self.make_syntax_error("Expected expression") self.require(lx.RBRACKET) size_text = size.text.strip() - return StackEffect(tkn.text, type_text, size_text) + return StackEffect(tkn.text, size_text) return None @contextual diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 6b681775f48..3a0e7e5d0d5 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,7 +168,7 @@ class Local: @staticmethod def register(name: str) -> "Local": - item = StackItem(name, None, "", False, True) + item = StackItem(name, "", False, True) return Local(item, None, True) def kill(self) -> None: @@ -216,13 +216,11 @@ def array_or_scalar(var: StackItem | Local) -> str: return "array" if var.is_array() else "scalar" class Stack: - def __init__(self, extract_bits: bool=True, cast_type: str = "uintptr_t") -> None: + def __init__(self) -> None: self.base_offset = PointerOffset.zero() self.physical_sp = PointerOffset.zero() self.logical_sp = PointerOffset.zero() self.variables: list[Local] = [] - self.extract_bits = extract_bits - self.cast_type = cast_type def drop(self, var: StackItem, check_liveness: bool) -> None: self.logical_sp = self.logical_sp.pop(var) @@ -268,10 +266,8 @@ class Stack: self.base_offset = self.logical_sp if var.name in UNUSED or not var.used: return Local.unused(var, self.base_offset) - cast = f"({var.type})" if (not indirect and var.type) else "" - bits = ".bits" if cast and self.extract_bits else "" c_offset = (self.base_offset - self.physical_sp).to_c() - assign = f"{var.name} = {cast}{indirect}stack_pointer[{c_offset}]{bits};\n" + assign = f"{var.name} = {indirect}stack_pointer[{c_offset}];\n" out.emit(assign) self._print(out) return Local.from_memory(var, self.base_offset) @@ -292,12 +288,8 @@ class Stack: out: CWriter, var: StackItem, stack_offset: PointerOffset, - cast_type: str, - extract_bits: bool, ) -> None: - cast = f"({cast_type})" if var.type else "" - bits = ".bits" if cast and extract_bits else "" - out.emit(f"stack_pointer[{stack_offset.to_c()}]{bits} = {cast}{var.name};\n") + out.emit(f"stack_pointer[{stack_offset.to_c()}] = {var.name};\n") def _save_physical_sp(self, out: CWriter) -> None: if self.physical_sp != self.logical_sp: @@ -320,7 +312,7 @@ class Stack: self._print(out) var.memory_offset = var_offset stack_offset = var_offset - self.physical_sp - Stack._do_emit(out, var.item, stack_offset, self.cast_type, self.extract_bits) + Stack._do_emit(out, var.item, stack_offset) self._print(out) var_offset = var_offset.push(var.item) @@ -350,7 +342,7 @@ class Stack: out.emit(self.as_comment() + "\n") def copy(self) -> "Stack": - other = Stack(self.extract_bits, self.cast_type) + other = Stack() other.base_offset = self.base_offset other.physical_sp = self.physical_sp other.logical_sp = self.logical_sp @@ -496,7 +488,7 @@ class Storage: f"Expected '{undefined}' to be defined before '{out.name}'" else: undefined = out.name - while len(self.outputs) > self.peeks and not self.needs_defining(self.outputs[0]): + while len(self.outputs) > self.peeks and not self.needs_defining(self.outputs[self.peeks]): out = self.outputs.pop(self.peeks) self.stack.push(out) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 0ac2a0497e5..fc3bc47286f 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -64,7 +64,7 @@ class Tier2Emitter(Emitter): super().__init__(out, labels) self._replacers["oparg"] = self.oparg - def goto_error(self, offset: int, label: str, storage: Storage) -> str: + def goto_error(self, offset: int, storage: Storage) -> str: # To do: Add jump targets for popping values. if offset != 0: storage.copy().flush(self.out) @@ -91,7 +91,7 @@ class Tier2Emitter(Emitter): self.emit("}\n") return not always_true(first_tkn) - def exit_if( # type: ignore[override] + def exit_if( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 6f995e5c46b..1cc23837a72 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -24,7 +24,8 @@ DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_uop_metadata.h" def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n") - out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n") + out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n") + out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n") out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n") out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n") out.emit("#ifdef NEED_OPCODE_METADATA\n") @@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n") out.emit("};\n\n") - out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n") + out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n") for uop in analysis.uops.values(): if uop.replicated: - out.emit(f"[{uop.name}] = {uop.replicated},\n") + assert(uop.replicated.step == 1) + out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n") out.emit("};\n\n") out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n") diff --git a/Tools/clinic/.ruff.toml b/Tools/clinic/.ruff.toml index c019572d0cb..5033887df0c 100644 --- a/Tools/clinic/.ruff.toml +++ b/Tools/clinic/.ruff.toml @@ -1,5 +1,4 @@ -target-version = "py310" -fix = true +extend = "../../.ruff.toml" # Inherit the project-wide settings [lint] select = [ diff --git a/Tools/clinic/libclinic/converters.py b/Tools/clinic/libclinic/converters.py index 633fb5f56a6..39d0ac557a6 100644 --- a/Tools/clinic/libclinic/converters.py +++ b/Tools/clinic/libclinic/converters.py @@ -17,6 +17,54 @@ from libclinic.converter import ( TypeSet = set[bltns.type[object]] +class BaseUnsignedIntConverter(CConverter): + + def use_converter(self) -> None: + if self.converter: + self.add_include('pycore_long.h', + f'{self.converter}()') + + def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: + if not limited_capi: + return super().parse_arg(argname, displayname, limited_capi=limited_capi) + return self.format_code(""" + {{{{ + Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_REJECT_NEGATIVE | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) {{{{ + goto exit; + }}}} + if ((size_t)_bytes > sizeof({type})) {{{{ + PyErr_SetString(PyExc_OverflowError, + "Python int too large for C {type}"); + goto exit; + }}}} + }}}} + """, + argname=argname, + type=self.type) + + +class uint8_converter(BaseUnsignedIntConverter): + type = "uint8_t" + converter = '_PyLong_UInt8_Converter' + +class uint16_converter(BaseUnsignedIntConverter): + type = "uint16_t" + converter = '_PyLong_UInt16_Converter' + +class uint32_converter(BaseUnsignedIntConverter): + type = "uint32_t" + converter = '_PyLong_UInt32_Converter' + +class uint64_converter(BaseUnsignedIntConverter): + type = "uint64_t" + converter = '_PyLong_UInt64_Converter' + + class bool_converter(CConverter): type = 'int' default_type = bool @@ -211,29 +259,7 @@ class short_converter(CConverter): return super().parse_arg(argname, displayname, limited_capi=limited_capi) -def format_inline_unsigned_int_converter(self: CConverter, argname: str) -> str: - return self.format_code(""" - {{{{ - Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}), - Py_ASNATIVEBYTES_NATIVE_ENDIAN | - Py_ASNATIVEBYTES_ALLOW_INDEX | - Py_ASNATIVEBYTES_REJECT_NEGATIVE | - Py_ASNATIVEBYTES_UNSIGNED_BUFFER); - if (_bytes < 0) {{{{ - goto exit; - }}}} - if ((size_t)_bytes > sizeof({type})) {{{{ - PyErr_SetString(PyExc_OverflowError, - "Python int too large for C {type}"); - goto exit; - }}}} - }}}} - """, - argname=argname, - type=self.type) - - -class unsigned_short_converter(CConverter): +class unsigned_short_converter(BaseUnsignedIntConverter): type = 'unsigned short' default_type = int c_ignored_default = "0" @@ -244,11 +270,6 @@ class unsigned_short_converter(CConverter): else: self.converter = '_PyLong_UnsignedShort_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedShort_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedShort_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'H': return self.format_code(""" @@ -258,9 +279,7 @@ class unsigned_short_converter(CConverter): }}}} """, argname=argname) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) @add_legacy_c_converter('C', accept={str}) @@ -311,7 +330,7 @@ class int_converter(CConverter): return super().parse_arg(argname, displayname, limited_capi=limited_capi) -class unsigned_int_converter(CConverter): +class unsigned_int_converter(BaseUnsignedIntConverter): type = 'unsigned int' default_type = int c_ignored_default = "0" @@ -322,11 +341,6 @@ class unsigned_int_converter(CConverter): else: self.converter = '_PyLong_UnsignedInt_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedInt_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedInt_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'I': return self.format_code(""" @@ -336,9 +350,7 @@ class unsigned_int_converter(CConverter): }}}} """, argname=argname) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class long_converter(CConverter): @@ -359,7 +371,7 @@ class long_converter(CConverter): return super().parse_arg(argname, displayname, limited_capi=limited_capi) -class unsigned_long_converter(CConverter): +class unsigned_long_converter(BaseUnsignedIntConverter): type = 'unsigned long' default_type = int c_ignored_default = "0" @@ -370,11 +382,6 @@ class unsigned_long_converter(CConverter): else: self.converter = '_PyLong_UnsignedLong_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedLong_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedLong_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'k': return self.format_code(""" @@ -387,9 +394,7 @@ class unsigned_long_converter(CConverter): argname=argname, bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi), ) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class long_long_converter(CConverter): @@ -410,7 +415,7 @@ class long_long_converter(CConverter): return super().parse_arg(argname, displayname, limited_capi=limited_capi) -class unsigned_long_long_converter(CConverter): +class unsigned_long_long_converter(BaseUnsignedIntConverter): type = 'unsigned long long' default_type = int c_ignored_default = "0" @@ -421,11 +426,6 @@ class unsigned_long_long_converter(CConverter): else: self.converter = '_PyLong_UnsignedLongLong_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedLongLong_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedLongLong_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'K': return self.format_code(""" @@ -438,9 +438,7 @@ class unsigned_long_long_converter(CConverter): argname=argname, bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi), ) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class Py_ssize_t_converter(CConverter): @@ -557,15 +555,11 @@ class slice_index_converter(CConverter): argname=argname) -class size_t_converter(CConverter): +class size_t_converter(BaseUnsignedIntConverter): type = 'size_t' converter = '_PyLong_Size_t_Converter' c_ignored_default = "0" - def use_converter(self) -> None: - self.add_include('pycore_long.h', - '_PyLong_Size_t_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'n': return self.format_code(""" @@ -575,9 +569,7 @@ class size_t_converter(CConverter): }}}} """, argname=argname) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class fildes_converter(CConverter): diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 926bc66b944..1a59e25189d 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -27,6 +27,7 @@ import queue import sys import threading import time +from operator import methodcaller # The iterations in individual benchmarks are scaled by this factor. WORK_SCALE = 100 @@ -188,6 +189,18 @@ def thread_local_read(): _ = tmp.x _ = tmp.x +class MyClass: + __slots__ = () + + def func(self): + pass + +@register_benchmark +def method_caller(): + mc = methodcaller("func") + obj = MyClass() + for i in range(1000 * WORK_SCALE): + mc(obj) def bench_one_thread(func): t0 = time.perf_counter_ns() diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index b407a8a643b..02af1caff7d 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -140,6 +140,9 @@ if __name__ == '__main__': data = locale.locale_alias.copy() data.update(parse_glibc_supported(args.glibc_supported)) data.update(parse(args.locale_alias)) + # Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist + # on all platforms. + data['c.utf8'] = 'C.UTF-8' while True: # Repeat optimization while the size is decreased. n = len(data) diff --git a/Tools/inspection/benchmark_external_inspection.py b/Tools/inspection/benchmark_external_inspection.py new file mode 100644 index 00000000000..0ac7ac4d385 --- /dev/null +++ b/Tools/inspection/benchmark_external_inspection.py @@ -0,0 +1,473 @@ +import _remote_debugging +import time +import subprocess +import sys +import contextlib +import tempfile +import os +import argparse +from _colorize import get_colors, can_colorize + +CODE = '''\ +import time +import os +import sys +import math + +def slow_fibonacci(n): + """Intentionally slow recursive fibonacci - should show up prominently in profiler""" + if n <= 1: + return n + return slow_fibonacci(n-1) + slow_fibonacci(n-2) + +def medium_computation(): + """Medium complexity function""" + result = 0 + for i in range(1000): + result += math.sqrt(i) * math.sin(i) + return result + +def fast_loop(): + """Fast simple loop""" + total = 0 + for i in range(100): + total += i + return total + +def string_operations(): + """String manipulation that should be visible in profiler""" + text = "hello world " * 100 + words = text.split() + return " ".join(reversed(words)) + +def nested_calls(): + """Nested function calls to test call stack depth""" + def level1(): + def level2(): + def level3(): + return medium_computation() + return level3() + return level2() + return level1() + +def main_loop(): + """Main computation loop with different execution paths""" + iteration = 0 + + while True: + iteration += 1 + + # Different execution paths with different frequencies + if iteration % 50 == 0: + # Expensive operation - should show high per-call time + result = slow_fibonacci(20) + + elif iteration % 10 == 0: + # Medium operation + result = nested_calls() + + elif iteration % 5 == 0: + # String operations + result = string_operations() + + else: + # Fast operation - most common + result = fast_loop() + + # Small delay to make sampling more interesting + time.sleep(0.001) + +if __name__ == "__main__": + main_loop() +''' + +DEEP_STATIC_CODE = """\ +import time +def factorial(n): + if n <= 1: + time.sleep(10000) + return 1 + return n * factorial(n-1) + +factorial(900) +""" + +CODE_WITH_TONS_OF_THREADS = '''\ +import time +import threading +import random +import math + +def cpu_intensive_work(): + """Do some CPU intensive calculations""" + result = 0 + for _ in range(10000): + result += math.sin(random.random()) * math.cos(random.random()) + return result + +def io_intensive_work(): + """Simulate IO intensive work with sleeps""" + time.sleep(0.1) + +def mixed_workload(): + """Mix of CPU and IO work""" + while True: + if random.random() < 0.3: + cpu_intensive_work() + else: + io_intensive_work() + +def create_threads(n): + """Create n threads doing mixed workloads""" + threads = [] + for _ in range(n): + t = threading.Thread(target=mixed_workload, daemon=True) + t.start() + threads.append(t) + return threads + +# Start with 5 threads +active_threads = create_threads(5) +thread_count = 5 + +# Main thread manages threads and does work +while True: + # Randomly add or remove threads + if random.random() < 0.1: # 10% chance each iteration + if random.random() < 0.5 and thread_count < 100: + # Add 1-5 new threads + new_count = random.randint(1, 5) + new_threads = create_threads(new_count) + active_threads.extend(new_threads) + thread_count += new_count + elif thread_count > 10: + # Remove 1-3 threads + remove_count = random.randint(1, 5) + # The threads will terminate naturally since they're daemons + active_threads = active_threads[remove_count:] + thread_count -= remove_count + + cpu_intensive_work() + time.sleep(0.05) +''' + +CODE_EXAMPLES = { + "basic": { + "code": CODE, + "description": "Mixed workload with fibonacci, computations, and string operations", + }, + "deep_static": { + "code": DEEP_STATIC_CODE, + "description": "Deep recursive call stack with 900+ frames (factorial)", + }, + "threads": { + "code": CODE_WITH_TONS_OF_THREADS, + "description": "Tons of threads doing mixed CPU/IO work", + }, +} + + +def benchmark(unwinder, duration_seconds=10): + """Benchmark mode - measure raw sampling speed for specified duration""" + sample_count = 0 + fail_count = 0 + total_work_time = 0.0 + start_time = time.perf_counter() + end_time = start_time + duration_seconds + total_attempts = 0 + + colors = get_colors(can_colorize()) + + print( + f"{colors.BOLD_BLUE}Benchmarking sampling speed for {duration_seconds} seconds...{colors.RESET}" + ) + + try: + while time.perf_counter() < end_time: + total_attempts += 1 + work_start = time.perf_counter() + try: + stack_trace = unwinder.get_stack_trace() + if stack_trace: + sample_count += 1 + except (OSError, RuntimeError, UnicodeDecodeError) as e: + fail_count += 1 + + work_end = time.perf_counter() + total_work_time += work_end - work_start + + if total_attempts % 10000 == 0: + avg_work_time_us = (total_work_time / total_attempts) * 1e6 + work_rate = ( + total_attempts / total_work_time if total_work_time > 0 else 0 + ) + success_rate = (sample_count / total_attempts) * 100 + + # Color code the success rate + if success_rate >= 95: + success_color = colors.GREEN + elif success_rate >= 80: + success_color = colors.YELLOW + else: + success_color = colors.RED + + print( + f"{colors.CYAN}Attempts:{colors.RESET} {total_attempts} | " + f"{colors.CYAN}Success:{colors.RESET} {success_color}{success_rate:.1f}%{colors.RESET} | " + f"{colors.CYAN}Rate:{colors.RESET} {colors.MAGENTA}{work_rate:.1f}Hz{colors.RESET} | " + f"{colors.CYAN}Avg:{colors.RESET} {colors.YELLOW}{avg_work_time_us:.2f}ยตs{colors.RESET}" + ) + except KeyboardInterrupt: + print(f"\n{colors.YELLOW}Benchmark interrupted by user{colors.RESET}") + + actual_end_time = time.perf_counter() + wall_time = actual_end_time - start_time + + # Return final statistics + return { + "wall_time": wall_time, + "total_attempts": total_attempts, + "sample_count": sample_count, + "fail_count": fail_count, + "success_rate": ( + (sample_count / total_attempts) * 100 if total_attempts > 0 else 0 + ), + "total_work_time": total_work_time, + "avg_work_time_us": ( + (total_work_time / total_attempts) * 1e6 if total_attempts > 0 else 0 + ), + "work_rate_hz": total_attempts / total_work_time if total_work_time > 0 else 0, + "samples_per_sec": sample_count / wall_time if wall_time > 0 else 0, + } + + +def print_benchmark_results(results): + """Print comprehensive benchmark results""" + colors = get_colors(can_colorize()) + + print(f"\n{colors.BOLD_GREEN}{'='*60}{colors.RESET}") + print(f"{colors.BOLD_GREEN}get_stack_trace() Benchmark Results{colors.RESET}") + print(f"{colors.BOLD_GREEN}{'='*60}{colors.RESET}") + + # Basic statistics + print(f"\n{colors.BOLD_CYAN}Basic Statistics:{colors.RESET}") + print( + f" {colors.CYAN}Wall time:{colors.RESET} {colors.YELLOW}{results['wall_time']:.3f}{colors.RESET} seconds" + ) + print( + f" {colors.CYAN}Total attempts:{colors.RESET} {colors.MAGENTA}{results['total_attempts']:,}{colors.RESET}" + ) + print( + f" {colors.CYAN}Successful samples:{colors.RESET} {colors.GREEN}{results['sample_count']:,}{colors.RESET}" + ) + print( + f" {colors.CYAN}Failed samples:{colors.RESET} {colors.RED}{results['fail_count']:,}{colors.RESET}" + ) + + # Color code the success rate + success_rate = results["success_rate"] + if success_rate >= 95: + success_color = colors.BOLD_GREEN + elif success_rate >= 80: + success_color = colors.BOLD_YELLOW + else: + success_color = colors.BOLD_RED + + print( + f" {colors.CYAN}Success rate:{colors.RESET} {success_color}{success_rate:.2f}%{colors.RESET}" + ) + + # Performance metrics + print(f"\n{colors.BOLD_CYAN}Performance Metrics:{colors.RESET}") + print( + f" {colors.CYAN}Average call time:{colors.RESET} {colors.YELLOW}{results['avg_work_time_us']:.2f}{colors.RESET} ยตs" + ) + print( + f" {colors.CYAN}Work rate:{colors.RESET} {colors.MAGENTA}{results['work_rate_hz']:.1f}{colors.RESET} calls/sec" + ) + print( + f" {colors.CYAN}Sample rate:{colors.RESET} {colors.MAGENTA}{results['samples_per_sec']:.1f}{colors.RESET} samples/sec" + ) + print( + f" {colors.CYAN}Total work time:{colors.RESET} {colors.YELLOW}{results['total_work_time']:.3f}{colors.RESET} seconds" + ) + + # Color code work efficiency + efficiency = (results["total_work_time"] / results["wall_time"]) * 100 + if efficiency >= 80: + efficiency_color = colors.GREEN + elif efficiency >= 50: + efficiency_color = colors.YELLOW + else: + efficiency_color = colors.RED + + print( + f" {colors.CYAN}Work efficiency:{colors.RESET} {efficiency_color}{efficiency:.1f}%{colors.RESET}" + ) + + +def parse_arguments(): + """Parse command line arguments""" + # Build the code examples description + examples_desc = "\n".join( + [f" {name}: {info['description']}" for name, info in CODE_EXAMPLES.items()] + ) + + parser = argparse.ArgumentParser( + description="Benchmark get_stack_trace() performance", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=f""" +Examples: + %(prog)s # Run basic benchmark for 10 seconds (default) + %(prog)s --duration 30 # Run basic benchmark for 30 seconds + %(prog)s -d 60 # Run basic benchmark for 60 seconds + %(prog)s --code deep_static # Run deep static call stack benchmark + %(prog)s --code deep_static -d 30 # Run deep static benchmark for 30 seconds + +Available code examples: +{examples_desc} + """, + color=True, + ) + + parser.add_argument( + "--duration", + "-d", + type=int, + default=10, + help="Benchmark duration in seconds (default: 10)", + ) + + parser.add_argument( + "--code", + "-c", + choices=list(CODE_EXAMPLES.keys()), + default="basic", + help="Code example to benchmark (default: basic)", + ) + + parser.add_argument( + "--threads", + choices=["all", "main", "only_active"], + default="all", + help="Which threads to include in the benchmark (default: all)", + ) + + return parser.parse_args() + + +def create_target_process(temp_file, code_example="basic"): + """Create and start the target process for benchmarking""" + example_info = CODE_EXAMPLES.get(code_example, {"code": CODE}) + selected_code = example_info["code"] + temp_file.write(selected_code) + temp_file.flush() + + process = subprocess.Popen( + [sys.executable, temp_file.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + + # Give it time to start + time.sleep(1.0) + + # Check if it's still running + if process.poll() is not None: + stdout, stderr = process.communicate() + raise RuntimeError( + f"Target process exited unexpectedly:\nSTDOUT: {stdout.decode()}\nSTDERR: {stderr.decode()}" + ) + + return process, temp_file.name + + +def cleanup_process(process, temp_file_path): + """Clean up the target process and temporary file""" + with contextlib.suppress(Exception): + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=5.0) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +def main(): + """Main benchmark function""" + colors = get_colors(can_colorize()) + args = parse_arguments() + + print(f"{colors.BOLD_MAGENTA}External Inspection Benchmark Tool{colors.RESET}") + print(f"{colors.BOLD_MAGENTA}{'=' * 34}{colors.RESET}") + + example_info = CODE_EXAMPLES.get(args.code, {"description": "Unknown"}) + print( + f"\n{colors.CYAN}Code Example:{colors.RESET} {colors.GREEN}{args.code}{colors.RESET}" + ) + print(f"{colors.CYAN}Description:{colors.RESET} {example_info['description']}") + print( + f"{colors.CYAN}Benchmark Duration:{colors.RESET} {colors.YELLOW}{args.duration}{colors.RESET} seconds" + ) + + process = None + temp_file_path = None + + try: + # Create target process + print(f"\n{colors.BLUE}Creating and starting target process...{colors.RESET}") + with tempfile.NamedTemporaryFile(mode="w", suffix=".py") as temp_file: + process, temp_file_path = create_target_process(temp_file, args.code) + print( + f"{colors.GREEN}Target process started with PID: {colors.BOLD_WHITE}{process.pid}{colors.RESET}" + ) + + # Run benchmark with specified duration + with process: + # Create unwinder and run benchmark + print(f"{colors.BLUE}Initializing unwinder...{colors.RESET}") + try: + kwargs = {} + if args.threads == "all": + kwargs["all_threads"] = True + elif args.threads == "main": + kwargs["all_threads"] = False + elif args.threads == "only_active": + kwargs["only_active_thread"] = True + unwinder = _remote_debugging.RemoteUnwinder( + process.pid, **kwargs + ) + results = benchmark(unwinder, duration_seconds=args.duration) + finally: + cleanup_process(process, temp_file_path) + + # Print results + print_benchmark_results(results) + + except PermissionError as e: + print( + f"{colors.BOLD_RED}Error: Insufficient permissions to read stack trace: {e}{colors.RESET}" + ) + print( + f"{colors.YELLOW}Try running with appropriate privileges (e.g., sudo){colors.RESET}" + ) + return 1 + except Exception as e: + print(f"{colors.BOLD_RED}Error during benchmarking: {e}{colors.RESET}") + if process: + with contextlib.suppress(Exception): + stdout, stderr = process.communicate(timeout=1) + if stdout: + print( + f"{colors.CYAN}Process STDOUT:{colors.RESET} {stdout.decode()}" + ) + if stderr: + print( + f"{colors.RED}Process STDERR:{colors.RESET} {stderr.decode()}" + ) + raise + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Tools/jit/README.md b/Tools/jit/README.md index 4107265754f..8e817574b4d 100644 --- a/Tools/jit/README.md +++ b/Tools/jit/README.md @@ -41,7 +41,9 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri ### Windows -Install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** +LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`. + +Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** Alternatively, you can use [chocolatey](https://chocolatey.org): @@ -52,13 +54,13 @@ choco install llvm --version=19.1.0 ## Building -For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.bat`. +For `PCbuild`-based builds, pass the `--experimental-jit` option to `build.bat`. -For all other builds, pass the new `--enable-experimental-jit` option to `configure`. +For all other builds, pass the `--enable-experimental-jit` option to `configure`. Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform. -The JIT can also be enabled or disabled using the `PYTHON_JIT` environment variable, even on builds where it is enabled or disabled by default. More details about configuring CPython with the JIT and optional values for `--enable-experimental-jit` can be found [here](https://docs.python.org/dev/whatsnew/3.13.html#experimental-jit-compiler). +The JIT can also be enabled or disabled using the `PYTHON_JIT` environment variable, even on builds where it is enabled or disabled by default. More details about configuring CPython with the JIT and optional values for `--enable-experimental-jit` can be found [here](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit). [^pep-744]: [PEP 744](https://peps.python.org/pep-0744/) diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py index 925b56ac669..f09a8404871 100644 --- a/Tools/jit/_llvm.py +++ b/Tools/jit/_llvm.py @@ -8,8 +8,11 @@ import shlex import subprocess import typing +import _targets + _LLVM_VERSION = 19 _LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\S*\s+") +_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0" _P = typing.ParamSpec("_P") _R = typing.TypeVar("_R") @@ -74,6 +77,11 @@ async def _find_tool(tool: str, *, echo: bool = False) -> str | None: path = f"{tool}-{_LLVM_VERSION}" if await _check_tool_version(path, echo=echo): return path + # PCbuild externals: + externals = os.environ.get("EXTERNALS_DIR", _targets.EXTERNALS) + path = os.path.join(externals, _EXTERNALS_LLVM_TAG, "bin", tool) + if await _check_tool_version(path, echo=echo): + return path # Homebrew-installed executables: prefix = await _get_brew_llvm_prefix(echo=echo) if prefix is not None: diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py new file mode 100644 index 00000000000..1077e4106fd --- /dev/null +++ b/Tools/jit/_optimizers.py @@ -0,0 +1,319 @@ +"""Low-level optimization of textual assembly.""" + +import dataclasses +import pathlib +import re +import typing + +# Same as saying "not string.startswith('')": +_RE_NEVER_MATCH = re.compile(r"(?!)") +# Dictionary mapping branch instructions to their inverted branch instructions. +# If a branch cannot be inverted, the value is None: +_X86_BRANCHES = { + # https://www.felixcloutier.com/x86/jcc + "ja": "jna", + "jae": "jnae", + "jb": "jnb", + "jbe": "jnbe", + "jc": "jnc", + "jcxz": None, + "je": "jne", + "jecxz": None, + "jg": "jng", + "jge": "jnge", + "jl": "jnl", + "jle": "jnle", + "jo": "jno", + "jp": "jnp", + "jpe": "jpo", + "jrcxz": None, + "js": "jns", + "jz": "jnz", + # https://www.felixcloutier.com/x86/loop:loopcc + "loop": None, + "loope": None, + "loopne": None, + "loopnz": None, + "loopz": None, +} +# Update with all of the inverted branches, too: +_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} + + +@dataclasses.dataclass +class _Block: + label: str | None = None + # Non-instruction lines like labels, directives, and comments: + noninstructions: list[str] = dataclasses.field(default_factory=list) + # Instruction lines: + instructions: list[str] = dataclasses.field(default_factory=list) + # If this block ends in a jump, where to? + target: typing.Self | None = None + # The next block in the linked list: + link: typing.Self | None = None + # Whether control flow can fall through to the linked block above: + fallthrough: bool = True + # Whether this block can eventually reach the next uop (_JIT_CONTINUE): + hot: bool = False + + def resolve(self) -> typing.Self: + """Find the first non-empty block reachable from this one.""" + block = self + while block.link and not block.instructions: + block = block.link + return block + + +@dataclasses.dataclass +class Optimizer: + """Several passes of analysis and optimization for textual assembly.""" + + path: pathlib.Path + _: dataclasses.KW_ONLY + # prefix used to mangle symbols on some platforms: + prefix: str = "" + # The first block in the linked list: + _root: _Block = dataclasses.field(init=False, default_factory=_Block) + _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) + # No groups: + _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( + r"\s*(?:\.|#|//|$)" + ) + # One group (label): + _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( + r'\s*(?P<label>[\w."$?@]+):' + ) + # Override everything that follows in subclasses: + _alignment: typing.ClassVar[int] = 1 + _branches: typing.ClassVar[dict[str, str | None]] = {} + # Two groups (instruction and target): + _re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + # One group (target): + _re_jump: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + # No groups: + _re_return: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + + def __post_init__(self) -> None: + # Split the code into a linked list of basic blocks. A basic block is an + # optional label, followed by zero or more non-instruction lines, + # followed by zero or more instruction lines (only the last of which may + # be a branch, jump, or return): + text = self._preprocess(self.path.read_text()) + block = self._root + for line in text.splitlines(): + # See if we need to start a new block: + if match := self._re_label.match(line): + # Label. New block: + block.link = block = self._lookup_label(match["label"]) + block.noninstructions.append(line) + continue + if self._re_noninstructions.match(line): + if block.instructions: + # Non-instruction lines. New block: + block.link = block = _Block() + block.noninstructions.append(line) + continue + if block.target or not block.fallthrough: + # Current block ends with a branch, jump, or return. New block: + block.link = block = _Block() + block.instructions.append(line) + if match := self._re_branch.match(line): + # A block ending in a branch has a target and fallthrough: + block.target = self._lookup_label(match["target"]) + assert block.fallthrough + elif match := self._re_jump.match(line): + # A block ending in a jump has a target and no fallthrough: + block.target = self._lookup_label(match["target"]) + block.fallthrough = False + elif self._re_return.match(line): + # A block ending in a return has no target and fallthrough: + assert not block.target + block.fallthrough = False + + def _preprocess(self, text: str) -> str: + # Override this method to do preprocessing of the textual assembly: + return text + + @classmethod + def _invert_branch(cls, line: str, target: str) -> str | None: + match = cls._re_branch.match(line) + assert match + inverted = cls._branches.get(match["instruction"]) + if not inverted: + return None + (a, b), (c, d) = match.span("instruction"), match.span("target") + # Before: + # je FOO + # After: + # jne BAR + return "".join([line[:a], inverted, line[b:c], target, line[d:]]) + + @classmethod + def _update_jump(cls, line: str, target: str) -> str: + match = cls._re_jump.match(line) + assert match + a, b = match.span("target") + # Before: + # jmp FOO + # After: + # jmp BAR + return "".join([line[:a], target, line[b:]]) + + def _lookup_label(self, label: str) -> _Block: + if label not in self._labels: + self._labels[label] = _Block(label) + return self._labels[label] + + def _blocks(self) -> typing.Generator[_Block, None, None]: + block: _Block | None = self._root + while block: + yield block + block = block.link + + def _body(self) -> str: + lines = [] + hot = True + for block in self._blocks(): + if hot != block.hot: + hot = block.hot + # Make it easy to tell at a glance where cold code is: + lines.append(f"# JIT: {'HOT' if hot else 'COLD'} ".ljust(80, "#")) + lines.extend(block.noninstructions) + lines.extend(block.instructions) + return "\n".join(lines) + + def _predecessors(self, block: _Block) -> typing.Generator[_Block, None, None]: + # This is inefficient, but it's never wrong: + for pre in self._blocks(): + if pre.target is block or pre.fallthrough and pre.link is block: + yield pre + + def _insert_continue_label(self) -> None: + # Find the block with the last instruction: + for end in reversed(list(self._blocks())): + if end.instructions: + break + # Before: + # jmp FOO + # After: + # jmp FOO + # .balign 8 + # _JIT_CONTINUE: + # This lets the assembler encode _JIT_CONTINUE jumps at build time! + align = _Block() + align.noninstructions.append(f"\t.balign\t{self._alignment}") + continuation = self._lookup_label(f"{self.prefix}_JIT_CONTINUE") + assert continuation.label + continuation.noninstructions.append(f"{continuation.label}:") + end.link, align.link, continuation.link = align, continuation, end.link + + def _mark_hot_blocks(self) -> None: + # Start with the last block, and perform a DFS to find all blocks that + # can eventually reach it: + todo = list(self._blocks())[-1:] + while todo: + block = todo.pop() + block.hot = True + todo.extend(pre for pre in self._predecessors(block) if not pre.hot) + + def _invert_hot_branches(self) -> None: + for branch in self._blocks(): + link = branch.link + if link is None: + continue + jump = link.resolve() + # Before: + # je HOT + # jmp COLD + # After: + # jne COLD + # jmp HOT + if ( + # block ends with a branch to hot code... + branch.target + and branch.fallthrough + and branch.target.hot + # ...followed by a jump to cold code with no other predecessors: + and jump.target + and not jump.fallthrough + and not jump.target.hot + and len(jump.instructions) == 1 + and list(self._predecessors(jump)) == [branch] + ): + assert jump.target.label + assert branch.target.label + inverted = self._invert_branch( + branch.instructions[-1], jump.target.label + ) + # Check to see if the branch can even be inverted: + if inverted is None: + continue + branch.instructions[-1] = inverted + jump.instructions[-1] = self._update_jump( + jump.instructions[-1], branch.target.label + ) + branch.target, jump.target = jump.target, branch.target + jump.hot = True + + def _remove_redundant_jumps(self) -> None: + # Zero-length jumps can be introduced by _insert_continue_label and + # _invert_hot_branches: + for block in self._blocks(): + # Before: + # jmp FOO + # FOO: + # After: + # FOO: + if ( + block.target + and block.link + and block.target.resolve() is block.link.resolve() + ): + block.target = None + block.fallthrough = True + block.instructions.pop() + + def run(self) -> None: + """Run this optimizer.""" + self._insert_continue_label() + self._mark_hot_blocks() + self._invert_hot_branches() + self._remove_redundant_jumps() + self.path.write_text(self._body()) + + +class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods + """aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu""" + + # TODO: @diegorusso + _alignment = 8 + # https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch- + _re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)") + + +class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods + """i686-pc-windows-msvc/x86_64-apple-darwin/x86_64-unknown-linux-gnu""" + + _branches = _X86_BRANCHES + _re_branch = re.compile( + rf"\s*(?P<instruction>{'|'.join(_X86_BRANCHES)})\s+(?P<target>[\w.]+)" + ) + # https://www.felixcloutier.com/x86/jmp + _re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)") + # https://www.felixcloutier.com/x86/ret + _re_return = re.compile(r"\s*ret\b") + + +class OptimizerX8664Windows(OptimizerX86): # pylint: disable = too-few-public-methods + """x86_64-pc-windows-msvc""" + + def _preprocess(self, text: str) -> str: + text = super()._preprocess(text) + # Before: + # rex64 jmpq *__imp__JIT_CONTINUE(%rip) + # After: + # jmp _JIT_CONTINUE + far_indirect_jump = ( + rf"rex64\s+jmpq\s+\*__imp_(?P<target>{self.prefix}_JIT_\w+)\(%rip\)" + ) + return re.sub(far_indirect_jump, r"jmp\t\g<target>", text) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 03b0ba647b0..1d82f5366f6 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -17,8 +17,6 @@ class HoleValue(enum.Enum): # The base address of the machine code for the current uop (exposed as _JIT_ENTRY): CODE = enum.auto() - # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE): - CONTINUE = enum.auto() # The base address of the read-only data for this uop: DATA = enum.auto() # The address of the current executor (exposed as _JIT_EXECUTOR): @@ -97,7 +95,6 @@ _PATCH_FUNCS = { # Translate HoleValues to C expressions: _HOLE_EXPRS = { HoleValue.CODE: "(uintptr_t)code", - HoleValue.CONTINUE: "(uintptr_t)code + sizeof(code_body)", HoleValue.DATA: "(uintptr_t)data", HoleValue.EXECUTOR: "(uintptr_t)executor", # These should all have been turned into DATA values by process_relocations: @@ -209,64 +206,6 @@ class Stencil: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def add_nops(self, nop: bytes, alignment: int) -> None: - """Add NOPs until there is alignment. Fail if it is not possible.""" - offset = len(self.body) - nop_size = len(nop) - - # Calculate the gap to the next multiple of alignment. - gap = -offset % alignment - if gap: - if gap % nop_size == 0: - count = gap // nop_size - self.body.extend(nop * count) - else: - raise ValueError( - f"Cannot add nops of size '{nop_size}' to a body with " - f"offset '{offset}' to align with '{alignment}'" - ) - - def remove_jump(self) -> None: - """Remove a zero-length continuation jump, if it exists.""" - hole = max(self.holes, key=lambda hole: hole.offset) - match hole: - case Hole( - offset=offset, - kind="IMAGE_REL_AMD64_REL32", - value=HoleValue.GOT, - symbol="_JIT_CONTINUE", - addend=-4, - ) as hole: - # jmp qword ptr [rip] - jump = b"\x48\xff\x25\x00\x00\x00\x00" - offset -= 3 - case Hole( - offset=offset, - kind="IMAGE_REL_I386_REL32" | "R_X86_64_PLT32" | "X86_64_RELOC_BRANCH", - value=HoleValue.CONTINUE, - symbol=None, - addend=addend, - ) as hole if ( - _signed(addend) == -4 - ): - # jmp 5 - jump = b"\xe9\x00\x00\x00\x00" - offset -= 1 - case Hole( - offset=offset, - kind="R_AARCH64_JUMP26", - value=HoleValue.CONTINUE, - symbol=None, - addend=0, - ) as hole: - # b #4 - jump = b"\x00\x00\x00\x14" - case _: - return - if self.body[offset:] == jump: - self.body = self.body[:offset] - self.holes.remove(hole) - @dataclasses.dataclass class StencilGroup: @@ -284,9 +223,7 @@ class StencilGroup: _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False) _trampolines: set[int] = dataclasses.field(default_factory=set, init=False) - def process_relocations( - self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b"" - ) -> None: + def process_relocations(self, known_symbols: dict[str, int]) -> None: """Fix up all GOT and internal relocations for this stencil group.""" for hole in self.code.holes.copy(): if ( @@ -306,8 +243,6 @@ class StencilGroup: self._trampolines.add(ordinal) hole.addend = ordinal hole.symbol = None - self.code.remove_jump() - self.code.add_nops(nop=nop, alignment=alignment) self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index f7ea159884c..ed10329d25d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -10,8 +10,10 @@ import re import sys import tempfile import typing +import shlex import _llvm +import _optimizers import _schema import _stencils import _writer @@ -23,9 +25,12 @@ TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve() TOOLS_JIT = TOOLS_JIT_BUILD.parent TOOLS = TOOLS_JIT.parent CPYTHON = TOOLS.parent +EXTERNALS = CPYTHON / "externals" PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h" TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c" +ASYNCIO_RUNNER = asyncio.Runner() + _S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection) _R = typing.TypeVar( "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation @@ -35,14 +40,17 @@ _R = typing.TypeVar( @dataclasses.dataclass class _Target(typing.Generic[_S, _R]): triple: str + condition: str _: dataclasses.KW_ONLY - alignment: int = 1 args: typing.Sequence[str] = () + optimizer: type[_optimizers.Optimizer] = _optimizers.Optimizer prefix: str = "" stable: bool = False debug: bool = False verbose: bool = False + cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() def _get_nop(self) -> bytes: if re.fullmatch(r"aarch64-.*", self.triple): @@ -53,13 +61,14 @@ class _Target(typing.Generic[_S, _R]): raise ValueError(f"NOP not defined for {self.triple}") return nop - def _compute_digest(self, out: pathlib.Path) -> str: + def _compute_digest(self) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) hasher.update(self.debug.to_bytes()) + hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) - hasher.update((out / "pyconfig.h").read_bytes()) + hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): for filename in filenames: hasher.update(pathlib.Path(dirpath, filename).read_bytes()) @@ -113,22 +122,23 @@ class _Target(typing.Generic[_S, _R]): async def _compile( self, opname: str, c: pathlib.Path, tempdir: pathlib.Path ) -> _stencils.StencilGroup: + s = tempdir / f"{opname}.s" o = tempdir / f"{opname}.o" - args = [ + args_s = [ f"--target={self.triple}", "-DPy_BUILD_CORE_MODULE", "-D_DEBUG" if self.debug else "-DNDEBUG", f"-D_JIT_OPCODE={opname}", "-D_PyJIT_ACTIVE", "-D_Py_JIT", - "-I.", + f"-I{self.pyconfig_dir}", f"-I{CPYTHON / 'Include'}", f"-I{CPYTHON / 'Include' / 'internal'}", f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", f"-I{CPYTHON / 'Python'}", f"-I{CPYTHON / 'Tools' / 'jit'}", "-O3", - "-c", + "-S", # Shorten full absolute file paths in the generated code (like the # __FILE__ macro and assert failure messages) for reproducibility: f"-ffile-prefix-map={CPYTHON}=.", @@ -147,11 +157,16 @@ class _Target(typing.Generic[_S, _R]): "-fno-stack-protector", "-std=c11", "-o", - f"{o}", + f"{s}", f"{c}", *self.args, + # Allow user-provided CFLAGS to override any defaults + *shlex.split(self.cflags), ] - await _llvm.run("clang", args, echo=self.verbose) + await _llvm.run("clang", args_s, echo=self.verbose) + self.optimizer(s, prefix=self.prefix).run() + args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] + await _llvm.run("clang", args_o, echo=self.verbose) return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: @@ -180,32 +195,32 @@ class _Target(typing.Generic[_S, _R]): tasks.append(group.create_task(coro, name=opname)) stencil_groups = {task.get_name(): task.result() for task in tasks} for stencil_group in stencil_groups.values(): - stencil_group.process_relocations( - known_symbols=self.known_symbols, - alignment=self.alignment, - nop=self._get_nop(), - ) + stencil_group.process_relocations(self.known_symbols) return stencil_groups def build( - self, out: pathlib.Path, *, comment: str = "", force: bool = False + self, + *, + comment: str = "", + force: bool = False, + jit_stencils: pathlib.Path, ) -> None: """Build jit_stencils.h in the given directory.""" + jit_stencils.parent.mkdir(parents=True, exist_ok=True) if not self.stable: warning = f"JIT support for {self.triple} is still experimental!" request = "Please report any issues you encounter.".center(len(warning)) outline = "=" * len(warning) print("\n".join(["", outline, warning, request, outline, ""])) - digest = f"// {self._compute_digest(out)}\n" - jit_stencils = out / "jit_stencils.h" + digest = f"// {self._compute_digest()}\n" if ( not force and jit_stencils.exists() and jit_stencils.read_text().startswith(digest) ): return - stencil_groups = asyncio.run(self._build_stencils()) - jit_stencils_new = out / "jit_stencils.h.new" + stencil_groups = ASYNCIO_RUNNER.run(self._build_stencils()) + jit_stencils_new = jit_stencils.parent / "jit_stencils.h.new" try: with jit_stencils_new.open("w") as file: file.write(digest) @@ -510,35 +525,43 @@ class _MachO( def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" + optimizer: type[_optimizers.Optimizer] target: _COFF | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): - target = _MachO(host, alignment=8, prefix="_") + condition = "defined(__aarch64__) && defined(__APPLE__)" + optimizer = _optimizers.OptimizerAArch64 + target = _MachO(host, condition, optimizer=optimizer, prefix="_") elif re.fullmatch(r"aarch64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll", "-fplt"] - target = _COFF(host, alignment=8, args=args) + condition = "defined(_M_ARM64)" + optimizer = _optimizers.OptimizerAArch64 + target = _COFF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): - args = [ - "-fpic", - # On aarch64 Linux, intrinsics were being emitted and this flag - # was required to disable them. - "-mno-outline-atomics", - ] - target = _ELF(host, alignment=8, args=args) + # -mno-outline-atomics: Keep intrinsics from being emitted. + args = ["-fpic", "-mno-outline-atomics"] + condition = "defined(__aarch64__) && defined(__linux__)" + optimizer = _optimizers.OptimizerAArch64 + target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): - args = [ - "-DPy_NO_ENABLE_SHARED", - # __attribute__((preserve_none)) is not supported - "-Wno-ignored-attributes", - ] - target = _COFF(host, args=args, prefix="_") + # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. + args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] + optimizer = _optimizers.OptimizerX86 + condition = "defined(_M_IX86)" + target = _COFF(host, condition, args=args, optimizer=optimizer, prefix="_") elif re.fullmatch(r"x86_64-apple-darwin.*", host): - target = _MachO(host, prefix="_") + condition = "defined(__x86_64__) && defined(__APPLE__)" + optimizer = _optimizers.OptimizerX86 + target = _MachO(host, condition, optimizer=optimizer, prefix="_") elif re.fullmatch(r"x86_64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] - target = _COFF(host, args=args) + condition = "defined(_M_X64)" + optimizer = _optimizers.OptimizerX8664Windows + target = _COFF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] - target = _ELF(host, args=args) + condition = "defined(__x86_64__) && defined(__linux__)" + optimizer = _optimizers.OptimizerX86 + target = _ELF(host, condition, args=args, optimizer=optimizer) else: raise ValueError(host) return target diff --git a/Tools/jit/build.py b/Tools/jit/build.py index a8cb0f67c36..a0733005929 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -11,7 +11,10 @@ if __name__ == "__main__": comment = f"$ {shlex.join([pathlib.Path(sys.executable).name] + sys.argv)}" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "target", type=_targets.get_target, help="a PEP 11 target triple to compile for" + "target", + nargs="+", + type=_targets.get_target, + help="a PEP 11 target triple to compile for", ) parser.add_argument( "-d", "--debug", action="store_true", help="compile for a debug build of Python" @@ -20,9 +23,48 @@ if __name__ == "__main__": "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" ) parser.add_argument( + "-o", + "--output-dir", + help="where to output generated files", + required=True, + type=lambda p: pathlib.Path(p).resolve(), + ) + parser.add_argument( + "-p", + "--pyconfig-dir", + help="where to find pyconfig.h", + required=True, + type=lambda p: pathlib.Path(p).resolve(), + ) + parser.add_argument( "-v", "--verbose", action="store_true", help="echo commands as they are run" ) + parser.add_argument( + "--cflags", help="additional flags to pass to the compiler", default="" + ) args = parser.parse_args() - args.target.debug = args.debug - args.target.verbose = args.verbose - args.target.build(pathlib.Path.cwd(), comment=comment, force=args.force) + for target in args.target: + target.debug = args.debug + target.force = args.force + target.verbose = args.verbose + target.cflags = args.cflags + target.pyconfig_dir = args.pyconfig_dir + target.build( + comment=comment, + force=args.force, + jit_stencils=args.output_dir / f"jit_stencils-{target.triple}.h", + ) + jit_stencils_h = args.output_dir / "jit_stencils.h" + lines = [f"// {comment}\n"] + guard = "#if" + for target in args.target: + lines.append(f"{guard} {target.condition}\n") + lines.append(f'#include "jit_stencils-{target.triple}.h"\n') + guard = "#elif" + lines.append("#else\n") + lines.append('#error "unexpected target"\n') + lines.append("#endif\n") + body = "".join(lines) + # Don't touch the file if it hasn't changed (so we don't trigger a rebuild): + if not jit_stencils_h.is_file() or jit_stencils_h.read_text() != body: + jit_stencils_h.write_text(body) diff --git a/Tools/jit/template.c b/Tools/jit/template.c index bc18e702eea..5ee26f93f1e 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -13,6 +13,7 @@ #include "pycore_function.h" #include "pycore_genobject.h" #include "pycore_interpframe.h" +#include "pycore_interpolation.h" #include "pycore_intrinsics.h" #include "pycore_jit.h" #include "pycore_list.h" @@ -25,6 +26,7 @@ #include "pycore_setobject.h" #include "pycore_sliceobject.h" #include "pycore_stackref.h" +#include "pycore_template.h" #include "pycore_tuple.h" #include "pycore_unicodeobject.h" @@ -48,13 +50,16 @@ #define GOTO_TIER_TWO(EXECUTOR) \ do { \ OPT_STAT_INC(traces_executed); \ - jit_func_preserve_none jitted = (EXECUTOR)->jit_side_entry; \ + _PyExecutorObject *_executor = (EXECUTOR); \ + tstate->current_executor = (PyObject *)_executor; \ + jit_func_preserve_none jitted = _executor->jit_side_entry; \ __attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \ } while (0) #undef GOTO_TIER_ONE #define GOTO_TIER_ONE(TARGET) \ do { \ + tstate->current_executor = NULL; \ _PyFrame_SetStackPointer(frame, stack_pointer); \ return TARGET; \ } while (0) diff --git a/Tools/msi/dev/dev_files.wxs b/Tools/msi/dev/dev_files.wxs index 4357dc86d9d..21f9c848cc6 100644 --- a/Tools/msi/dev/dev_files.wxs +++ b/Tools/msi/dev/dev_files.wxs @@ -3,7 +3,7 @@ <Fragment> <ComponentGroup Id="dev_pyconfig"> <Component Id="include_pyconfig.h" Directory="include" Guid="*"> - <File Id="include_pyconfig.h" Name="pyconfig.h" Source="pyconfig.h" KeyPath="yes" /> + <File Id="include_pyconfig.h" Name="pyconfig.h" Source="!(bindpath.src)PC\pyconfig.h" KeyPath="yes" /> </Component> </ComponentGroup> </Fragment> diff --git a/Tools/msi/freethreaded/freethreaded_files.wxs b/Tools/msi/freethreaded/freethreaded_files.wxs index b3ce28e7aed..0707e77b5e9 100644 --- a/Tools/msi/freethreaded/freethreaded_files.wxs +++ b/Tools/msi/freethreaded/freethreaded_files.wxs @@ -103,7 +103,7 @@ </ComponentGroup> </Fragment> - <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_uuid;_wmi;_zoneinfo;_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited;_tkinter ?> + <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_remote_debugging;_uuid;_wmi;_zoneinfo;_zstd;_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited;_tkinter ?> <Fragment> <DirectoryRef Id="Lib_venv_scripts_nt__freethreaded" /> diff --git a/Tools/msi/lib/lib.wixproj b/Tools/msi/lib/lib.wixproj index 02078e503d7..3ea46dd40ea 100644 --- a/Tools/msi/lib/lib.wixproj +++ b/Tools/msi/lib/lib.wixproj @@ -15,12 +15,11 @@ <EmbeddedResource Include="*.wxl" /> </ItemGroup> <ItemGroup> - <ExcludeFolders Include="Lib\test;Lib\tests;Lib\tkinter;Lib\idlelib;Lib\turtledemo" /> + <ExcludeFolders Include="Lib\site-packages;Lib\test;Lib\tests;Lib\tkinter;Lib\idlelib;Lib\turtledemo" /> <InstallFiles Include="$(PySourcePath)Lib\**\*" Exclude="$(PySourcePath)Lib\**\*.pyc; $(PySourcePath)Lib\**\*.pyo; $(PySourcePath)Lib\turtle.py; - $(PySourcePath)Lib\site-packages\README; @(ExcludeFolders->'$(PySourcePath)%(Identity)\*'); @(ExcludeFolders->'$(PySourcePath)%(Identity)\**\*')"> <SourceBase>$(PySourcePath)Lib</SourceBase> diff --git a/Tools/msi/lib/lib_files.wxs b/Tools/msi/lib/lib_files.wxs index b8e16b5fe23..4d44299f783 100644 --- a/Tools/msi/lib/lib_files.wxs +++ b/Tools/msi/lib/lib_files.wxs @@ -1,6 +1,6 @@ ๏ปฟ<?xml version="1.0" encoding="UTF-8"?> <Wix xmlns="http://schemas.microsoft.com/wix/2006/wi"> - <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_uuid;_wmi;_zoneinfo ?> + <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_remote_debugging;_uuid;_wmi;_zoneinfo;_zstd ?> <Fragment> <DirectoryRef Id="Lib_venv_scripts_nt" /> diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py index 0dcf6ef844a..afd010a5254 100755 --- a/Tools/patchcheck/patchcheck.py +++ b/Tools/patchcheck/patchcheck.py @@ -53,19 +53,43 @@ def get_git_branch(): def get_git_upstream_remote(): - """Get the remote name to use for upstream branches + """ + Get the remote name to use for upstream branches - Uses "upstream" if it exists, "origin" otherwise + Check for presence of "https://github.com/python/cpython" remote URL. + If only one is found, return that remote name. If multiple are found, + check for and return "upstream", "origin", or "python", in that + order. Raise an error if no valid matches are found. """ - cmd = "git remote get-url upstream".split() - try: - subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8') - except subprocess.CalledProcessError: - return "origin" - return "upstream" + cmd = "git remote -v".split() + output = subprocess.check_output( + cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding="UTF-8" + ) + # Filter to desired remotes, accounting for potential uppercasing + filtered_remotes = { + remote.split("\t")[0].lower() for remote in output.split('\n') + if "python/cpython" in remote.lower() and remote.endswith("(fetch)") + } + if len(filtered_remotes) == 1: + [remote] = filtered_remotes + return remote + for remote_name in ["upstream", "origin", "python"]: + if remote_name in filtered_remotes: + return remote_name + remotes_found = "\n".join( + {remote for remote in output.split('\n') if remote.endswith("(fetch)")} + ) + raise ValueError( + f"Patchcheck was unable to find an unambiguous upstream remote, " + f"with URL matching 'https://github.com/python/cpython'. " + f"For help creating an upstream remote, see Dev Guide: " + f"https://devguide.python.org/getting-started/" + f"git-boot-camp/#cloning-a-forked-cpython-repository " + f"\nRemotes found: \n{remotes_found}" + ) def get_git_remote_default_branch(remote_name): diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 41338c29bdd..be289c352de 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -108,6 +108,8 @@ def compile_c_extension( extra_compile_args.append("-DPy_BUILD_CORE_MODULE") # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c extra_compile_args.append("-D_Py_TEST_PEGEN") + if sys.platform == "win32" and sysconfig.get_config_var("Py_GIL_DISABLED"): + extra_compile_args.append("-DPy_GIL_DISABLED") extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") if keep_asserts: extra_compile_args.append("-UNDEBUG") diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 2be85a163b4..04f66eec1a0 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -44,7 +44,7 @@ EXTENSION_PREFIX = """\ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif """ @@ -214,33 +214,47 @@ class CCallMakerVisitor(GrammarVisitor): call.assigned_variable_type = node.type return call + def assert_no_undefined_behavior( + self, call: FunctionCall, wrapper: str, expected_rtype: str | None, + ) -> None: + if call.return_type != expected_rtype: + raise RuntimeError( + f"{call.function} return type is incompatible with {wrapper}: " + f"expect: {expected_rtype}, actual: {call.return_type}" + ) + def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: call = self.generate_call(node.node) - if call.nodetype == NodeTypes.NAME_TOKEN: - return FunctionCall( - function=f"_PyPegen_lookahead_with_name", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + comment = None + if call.nodetype is NodeTypes.NAME_TOKEN: + function = "_PyPegen_lookahead_for_expr" + self.assert_no_undefined_behavior(call, function, "expr_ty") + elif call.nodetype is NodeTypes.STRING_TOKEN: + # _PyPegen_string_token() returns 'void *' instead of 'Token *'; + # in addition, the overall function call would return 'expr_ty'. + assert call.function == "_PyPegen_string_token" + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype == NodeTypes.SOFT_KEYWORD: - return FunctionCall( - function=f"_PyPegen_lookahead_with_string", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead_with_string" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: - return FunctionCall( - function=f"_PyPegen_lookahead_with_int", - arguments=[positive, call.function, *call.arguments], - return_type="int", - comment=f"token={node.node}", - ) + function = "_PyPegen_lookahead_with_int" + self.assert_no_undefined_behavior(call, function, "Token *") + comment = f"token={node.node}" + elif call.return_type == "expr_ty": + function = "_PyPegen_lookahead_for_expr" + elif call.return_type == "stmt_ty": + function = "_PyPegen_lookahead_for_stmt" else: - return FunctionCall( - function=f"_PyPegen_lookahead", - arguments=[positive, f"(void *(*)(Parser *)) {call.function}", *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, None) + return FunctionCall( + function=function, + arguments=[positive, call.function, *call.arguments], + return_type="int", + comment=comment, + ) def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: return self.lookahead_call_helper(node, 1) diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 6ce0649aefe..52ae743c26b 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -81,6 +81,11 @@ class RuleCheckingVisitor(GrammarVisitor): self.tokens.add("FSTRING_START") self.tokens.add("FSTRING_END") self.tokens.add("FSTRING_MIDDLE") + # If python < 3.14 add the virtual tstring tokens + if sys.version_info < (3, 14, 0, 'beta', 1): + self.tokens.add("TSTRING_START") + self.tokens.add("TSTRING_END") + self.tokens.add("TSTRING_MIDDLE") def visit_NameLeaf(self, node: NameLeaf) -> None: if node.value not in self.rules and node.value not in self.tokens: diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index e5badaccadd..0beaab2d3e7 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,7 +1,7 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.13 +mypy==1.16.1 # needed for peg_generator: -types-psutil==6.0.0.20240901 -types-setuptools==74.0.0.20240831 +types-psutil==7.0.0.20250601 +types-setuptools==80.9.0.20250529 diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 68cfad3f92c..905af9dcfd8 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -492,7 +492,7 @@ class Stats: ): (trace_too_long, attempts), Doc( "Trace too short", - "A potential trace is abandoned because it it too short.", + "A potential trace is abandoned because it is too short.", ): (trace_too_short, attempts), Doc( "Inner loop found", "A trace is truncated because it has an inner loop" diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 21224e490b8..93421b623b9 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -12,15 +12,12 @@ # These warnings trigger directly in a CPython function. -race_top:assign_version_tag -race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -29,9 +26,6 @@ race_top:rangeiter_next # gh-129748: test.test_free_threading.test_slots.TestSlots.test_object race_top:mi_block_set_nextx -# gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -race_top:update_one_slot - # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create @@ -46,4 +40,11 @@ race:list_inplace_repeat_lock_held # PyObject_Realloc internally does memcpy which isn't atomic so can race # with non-locking reads. See #132070 -race:PyObject_Realloc
\ No newline at end of file +race:PyObject_Realloc + +# gh-133467. Some of these could be hard to trigger. +race_top:_Py_slot_tp_getattr_hook +race_top:slot_tp_descr_get +race_top:type_set_name +race_top:set_tp_bases +race_top:type_set_bases_unlocked diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 889ae8fc869..d4cca68c3e3 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -43,7 +43,7 @@ VERSION = "3.3" # When changing UCD version please update # * Doc/library/stdtypes.rst, and # * Doc/library/unicodedata.rst -# * Doc/reference/lexical_analysis.rst (two occurrences) +# * Doc/reference/lexical_analysis.rst (three occurrences) UNIDATA_VERSION = "16.0.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" diff --git a/Tools/wasm/.editorconfig b/Tools/wasm/emscripten/.editorconfig index 4de5fe5954d..4de5fe5954d 100644 --- a/Tools/wasm/.editorconfig +++ b/Tools/wasm/emscripten/.editorconfig diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 849bd5de44e..c0d58aeaadd 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -167,11 +167,12 @@ def make_build_python(context, working_dir): @subdir(HOST_BUILD_DIR, clean_ok=True) def make_emscripten_libffi(context, working_dir): shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True) - with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file: + with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete_on_close=False) as tmp_file: with urlopen( "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz" ) as response: shutil.copyfileobj(response, tmp_file) + tmp_file.close() shutil.unpack_archive(tmp_file.name, working_dir) call( [EMSCRIPTEN_DIR / "make_libffi.sh"], diff --git a/Tools/wasm/mypy.ini b/Tools/wasm/mypy.ini deleted file mode 100644 index 4de0a30c260..00000000000 --- a/Tools/wasm/mypy.ini +++ /dev/null @@ -1,11 +0,0 @@ -[mypy] -files = Tools/wasm/wasm_*.py -pretty = True -show_traceback = True - -# Make sure the wasm can be run using Python 3.8: -python_version = 3.8 - -# Be strict... -strict = True -enable_error_code = truthy-bool,ignore-without-code diff --git a/Tools/wasm/wasi-env b/Tools/wasm/wasi-env index 4c5078a1f67..08d4f499baa 100755 --- a/Tools/wasm/wasi-env +++ b/Tools/wasm/wasi-env @@ -1,7 +1,8 @@ #!/bin/sh set -e -# NOTE: to be removed once no longer used in https://github.com/python/buildmaster-config/blob/main/master/custom/factories.py . +# NOTE: to be removed once no longer used in https://github.com/python/buildmaster-config/blob/main/master/custom/factories.py ; +# expected in Python 3.18 as 3.13 is when `wasi.py` was introduced. # function usage() { diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index da847c4ff86..b49b27cbbbe 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -1,362 +1,10 @@ -#!/usr/bin/env python3 - -import argparse -import contextlib -import functools -import os -try: - from os import process_cpu_count as cpu_count -except ImportError: - from os import cpu_count -import pathlib -import shutil -import subprocess -import sys -import sysconfig -import tempfile - - -CHECKOUT = pathlib.Path(__file__).parent.parent.parent - -CROSS_BUILD_DIR = CHECKOUT / "cross-build" -BUILD_DIR = CROSS_BUILD_DIR / "build" - -LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" -LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/wasi.py\n".encode("utf-8") - -WASMTIME_VAR_NAME = "WASMTIME" -WASMTIME_HOST_RUNNER_VAR = f"{{{WASMTIME_VAR_NAME}}}" - - -def updated_env(updates={}): - """Create a new dict representing the environment to use. - - The changes made to the execution environment are printed out. - """ - env_defaults = {} - # https://reproducible-builds.org/docs/source-date-epoch/ - git_epoch_cmd = ["git", "log", "-1", "--pretty=%ct"] - try: - epoch = subprocess.check_output(git_epoch_cmd, encoding="utf-8").strip() - env_defaults["SOURCE_DATE_EPOCH"] = epoch - except subprocess.CalledProcessError: - pass # Might be building from a tarball. - # This layering lets SOURCE_DATE_EPOCH from os.environ takes precedence. - environment = env_defaults | os.environ | updates - - env_diff = {} - for key, value in environment.items(): - if os.environ.get(key) != value: - env_diff[key] = value - - print("๐ Environment changes:") - for key in sorted(env_diff.keys()): - print(f" {key}={env_diff[key]}") - - return environment - - -def subdir(working_dir, *, clean_ok=False): - """Decorator to change to a working directory.""" - def decorator(func): - @functools.wraps(func) - def wrapper(context): - nonlocal working_dir - - if callable(working_dir): - working_dir = working_dir(context) - try: - tput_output = subprocess.check_output(["tput", "cols"], - encoding="utf-8") - except subprocess.CalledProcessError: - terminal_width = 80 - else: - terminal_width = int(tput_output.strip()) - print("โฏ" * terminal_width) - print("๐", working_dir) - if (clean_ok and getattr(context, "clean", False) and - working_dir.exists()): - print(f"๐ฎ Deleting directory (--clean)...") - shutil.rmtree(working_dir) - - working_dir.mkdir(parents=True, exist_ok=True) - - with contextlib.chdir(working_dir): - return func(context, working_dir) - - return wrapper - - return decorator - - -def call(command, *, quiet, **kwargs): - """Execute a command. - - If 'quiet' is true, then redirect stdout and stderr to a temporary file. - """ - print("โฏ", " ".join(map(str, command))) - if not quiet: - stdout = None - stderr = None - else: - stdout = tempfile.NamedTemporaryFile("w", encoding="utf-8", - delete=False, - prefix="cpython-wasi-", - suffix=".log") - stderr = subprocess.STDOUT - print(f"๐ Logging output to {stdout.name} (--quiet)...") - - subprocess.check_call(command, **kwargs, stdout=stdout, stderr=stderr) - - -def build_platform(): - """The name of the build/host platform.""" - # Can also be found via `config.guess`.` - return sysconfig.get_config_var("BUILD_GNU_TYPE") - - -def build_python_path(): - """The path to the build Python binary.""" - binary = BUILD_DIR / "python" - if not binary.is_file(): - binary = binary.with_suffix(".exe") - if not binary.is_file(): - raise FileNotFoundError("Unable to find `python(.exe)` in " - f"{BUILD_DIR}") - - return binary - - -@subdir(BUILD_DIR, clean_ok=True) -def configure_build_python(context, working_dir): - """Configure the build/host Python.""" - if LOCAL_SETUP.exists(): - print(f"๐ {LOCAL_SETUP} exists ...") - else: - print(f"๐ Touching {LOCAL_SETUP} ...") - LOCAL_SETUP.write_bytes(LOCAL_SETUP_MARKER) - - configure = [os.path.relpath(CHECKOUT / 'configure', working_dir)] - if context.args: - configure.extend(context.args) - - call(configure, quiet=context.quiet) - - -@subdir(BUILD_DIR) -def make_build_python(context, working_dir): - """Make/build the build Python.""" - call(["make", "--jobs", str(cpu_count()), "all"], - quiet=context.quiet) - - binary = build_python_path() - cmd = [binary, "-c", - "import sys; " - "print(f'{sys.version_info.major}.{sys.version_info.minor}')"] - version = subprocess.check_output(cmd, encoding="utf-8").strip() - - print(f"๐ {binary} {version}") - - -def find_wasi_sdk(): - """Find the path to wasi-sdk.""" - if wasi_sdk_path := os.environ.get("WASI_SDK_PATH"): - return pathlib.Path(wasi_sdk_path) - elif (default_path := pathlib.Path("/opt/wasi-sdk")).exists(): - return default_path - - -def wasi_sdk_env(context): - """Calculate environment variables for building with wasi-sdk.""" - wasi_sdk_path = context.wasi_sdk_path - sysroot = wasi_sdk_path / "share" / "wasi-sysroot" - env = {"CC": "clang", "CPP": "clang-cpp", "CXX": "clang++", - "AR": "llvm-ar", "RANLIB": "ranlib"} - - for env_var, binary_name in list(env.items()): - env[env_var] = os.fsdecode(wasi_sdk_path / "bin" / binary_name) - - if wasi_sdk_path != pathlib.Path("/opt/wasi-sdk"): - for compiler in ["CC", "CPP", "CXX"]: - env[compiler] += f" --sysroot={sysroot}" - - env["PKG_CONFIG_PATH"] = "" - env["PKG_CONFIG_LIBDIR"] = os.pathsep.join( - map(os.fsdecode, - [sysroot / "lib" / "pkgconfig", - sysroot / "share" / "pkgconfig"])) - env["PKG_CONFIG_SYSROOT_DIR"] = os.fsdecode(sysroot) - - env["WASI_SDK_PATH"] = os.fsdecode(wasi_sdk_path) - env["WASI_SYSROOT"] = os.fsdecode(sysroot) - - env["PATH"] = os.pathsep.join([os.fsdecode(wasi_sdk_path / "bin"), - os.environ["PATH"]]) - - return env - - -@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple, clean_ok=True) -def configure_wasi_python(context, working_dir): - """Configure the WASI/host build.""" - if not context.wasi_sdk_path or not context.wasi_sdk_path.exists(): - raise ValueError("WASI-SDK not found; " - "download from " - "https://github.com/WebAssembly/wasi-sdk and/or " - "specify via $WASI_SDK_PATH or --wasi-sdk") - - config_site = os.fsdecode(CHECKOUT / "Tools" / "wasm" / "config.site-wasm32-wasi") - - wasi_build_dir = working_dir.relative_to(CHECKOUT) - - python_build_dir = BUILD_DIR / "build" - lib_dirs = list(python_build_dir.glob("lib.*")) - assert len(lib_dirs) == 1, f"Expected a single lib.* directory in {python_build_dir}" - lib_dir = os.fsdecode(lib_dirs[0]) - pydebug = lib_dir.endswith("-pydebug") - python_version = lib_dir.removesuffix("-pydebug").rpartition("-")[-1] - sysconfig_data = f"{wasi_build_dir}/build/lib.wasi-wasm32-{python_version}" - if pydebug: - sysconfig_data += "-pydebug" - - # Use PYTHONPATH to include sysconfig data which must be anchored to the - # WASI guest's `/` directory. - args = {"GUEST_DIR": "/", - "HOST_DIR": CHECKOUT, - "ENV_VAR_NAME": "PYTHONPATH", - "ENV_VAR_VALUE": f"/{sysconfig_data}", - "PYTHON_WASM": working_dir / "python.wasm"} - # Check dynamically for wasmtime in case it was specified manually via - # `--host-runner`. - if WASMTIME_HOST_RUNNER_VAR in context.host_runner: - if wasmtime := shutil.which("wasmtime"): - args[WASMTIME_VAR_NAME] = wasmtime - else: - raise FileNotFoundError("wasmtime not found; download from " - "https://github.com/bytecodealliance/wasmtime") - host_runner = context.host_runner.format_map(args) - env_additions = {"CONFIG_SITE": config_site, "HOSTRUNNER": host_runner} - build_python = os.fsdecode(build_python_path()) - # The path to `configure` MUST be relative, else `python.wasm` is unable - # to find the stdlib due to Python not recognizing that it's being - # executed from within a checkout. - configure = [os.path.relpath(CHECKOUT / 'configure', working_dir), - f"--host={context.host_triple}", - f"--build={build_platform()}", - f"--with-build-python={build_python}"] - if pydebug: - configure.append("--with-pydebug") - if context.args: - configure.extend(context.args) - call(configure, - env=updated_env(env_additions | wasi_sdk_env(context)), - quiet=context.quiet) - - python_wasm = working_dir / "python.wasm" - exec_script = working_dir / "python.sh" - with exec_script.open("w", encoding="utf-8") as file: - file.write(f'#!/bin/sh\nexec {host_runner} {python_wasm} "$@"\n') - exec_script.chmod(0o755) - print(f"๐โโ๏ธ Created {exec_script} ... ") - sys.stdout.flush() - - -@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple) -def make_wasi_python(context, working_dir): - """Run `make` for the WASI/host build.""" - call(["make", "--jobs", str(cpu_count()), "all"], - env=updated_env(), - quiet=context.quiet) - - exec_script = working_dir / "python.sh" - subprocess.check_call([exec_script, "--version"]) - - -def build_all(context): - """Build everything.""" - steps = [configure_build_python, make_build_python, configure_wasi_python, - make_wasi_python] - for step in steps: - step(context) - -def clean_contents(context): - """Delete all files created by this script.""" - if CROSS_BUILD_DIR.exists(): - print(f"๐งน Deleting {CROSS_BUILD_DIR} ...") - shutil.rmtree(CROSS_BUILD_DIR) - - if LOCAL_SETUP.exists(): - with LOCAL_SETUP.open("rb") as file: - if file.read(len(LOCAL_SETUP_MARKER)) == LOCAL_SETUP_MARKER: - print(f"๐งน Deleting generated {LOCAL_SETUP} ...") - - -def main(): - default_host_runner = (f"{WASMTIME_HOST_RUNNER_VAR} run " - # Make sure the stack size will work for a pydebug - # build. - # Use 16 MiB stack. - "--wasm max-wasm-stack=16777216 " - # Enable thread support; causes use of preview1. - #"--wasm threads=y --wasi threads=y " - # Map the checkout to / to load the stdlib from /Lib. - "--dir {HOST_DIR}::{GUEST_DIR} " - # Set PYTHONPATH to the sysconfig data. - "--env {ENV_VAR_NAME}={ENV_VAR_VALUE}") - - parser = argparse.ArgumentParser() - subcommands = parser.add_subparsers(dest="subcommand") - build = subcommands.add_parser("build", help="Build everything") - configure_build = subcommands.add_parser("configure-build-python", - help="Run `configure` for the " - "build Python") - make_build = subcommands.add_parser("make-build-python", - help="Run `make` for the build Python") - configure_host = subcommands.add_parser("configure-host", - help="Run `configure` for the " - "host/WASI (pydebug builds " - "are inferred from the build " - "Python)") - make_host = subcommands.add_parser("make-host", - help="Run `make` for the host/WASI") - clean = subcommands.add_parser("clean", help="Delete files and directories " - "created by this script") - for subcommand in build, configure_build, make_build, configure_host, make_host: - subcommand.add_argument("--quiet", action="store_true", default=False, - dest="quiet", - help="Redirect output from subprocesses to a log file") - for subcommand in configure_build, configure_host: - subcommand.add_argument("--clean", action="store_true", default=False, - dest="clean", - help="Delete any relevant directories before building") - for subcommand in build, configure_build, configure_host: - subcommand.add_argument("args", nargs="*", - help="Extra arguments to pass to `configure`") - for subcommand in build, configure_host: - subcommand.add_argument("--wasi-sdk", type=pathlib.Path, - dest="wasi_sdk_path", - default=find_wasi_sdk(), - help="Path to wasi-sdk; defaults to " - "$WASI_SDK_PATH or /opt/wasi-sdk") - subcommand.add_argument("--host-runner", action="store", - default=default_host_runner, dest="host_runner", - help="Command template for running the WASI host " - "(default designed for wasmtime 14 or newer: " - f"`{default_host_runner}`)") - for subcommand in build, configure_host, make_host: - subcommand.add_argument("--host-triple", action="store", default="wasm32-wasip1", - help="The target triple for the WASI host build") - - context = parser.parse_args() - - dispatch = {"configure-build-python": configure_build_python, - "make-build-python": make_build_python, - "configure-host": configure_wasi_python, - "make-host": make_wasi_python, - "build": build_all, - "clean": clean_contents} - dispatch[context.subcommand](context) +if __name__ == "__main__": + import pathlib + import runpy + import sys + print("โ ๏ธ WARNING: This script is deprecated and slated for removal in Python 3.20; " + "execute the `wasi/` directory instead (i.e. `python Tools/wasm/wasi`)\n", + file=sys.stderr) -if __name__ == "__main__": - main() + runpy.run_path(pathlib.Path(__file__).parent / "wasi", run_name="__main__") diff --git a/Tools/wasm/wasi/__main__.py b/Tools/wasm/wasi/__main__.py new file mode 100644 index 00000000000..54ccc95157d --- /dev/null +++ b/Tools/wasm/wasi/__main__.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 + +import argparse +import contextlib +import functools +import os +try: + from os import process_cpu_count as cpu_count +except ImportError: + from os import cpu_count +import pathlib +import shutil +import subprocess +import sys +import sysconfig +import tempfile + + +CHECKOUT = pathlib.Path(__file__).parent.parent.parent.parent +assert (CHECKOUT / "configure").is_file(), "Please update the location of the file" + +CROSS_BUILD_DIR = CHECKOUT / "cross-build" +BUILD_DIR = CROSS_BUILD_DIR / "build" + +LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" +LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/wasi.py\n".encode("utf-8") + +WASMTIME_VAR_NAME = "WASMTIME" +WASMTIME_HOST_RUNNER_VAR = f"{{{WASMTIME_VAR_NAME}}}" + + +def updated_env(updates={}): + """Create a new dict representing the environment to use. + + The changes made to the execution environment are printed out. + """ + env_defaults = {} + # https://reproducible-builds.org/docs/source-date-epoch/ + git_epoch_cmd = ["git", "log", "-1", "--pretty=%ct"] + try: + epoch = subprocess.check_output(git_epoch_cmd, encoding="utf-8").strip() + env_defaults["SOURCE_DATE_EPOCH"] = epoch + except subprocess.CalledProcessError: + pass # Might be building from a tarball. + # This layering lets SOURCE_DATE_EPOCH from os.environ takes precedence. + environment = env_defaults | os.environ | updates + + env_diff = {} + for key, value in environment.items(): + if os.environ.get(key) != value: + env_diff[key] = value + + print("๐ Environment changes:") + for key in sorted(env_diff.keys()): + print(f" {key}={env_diff[key]}") + + return environment + + +def subdir(working_dir, *, clean_ok=False): + """Decorator to change to a working directory.""" + def decorator(func): + @functools.wraps(func) + def wrapper(context): + nonlocal working_dir + + if callable(working_dir): + working_dir = working_dir(context) + try: + tput_output = subprocess.check_output(["tput", "cols"], + encoding="utf-8") + except subprocess.CalledProcessError: + terminal_width = 80 + else: + terminal_width = int(tput_output.strip()) + print("โฏ" * terminal_width) + print("๐", working_dir) + if (clean_ok and getattr(context, "clean", False) and + working_dir.exists()): + print(f"๐ฎ Deleting directory (--clean)...") + shutil.rmtree(working_dir) + + working_dir.mkdir(parents=True, exist_ok=True) + + with contextlib.chdir(working_dir): + return func(context, working_dir) + + return wrapper + + return decorator + + +def call(command, *, quiet, **kwargs): + """Execute a command. + + If 'quiet' is true, then redirect stdout and stderr to a temporary file. + """ + print("โฏ", " ".join(map(str, command))) + if not quiet: + stdout = None + stderr = None + else: + stdout = tempfile.NamedTemporaryFile("w", encoding="utf-8", + delete=False, + prefix="cpython-wasi-", + suffix=".log") + stderr = subprocess.STDOUT + print(f"๐ Logging output to {stdout.name} (--quiet)...") + + subprocess.check_call(command, **kwargs, stdout=stdout, stderr=stderr) + + +def build_platform(): + """The name of the build/host platform.""" + # Can also be found via `config.guess`. + return sysconfig.get_config_var("BUILD_GNU_TYPE") + + +def build_python_path(): + """The path to the build Python binary.""" + binary = BUILD_DIR / "python" + if not binary.is_file(): + binary = binary.with_suffix(".exe") + if not binary.is_file(): + raise FileNotFoundError("Unable to find `python(.exe)` in " + f"{BUILD_DIR}") + + return binary + + +def build_python_is_pydebug(): + """Find out if the build Python is a pydebug build.""" + test = "import sys, test.support; sys.exit(test.support.Py_DEBUG)" + result = subprocess.run([build_python_path(), "-c", test], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + return bool(result.returncode) + + +@subdir(BUILD_DIR, clean_ok=True) +def configure_build_python(context, working_dir): + """Configure the build/host Python.""" + if LOCAL_SETUP.exists(): + print(f"๐ {LOCAL_SETUP} exists ...") + else: + print(f"๐ Touching {LOCAL_SETUP} ...") + LOCAL_SETUP.write_bytes(LOCAL_SETUP_MARKER) + + configure = [os.path.relpath(CHECKOUT / 'configure', working_dir)] + if context.args: + configure.extend(context.args) + + call(configure, quiet=context.quiet) + + +@subdir(BUILD_DIR) +def make_build_python(context, working_dir): + """Make/build the build Python.""" + call(["make", "--jobs", str(cpu_count()), "all"], + quiet=context.quiet) + + binary = build_python_path() + cmd = [binary, "-c", + "import sys; " + "print(f'{sys.version_info.major}.{sys.version_info.minor}')"] + version = subprocess.check_output(cmd, encoding="utf-8").strip() + + print(f"๐ {binary} {version}") + + +def find_wasi_sdk(): + """Find the path to wasi-sdk.""" + if wasi_sdk_path := os.environ.get("WASI_SDK_PATH"): + return pathlib.Path(wasi_sdk_path) + elif (default_path := pathlib.Path("/opt/wasi-sdk")).exists(): + return default_path + + +def wasi_sdk_env(context): + """Calculate environment variables for building with wasi-sdk.""" + wasi_sdk_path = context.wasi_sdk_path + sysroot = wasi_sdk_path / "share" / "wasi-sysroot" + env = {"CC": "clang", "CPP": "clang-cpp", "CXX": "clang++", + "AR": "llvm-ar", "RANLIB": "ranlib"} + + for env_var, binary_name in list(env.items()): + env[env_var] = os.fsdecode(wasi_sdk_path / "bin" / binary_name) + + if wasi_sdk_path != pathlib.Path("/opt/wasi-sdk"): + for compiler in ["CC", "CPP", "CXX"]: + env[compiler] += f" --sysroot={sysroot}" + + env["PKG_CONFIG_PATH"] = "" + env["PKG_CONFIG_LIBDIR"] = os.pathsep.join( + map(os.fsdecode, + [sysroot / "lib" / "pkgconfig", + sysroot / "share" / "pkgconfig"])) + env["PKG_CONFIG_SYSROOT_DIR"] = os.fsdecode(sysroot) + + env["WASI_SDK_PATH"] = os.fsdecode(wasi_sdk_path) + env["WASI_SYSROOT"] = os.fsdecode(sysroot) + + env["PATH"] = os.pathsep.join([os.fsdecode(wasi_sdk_path / "bin"), + os.environ["PATH"]]) + + return env + + +@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple, clean_ok=True) +def configure_wasi_python(context, working_dir): + """Configure the WASI/host build.""" + if not context.wasi_sdk_path or not context.wasi_sdk_path.exists(): + raise ValueError("WASI-SDK not found; " + "download from " + "https://github.com/WebAssembly/wasi-sdk and/or " + "specify via $WASI_SDK_PATH or --wasi-sdk") + + config_site = os.fsdecode(CHECKOUT / "Tools" / "wasm" / "wasi" / "config.site-wasm32-wasi") + + wasi_build_dir = working_dir.relative_to(CHECKOUT) + + python_build_dir = BUILD_DIR / "build" + lib_dirs = list(python_build_dir.glob("lib.*")) + assert len(lib_dirs) == 1, f"Expected a single lib.* directory in {python_build_dir}" + lib_dir = os.fsdecode(lib_dirs[0]) + python_version = lib_dir.rpartition("-")[-1] + sysconfig_data_dir = f"{wasi_build_dir}/build/lib.wasi-wasm32-{python_version}" + + # Use PYTHONPATH to include sysconfig data which must be anchored to the + # WASI guest's `/` directory. + args = {"GUEST_DIR": "/", + "HOST_DIR": CHECKOUT, + "ENV_VAR_NAME": "PYTHONPATH", + "ENV_VAR_VALUE": f"/{sysconfig_data_dir}", + "PYTHON_WASM": working_dir / "python.wasm"} + # Check dynamically for wasmtime in case it was specified manually via + # `--host-runner`. + if WASMTIME_HOST_RUNNER_VAR in context.host_runner: + if wasmtime := shutil.which("wasmtime"): + args[WASMTIME_VAR_NAME] = wasmtime + else: + raise FileNotFoundError("wasmtime not found; download from " + "https://github.com/bytecodealliance/wasmtime") + host_runner = context.host_runner.format_map(args) + env_additions = {"CONFIG_SITE": config_site, "HOSTRUNNER": host_runner} + build_python = os.fsdecode(build_python_path()) + # The path to `configure` MUST be relative, else `python.wasm` is unable + # to find the stdlib due to Python not recognizing that it's being + # executed from within a checkout. + configure = [os.path.relpath(CHECKOUT / 'configure', working_dir), + f"--host={context.host_triple}", + f"--build={build_platform()}", + f"--with-build-python={build_python}"] + if build_python_is_pydebug(): + configure.append("--with-pydebug") + if context.args: + configure.extend(context.args) + call(configure, + env=updated_env(env_additions | wasi_sdk_env(context)), + quiet=context.quiet) + + python_wasm = working_dir / "python.wasm" + exec_script = working_dir / "python.sh" + with exec_script.open("w", encoding="utf-8") as file: + file.write(f'#!/bin/sh\nexec {host_runner} {python_wasm} "$@"\n') + exec_script.chmod(0o755) + print(f"๐โโ๏ธ Created {exec_script} (--host-runner)... ") + sys.stdout.flush() + + +@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple) +def make_wasi_python(context, working_dir): + """Run `make` for the WASI/host build.""" + call(["make", "--jobs", str(cpu_count()), "all"], + env=updated_env(), + quiet=context.quiet) + + exec_script = working_dir / "python.sh" + call([exec_script, "--version"], quiet=False) + print( + f"๐ Use `{exec_script.relative_to(context.init_dir)}` " + "to run CPython w/ the WASI host specified by --host-runner" + ) + + +def build_all(context): + """Build everything.""" + steps = [configure_build_python, make_build_python, configure_wasi_python, + make_wasi_python] + for step in steps: + step(context) + +def clean_contents(context): + """Delete all files created by this script.""" + if CROSS_BUILD_DIR.exists(): + print(f"๐งน Deleting {CROSS_BUILD_DIR} ...") + shutil.rmtree(CROSS_BUILD_DIR) + + if LOCAL_SETUP.exists(): + with LOCAL_SETUP.open("rb") as file: + if file.read(len(LOCAL_SETUP_MARKER)) == LOCAL_SETUP_MARKER: + print(f"๐งน Deleting generated {LOCAL_SETUP} ...") + + +def main(): + default_host_runner = (f"{WASMTIME_HOST_RUNNER_VAR} run " + # Make sure the stack size will work for a pydebug + # build. + # Use 16 MiB stack. + "--wasm max-wasm-stack=16777216 " + # Enable thread support; causes use of preview1. + #"--wasm threads=y --wasi threads=y " + # Map the checkout to / to load the stdlib from /Lib. + "--dir {HOST_DIR}::{GUEST_DIR} " + # Set PYTHONPATH to the sysconfig data. + "--env {ENV_VAR_NAME}={ENV_VAR_VALUE}") + + parser = argparse.ArgumentParser() + subcommands = parser.add_subparsers(dest="subcommand") + build = subcommands.add_parser("build", help="Build everything") + configure_build = subcommands.add_parser("configure-build-python", + help="Run `configure` for the " + "build Python") + make_build = subcommands.add_parser("make-build-python", + help="Run `make` for the build Python") + configure_host = subcommands.add_parser("configure-host", + help="Run `configure` for the " + "host/WASI (pydebug builds " + "are inferred from the build " + "Python)") + make_host = subcommands.add_parser("make-host", + help="Run `make` for the host/WASI") + clean = subcommands.add_parser("clean", help="Delete files and directories " + "created by this script") + for subcommand in build, configure_build, make_build, configure_host, make_host: + subcommand.add_argument("--quiet", action="store_true", default=False, + dest="quiet", + help="Redirect output from subprocesses to a log file") + for subcommand in configure_build, configure_host: + subcommand.add_argument("--clean", action="store_true", default=False, + dest="clean", + help="Delete any relevant directories before building") + for subcommand in build, configure_build, configure_host: + subcommand.add_argument("args", nargs="*", + help="Extra arguments to pass to `configure`") + for subcommand in build, configure_host: + subcommand.add_argument("--wasi-sdk", type=pathlib.Path, + dest="wasi_sdk_path", + default=find_wasi_sdk(), + help="Path to wasi-sdk; defaults to " + "$WASI_SDK_PATH or /opt/wasi-sdk") + subcommand.add_argument("--host-runner", action="store", + default=default_host_runner, dest="host_runner", + help="Command template for running the WASI host " + "(default designed for wasmtime 14 or newer: " + f"`{default_host_runner}`)") + for subcommand in build, configure_host, make_host: + subcommand.add_argument("--host-triple", action="store", default="wasm32-wasip1", + help="The target triple for the WASI host build") + + context = parser.parse_args() + context.init_dir = pathlib.Path().absolute() + + dispatch = {"configure-build-python": configure_build_python, + "make-build-python": make_build_python, + "configure-host": configure_wasi_python, + "make-host": make_wasi_python, + "build": build_all, + "clean": clean_contents} + dispatch[context.subcommand](context) + + +if __name__ == "__main__": + main() diff --git a/Tools/wasm/config.site-wasm32-wasi b/Tools/wasm/wasi/config.site-wasm32-wasi index c5d8b3e205d..c5d8b3e205d 100644 --- a/Tools/wasm/config.site-wasm32-wasi +++ b/Tools/wasm/wasi/config.site-wasm32-wasi diff --git a/Tools/wasm/wasm_build.py b/Tools/wasm/wasm_build.py deleted file mode 100755 index bcb80212362..00000000000 --- a/Tools/wasm/wasm_build.py +++ /dev/null @@ -1,932 +0,0 @@ -#!/usr/bin/env python3 -"""Build script for Python on WebAssembly platforms. - - $ ./Tools/wasm/wasm_builder.py emscripten-browser build repl - $ ./Tools/wasm/wasm_builder.py emscripten-node-dl build test - $ ./Tools/wasm/wasm_builder.py wasi build test - -Primary build targets are "emscripten-node-dl" (NodeJS, dynamic linking), -"emscripten-browser", and "wasi". - -Emscripten builds require a recent Emscripten SDK. The tools looks for an -activated EMSDK environment (". /path/to/emsdk_env.sh"). System packages -(Debian, Homebrew) are not supported. - -WASI builds require WASI SDK and wasmtime. The tool looks for 'WASI_SDK_PATH' -and falls back to /opt/wasi-sdk. - -The 'build' Python interpreter must be rebuilt every time Python's byte code -changes. - - ./Tools/wasm/wasm_builder.py --clean build build - -""" -import argparse -import enum -import dataclasses -import logging -import os -import pathlib -import re -import shlex -import shutil -import socket -import subprocess -import sys -import sysconfig -import tempfile -import time -import warnings -import webbrowser - -# for Python 3.8 -from typing import ( - cast, - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Tuple, - Union, -) - -logger = logging.getLogger("wasm_build") - -SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute() -WASMTOOLS = SRCDIR / "Tools" / "wasm" -BUILDDIR = SRCDIR / "builddir" -CONFIGURE = SRCDIR / "configure" -SETUP_LOCAL = SRCDIR / "Modules" / "Setup.local" - -HAS_CCACHE = shutil.which("ccache") is not None - -# path to WASI-SDK root -WASI_SDK_PATH = pathlib.Path(os.environ.get("WASI_SDK_PATH", "/opt/wasi-sdk")) - -# path to Emscripten SDK config file. -# auto-detect's EMSDK in /opt/emsdk without ". emsdk_env.sh". -EM_CONFIG = pathlib.Path(os.environ.setdefault("EM_CONFIG", "/opt/emsdk/.emscripten")) -EMSDK_MIN_VERSION = (3, 1, 19) -EMSDK_BROKEN_VERSION = { - (3, 1, 14): "https://github.com/emscripten-core/emscripten/issues/17338", - (3, 1, 16): "https://github.com/emscripten-core/emscripten/issues/17393", - (3, 1, 20): "https://github.com/emscripten-core/emscripten/issues/17720", -} -_MISSING = pathlib.Path("MISSING") - -WASM_WEBSERVER = WASMTOOLS / "wasm_webserver.py" - -CLEAN_SRCDIR = f""" -Builds require a clean source directory. Please use a clean checkout or -run "make clean -C '{SRCDIR}'". -""" - -INSTALL_NATIVE = """ -Builds require a C compiler (gcc, clang), make, pkg-config, and development -headers for dependencies like zlib. - -Debian/Ubuntu: sudo apt install build-essential git curl pkg-config zlib1g-dev -Fedora/CentOS: sudo dnf install gcc make git-core curl pkgconfig zlib-devel -""" - -INSTALL_EMSDK = """ -wasm32-emscripten builds need Emscripten SDK. Please follow instructions at -https://emscripten.org/docs/getting_started/downloads.html how to install -Emscripten and how to activate the SDK with "emsdk_env.sh". - - git clone https://github.com/emscripten-core/emsdk.git /path/to/emsdk - cd /path/to/emsdk - ./emsdk install latest - ./emsdk activate latest - source /path/to/emsdk_env.sh -""" - -INSTALL_WASI_SDK = """ -wasm32-wasi builds need WASI SDK. Please fetch the latest SDK from -https://github.com/WebAssembly/wasi-sdk/releases and install it to -"/opt/wasi-sdk". Alternatively you can install the SDK in a different location -and point the environment variable WASI_SDK_PATH to the root directory -of the SDK. The SDK is available for Linux x86_64, macOS x86_64, and MinGW. -""" - -INSTALL_WASMTIME = """ -wasm32-wasi tests require wasmtime on PATH. Please follow instructions at -https://wasmtime.dev/ to install wasmtime. -""" - - -def parse_emconfig( - emconfig: pathlib.Path = EM_CONFIG, -) -> Tuple[pathlib.Path, pathlib.Path]: - """Parse EM_CONFIG file and lookup EMSCRIPTEN_ROOT and NODE_JS. - - The ".emscripten" config file is a Python snippet that uses "EM_CONFIG" - environment variable. EMSCRIPTEN_ROOT is the "upstream/emscripten" - subdirectory with tools like "emconfigure". - """ - if not emconfig.exists(): - return _MISSING, _MISSING - with open(emconfig, encoding="utf-8") as f: - code = f.read() - # EM_CONFIG file is a Python snippet - local: Dict[str, Any] = {} - exec(code, globals(), local) - emscripten_root = pathlib.Path(local["EMSCRIPTEN_ROOT"]) - node_js = pathlib.Path(local["NODE_JS"]) - return emscripten_root, node_js - - -EMSCRIPTEN_ROOT, NODE_JS = parse_emconfig() - - -def read_python_version(configure: pathlib.Path = CONFIGURE) -> str: - """Read PACKAGE_VERSION from configure script - - configure and configure.ac are the canonical source for major and - minor version number. - """ - version_re = re.compile(r"^PACKAGE_VERSION='(\d\.\d+)'") - with configure.open(encoding="utf-8") as f: - for line in f: - mo = version_re.match(line) - if mo: - return mo.group(1) - raise ValueError(f"PACKAGE_VERSION not found in {configure}") - - -PYTHON_VERSION = read_python_version() - - -class ConditionError(ValueError): - def __init__(self, info: str, text: str) -> None: - self.info = info - self.text = text - - def __str__(self) -> str: - return f"{type(self).__name__}: '{self.info}'\n{self.text}" - - -class MissingDependency(ConditionError): - pass - - -class DirtySourceDirectory(ConditionError): - pass - - -@dataclasses.dataclass -class Platform: - """Platform-specific settings - - - CONFIG_SITE override - - configure wrapper (e.g. emconfigure) - - make wrapper (e.g. emmake) - - additional environment variables - - check function to verify SDK - """ - - name: str - pythonexe: str - config_site: Optional[pathlib.PurePath] - configure_wrapper: Optional[pathlib.Path] - make_wrapper: Optional[pathlib.PurePath] - environ: Dict[str, Any] - check: Callable[[], None] - # Used for build_emports(). - ports: Optional[pathlib.PurePath] - cc: Optional[pathlib.PurePath] - - def getenv(self, profile: "BuildProfile") -> Dict[str, Any]: - return self.environ.copy() - - -def _check_clean_src() -> None: - candidates = [ - SRCDIR / "Programs" / "python.o", - SRCDIR / "Python" / "frozen_modules" / "importlib._bootstrap.h", - ] - for candidate in candidates: - if candidate.exists(): - raise DirtySourceDirectory(os.fspath(candidate), CLEAN_SRCDIR) - - -def _check_native() -> None: - if not any(shutil.which(cc) for cc in ["cc", "gcc", "clang"]): - raise MissingDependency("cc", INSTALL_NATIVE) - if not shutil.which("make"): - raise MissingDependency("make", INSTALL_NATIVE) - if sys.platform == "linux": - # skip pkg-config check on macOS - if not shutil.which("pkg-config"): - raise MissingDependency("pkg-config", INSTALL_NATIVE) - # zlib is needed to create zip files - for devel in ["zlib"]: - try: - subprocess.check_call(["pkg-config", "--exists", devel]) - except subprocess.CalledProcessError: - raise MissingDependency(devel, INSTALL_NATIVE) from None - _check_clean_src() - - -NATIVE = Platform( - "native", - # macOS has python.exe - pythonexe=sysconfig.get_config_var("BUILDPYTHON") or "python", - config_site=None, - configure_wrapper=None, - ports=None, - cc=None, - make_wrapper=None, - environ={}, - check=_check_native, -) - - -def _check_emscripten() -> None: - if EMSCRIPTEN_ROOT is _MISSING: - raise MissingDependency("Emscripten SDK EM_CONFIG", INSTALL_EMSDK) - # sanity check - emconfigure = EMSCRIPTEN.configure_wrapper - if emconfigure is not None and not emconfigure.exists(): - raise MissingDependency(os.fspath(emconfigure), INSTALL_EMSDK) - # version check - version_txt = EMSCRIPTEN_ROOT / "emscripten-version.txt" - if not version_txt.exists(): - raise MissingDependency(os.fspath(version_txt), INSTALL_EMSDK) - with open(version_txt) as f: - version = f.read().strip().strip('"') - if version.endswith("-git"): - # git / upstream / tot-upstream installation - version = version[:-4] - version_tuple = cast( - Tuple[int, int, int], - tuple(int(v) for v in version.split(".")) - ) - if version_tuple < EMSDK_MIN_VERSION: - raise ConditionError( - os.fspath(version_txt), - f"Emscripten SDK {version} in '{EMSCRIPTEN_ROOT}' is older than " - "minimum required version " - f"{'.'.join(str(v) for v in EMSDK_MIN_VERSION)}.", - ) - broken = EMSDK_BROKEN_VERSION.get(version_tuple) - if broken is not None: - raise ConditionError( - os.fspath(version_txt), - ( - f"Emscripten SDK {version} in '{EMSCRIPTEN_ROOT}' has known " - f"bugs, see {broken}." - ), - ) - if os.environ.get("PKG_CONFIG_PATH"): - warnings.warn( - "PKG_CONFIG_PATH is set and not empty. emconfigure overrides " - "this environment variable. Use EM_PKG_CONFIG_PATH instead." - ) - _check_clean_src() - - -EMSCRIPTEN = Platform( - "emscripten", - pythonexe="python.js", - config_site=WASMTOOLS / "config.site-wasm32-emscripten", - configure_wrapper=EMSCRIPTEN_ROOT / "emconfigure", - ports=EMSCRIPTEN_ROOT / "embuilder", - cc=EMSCRIPTEN_ROOT / "emcc", - make_wrapper=EMSCRIPTEN_ROOT / "emmake", - environ={ - # workaround for https://github.com/emscripten-core/emscripten/issues/17635 - "TZ": "UTC", - "EM_COMPILER_WRAPPER": "ccache" if HAS_CCACHE else None, - "PATH": [EMSCRIPTEN_ROOT, os.environ["PATH"]], - }, - check=_check_emscripten, -) - - -def _check_wasi() -> None: - wasm_ld = WASI_SDK_PATH / "bin" / "wasm-ld" - if not wasm_ld.exists(): - raise MissingDependency(os.fspath(wasm_ld), INSTALL_WASI_SDK) - wasmtime = shutil.which("wasmtime") - if wasmtime is None: - raise MissingDependency("wasmtime", INSTALL_WASMTIME) - _check_clean_src() - - -WASI = Platform( - "wasi", - pythonexe="python.wasm", - config_site=WASMTOOLS / "config.site-wasm32-wasi", - configure_wrapper=WASMTOOLS / "wasi-env", - ports=None, - cc=WASI_SDK_PATH / "bin" / "clang", - make_wrapper=None, - environ={ - "WASI_SDK_PATH": WASI_SDK_PATH, - # workaround for https://github.com/python/cpython/issues/95952 - "HOSTRUNNER": ( - "wasmtime run " - "--wasm max-wasm-stack=16777216 " - "--wasi preview2 " - "--dir {srcdir}::/ " - "--env PYTHONPATH=/{relbuilddir}/build/lib.wasi-wasm32-{version}:/Lib" - ), - "PATH": [WASI_SDK_PATH / "bin", os.environ["PATH"]], - }, - check=_check_wasi, -) - - -class Host(enum.Enum): - """Target host triplet""" - - wasm32_emscripten = "wasm32-unknown-emscripten" - wasm64_emscripten = "wasm64-unknown-emscripten" - wasm32_wasi = "wasm32-unknown-wasi" - wasm64_wasi = "wasm64-unknown-wasi" - # current platform - build = sysconfig.get_config_var("BUILD_GNU_TYPE") - - @property - def platform(self) -> Platform: - if self.is_emscripten: - return EMSCRIPTEN - elif self.is_wasi: - return WASI - else: - return NATIVE - - @property - def is_emscripten(self) -> bool: - cls = type(self) - return self in {cls.wasm32_emscripten, cls.wasm64_emscripten} - - @property - def is_wasi(self) -> bool: - cls = type(self) - return self in {cls.wasm32_wasi, cls.wasm64_wasi} - - def get_extra_paths(self) -> Iterable[pathlib.PurePath]: - """Host-specific os.environ["PATH"] entries. - - Emscripten's Node version 14.x works well for wasm32-emscripten. - wasm64-emscripten requires more recent v8 version, e.g. node 16.x. - Attempt to use system's node command. - """ - cls = type(self) - if self == cls.wasm32_emscripten: - return [NODE_JS.parent] - elif self == cls.wasm64_emscripten: - # TODO: look for recent node - return [] - else: - return [] - - @property - def emport_args(self) -> List[str]: - """Host-specific port args (Emscripten).""" - cls = type(self) - if self is cls.wasm64_emscripten: - return ["-sMEMORY64=1"] - elif self is cls.wasm32_emscripten: - return ["-sMEMORY64=0"] - else: - return [] - - @property - def embuilder_args(self) -> List[str]: - """Host-specific embuilder args (Emscripten).""" - cls = type(self) - if self is cls.wasm64_emscripten: - return ["--wasm64"] - else: - return [] - - -class EmscriptenTarget(enum.Enum): - """Emscripten-specific targets (--with-emscripten-target)""" - - browser = "browser" - browser_debug = "browser-debug" - node = "node" - node_debug = "node-debug" - - @property - def is_browser(self) -> bool: - cls = type(self) - return self in {cls.browser, cls.browser_debug} - - @property - def emport_args(self) -> List[str]: - """Target-specific port args.""" - cls = type(self) - if self in {cls.browser_debug, cls.node_debug}: - # some libs come in debug and non-debug builds - return ["-O0"] - else: - return ["-O2"] - - -class SupportLevel(enum.Enum): - supported = "tier 3, supported" - working = "working, unsupported" - experimental = "experimental, may be broken" - broken = "broken / unavailable" - - def __bool__(self) -> bool: - cls = type(self) - return self in {cls.supported, cls.working} - - -@dataclasses.dataclass -class BuildProfile: - name: str - support_level: SupportLevel - host: Host - target: Union[EmscriptenTarget, None] = None - dynamic_linking: Union[bool, None] = None - pthreads: Union[bool, None] = None - default_testopts: str = "-j2" - - @property - def is_browser(self) -> bool: - """Is this a browser build?""" - return self.target is not None and self.target.is_browser - - @property - def builddir(self) -> pathlib.Path: - """Path to build directory""" - return BUILDDIR / self.name - - @property - def python_cmd(self) -> pathlib.Path: - """Path to python executable""" - return self.builddir / self.host.platform.pythonexe - - @property - def makefile(self) -> pathlib.Path: - """Path to Makefile""" - return self.builddir / "Makefile" - - @property - def configure_cmd(self) -> List[str]: - """Generate configure command""" - # use relative path, so WASI tests can find lib prefix. - # pathlib.Path.relative_to() does not work here. - configure = os.path.relpath(CONFIGURE, self.builddir) - cmd = [configure, "-C"] - platform = self.host.platform - if platform.configure_wrapper: - cmd.insert(0, os.fspath(platform.configure_wrapper)) - - cmd.append(f"--host={self.host.value}") - cmd.append(f"--build={Host.build.value}") - - if self.target is not None: - assert self.host.is_emscripten - cmd.append(f"--with-emscripten-target={self.target.value}") - - if self.dynamic_linking is not None: - assert self.host.is_emscripten - opt = "enable" if self.dynamic_linking else "disable" - cmd.append(f"--{opt}-wasm-dynamic-linking") - - if self.pthreads is not None: - opt = "enable" if self.pthreads else "disable" - cmd.append(f"--{opt}-wasm-pthreads") - - if self.host != Host.build: - cmd.append(f"--with-build-python={BUILD.python_cmd}") - - if platform.config_site is not None: - cmd.append(f"CONFIG_SITE={platform.config_site}") - - return cmd - - @property - def make_cmd(self) -> List[str]: - """Generate make command""" - cmd = ["make"] - platform = self.host.platform - if platform.make_wrapper: - cmd.insert(0, os.fspath(platform.make_wrapper)) - return cmd - - def getenv(self) -> Dict[str, Any]: - """Generate environ dict for platform""" - env = os.environ.copy() - if hasattr(os, 'process_cpu_count'): - cpu_count = os.process_cpu_count() - else: - cpu_count = os.cpu_count() - env.setdefault("MAKEFLAGS", f"-j{cpu_count}") - platenv = self.host.platform.getenv(self) - for key, value in platenv.items(): - if value is None: - env.pop(key, None) - elif key == "PATH": - # list of path items, prefix with extra paths - new_path: List[pathlib.PurePath] = [] - new_path.extend(self.host.get_extra_paths()) - new_path.extend(value) - env[key] = os.pathsep.join(os.fspath(p) for p in new_path) - elif isinstance(value, str): - env[key] = value.format( - relbuilddir=self.builddir.relative_to(SRCDIR), - srcdir=SRCDIR, - version=PYTHON_VERSION, - ) - else: - env[key] = value - return env - - def _run_cmd( - self, - cmd: Iterable[str], - args: Iterable[str] = (), - cwd: Optional[pathlib.Path] = None, - ) -> int: - cmd = list(cmd) - cmd.extend(args) - if cwd is None: - cwd = self.builddir - logger.info('Running "%s" in "%s"', shlex.join(cmd), cwd) - return subprocess.check_call( - cmd, - cwd=os.fspath(cwd), - env=self.getenv(), - ) - - def _check_execute(self) -> None: - if self.is_browser: - raise ValueError(f"Cannot execute on {self.target}") - - def run_build(self, *args: str) -> None: - """Run configure (if necessary) and make""" - if not self.makefile.exists(): - logger.info("Makefile not found, running configure") - self.run_configure(*args) - self.run_make("all", *args) - - def run_configure(self, *args: str) -> int: - """Run configure script to generate Makefile""" - os.makedirs(self.builddir, exist_ok=True) - return self._run_cmd(self.configure_cmd, args) - - def run_make(self, *args: str) -> int: - """Run make (defaults to build all)""" - return self._run_cmd(self.make_cmd, args) - - def run_pythoninfo(self, *args: str) -> int: - """Run 'make pythoninfo'""" - self._check_execute() - return self.run_make("pythoninfo", *args) - - def run_test(self, target: str, testopts: Optional[str] = None) -> int: - """Run buildbottests""" - self._check_execute() - if testopts is None: - testopts = self.default_testopts - return self.run_make(target, f"TESTOPTS={testopts}") - - def run_py(self, *args: str) -> int: - """Run Python with hostrunner""" - self._check_execute() - return self.run_make( - "--eval", f"run: all; $(HOSTRUNNER) ./$(PYTHON) {shlex.join(args)}", "run" - ) - - def run_browser(self, bind: str = "127.0.0.1", port: int = 8000) -> None: - """Run WASM webserver and open build in browser""" - relbuilddir = self.builddir.relative_to(SRCDIR) - url = f"http://{bind}:{port}/{relbuilddir}/python.html" - args = [ - sys.executable, - os.fspath(WASM_WEBSERVER), - "--bind", - bind, - "--port", - str(port), - ] - srv = subprocess.Popen(args, cwd=SRCDIR) - # wait for server - end = time.monotonic() + 3.0 - while time.monotonic() < end and srv.returncode is None: - try: - with socket.create_connection((bind, port), timeout=0.1) as _: - pass - except OSError: - time.sleep(0.01) - else: - break - - webbrowser.open(url) - - try: - srv.wait() - except KeyboardInterrupt: - pass - - def clean(self, all: bool = False) -> None: - """Clean build directory""" - if all: - if self.builddir.exists(): - shutil.rmtree(self.builddir) - elif self.makefile.exists(): - self.run_make("clean") - - def build_emports(self, force: bool = False) -> None: - """Pre-build emscripten ports.""" - platform = self.host.platform - if platform.ports is None or platform.cc is None: - raise ValueError("Need ports and CC command") - - embuilder_cmd = [os.fspath(platform.ports)] - embuilder_cmd.extend(self.host.embuilder_args) - if force: - embuilder_cmd.append("--force") - - ports_cmd = [os.fspath(platform.cc)] - ports_cmd.extend(self.host.emport_args) - if self.target: - ports_cmd.extend(self.target.emport_args) - - if self.dynamic_linking: - # Trigger PIC build. - ports_cmd.append("-sMAIN_MODULE") - embuilder_cmd.append("--pic") - - if self.pthreads: - # Trigger multi-threaded build. - ports_cmd.append("-sUSE_PTHREADS") - - # Pre-build libbz2, libsqlite3, libz, and some system libs. - ports_cmd.extend(["-sUSE_ZLIB", "-sUSE_BZIP2", "-sUSE_SQLITE3"]) - # Multi-threaded sqlite3 has different suffix - embuilder_cmd.extend( - ["build", "bzip2", "sqlite3-mt" if self.pthreads else "sqlite3", "zlib"] - ) - - self._run_cmd(embuilder_cmd, cwd=SRCDIR) - - with tempfile.TemporaryDirectory(suffix="-py-emport") as tmpdir: - tmppath = pathlib.Path(tmpdir) - main_c = tmppath / "main.c" - main_js = tmppath / "main.js" - with main_c.open("w") as f: - f.write("int main(void) { return 0; }\n") - args = [ - os.fspath(main_c), - "-o", - os.fspath(main_js), - ] - self._run_cmd(ports_cmd, args, cwd=tmppath) - - -# native build (build Python) -BUILD = BuildProfile( - "build", - support_level=SupportLevel.working, - host=Host.build, -) - -_profiles = [ - BUILD, - # wasm32-emscripten - BuildProfile( - "emscripten-browser", - support_level=SupportLevel.supported, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.browser, - dynamic_linking=True, - ), - BuildProfile( - "emscripten-browser-debug", - support_level=SupportLevel.working, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.browser_debug, - dynamic_linking=True, - ), - BuildProfile( - "emscripten-node-dl", - support_level=SupportLevel.supported, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.node, - dynamic_linking=True, - ), - BuildProfile( - "emscripten-node-dl-debug", - support_level=SupportLevel.working, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.node_debug, - dynamic_linking=True, - ), - BuildProfile( - "emscripten-node-pthreads", - support_level=SupportLevel.supported, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.node, - pthreads=True, - ), - BuildProfile( - "emscripten-node-pthreads-debug", - support_level=SupportLevel.working, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.node_debug, - pthreads=True, - ), - # Emscripten build with both pthreads and dynamic linking is crashing. - BuildProfile( - "emscripten-node-dl-pthreads-debug", - support_level=SupportLevel.broken, - host=Host.wasm32_emscripten, - target=EmscriptenTarget.node_debug, - dynamic_linking=True, - pthreads=True, - ), - # wasm64-emscripten (requires Emscripten >= 3.1.21) - BuildProfile( - "wasm64-emscripten-node-debug", - support_level=SupportLevel.experimental, - host=Host.wasm64_emscripten, - target=EmscriptenTarget.node_debug, - # MEMORY64 is not compatible with dynamic linking - dynamic_linking=False, - pthreads=False, - ), - # wasm32-wasi - BuildProfile( - "wasi", - support_level=SupportLevel.supported, - host=Host.wasm32_wasi, - ), - # wasm32-wasi-threads - BuildProfile( - "wasi-threads", - support_level=SupportLevel.experimental, - host=Host.wasm32_wasi, - pthreads=True, - ), - # no SDK available yet - # BuildProfile( - # "wasm64-wasi", - # support_level=SupportLevel.broken, - # host=Host.wasm64_wasi, - # ), -] - -PROFILES = {p.name: p for p in _profiles} - -parser = argparse.ArgumentParser( - "wasm_build.py", - description=__doc__, - formatter_class=argparse.RawTextHelpFormatter, -) - -parser.add_argument( - "--clean", - "-c", - help="Clean build directories first", - action="store_true", -) - -parser.add_argument( - "--verbose", - "-v", - help="Verbose logging", - action="store_true", -) - -parser.add_argument( - "--silent", - help="Run configure and make in silent mode", - action="store_true", -) - -parser.add_argument( - "--testopts", - help=( - "Additional test options for 'test' and 'hostrunnertest', e.g. " - "--testopts='-v test_os'." - ), - default=None, -) - -# Don't list broken and experimental variants in help -platforms_choices = list(p.name for p in _profiles) + ["cleanall"] -platforms_help = list(p.name for p in _profiles if p.support_level) + ["cleanall"] -parser.add_argument( - "platform", - metavar="PLATFORM", - help=f"Build platform: {', '.join(platforms_help)}", - choices=platforms_choices, -) - -ops = dict( - build="auto build (build 'build' Python, emports, configure, compile)", - configure="run ./configure", - compile="run 'make all'", - pythoninfo="run 'make pythoninfo'", - test="run 'make buildbottest TESTOPTS=...' (supports parallel tests)", - hostrunnertest="run 'make hostrunnertest TESTOPTS=...'", - repl="start interactive REPL / webserver + browser session", - clean="run 'make clean'", - cleanall="remove all build directories", - emports="build Emscripten port with embuilder (only Emscripten)", -) -ops_help = "\n".join(f"{op:16s} {help}" for op, help in ops.items()) -parser.add_argument( - "ops", - metavar="OP", - help=f"operation (default: build)\n\n{ops_help}", - choices=tuple(ops), - default="build", - nargs="*", -) - - -def main() -> None: - args = parser.parse_args() - logging.basicConfig( - level=logging.INFO if args.verbose else logging.ERROR, - format="%(message)s", - ) - - if args.platform == "cleanall": - for builder in PROFILES.values(): - builder.clean(all=True) - parser.exit(0) - - # additional configure and make args - cm_args = ("--silent",) if args.silent else () - - # nargs=* with default quirk - if args.ops == "build": - args.ops = ["build"] - - builder = PROFILES[args.platform] - try: - builder.host.platform.check() - except ConditionError as e: - parser.error(str(e)) - - if args.clean: - builder.clean(all=False) - - # hack for WASI - if builder.host.is_wasi and not SETUP_LOCAL.exists(): - SETUP_LOCAL.touch() - - # auto-build - if "build" in args.ops: - # check and create build Python - if builder is not BUILD: - logger.info("Auto-building 'build' Python.") - try: - BUILD.host.platform.check() - except ConditionError as e: - parser.error(str(e)) - if args.clean: - BUILD.clean(all=False) - BUILD.run_build(*cm_args) - # build Emscripten ports with embuilder - if builder.host.is_emscripten and "emports" not in args.ops: - builder.build_emports() - - for op in args.ops: - logger.info("\n*** %s %s", args.platform, op) - if op == "build": - builder.run_build(*cm_args) - elif op == "configure": - builder.run_configure(*cm_args) - elif op == "compile": - builder.run_make("all", *cm_args) - elif op == "pythoninfo": - builder.run_pythoninfo(*cm_args) - elif op == "repl": - if builder.is_browser: - builder.run_browser() - else: - builder.run_py() - elif op == "test": - builder.run_test("buildbottest", testopts=args.testopts) - elif op == "hostrunnertest": - builder.run_test("hostrunnertest", testopts=args.testopts) - elif op == "clean": - builder.clean(all=False) - elif op == "cleanall": - builder.clean(all=True) - elif op == "emports": - builder.build_emports(force=args.clean) - else: - raise ValueError(op) - - print(builder.builddir) - parser.exit(0) - - -if __name__ == "__main__": - main() |