290 files changed, 8036 insertions, 3759 deletions
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 63a28490043..08d7a80d772 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -35,6 +35,7 @@ Objects/type*                 @markshannon
 Objects/codeobject.c          @markshannon
 Objects/frameobject.c         @markshannon
 Objects/call.c                @markshannon
+Objects/object.c              @ZeroIntensity
 Python/ceval*.c               @markshannon
 Python/ceval*.h               @markshannon
 Python/codegen.c              @markshannon @iritkatriel
@@ -44,8 +45,9 @@ Python/flowgraph.c            @markshannon @iritkatriel
 Python/instruction_sequence.c @iritkatriel
 Python/bytecodes.c            @markshannon
 Python/optimizer*.c           @markshannon
-Python/optimizer_analysis.c   @Fidget-Spinner
-Python/optimizer_bytecodes.c  @Fidget-Spinner
+Python/optimizer_analysis.c   @Fidget-Spinner @tomasr8
+Python/optimizer_bytecodes.c  @Fidget-Spinner @tomasr8
+Python/optimizer_symbols.c    @tomasr8
 Python/symtable.c             @JelleZijlstra @carljm
 Lib/_pyrepl/*                 @pablogsal @lysnikolaou @ambv
 Lib/test/test_patma.py        @brandtbucher
@@ -66,8 +68,8 @@ Doc/_static/**                @AA-Turner @hugovk
 Doc/tools/**                  @AA-Turner @hugovk
 
 # runtime state/lifecycle
-**/*pylifecycle*              @ericsnowcurrently
-**/*pystate*                  @ericsnowcurrently
+**/*pylifecycle*              @ericsnowcurrently @ZeroIntensity
+**/*pystate*                  @ericsnowcurrently @ZeroIntensity
 **/*preconfig*                @ericsnowcurrently
 **/*initconfig*               @ericsnowcurrently
 **/*pathconfig*               @ericsnowcurrently
@@ -187,13 +189,13 @@ Include/internal/pycore_time.h  @pganssle @abalkin
 /Tools/cases_generator/        @markshannon
 
 # AST
-Python/ast.c                  @isidentical @JelleZijlstra @eclips4
-Python/ast_preprocess.c       @isidentical @eclips4
-Parser/asdl.py                @isidentical @JelleZijlstra @eclips4
-Parser/asdl_c.py              @isidentical @JelleZijlstra @eclips4
-Lib/ast.py                    @isidentical @JelleZijlstra @eclips4
-Lib/_ast_unparse.py           @isidentical @JelleZijlstra @eclips4
-Lib/test/test_ast/            @eclips4
+Python/ast.c                  @isidentical @JelleZijlstra @eclips4 @tomasr8
+Python/ast_preprocess.c       @isidentical @eclips4 @tomasr8
+Parser/asdl.py                @isidentical @JelleZijlstra @eclips4 @tomasr8
+Parser/asdl_c.py              @isidentical @JelleZijlstra @eclips4 @tomasr8
+Lib/ast.py                    @isidentical @JelleZijlstra @eclips4 @tomasr8
+Lib/_ast_unparse.py           @isidentical @JelleZijlstra @eclips4 @tomasr8
+Lib/test/test_ast/            @eclips4 @tomasr8
 
 # Mock
 /Lib/unittest/mock.py         @cjw296
@@ -340,3 +342,6 @@ Modules/_xxtestfuzz/          @ammaraskar
 Python/remote_debug.h         @pablogsal
 Python/remote_debugging.c     @pablogsal
 Modules/_remote_debugging_module.c @pablogsal @ambv @1st1
+
+# gettext
+**/*gettext*                  @tomasr8
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 54ebc914b46..c6171571857 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -527,6 +527,14 @@ jobs:
       config_hash: ${{ needs.build-context.outputs.config-hash }}
       free-threading: ${{ matrix.free-threading }}
 
+  build-ubsan:
+    name: Undefined behavior sanitizer
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
+    uses: ./.github/workflows/reusable-ubsan.yml
+    with:
+      config_hash: ${{ needs.build-context.outputs.config-hash }}
+
   cross-build-linux:
     name: Cross build Linux
     runs-on: ubuntu-latest
diff --git a/.github/workflows/reusable-ubsan.yml b/.github/workflows/reusable-ubsan.yml
new file mode 100644
index 00000000000..cf93932f13b
--- /dev/null
+++ b/.github/workflows/reusable-ubsan.yml
@@ -0,0 +1,74 @@
+name: Reusable Undefined Behavior Sanitizer
+
+on:
+  workflow_call:
+    inputs:
+      config_hash:
+        required: true
+        type: string
+
+env:
+  FORCE_COLOR: 1
+
+jobs:
+  build-ubsan-reusable:
+    name: 'Undefined behavior sanitizer'
+    runs-on: ubuntu-24.04
+    timeout-minutes: 60
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        persist-credentials: false
+    - name: Runner image version
+      run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV"
+    - name: Restore config.cache
+      uses: actions/cache@v4
+      with:
+        path: config.cache
+        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }}
+    - name: Install dependencies
+      run: |
+        sudo ./.github/workflows/posix-deps-apt.sh
+        # Install clang-20
+        wget https://apt.llvm.org/llvm.sh
+        chmod +x llvm.sh
+        sudo ./llvm.sh 20
+    - name: UBSAN option setup
+      run: |
+        echo "UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1" >> "$GITHUB_ENV"
+        echo "CC=clang" >> "$GITHUB_ENV"
+        echo "CXX=clang++" >> "$GITHUB_ENV"
+    - name: Add ccache to PATH
+      run: |
+        echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV"
+    - name: Configure ccache action
+      uses: hendrikmuhs/ccache-action@v1.2
+      with:
+        save: ${{ github.event_name == 'push' }}
+        max-size: "200M"
+    - name: Configure CPython
+      run: >-
+        ./configure
+        --config-cache
+        --with-undefined-behavior-sanitizer
+        --with-pydebug
+    - name: Set up UBSAN log after configuration
+      run: |
+        echo "UBSAN_OPTIONS=${UBSAN_OPTIONS}:log_path=${GITHUB_WORKSPACE}/ubsan_log" >> "$GITHUB_ENV"
+    - name: Build CPython
+      run: make -j4
+    - name: Display build info
+      run: make pythoninfo
+    - name: Tests
+      run: ./python -m test -j4
+    - name: Display UBSAN logs
+      if: always()
+      run: find "${GITHUB_WORKSPACE}" -name 'ubsan_log.*' | xargs head -n 1000
+    - name: Archive UBSAN logs
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: >-
+          ubsan-logs
+        path: ubsan_log.*
+        if-no-files-found: ignore
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 822a8a9f4e5..86410c46d1d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,6 +34,13 @@ repos:
         name: Run Black on Tools/jit/
         files: ^Tools/jit/
 
+  - repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: v1.5.5
+    hooks:
+      - id: remove-tabs
+        types: [python]
+        exclude: ^Tools/c-analyzer/cpython/_parser.py
+
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
     hooks:
diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst
index 49dbc8d71cc..ab9f9c4539a 100644
--- a/Doc/c-api/arg.rst
+++ b/Doc/c-api/arg.rst
@@ -686,6 +686,12 @@ Building values
    ``p`` (:class:`bool`) [int]
       Convert a C :c:expr:`int` to a Python :class:`bool` object.
 
+      Be aware that this format requires an ``int`` argument.
+      Unlike most other contexts in C, variadic arguments are not coerced to
+      a suitable type automatically.
+      You can convert another type (for example, a pointer or a float) to a
+      suitable ``int`` value using ``(x) ? 1 : 0`` or ``!!x``.
+
       .. versionadded:: 3.14
 
    ``c`` (:class:`bytes` of length 1) [char]
diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index 885dbeb7530..a750cda3e2d 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -749,6 +749,16 @@ Exception Classes
    .. versionadded:: 3.2
 
 
+.. c:function:: int PyExceptionClass_Check(PyObject *ob)
+
+   Return non-zero if *ob* is an exception class, zero otherwise. This function always succeeds.
+
+
+.. c:function:: const char *PyExceptionClass_Name(PyObject *ob)
+
+   Return :c:member:`~PyTypeObject.tp_name` of the exception class *ob*.
+
+
 Exception Objects
 =================
 
diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst
index 4c8212f2f5e..3d331e6ec12 100644
--- a/Doc/c-api/extension-modules.rst
+++ b/Doc/c-api/extension-modules.rst
@@ -242,6 +242,6 @@ in the following ways:
 * Single-phase modules support module lookup functions like
   :c:func:`PyState_FindModule`.
 
-.. [#testsinglephase] ``_testsinglephase`` is an internal module used \
-   in CPython's self-test suite; your installation may or may not \
+.. [#testsinglephase] ``_testsinglephase`` is an internal module used
+   in CPython's self-test suite; your installation may or may not
    include it.
diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst
index 63b78f67767..5fb8567ef8c 100644
--- a/Doc/c-api/function.rst
+++ b/Doc/c-api/function.rst
@@ -95,6 +95,13 @@ There are a few functions specific to Python functions.
 
    .. versionadded:: 3.12
 
+
+.. c:function:: PyObject* PyFunction_GetKwDefaults(PyObject *op)
+
+   Return the keyword-only argument default values of the function object *op*. This can be a
+   dictionary of arguments or ``NULL``.
+
+
 .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op)
 
    Return the closure associated with the function object *op*. This can be ``NULL``
@@ -123,6 +130,19 @@ There are a few functions specific to Python functions.
    Raises :exc:`SystemError` and returns ``-1`` on failure.
 
 
+.. c:function:: PyObject *PyFunction_GET_CODE(PyObject *op)
+                PyObject *PyFunction_GET_GLOBALS(PyObject *op)
+                PyObject *PyFunction_GET_MODULE(PyObject *op)
+                PyObject *PyFunction_GET_DEFAULTS(PyObject *op)
+                PyObject *PyFunction_GET_KW_DEFAULTS(PyObject *op)
+                PyObject *PyFunction_GET_CLOSURE(PyObject *op)
+                PyObject *PyFunction_GET_ANNOTATIONS(PyObject *op)
+
+   These functions are similar to their ``PyFunction_Get*`` counterparts, but
+   do not do type checking. Passing anything other than an instance of
+   :c:data:`PyFunction_Type` is undefined behavior.
+
+
 .. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback)
 
    Register *callback* as a function watcher for the current interpreter.
diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst
index 3106bf9808f..41fd4ea14ef 100644
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@@ -1250,7 +1250,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`.
 .. c:function:: void PyInterpreterState_Clear(PyInterpreterState *interp)
 
    Reset all information in an interpreter state object.  There must be
-   an :term:`attached thread state` for the the interpreter.
+   an :term:`attached thread state` for the interpreter.
 
    .. audit-event:: cpython.PyInterpreterState_Clear "" c.PyInterpreterState_Clear
 
diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst
index 25d9e62e387..2d0bda76697 100644
--- a/Doc/c-api/long.rst
+++ b/Doc/c-api/long.rst
@@ -439,7 +439,7 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
    All *n_bytes* of the buffer are written: large buffers are padded with
    zeroes.
 
-   If the returned value is greater than than *n_bytes*, the value was
+   If the returned value is greater than *n_bytes*, the value was
    truncated: as many of the lowest bits of the value as could fit are written,
    and the higher bits are ignored. This matches the typical behavior
    of a C-style downcast.
diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst
index b23f016f9b0..57a0728d4e9 100644
--- a/Doc/c-api/refcounting.rst
+++ b/Doc/c-api/refcounting.rst
@@ -210,7 +210,7 @@ of Python objects.
 
         Py_SETREF(dst, src);
 
-   That arranges to set *dst* to *src* _before_ releasing the reference
+   That arranges to set *dst* to *src* *before* releasing the reference
    to the old value of *dst*, so that any code triggered as a side-effect
    of *dst* getting torn down no longer believes *dst* points
    to a valid object.
diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst
index 2176b8e492f..5bdbff4e0ad 100644
--- a/Doc/c-api/type.rst
+++ b/Doc/c-api/type.rst
@@ -282,6 +282,10 @@ Type Objects
    and other places where a method's defining class cannot be passed using the
    :c:type:`PyCMethod` calling convention.
 
+   The returned reference is :term:`borrowed <borrowed reference>` from *type*,
+   and will be valid as long as you hold a reference to *type*.
+   Do not release it with :c:func:`Py_DECREF` or similar.
+
    .. versionadded:: 3.11
 
 .. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result)
diff --git a/Doc/conf.py b/Doc/conf.py
index b08f5452901..161c2986441 100644
--- a/Doc/conf.py
+++ b/Doc/conf.py
@@ -79,6 +79,10 @@ version, release = import_module('patchlevel').get_version_info()
 rst_epilog = f"""
 .. |python_version_literal| replace:: ``Python {version}``
 .. |python_x_dot_y_literal| replace:: ``python{version}``
+.. |python_x_dot_y_t_literal| replace:: ``python{version}t``
+.. |python_x_dot_y_t_literal_config| replace:: ``python{version}t-config``
+.. |x_dot_y_b2_literal| replace:: ``{version}.0b2``
+.. |applications_python_version_literal| replace:: ``/Applications/Python {version}/``
 .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}``
 
 .. Apparently this how you hack together a formatted link:
diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat
index f5f02f0a79c..144c5608e07 100644
--- a/Doc/data/refcounts.dat
+++ b/Doc/data/refcounts.dat
@@ -963,21 +963,45 @@ PyFunction_Check:PyObject*:o:0:
 PyFunction_GetAnnotations:PyObject*::0:
 PyFunction_GetAnnotations:PyObject*:op:0:
 
+PyFunction_GET_ANNOTATIONS:PyObject*::0:
+PyFunction_GET_ANNOTATIONS:PyObject*:op:0:
+
 PyFunction_GetClosure:PyObject*::0:
 PyFunction_GetClosure:PyObject*:op:0:
 
+PyFunction_GET_CLOSURE:PyObject*::0:
+PyFunction_GET_CLOSURE:PyObject*:op:0:
+
 PyFunction_GetCode:PyObject*::0:
 PyFunction_GetCode:PyObject*:op:0:
 
+PyFunction_GET_CODE:PyObject*::0:
+PyFunction_GET_CODE:PyObject*:op:0:
+
 PyFunction_GetDefaults:PyObject*::0:
 PyFunction_GetDefaults:PyObject*:op:0:
 
+PyFunction_GET_DEFAULTS:PyObject*::0:
+PyFunction_GET_DEFAULTS:PyObject*:op:0:
+
+PyFunction_GetKwDefaults:PyObject*::0:
+PyFunction_GetKwDefaults:PyObject*:op:0:
+
+PyFunction_GET_KW_DEFAULTS:PyObject*::0:
+PyFunction_GET_KW_DEFAULTS:PyObject*:op:0:
+
 PyFunction_GetGlobals:PyObject*::0:
 PyFunction_GetGlobals:PyObject*:op:0:
 
+PyFunction_GET_GLOBALS:PyObject*::0:
+PyFunction_GET_GLOBALS:PyObject*:op:0:
+
 PyFunction_GetModule:PyObject*::0:
 PyFunction_GetModule:PyObject*:op:0:
 
+PyFunction_GET_MODULE:PyObject*::0:
+PyFunction_GET_MODULE:PyObject*:op:0:
+
 PyFunction_New:PyObject*::+1:
 PyFunction_New:PyObject*:code:+1:
 PyFunction_New:PyObject*:globals:+1:
@@ -2385,6 +2409,10 @@ PyType_GetFlags:PyTypeObject*:type:0:
 PyType_GetName:PyObject*::+1:
 PyType_GetName:PyTypeObject*:type:0:
 
+PyType_GetModuleByDef:PyObject*::0:
+PyType_GetModuleByDef:PyTypeObject*:type:0:
+PyType_GetModuleByDef:PyModuleDef*:def::
+
 PyType_GetQualName:PyObject*::+1:
 PyType_GetQualName:PyTypeObject*:type:0:
 
diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst
index f14690de4f8..3bbee33bd50 100644
--- a/Doc/extending/newtypes_tutorial.rst
+++ b/Doc/extending/newtypes_tutorial.rst
@@ -277,7 +277,7 @@ be an instance of a subclass.
    The explicit cast to ``CustomObject *`` above is needed because we defined
    ``Custom_dealloc`` to take a ``PyObject *`` argument, as the ``tp_dealloc``
    function pointer expects to receive a ``PyObject *`` argument.
-   By assigning to the the ``tp_dealloc`` slot of a type, we declare
+   By assigning to the ``tp_dealloc`` slot of a type, we declare
    that it can only be called with instances of our ``CustomObject``
    class, so the cast to ``(CustomObject *)`` is safe.
    This is object-oriented polymorphism, in C!
diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst
index 175bb5dc831..02b45879ccf 100644
--- a/Doc/howto/free-threading-extensions.rst
+++ b/Doc/howto/free-threading-extensions.rst
@@ -6,8 +6,8 @@
 C API Extension Support for Free Threading
 ******************************************
 
-Starting with the 3.13 release, CPython has experimental support for running
-with the :term:`global interpreter lock` (GIL) disabled in a configuration
+Starting with the 3.13 release, CPython has support for running with
+the :term:`global interpreter lock` (GIL) disabled in a configuration
 called :term:`free threading`.  This document describes how to adapt C API
 extensions to support free threading.
 
diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst
index c33cef2c8e9..24069617c47 100644
--- a/Doc/howto/free-threading-python.rst
+++ b/Doc/howto/free-threading-python.rst
@@ -1,18 +1,21 @@
 .. _freethreading-python-howto:
 
-**********************************************
-Python experimental support for free threading
-**********************************************
+*********************************
+Python support for free threading
+*********************************
 
-Starting with the 3.13 release, CPython has experimental support for a build of
+Starting with the 3.13 release, CPython has support for a build of
 Python called :term:`free threading` where the :term:`global interpreter lock`
 (GIL) is disabled.  Free-threaded execution allows for full utilization of the
 available processing power by running threads in parallel on available CPU cores.
 While not all software will benefit from this automatically, programs
 designed with threading in mind will run faster on multi-core hardware.
 
-**The free-threaded mode is experimental** and work is ongoing to improve it:
-expect some bugs and a substantial single-threaded performance hit.
+The free-threaded mode is working and continues to be improved, but
+there is some additional overhead in single-threaded workloads compared
+to the regular build. Additionally, third-party packages, in particular ones
+with an :term:`extension module`, may not be ready for use in a
+free-threaded build, and will re-enable the :term:`GIL`.
 
 This document describes the implications of free threading
 for Python code.  See :ref:`freethreading-extensions-howto` for information on
@@ -43,7 +46,7 @@ Identifying free-threaded Python
 ================================
 
 To check if the current interpreter supports free-threading, :option:`python -VV <-V>`
-and :data:`sys.version` contain "experimental free-threading build".
+and :data:`sys.version` contain "free-threading build".
 The new :func:`sys._is_gil_enabled` function can be used to check whether
 the GIL is actually disabled in the running process.
 
diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst
index b4f3463afee..78e56e0c64f 100644
--- a/Doc/howto/functional.rst
+++ b/Doc/howto/functional.rst
@@ -602,7 +602,7 @@ generators:
   raise an exception inside the generator; the exception is raised by the
   ``yield`` expression where the generator's execution is paused.
 
-* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the
+* :meth:`~generator.close` sends a :exc:`GeneratorExit` exception to the
   generator to terminate the iteration.  On receiving this exception, the
   generator's code must either raise :exc:`GeneratorExit` or
   :exc:`StopIteration`; catching the exception and doing anything else is
diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst
index b2109b15039..fbc426ba1d7 100644
--- a/Doc/howto/isolating-extensions.rst
+++ b/Doc/howto/isolating-extensions.rst
@@ -453,7 +453,7 @@ Avoiding ``PyObject_New``
 
 GC-tracked objects need to be allocated using GC-aware functions.
 
-If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`:
+If you use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`:
 
 - Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible.
   That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with::
diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst
index e543f6d5657..7486a378dbb 100644
--- a/Doc/howto/regex.rst
+++ b/Doc/howto/regex.rst
@@ -1016,7 +1016,9 @@ extension.  This regular expression matches ``foo.bar`` and
 Now, consider complicating the problem a bit; what if you want to match
 filenames where the extension is not ``bat``? Some incorrect attempts:
 
-``.*[.][^b].*$``  The first attempt above tries to exclude ``bat`` by requiring
+``.*[.][^b].*$``
+
+The first attempt above tries to exclude ``bat`` by requiring
 that the first character of the extension is not a ``b``.  This is wrong,
 because the pattern also doesn't match ``foo.bar``.
 
@@ -1043,7 +1045,9 @@ confusing.
 
 A negative lookahead cuts through all this confusion:
 
-``.*[.](?!bat$)[^.]*$``  The negative lookahead means: if the expression ``bat``
+``.*[.](?!bat$)[^.]*$``
+
+The negative lookahead means: if the expression ``bat``
 doesn't match at this point, try the rest of the pattern; if ``bat$`` does
 match, the whole pattern will fail.  The trailing ``$`` is required to ensure
 that something like ``sample.batch``, where the extension only starts with
diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index 17f126cc065..a03d88092db 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -955,7 +955,7 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are:
 
 .. index:: single: + (plus); in argparse module
 
-* ``'+'``. Just like ``'*'``, all command-line args present are gathered into a
+* ``'+'``. Just like ``'*'``, all command-line arguments present are gathered into a
   list.  Additionally, an error message will be generated if there wasn't at
   least one command-line argument present.  For example::
 
diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst
index 3c8d9ab111e..dd92765038c 100644
--- a/Doc/library/concurrent.futures.rst
+++ b/Doc/library/concurrent.futures.rst
@@ -265,7 +265,7 @@ Each worker's interpreter is isolated from all the other interpreters.
 "Isolated" means each interpreter has its own runtime state and
 operates completely independently.  For example, if you redirect
 :data:`sys.stdout` in one interpreter, it will not be automatically
-redirected any other interpreter.  If you import a module in one
+redirected to any other interpreter.  If you import a module in one
 interpreter, it is not automatically imported in any other.  You
 would need to import the module separately in interpreter where
 you need it.  In fact, each module imported in an interpreter is
@@ -287,7 +287,7 @@ efficient alternative is to serialize with :mod:`pickle` and then send
 the bytes over a shared :mod:`socket <socket>` or
 :func:`pipe <os.pipe>`.
 
-.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=(), shared=None)
+.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=())
 
    A :class:`ThreadPoolExecutor` subclass that executes calls asynchronously
    using a pool of at most *max_workers* threads.  Each thread runs
@@ -305,20 +305,9 @@ the bytes over a shared :mod:`socket <socket>` or
    interpreter.
 
    .. note::
-      Functions defined in the ``__main__`` module cannot be pickled
-      and thus cannot be used.
-
-   .. note::
       The executor may replace uncaught exceptions from *initializer*
       with :class:`~concurrent.futures.interpreter.ExecutionFailed`.
 
-   The optional *shared* argument is a :class:`dict` of objects that all
-   interpreters in the pool share.  The *shared* items are added to each
-   interpreter's ``__main__`` module.  Not all objects are shareable.
-   Shareable objects include the builtin singletons, :class:`str`
-   and :class:`bytes`, and :class:`memoryview`.  See :pep:`734`
-   for more info.
-
    Other caveats from parent :class:`ThreadPoolExecutor` apply here.
 
 :meth:`~Executor.submit` and :meth:`~Executor.map` work like normal,
@@ -326,10 +315,6 @@ except the worker serializes the callable and arguments using
 :mod:`pickle` when sending them to its interpreter.  The worker
 likewise serializes the return value when sending it back.
 
-.. note::
-   Functions defined in the ``__main__`` module cannot be pickled
-   and thus cannot be used.
-
 When a worker's current task raises an uncaught exception, the worker
 always tries to preserve the exception as-is.  If that is successful
 then it also sets the ``__cause__`` to a corresponding
diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst
index 2ee4450698a..846cece3761 100644
--- a/Doc/library/ctypes.rst
+++ b/Doc/library/ctypes.rst
@@ -882,7 +882,7 @@ invalid non-\ ``NULL`` pointers would crash Python)::
 Thread safety without the GIL
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-In Python 3.13, the :term:`GIL` may be disabled on :term:`experimental free threaded <free threading>` builds.
+From Python 3.13 onward, the :term:`GIL` can be disabled on :term:`free threaded <free threading>` builds.
 In ctypes, reads and writes to a single object concurrently is safe, but not across multiple objects:
 
    .. code-block:: pycon
@@ -2965,7 +2965,7 @@ fields, or any other data types containing pointer type fields.
    .. attribute:: is_anonymous
 
       True if this field is anonymous, that is, it contains nested sub-fields
-      that should be be merged into a containing structure or union.
+      that should be merged into a containing structure or union.
 
 
 .. _ctypes-arrays-pointers:
diff --git a/Doc/library/dialog.rst b/Doc/library/dialog.rst
index 191e0da1210..e0693e8eb6e 100644
--- a/Doc/library/dialog.rst
+++ b/Doc/library/dialog.rst
@@ -220,7 +220,7 @@ is the base class for dialogs defined in other supporting modules.
 
 .. class:: Dialog(master=None, **options)
 
-   .. method:: show(color=None, **options)
+   .. method:: show(**options)
 
       Render the Dialog window.
 
diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst
index 44767b5dd2d..11685a32f48 100644
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -1094,14 +1094,6 @@ iterations of the loop.
    .. versionadded:: 3.14
 
 
-.. opcode:: LOAD_CONST_IMMORTAL (consti)
-
-   Pushes ``co_consts[consti]`` onto the stack.
-   Can be used when the constant value is known to be immortal.
-
-   .. versionadded:: 3.14
-
-
 .. opcode:: LOAD_NAME (namei)
 
    Pushes the value associated with ``co_names[namei]`` onto the stack.
diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst
index 219fad0d2f6..f49885b8785 100644
--- a/Doc/library/email.header.rst
+++ b/Doc/library/email.header.rst
@@ -178,16 +178,36 @@ The :mod:`email.header` module also provides the following convenient functions.
    Decode a message header value without converting the character set. The header
    value is in *header*.
 
-   This function returns a list of ``(decoded_string, charset)`` pairs containing
-   each of the decoded parts of the header.  *charset* is ``None`` for non-encoded
-   parts of the header, otherwise a lower case string containing the name of the
-   character set specified in the encoded string.
+   For historical reasons, this function may return either:
 
-   Here's an example::
+   1. A list of pairs containing each of the decoded parts of the header,
+      ``(decoded_bytes, charset)``, where *decoded_bytes* is always an instance of
+      :class:`bytes`, and *charset* is either:
+
+        - A lower case string containing the name of the character set specified.
+
+        - ``None`` for non-encoded parts of the header.
+
+   2. A list of length 1 containing a pair ``(string, None)``, where
+      *string* is always an instance of :class:`str`.
+
+   An :exc:`email.errors.HeaderParseError` may be raised when certain decoding
+   errors occur (e.g. a base64 decoding exception).
+
+   Here are examples:
 
       >>> from email.header import decode_header
       >>> decode_header('=?iso-8859-1?q?p=F6stal?=')
       [(b'p\xf6stal', 'iso-8859-1')]
+      >>> decode_header('unencoded_string')
+      [('unencoded_string', None)]
+      >>> decode_header('bar =?utf-8?B?ZsOzbw==?=')
+      [(b'bar ', None), (b'f\xc3\xb3o', 'utf-8')]
+
+   .. note::
+
+       This function exists for backwards compatibility only. For
+       new code, we recommend using :class:`email.headerregistry.HeaderRegistry`.
 
 
 .. function:: make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' ')
@@ -203,3 +223,7 @@ The :mod:`email.header` module also provides the following convenient functions.
    :class:`Header` instance.  Optional *maxlinelen*, *header_name*, and
    *continuation_ws* are as in the :class:`Header` constructor.
 
+   .. note::
+
+       This function exists for backwards compatibility only, and is
+       not recommended for use in new code.
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index bb72032891e..9806ae80905 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -1048,7 +1048,7 @@ their subgroups based on the types of the contained exceptions.
    subclasses that need a different constructor signature need to
    override that rather than :meth:`~object.__init__`. For example, the following
    defines an exception group subclass which accepts an exit_code and
-   and constructs the group's message from it. ::
+   constructs the group's message from it. ::
 
       class Errors(ExceptionGroup):
          def __new__(cls, errors, exit_code):
diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst
index 5058b85bffb..1977f4d3ba3 100644
--- a/Doc/library/faulthandler.rst
+++ b/Doc/library/faulthandler.rst
@@ -90,7 +90,7 @@ An error will be printed instead of the stack.
 
 Additionally, some compilers do not support :term:`CPython's <CPython>`
 implementation of C stack dumps. As a result, a different error may be printed
-instead of the stack, even if the the operating system supports dumping stacks.
+instead of the stack, even if the operating system supports dumping stacks.
 
 .. note::
 
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index 2ecce3dba5a..80bd1275973 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -1839,15 +1839,15 @@ are always available.  They are listed here in alphabetical order.
    ``range(start, stop, step)``.  The *start* and *step* arguments default to
    ``None``.
 
+   Slice objects have read-only data attributes :attr:`!start`,
+   :attr:`!stop`, and :attr:`!step` which merely return the argument
+   values (or their default).  They have no other explicit functionality;
+   however, they are used by NumPy and other third-party packages.
+
    .. attribute:: slice.start
    .. attribute:: slice.stop
    .. attribute:: slice.step
 
-      Slice objects have read-only data attributes :attr:`!start`,
-      :attr:`!stop`, and :attr:`!step` which merely return the argument
-      values (or their default).  They have no other explicit functionality;
-      however, they are used by NumPy and other third-party packages.
-
    Slice objects are also generated when extended indexing syntax is used.  For
    example: ``a[start:stop:step]`` or ``a[start:stop, i]``.  See
    :func:`itertools.islice` for an alternate version that returns an
diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst
index f8c71005a53..96cca3073fe 100644
--- a/Doc/library/logging.config.rst
+++ b/Doc/library/logging.config.rst
@@ -586,7 +586,7 @@ configuration dictionary for the handler named ``foo``, and later (once that
 handler has been configured) it points to the configured handler instance.
 Thus, ``cfg://handlers.foo`` could resolve to either a dictionary or a handler
 instance. In general, it is wise to name handlers in a way such that dependent
-handlers are configured _after_ any handlers they depend on; that allows
+handlers are configured *after* any handlers they depend on; that allows
 something like ``cfg://handlers.foo`` to be used in configuring a handler that
 depends on handler ``foo``. If that dependent handler were named ``bar``,
 problems would result, because the configuration of ``bar`` would be attempted
diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst
index 8f3aa1dfdd0..d74ef73ee28 100644
--- a/Doc/library/logging.handlers.rst
+++ b/Doc/library/logging.handlers.rst
@@ -1059,6 +1059,15 @@ possible, while any potentially slow operations (such as sending an email via
    .. note:: If you are using :mod:`multiprocessing`, you should avoid using
       :class:`~queue.SimpleQueue` and instead use :class:`multiprocessing.Queue`.
 
+   .. warning::
+
+      The :mod:`multiprocessing` module uses an internal logger created and
+      accessed via :meth:`~multiprocessing.get_logger`.
+      :class:`multiprocessing.Queue` will log ``DEBUG`` level messages upon
+      items being queued. If those log messages are processed by a
+      :class:`QueueHandler` using the same :class:`multiprocessing.Queue` instance,
+      it will cause a deadlock or infinite recursion.
+
    .. method:: emit(record)
 
       Enqueues the result of preparing the LogRecord. Should an exception
diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst
index 4e20c07331a..8fca79b23e4 100644
--- a/Doc/library/mmap.rst
+++ b/Doc/library/mmap.rst
@@ -269,7 +269,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
 
       Resizing a map created with *access* of :const:`ACCESS_READ` or
       :const:`ACCESS_COPY`, will raise a :exc:`TypeError` exception.
-      Resizing a map created with with *trackfd* set to ``False``,
+      Resizing a map created with *trackfd* set to ``False``,
       will raise a :exc:`ValueError` exception.
 
       **On Windows**: Resizing the map will raise an :exc:`OSError` if there are other
diff --git a/Doc/library/netrc.rst b/Doc/library/netrc.rst
index f6260383b2b..74c97e8c9a9 100644
--- a/Doc/library/netrc.rst
+++ b/Doc/library/netrc.rst
@@ -24,12 +24,14 @@ the Unix :program:`ftp` program and other FTP clients.
    a :exc:`FileNotFoundError` exception will be raised.
    Parse errors will raise :exc:`NetrcParseError` with diagnostic
    information including the file name, line number, and terminating token.
+
    If no argument is specified on a POSIX system, the presence of passwords in
    the :file:`.netrc` file will raise a :exc:`NetrcParseError` if the file
    ownership or permissions are insecure (owned by a user other than the user
    running the process, or accessible for read or write by any other user).
    This implements security behavior equivalent to that of ftp and other
-   programs that use :file:`.netrc`.
+   programs that use :file:`.netrc`. Such security checks are not available
+   on platforms that do not support :func:`os.getuid`.
 
    .. versionchanged:: 3.4 Added the POSIX permission check.
 
diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 86351e65dc4..47986a2d960 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -1985,7 +1985,7 @@ The :mod:`pathlib.types` module provides types for static type checking.
 
       If *follow_symlinks* is ``False``, return ``True`` only if the path
       is a file (without following symlinks); return ``False`` if the path
-      is a directory or other other non-file, or if it doesn't exist.
+      is a directory or other non-file, or if it doesn't exist.
 
    .. method:: is_symlink()
 
diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst
index 20b8f6bcf19..47d24b6f7d0 100644
--- a/Doc/library/pkgutil.rst
+++ b/Doc/library/pkgutil.rst
@@ -69,8 +69,8 @@ support.
 
    Yield :term:`finder` objects for the given module name.
 
-   If fullname contains a ``'.'``, the finders will be for the package
-   containing fullname, otherwise they will be all registered top level
+   If *fullname* contains a ``'.'``, the finders will be for the package
+   containing *fullname*, otherwise they will be all registered top level
    finders (i.e. those on both :data:`sys.meta_path` and :data:`sys.path_hooks`).
 
    If the named module is in a package, that package is imported as a side
diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst
index c78dfe1aafa..2dde40c9d92 100644
--- a/Doc/library/shutil.rst
+++ b/Doc/library/shutil.rst
@@ -47,6 +47,13 @@ Directory and files operations
    0, only the contents from the current file position to the end of the file will
    be copied.
 
+   :func:`copyfileobj` will *not* guarantee that the destination stream has
+   been flushed on completion of the copy. If you want to read from the
+   destination at the completion of the copy operation (for example, reading
+   the contents of a temporary file that has been copied from a HTTP stream),
+   you must ensure that you have called :func:`~io.IOBase.flush` or
+   :func:`~io.IOBase.close` on the file-like object before attempting to read
+   the destination file.
 
 .. function:: copyfile(src, dst, *, follow_symlinks=True)
 
@@ -327,6 +334,10 @@ Directory and files operations
    The deprecated *onerror* is similar to *onexc*, except that the third
    parameter it receives is the tuple returned from :func:`sys.exc_info`.
 
+   .. seealso::
+      :ref:`shutil-rmtree-example` for an example of handling the removal
+      of a directory tree that contains read-only files.
+
    .. audit-event:: shutil.rmtree path,dir_fd shutil.rmtree
 
    .. versionchanged:: 3.3
diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst
index 753f12460b8..7fb629f7d2f 100644
--- a/Doc/library/socketserver.rst
+++ b/Doc/library/socketserver.rst
@@ -543,7 +543,7 @@ objects that simplify communication by providing the standard file interface)::
 
 The difference is that the ``readline()`` call in the second handler will call
 ``recv()`` multiple times until it encounters a newline character, while the
-the first handler had to use a ``recv()`` loop to accumulate data until a
+first handler had to use a ``recv()`` loop to accumulate data until a
 newline itself.  If it had just used a single ``recv()`` without the loop it
 would just have returned what has been received so far from the client.
 TCP is stream based: data arrives in the order it was sent, but there no
diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index 12761baf792..641e1f1de03 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -507,6 +507,15 @@ Module constants
    Version number of the runtime SQLite library as a :class:`tuple` of
    :class:`integers <int>`.
 
+.. data:: SQLITE_KEYWORDS
+
+   A :class:`tuple` containing all sqlite3 keywords.
+
+   This constant is only available if Python was compiled with SQLite
+   3.24.0 or greater.
+
+   .. versionadded:: next
+
 .. data:: threadsafety
 
    Integer constant required by the DB-API 2.0, stating the level of thread
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index 55e442b20ff..1626a89a073 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -1185,6 +1185,15 @@ always available. Unless explicitly noted otherwise, all variables are read-only
    ``cache_tag`` is set to ``None``, it indicates that module caching should
    be disabled.
 
+   *supports_isolated_interpreters* is a boolean value, whether
+   this implementation supports multiple isolated interpreters.
+   It is ``True`` for CPython on most platforms.  Platforms with
+   this support implement the low-level :mod:`!_interpreters` module.
+
+   .. seealso::
+
+      :pep:`684`, :pep:`734`, and :mod:`concurrent.interpreters`.
+
    :data:`sys.implementation` may contain additional attributes specific to
    the Python implementation.  These non-standard attributes must start with
    an underscore, and are not described here.  Regardless of its contents,
@@ -1194,6 +1203,9 @@ always available. Unless explicitly noted otherwise, all variables are read-only
 
    .. versionadded:: 3.3
 
+   .. versionchanged:: 3.14
+      Added ``supports_isolated_interpreters`` field.
+
    .. note::
 
       The addition of new required attributes must go through the normal PEP
@@ -1933,6 +1945,22 @@ always available. Unless explicitly noted otherwise, all variables are read-only
    interpreter is pre-release (alpha, beta, or release candidate) then the
    local and remote interpreters must be the same exact version.
 
+   .. audit-event:: sys.remote_exec pid script_path
+
+      When the code is executed in the remote process, an
+      :ref:`auditing event <auditing>` ``sys.remote_exec`` is raised with
+      the *pid* and the path to the script file.
+      This event is raised in the process that called :func:`sys.remote_exec`.
+
+   .. audit-event:: cpython.remote_debugger_script script_path
+
+      When the script is executed in the remote process, an
+      :ref:`auditing event <auditing>`
+      ``cpython.remote_debugger_script`` is raised
+      with the path in the remote process.
+      This event is raised in the remote process, not the one
+      that called :func:`sys.remote_exec`.
+
    .. availability:: Unix, Windows.
    .. versionadded:: 3.14
 
diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst
index 7edcdcabdce..cabb41442f8 100644
--- a/Doc/library/threading.rst
+++ b/Doc/library/threading.rst
@@ -102,7 +102,7 @@ CPU-bound tasks, as only one thread can execute Python bytecode at a time.
 Despite this, threads remain a useful tool for achieving concurrency in many
 scenarios.
 
-As of Python 3.13, experimental :term:`free-threaded <free threading>` builds
+As of Python 3.13, :term:`free-threaded <free threading>` builds
 can disable the GIL, enabling true parallel execution of threads, but this
 feature is not available by default (see :pep:`703`).
 
@@ -621,7 +621,7 @@ since it is impossible to detect the termination of alien threads.
       an error to :meth:`~Thread.join` a thread before it has been started
       and attempts to do so raise the same exception.
 
-      If an attempt is made to join a running daemonic thread in in late stages
+      If an attempt is made to join a running daemonic thread in late stages
       of :term:`Python finalization <interpreter shutdown>` :meth:`!join`
       raises a :exc:`PythonFinalizationError`.
 
diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst
index 92d58024e84..6698e6d3f43 100644
--- a/Doc/library/uuid.rst
+++ b/Doc/library/uuid.rst
@@ -193,43 +193,52 @@ The :mod:`uuid` module defines the following functions:
 
 .. function:: uuid1(node=None, clock_seq=None)
 
-   Generate a UUID from a host ID, sequence number, and the current time. If *node*
-   is not given, :func:`getnode` is used to obtain the hardware address. If
-   *clock_seq* is given, it is used as the sequence number; otherwise a random
-   14-bit sequence number is chosen.
+   Generate a UUID from a host ID, sequence number, and the current time
+   according to :rfc:`RFC 9562, §5.1 <9562#section-5.1>`.
+
+   When *node* is not specified, :func:`getnode` is used to obtain the hardware
+   address as a 48-bit positive integer. When a sequence number *clock_seq* is
+   not specified, a pseudo-random 14-bit positive integer is generated.
+
+   If *node* or *clock_seq* exceed their expected bit count,
+   only their least significant bits are kept.
 
 
 .. function:: uuid3(namespace, name)
 
    Generate a UUID based on the MD5 hash of a namespace identifier (which is a
    UUID) and a name (which is a :class:`bytes` object or a string
-   that will be encoded using UTF-8).
+   that will be encoded using UTF-8)
+   according to :rfc:`RFC 9562, §5.3 <9562#section-5.3>`.
 
 
 .. function:: uuid4()
 
-   Generate a random UUID.
+   Generate a random UUID in a cryptographically-secure method
+   according to :rfc:`RFC 9562, §5.4 <9562#section-5.4>`.
 
 
 .. function:: uuid5(namespace, name)
 
    Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a
    UUID) and a name (which is a :class:`bytes` object or a string
-   that will be encoded using UTF-8).
+   that will be encoded using UTF-8)
+   according to :rfc:`RFC 9562, §5.5 <9562#section-5.5>`.
 
 
 .. function:: uuid6(node=None, clock_seq=None)
 
    Generate a UUID from a sequence number and the current time according to
-   :rfc:`9562`.
+   :rfc:`RFC 9562, §5.6 <9562#section-5.6>`.
+
    This is an alternative to :func:`uuid1` to improve database locality.
 
    When *node* is not specified, :func:`getnode` is used to obtain the hardware
    address as a 48-bit positive integer. When a sequence number *clock_seq* is
    not specified, a pseudo-random 14-bit positive integer is generated.
 
-   If *node* or *clock_seq* exceed their expected bit count, only their least
-   significant bits are kept.
+   If *node* or *clock_seq* exceed their expected bit count,
+   only their least significant bits are kept.
 
    .. versionadded:: 3.14
 
diff --git a/Doc/library/zoneinfo.rst b/Doc/library/zoneinfo.rst
index a57f3b8b3e8..53d8e2598ec 100644
--- a/Doc/library/zoneinfo.rst
+++ b/Doc/library/zoneinfo.rst
@@ -195,7 +195,7 @@ The ``ZoneInfo`` class
 
 The ``ZoneInfo`` class has two alternate constructors:
 
-.. classmethod:: ZoneInfo.from_file(fobj, /, key=None)
+.. classmethod:: ZoneInfo.from_file(file_obj, /, key=None)
 
     Constructs a ``ZoneInfo`` object from a file-like object returning bytes
     (e.g. a file opened in binary mode or an :class:`io.BytesIO` object).
@@ -325,7 +325,7 @@ The behavior of a ``ZoneInfo`` file depends on how it was constructed:
        >>> a is b
        False
 
-3. ``ZoneInfo.from_file(fobj, /, key=None)``: When constructed from a file, the
+3. ``ZoneInfo.from_file(file_obj, /, key=None)``: When constructed from a file, the
    ``ZoneInfo`` object raises an exception on pickling. If an end user wants to
    pickle a ``ZoneInfo`` constructed from a file, it is recommended that they
    use a wrapper type or a custom serialization function: either serializing by
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
index 32a2e266262..4a099e81dac 100644
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -262,6 +262,8 @@ Booleans (:class:`bool`)
    a string, the strings ``"False"`` or ``"True"`` are returned, respectively.
 
 
+.. _datamodel-float:
+
 :class:`numbers.Real` (:class:`float`)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst
index 2a550b504ca..24544a055c3 100644
--- a/Doc/reference/expressions.rst
+++ b/Doc/reference/expressions.rst
@@ -134,8 +134,7 @@ Literals
 Python supports string and bytes literals and various numeric literals:
 
 .. productionlist:: python-grammar
-   literal: `stringliteral` | `bytesliteral`
-          : | `integer` | `floatnumber` | `imagnumber`
+   literal: `stringliteral` | `bytesliteral` | `NUMBER`
 
 Evaluation of a literal yields an object of the given type (string, bytes,
 integer, floating-point number, complex number) with the given value.  The value
@@ -406,8 +405,9 @@ brackets or curly braces.
 Variables used in the generator expression are evaluated lazily when the
 :meth:`~generator.__next__` method is called for the generator object (in the same
 fashion as normal generators).  However, the iterable expression in the
-leftmost :keyword:`!for` clause is immediately evaluated, so that an error
-produced by it will be emitted at the point where the generator expression
+leftmost :keyword:`!for` clause is immediately evaluated, and the
+:term:`iterator` is immediately created for that iterable, so that an error
+produced while creating the iterator will be emitted at the point where the generator expression
 is defined, rather than at the point where the first value is retrieved.
 Subsequent :keyword:`!for` clauses and any filter condition in the leftmost
 :keyword:`!for` clause cannot be evaluated in the enclosing scope as they may
@@ -625,8 +625,10 @@ is already executing raises a :exc:`ValueError` exception.
 
 .. method:: generator.close()
 
-   Raises a :exc:`GeneratorExit` at the point where the generator function was
-   paused.  If the generator function catches the exception and returns a
+   Raises a :exc:`GeneratorExit` exception at the point where the generator
+   function was paused (equivalent to calling ``throw(GeneratorExit)``).
+   The exception is raised by the yield expression where the generator was paused.
+   If the generator function catches the exception and returns a
    value, this value is returned from :meth:`close`.  If the generator function
    is already closed, or raises :exc:`GeneratorExit` (by not catching the
    exception), :meth:`close` returns :const:`None`.  If the generator yields a
@@ -1023,7 +1025,7 @@ series of :term:`arguments <argument>`:
                 :   ["," `keywords_arguments`]
                 : | `starred_and_keywords` ["," `keywords_arguments`]
                 : | `keywords_arguments`
-   positional_arguments: positional_item ("," positional_item)*
+   positional_arguments: `positional_item` ("," `positional_item`)*
    positional_item: `assignment_expression` | "*" `expression`
    starred_and_keywords: ("*" `expression` | `keyword_item`)
                 : ("," "*" `expression` | "," `keyword_item`)*
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index b22eb4db794..567c70111c2 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -922,11 +922,20 @@ Numeric literals
    floating-point literal, hexadecimal literal
    octal literal, binary literal, decimal literal, imaginary literal, complex literal
 
-There are three types of numeric literals: integers, floating-point numbers, and
-imaginary numbers.  There are no complex literals (complex numbers can be formed
-by adding a real number and an imaginary number).
+:data:`~token.NUMBER` tokens represent numeric literals, of which there are
+three types: integers, floating-point numbers, and imaginary numbers.
 
-Note that numeric literals do not include a sign; a phrase like ``-1`` is
+.. grammar-snippet::
+   :group: python-grammar
+
+   NUMBER: `integer` | `floatnumber` | `imagnumber`
+
+The numeric value of a numeric literal is the same as if it were passed as a
+string to the :class:`int`, :class:`float` or :class:`complex` class
+constructor, respectively.
+Note that not all valid inputs for those constructors are also valid literals.
+
+Numeric literals do not include a sign; a phrase like ``-1`` is
 actually an expression composed of the unary operator '``-``' and the literal
 ``1``.
 
@@ -940,38 +949,67 @@ actually an expression composed of the unary operator '``-``' and the literal
 .. _integers:
 
 Integer literals
-----------------
+^^^^^^^^^^^^^^^^
 
-Integer literals are described by the following lexical definitions:
+Integer literals denote whole numbers. For example::
 
-.. productionlist:: python-grammar
-   integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger`
-   decinteger: `nonzerodigit` (["_"] `digit`)* | "0"+ (["_"] "0")*
-   bininteger: "0" ("b" | "B") (["_"] `bindigit`)+
-   octinteger: "0" ("o" | "O") (["_"] `octdigit`)+
-   hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+
-   nonzerodigit: "1"..."9"
-   digit: "0"..."9"
-   bindigit: "0" | "1"
-   octdigit: "0"..."7"
-   hexdigit: `digit` | "a"..."f" | "A"..."F"
+   7
+   3
+   2147483647
 
 There is no limit for the length of integer literals apart from what can be
-stored in available memory.
+stored in available memory::
+
+   7922816251426433759354395033679228162514264337593543950336
+
+Underscores can be used to group digits for enhanced readability,
+and are ignored for determining the numeric value of the literal.
+For example, the following literals are equivalent::
+
+   100_000_000_000
+   100000000000
+   1_00_00_00_00_000
+
+Underscores can only occur between digits.
+For example, ``_123``, ``321_``, and ``123__321`` are *not* valid literals.
 
-Underscores are ignored for determining the numeric value of the literal.  They
-can be used to group digits for enhanced readability.  One underscore can occur
-between digits, and after base specifiers like ``0x``.
+Integers can be specified in binary (base 2), octal (base 8), or hexadecimal
+(base 16) using the prefixes ``0b``, ``0o`` and ``0x``, respectively.
+Hexadecimal digits 10 through 15 are represented by letters ``A``-``F``,
+case-insensitive.  For example::
 
-Note that leading zeros in a non-zero decimal number are not allowed. This is
-for disambiguation with C-style octal literals, which Python used before version
-3.0.
+   0b100110111
+   0b_1110_0101
+   0o177
+   0o377
+   0xdeadbeef
+   0xDead_Beef
 
-Some examples of integer literals::
+An underscore can follow the base specifier.
+For example, ``0x_1f`` is a valid literal, but ``0_x1f`` and ``0x__1f`` are
+not.
 
-   7     2147483647                        0o177    0b100110111
-   3     79228162514264337593543950336     0o377    0xdeadbeef
-         100_000_000_000                   0b_1110_0101
+Leading zeros in a non-zero decimal number are not allowed.
+For example, ``0123`` is not a valid literal.
+This is for disambiguation with C-style octal literals, which Python used
+before version 3.0.
+
+Formally, integer literals are described by the following lexical definitions:
+
+.. grammar-snippet::
+   :group: python-grammar
+
+   integer:      `decinteger` | `bininteger` | `octinteger` | `hexinteger` | `zerointeger`
+   decinteger:   `nonzerodigit` (["_"] `digit`)*
+   bininteger:   "0" ("b" | "B") (["_"] `bindigit`)+
+   octinteger:   "0" ("o" | "O") (["_"] `octdigit`)+
+   hexinteger:   "0" ("x" | "X") (["_"] `hexdigit`)+
+   zerointeger:  "0"+ (["_"] "0")*
+   nonzerodigit: "1"..."9"
+   digit:        "0"..."9"
+   bindigit:     "0" | "1"
+   octdigit:     "0"..."7"
+   hexdigit:     `digit` | "a"..."f" | "A"..."F"
 
 .. versionchanged:: 3.6
    Underscores are now allowed for grouping purposes in literals.
@@ -984,26 +1022,58 @@ Some examples of integer literals::
 .. _floating:
 
 Floating-point literals
------------------------
+^^^^^^^^^^^^^^^^^^^^^^^
 
-Floating-point literals are described by the following lexical definitions:
+Floating-point (float) literals, such as ``3.14`` or ``1.5``, denote
+:ref:`approximations of real numbers <datamodel-float>`.
 
-.. productionlist:: python-grammar
-   floatnumber: `pointfloat` | `exponentfloat`
-   pointfloat: [`digitpart`] `fraction` | `digitpart` "."
-   exponentfloat: (`digitpart` | `pointfloat`) `exponent`
-   digitpart: `digit` (["_"] `digit`)*
-   fraction: "." `digitpart`
-   exponent: ("e" | "E") ["+" | "-"] `digitpart`
+They consist of *integer* and *fraction* parts, each composed of decimal digits.
+The parts are separated by a decimal point, ``.``::
+
+   2.71828
+   4.0
+
+Unlike in integer literals, leading zeros are allowed in the numeric parts.
+For example, ``077.010`` is legal, and denotes the same number as ``77.10``.
+
+As in integer literals, single underscores may occur between digits to help
+readability::
+
+   96_485.332_123
+   3.14_15_93
 
-Note that the integer and exponent parts are always interpreted using radix 10.
-For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The
-allowed range of floating-point literals is implementation-dependent.  As in
-integer literals, underscores are supported for digit grouping.
+Either of these parts, but not both, can be empty. For example::
 
-Some examples of floating-point literals::
+   10.  # (equivalent to 10.0)
+   .001  # (equivalent to 0.001)
 
-   3.14    10.    .001    1e100    3.14e-10    0e0    3.14_15_93
+Optionally, the integer and fraction may be followed by an *exponent*:
+the letter ``e`` or ``E``, followed by an optional sign, ``+`` or ``-``,
+and a number in the same format as the integer and fraction parts.
+The ``e`` or ``E`` represents "times ten raised to the power of"::
+
+   1.0e3  # (represents 1.0×10³, or 1000.0)
+   1.166e-5  # (represents 1.166×10⁻⁵, or 0.00001166)
+   6.02214076e+23  # (represents 6.02214076×10²³, or 602214076000000000000000.)
+
+In floats with only integer and exponent parts, the decimal point may be
+omitted::
+
+   1e3  # (equivalent to 1.e3 and 1.0e3)
+   0e0  # (equivalent to 0.)
+
+Formally, floating-point literals are described by the following
+lexical definitions:
+
+.. grammar-snippet::
+   :group: python-grammar
+
+   floatnumber:
+      | `digitpart` "." [`digitpart`] [`exponent`]
+      | "." `digitpart` [`exponent`]
+      | `digitpart` `exponent`
+   digitpart: `digit` (["_"] `digit`)*
+   exponent:  ("e" | "E") ["+" | "-"] `digitpart`
 
 .. versionchanged:: 3.6
    Underscores are now allowed for grouping purposes in literals.
@@ -1014,20 +1084,62 @@ Some examples of floating-point literals::
 .. _imaginary:
 
 Imaginary literals
-------------------
+^^^^^^^^^^^^^^^^^^
 
-Imaginary literals are described by the following lexical definitions:
+Python has :ref:`complex number <typesnumeric>` objects, but no complex
+literals.
+Instead, *imaginary literals* denote complex numbers with a zero
+real part.
 
-.. productionlist:: python-grammar
-   imagnumber: (`floatnumber` | `digitpart`) ("j" | "J")
+For example, in math, the complex number 3+4.2\ *i* is written
+as the real number 3 added to the imaginary number 4.2\ *i*.
+Python uses a similar syntax, except the imaginary unit is written as ``j``
+rather than *i*::
+
+   3+4.2j
+
+This is an expression composed
+of the :ref:`integer literal <integers>` ``3``,
+the :ref:`operator <operators>` '``+``',
+and the :ref:`imaginary literal <imaginary>` ``4.2j``.
+Since these are three separate tokens, whitespace is allowed between them::
 
-An imaginary literal yields a complex number with a real part of 0.0.  Complex
-numbers are represented as a pair of floating-point numbers and have the same
-restrictions on their range.  To create a complex number with a nonzero real
-part, add a floating-point number to it, e.g., ``(3+4j)``.  Some examples of
-imaginary literals::
+   3 + 4.2j
 
-   3.14j   10.j    10j     .001j   1e100j   3.14e-10j   3.14_15_93j
+No whitespace is allowed *within* each token.
+In particular, the ``j`` suffix, may not be separated from the number
+before it.
+
+The number before the ``j`` has the same syntax as a floating-point literal.
+Thus, the following are valid imaginary literals::
+
+   4.2j
+   3.14j
+   10.j
+   .001j
+   1e100j
+   3.14e-10j
+   3.14_15_93j
+
+Unlike in a floating-point literal the decimal point can be omitted if the
+imaginary number only has an integer part.
+The number is still evaluated as a floating-point number, not an integer::
+
+   10j
+   0j
+   1000000000000000000000000j   # equivalent to 1e+24j
+
+The ``j`` suffix is case-insensitive.
+That means you can use ``J`` instead::
+
+   3.14J   # equivalent to 3.14j
+
+Formally, imaginary literals are described by the following lexical definition:
+
+.. grammar-snippet::
+   :group: python-grammar
+
+   imagnumber: (`floatnumber` | `digitpart`) ("j" | "J")
 
 
 .. _operators:
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 40a46a62031..cad49e2deeb 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -653,7 +653,7 @@ Miscellaneous options
      .. versionadded:: 3.13
 
    * :samp:`-X thread_inherit_context={0,1}` causes :class:`~threading.Thread`
-     to, by default, use a copy of context of of the caller of
+     to, by default, use a copy of context of the caller of
      ``Thread.start()`` when starting.  Otherwise, threads will start
      with an empty context.  If unset, the value of this option defaults
      to ``1`` on free-threaded builds and to ``0`` otherwise.  See also
@@ -669,6 +669,13 @@ Miscellaneous options
 
      .. versionadded:: 3.14
 
+   * :samp:`-X tlbc={0,1}` enables (1, the default) or disables (0) thread-local
+     bytecode in builds configured with :option:`--disable-gil`.  When disabled,
+     this also disables the specializing interpreter.  See also
+     :envvar:`PYTHON_TLBC`.
+
+     .. versionadded:: 3.14
+
    It also allows passing arbitrary values and retrieving them through the
    :data:`sys._xoptions` dictionary.
 
@@ -1277,7 +1284,7 @@ conflict.
 .. envvar:: PYTHON_THREAD_INHERIT_CONTEXT
 
    If this variable is set to ``1`` then :class:`~threading.Thread` will,
-   by default, use a copy of context of of the caller of ``Thread.start()``
+   by default, use a copy of context of the caller of ``Thread.start()``
    when starting.  Otherwise, new threads will start with an empty context.
    If unset, this variable defaults to ``1`` on free-threaded builds and to
    ``0`` otherwise.  See also :option:`-X thread_inherit_context<-X>`.
@@ -1302,6 +1309,16 @@ conflict.
 
    .. versionadded:: 3.13
 
+.. envvar:: PYTHON_TLBC
+
+   If set to ``1`` enables thread-local bytecode. If set to ``0`` thread-local
+   bytecode and the specializing interpreter are disabled.  Only applies to
+   builds configured with :option:`--disable-gil`.
+
+   See also the :option:`-X tlbc <-X>` command-line option.
+
+   .. versionadded:: 3.14
+
 Debug-mode variables
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst
index 0b7eaf35a1e..df81a330549 100644
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@@ -290,8 +290,8 @@ General Options
 
 .. option:: --disable-gil
 
-   Enables **experimental** support for running Python without the
-   :term:`global interpreter lock` (GIL): free threading build.
+   Enables support for running Python without the :term:`global interpreter
+   lock` (GIL): free threading build.
 
    Defines the ``Py_GIL_DISABLED`` macro and adds ``"t"`` to
    :data:`sys.abiflags`.
diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst
index 7d5c6331bef..0fb28f8c866 100644
--- a/Doc/using/ios.rst
+++ b/Doc/using/ios.rst
@@ -298,9 +298,9 @@ To add Python to an iOS Xcode project:
    * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*;
    * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled*
      (optional, but strongly recommended; this is enabled by default);
-   * ``PYTHONHOME`` for the interpreter is configured to point at the
+   * :envvar:`PYTHONHOME` for the interpreter is configured to point at the
      ``python`` subfolder of your app's bundle; and
-   * The ``PYTHONPATH`` for the interpreter includes:
+   * The :envvar:`PYTHONPATH` for the interpreter includes:
 
      - the ``python/lib/python3.X`` subfolder of your app's bundle,
      - the ``python/lib/python3.X/lib-dynload`` subfolder of your app's bundle, and
@@ -324,7 +324,12 @@ modules in your app, some additional steps will be required:
   the ``lib-dynload`` folder can be copied and adapted for this purpose.
 
 * If you're using a separate folder for third-party packages, ensure that folder
-  is included as part of the ``PYTHONPATH`` configuration in step 10.
+  is included as part of the :envvar:`PYTHONPATH` configuration in step 10.
+
+* If any of the folders that contain third-party packages will contain ``.pth``
+  files, you should add that folder as a *site directory* (using
+  :meth:`site.addsitedir`), rather than adding to :envvar:`PYTHONPATH` or
+  :attr:`sys.path` directly.
 
 Testing a Python package
 ------------------------
diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst
index 4b6c884f3d4..f88f3c2e078 100644
--- a/Doc/using/mac.rst
+++ b/Doc/using/mac.rst
@@ -20,13 +20,6 @@ the Pythons provided by the CPython release team for download from
 the `python.org website <https://www.python.org/downloads/>`_. See
 :ref:`alternative_bundles` for some other options.
 
-.. |usemac_x_dot_y| replace:: 3.13
-.. |usemac_python_x_dot_y_literal| replace:: ``python3.13``
-.. |usemac_python_x_dot_y_t_literal| replace:: ``python3.13t``
-.. |usemac_python_x_dot_y_t_literal_config| replace:: ``python3.13t-config``
-.. |usemac_applications_folder_name| replace:: ``Python 3.13``
-.. |usemac_applications_folder_version| replace:: ``/Applications/Python 3.13/``
-
 .. _getting-osx:
 .. _getting-and-installing-macpython:
 
@@ -64,7 +57,7 @@ Clicking on the **Continue** button brings up the **Read Me** for this installer
 Besides other important information, the **Read Me** documents which Python version is
 going to be installed and on what versions of macOS it is supported. You may need
 to scroll through to read the whole file. By default, this **Read Me** will also be
-installed in |usemac_applications_folder_version| and available to read anytime.
+installed in |applications_python_version_literal| and available to read anytime.
 
 .. image:: mac_installer_02_readme.png
 
@@ -83,7 +76,7 @@ display. For most uses, the standard set of installation operations is appropria
 By pressing the **Customize** button, you can choose to omit or select certain package
 components of the installer. Click on each package name to see a description of
 what it installs.
-To also install support for the optional experimental free-threaded feature,
+To also install support for the optional free-threaded feature,
 see :ref:`install-freethreaded-macos`.
 
 .. image:: mac_installer_05_custom_install.png
@@ -97,7 +90,7 @@ When the installation is complete, the **Summary** window will appear.
 .. image:: mac_installer_06_summary.png
 
 Double-click on the :command:`Install Certificates.command`
-icon or file in the |usemac_applications_folder_version| window to complete the
+icon or file in the |applications_python_version_literal| window to complete the
 installation.
 
 .. image:: mac_installer_07_applications.png
@@ -114,7 +107,7 @@ Close this terminal window and the installer window.
 
 A default install will include:
 
-* A |usemac_applications_folder_name| folder in your :file:`Applications` folder. In here
+* A |python_version_literal| folder in your :file:`Applications` folder. In here
   you find :program:`IDLE`, the development environment that is a standard part of official
   Python distributions; and :program:`Python Launcher`, which handles double-clicking Python
   scripts from the macOS `Finder <https://support.apple.com/en-us/HT201732>`_.
@@ -141,7 +134,7 @@ How to run a Python script
 
 There are two ways to invoke the Python interpreter.
 If you are familiar with using a Unix shell in a terminal
-window, you can invoke |usemac_python_x_dot_y_literal| or ``python3`` optionally
+window, you can invoke |python_x_dot_y_literal| or ``python3`` optionally
 followed by one or more command line options (described in :ref:`using-on-general`).
 The Python tutorial also has a useful section on
 :ref:`using Python interactively from a shell <tut-interac>`.
@@ -160,7 +153,7 @@ for more information.
 To run a Python script file from the terminal window, you can
 invoke the interpreter with the name of the script file:
 
-    |usemac_python_x_dot_y_literal| ``myscript.py``
+    |python_x_dot_y_literal| ``myscript.py``
 
 To run your script from the Finder, you can either:
 
@@ -259,20 +252,20 @@ Advanced Topics
 Installing Free-threaded Binaries
 ---------------------------------
 
-.. versionadded:: 3.13 (Experimental)
-
-.. note::
-
-   Everything described in this section is considered experimental,
-   and should be expected to change in future releases.
+.. versionadded:: 3.13
 
 The ``python.org`` :ref:`Python for macOS <getting-and-installing-macpython>`
 installer package can optionally install an additional build of
-Python |usemac_x_dot_y| that supports :pep:`703`, the experimental free-threading feature
+Python |version| that supports :pep:`703`, the free-threading feature
 (running with the :term:`global interpreter lock` disabled).
 Check the release page on ``python.org`` for possible updated information.
 
-Because this feature is still considered experimental, the support for it
+The free-threaded mode is working and continues to be improved, but
+there is some additional overhead in single-threaded workloads compared
+to the regular build. Additionally, third-party packages, in particular ones
+with an :term:`extension module`, may not be ready for use in a
+free-threaded build, and will re-enable the :term:`GIL`.
+Therefore, the support for free-threading
 is not installed by default. It is packaged as a separate install option,
 available by clicking the **Customize** button on the **Installation Type**
 step of the installer as described above.
@@ -282,46 +275,54 @@ step of the installer as described above.
 If the box next to the **Free-threaded Python** package name is checked,
 a separate :file:`PythonT.framework` will also be installed
 alongside the normal :file:`Python.framework` in :file:`/Library/Frameworks`.
-This configuration allows a free-threaded Python |usemac_x_dot_y| build to co-exist
-on your system with a traditional (GIL only) Python |usemac_x_dot_y| build with
-minimal risk while installing or testing. This installation layout is itself
-experimental and is subject to change in future releases.
+This configuration allows a free-threaded Python |version| build to co-exist
+on your system with a traditional (GIL only) Python |version| build with
+minimal risk while installing or testing. This installation layout may
+change in future releases.
 
 Known cautions and limitations:
 
 - The **UNIX command-line tools** package, which is selected by default,
-  will install links in :file:`/usr/local/bin` for |usemac_python_x_dot_y_t_literal|,
-  the free-threaded interpreter, and |usemac_python_x_dot_y_t_literal_config|,
+  will install links in :file:`/usr/local/bin` for |python_x_dot_y_t_literal|,
+  the free-threaded interpreter, and |python_x_dot_y_t_literal_config|,
   a configuration utility which may be useful for package builders.
   Since :file:`/usr/local/bin` is typically included in your shell ``PATH``,
   in most cases no changes to your ``PATH`` environment variables should
-  be needed to use |usemac_python_x_dot_y_t_literal|.
+  be needed to use |python_x_dot_y_t_literal|.
 
 - For this release, the **Shell profile updater** package and the
-  :file:`Update Shell Profile.command` in |usemac_applications_folder_version|
+  :file:`Update Shell Profile.command` in |applications_python_version_literal|
   do not support the free-threaded package.
 
 - The free-threaded build and the traditional build have separate search
   paths and separate :file:`site-packages` directories so, by default,
   if you need a package available in both builds, it may need to be installed in both.
   The free-threaded package will install a separate instance of :program:`pip` for use
-  with |usemac_python_x_dot_y_t_literal|.
+  with |python_x_dot_y_t_literal|.
 
   - To install a package using :command:`pip` without a :command:`venv`:
 
-        |usemac_python_x_dot_y_t_literal| ``-m pip install <package_name>``
+    .. parsed-literal::
+
+       python\ |version|\ t -m pip install <package_name>
 
 - When working with multiple Python environments, it is usually safest and easiest
   to :ref:`create and use virtual environments <tut-venv>`.
   This can avoid possible command name conflicts and confusion about which Python is in use:
 
-      |usemac_python_x_dot_y_t_literal| ``-m venv <venv_name>``
+  .. parsed-literal::
+
+     python\ |version|\ t -m venv <venv_name>
+
 
   then :command:`activate`.
 
 - To run a free-threaded version of IDLE:
 
-      |usemac_python_x_dot_y_t_literal| ``-m idlelib``
+  .. parsed-literal::
+
+     python\ |version|\ t -m idlelib
+
 
 - The interpreters in both builds respond to the same
   :ref:`PYTHON environment variables <using-on-envvars>`
@@ -337,28 +338,28 @@ Known cautions and limitations:
   thus it only needs to be run once.
 
 - If you cannot depend on the link in ``/usr/local/bin`` pointing to the
-  ``python.org`` free-threaded |usemac_python_x_dot_y_t_literal| (for example, if you want
+  ``python.org`` free-threaded |python_x_dot_y_t_literal| (for example, if you want
   to install your own version there or some other distribution does),
   you can explicitly set your shell ``PATH`` environment variable to
   include the ``PythonT`` framework ``bin`` directory:
 
-  .. code-block:: sh
+  .. parsed-literal::
 
-     export PATH="/Library/Frameworks/PythonT.framework/Versions/3.13/bin":"$PATH"
+     export PATH="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin":"$PATH"
 
   The traditional framework installation by default does something similar,
   except for :file:`Python.framework`. Be aware that having both framework ``bin``
   directories in ``PATH`` can lead to confusion if there are duplicate names
-  like ``python3.13`` in both; which one is actually used depends on the order
+  like |python_x_dot_y_literal| in both; which one is actually used depends on the order
   they appear in ``PATH``. The ``which python3.x`` or ``which python3.xt``
   commands can show which path is being used. Using virtual environments
   can help avoid such ambiguities. Another option might be to create
   a shell :command:`alias` to the desired interpreter, like:
 
-  .. code-block:: sh
+  .. parsed-literal::
 
-     alias py3.13="/Library/Frameworks/Python.framework/Versions/3.13/bin/python3.13"
-     alias py3.13t="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t"
+     alias py\ |version|\ ="/Library/Frameworks/Python.framework/Versions/\ |version|\ /bin/python\ |version|\ "
+     alias py\ |version|\ t="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin/python\ |version|\ t"
 
 Installing using the command line
 ---------------------------------
@@ -369,22 +370,22 @@ the macOS command line :command:`installer` utility lets you select non-default
 options, too. If you are not familiar with :command:`installer`, it can be
 somewhat cryptic (see :command:`man installer` for more information).
 As an example, the following shell snippet shows one way to do it,
-using the ``3.13.0b2`` release and selecting the free-threaded interpreter
+using the |x_dot_y_b2_literal| release and selecting the free-threaded interpreter
 option:
 
-..  code-block:: sh
+.. parsed-literal::
 
-    RELEASE="python-3.13.0b2-macos11.pkg"
+    RELEASE="python-\ |version|\ 0b2-macos11.pkg"
 
     # download installer pkg
-    curl -O https://www.python.org/ftp/python/3.13.0/${RELEASE}
+    curl -O \https://www.python.org/ftp/python/\ |version|\ .0/${RELEASE}
 
     # create installer choicechanges to customize the install:
-    #    enable the PythonTFramework-3.13 package
+    #    enable the PythonTFramework-\ |version|\  package
     #    while accepting the other defaults (install all other packages)
     cat > ./choicechanges.plist <<EOF
     <?xml version="1.0" encoding="UTF-8"?>
-    <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+    <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "\http://www.apple.com/DTDs/PropertyList-1.0.dtd">
     <plist version="1.0">
     <array>
             <dict>
@@ -393,7 +394,7 @@ option:
                     <key>choiceAttribute</key>
                     <string>selected</string>
                     <key>choiceIdentifier</key>
-                    <string>org.python.Python.PythonTFramework-3.13</string>
+                    <string>org.python.Python.PythonTFramework-\ |version|\ </string>
             </dict>
     </array>
     </plist>
@@ -404,19 +405,19 @@ option:
 
 You can then test that both installer builds are now available with something like:
 
-..  code-block:: console
+.. parsed-literal::
 
     $ # test that the free-threaded interpreter was installed if the Unix Command Tools package was enabled
-    $ /usr/local/bin/python3.13t -VV
-    Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun  5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)]
+    $ /usr/local/bin/python\ |version|\ t -VV
+    Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun  5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)]
     $ #    and the traditional interpreter
-    $ /usr/local/bin/python3.13 -VV
-    Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun  5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)]
+    $ /usr/local/bin/python\ |version|\  -VV
+    Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun  5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)]
     $ # test that they are also available without the prefix if /usr/local/bin is on $PATH
-    $ python3.13t -VV
-    Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun  5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)]
-    $ python3.13 -VV
-    Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun  5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)]
+    $ python\ |version|\ t -VV
+    Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun  5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)]
+    $ python\ |version|\  -VV
+    Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun  5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)]
 
 .. note::
 
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 580a3d8154d..ef7c36d8539 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -1996,7 +1996,7 @@ New Deprecations
     (Contributed by Alex Waygood in :gh:`105566` and :gh:`105570`.)
 
   * Deprecate the :func:`typing.no_type_check_decorator` decorator function,
-    to be removed in in Python 3.15.
+    to be removed in Python 3.15.
     After eight years in the :mod:`typing` module,
     it has yet to be supported by any major type checker.
     (Contributed by Alex Waygood in :gh:`106309`.)
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 705bf46d603..a74d414ae4b 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -82,9 +82,10 @@ and improvements in user-friendliness and correctness.
 
 .. PEP-sized items next.
 
+* :ref:`PEP 779: Free-threaded Python is officially supported <whatsnew314-pep779>`
 * :ref:`PEP 649 and 749: deferred evaluation of annotations <whatsnew314-pep649>`
-* :ref:`PEP 734: Multiple Interpreters in the Stdlib <whatsnew314-pep734>`
-* :ref:`PEP 741: Python Configuration C API <whatsnew314-pep741>`
+* :ref:`PEP 734: Multiple interpreters in the stdlib <whatsnew314-pep734>`
+* :ref:`PEP 741: Python configuration C API <whatsnew314-pep741>`
 * :ref:`PEP 750: Template strings <whatsnew314-pep750>`
 * :ref:`PEP 758: Allow except and except* expressions without parentheses <whatsnew314-pep758>`
 * :ref:`PEP 761: Discontinuation of PGP signatures <whatsnew314-pep761>`
@@ -124,9 +125,35 @@ of Python.  See :ref:`below <whatsnew314-refcount>` for details.
 New features
 ============
 
+.. _whatsnew314-pep779:
+
+PEP 779: Free-threaded Python is officially supported
+-----------------------------------------------------
+
+The free-threaded build of Python is now supported and no longer experimental.
+This is the start of phase II where free-threaded Python is officially supported
+but still optional.
+
+We are confident that the project is on the right path, and we appreciate the
+continued dedication from everyone working to make free-threading ready for
+broader adoption across the Python community.
+
+With these recommendations and the acceptance of this PEP, we as the Python
+developer community should broadly advertise that free-threading is a supported
+Python build option now and into the future, and that it will not be removed
+without a proper deprecation schedule.
+
+Any decision to transition to phase III, with free-threading as the default or
+sole build of Python is still undecided, and dependent on many factors both
+within CPython itself and the community. This decision is for the future.
+
+.. seealso::
+   :pep:`779` and its `acceptance
+   <https://discuss.python.org/t/pep-779-criteria-for-supported-status-for-free-threaded-python/84319/123>`__.
+
 .. _whatsnew314-pep734:
 
-PEP 734: Multiple Interpreters in the Stdlib
+PEP 734: Multiple interpreters in the stdlib
 --------------------------------------------
 
 The CPython runtime supports running multiple copies of Python in the
@@ -251,7 +278,7 @@ As another example, generating HTML attributes from data:
 
    attributes = {"src": "shrubbery.jpg", "alt": "looks nice"}
    template = t"<img {attributes}>"
-   assert html(template) == '<img src="shrubbery.jpg" alt="looks nice" class="looks-nice">'
+   assert html(template) == '<img src="shrubbery.jpg" alt="looks nice" />'
 
 Compared to using an f-string, the ``html`` function has access to template attributes
 containing the original information: static strings, interpolations, and values
@@ -392,7 +419,7 @@ As can be seen, the API is similar to the APIs of the :mod:`!lzma` and
 :mod:`!bz2` modules.
 
 (Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun,
-Victor Stinner, and Rogdham in :gh:`132983`)
+Victor Stinner, and Rogdham in :gh:`132983`.)
 
 .. seealso::
    :pep:`784`.
@@ -727,7 +754,7 @@ Improved error messages
 
 .. _whatsnew314-pep741:
 
-PEP 741: Python Configuration C API
+PEP 741: Python configuration C API
 -----------------------------------
 
 Add a :ref:`PyInitConfig C API <pyinitconfig_api>` to configure the Python
@@ -951,6 +978,23 @@ be specified by the build backend, as it will no longer be determined
 automatically by the C compiler. For a running interpreter, the setting that
 was used at compile time can be found using :func:`sysconfig.get_config_var`.
 
+A new flag has been added, :data:`~sys.flags.context_aware_warnings`.  This
+flag defaults to true for the free-threaded build and false for the GIL-enabled
+build. If the flag is true then the :class:`warnings.catch_warnings` context
+manager uses a context variable for warning filters.  This makes the context
+manager behave predicably when used with multiple threads or asynchronous
+tasks.
+
+A new flag has been added, :data:`~sys.flags.thread_inherit_context`. This flag
+defaults to true for the free-threaded build and false for the GIL-enabled
+build. If the flag is true then threads created with :class:`threading.Thread`
+start with a copy of the :class:`~contextvars.Context()` of the caller of
+:meth:`~threading.Thread.start`.  Most significantly, this makes the warning
+filtering context established by :class:`~warnings.catch_warnings` be
+"inherited" by threads (or asyncio tasks) started within that context.  It also
+affects other modules that use context variables, such as the :mod:`decimal`
+context manager.
+
 
 .. _whatsnew314-pyrepl-highlighting:
 
@@ -1001,6 +1045,18 @@ Please report any bugs or major performance regressions that you encounter!
 
 .. seealso:: :pep:`744`
 
+Concurrent safe warnings control
+--------------------------------
+
+The :class:`warnings.catch_warnings` context manager will now optionally
+use a context variable for warning filters.  This is enabled by setting
+the :data:`~sys.flags.context_aware_warnings` flag, either with the ``-X``
+command-line option or an environment variable.  This gives predicable
+warnings control when using :class:`~warnings.catch_warnings` combined with
+multiple threads or asynchronous tasks. The flag defaults to true for the
+free-threaded build and false for the GIL-enabled build.
+
+(Contributed by Neil Schemenauer and Kumar Aditya in :gh:`130010`.)
 
 Other language changes
 ======================
@@ -1259,6 +1315,14 @@ concurrent.futures
   buffer.
   (Contributed by Enzo Bonnal and Josh Rosenberg in :gh:`74028`.)
 
+configparser
+------------
+
+* Security fix: will no longer write config files it cannot read. Attempting
+  to :meth:`configparser.ConfigParser.write` keys containing delimiters or
+  beginning with the section header pattern will raise a
+  :class:`configparser.InvalidWriteError`.
+  (Contributed by Jacob Lincoln in :gh:`129270`.)
 
 contextvars
 -----------
@@ -1722,6 +1786,16 @@ os
   (Contributed by Cody Maloney in :gh:`129205`.)
 
 
+os.path
+-------
+
+* The *strict* parameter to :func:`os.path.realpath` accepts a new value,
+  :data:`os.path.ALLOW_MISSING`.
+  If used, errors other than :exc:`FileNotFoundError` will be re-raised;
+  the resulting path can be missing but it will be free of symlinks.
+  (Contributed by Petr Viktorin for :cve:`2025-4517`.)
+
+
 pathlib
 -------
 
@@ -1910,6 +1984,28 @@ sysconfig
   (Contributed by Xuehai Pan in :gh:`131799`.)
 
 
+tarfile
+-------
+
+* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to
+  avoid path traversal attacks.
+  (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2025-4138`.)
+* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes
+  when a directory was removed or replaced by another kind of file.
+  (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2024-12718`.)
+* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall`
+  now (re-)apply the extraction filter when substituting a link (hard or
+  symbolic) with a copy of another archive member, and when fixing up
+  directory attributes.
+  The former raises a new exception, :exc:`~tarfile.LinkFallbackError`.
+  (Contributed by Petr Viktorin for :cve:`2025-4330` and :cve:`2024-12718`.)
+* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall`
+  no longer extract rejected members when
+  :func:`~tarfile.TarFile.errorlevel` is zero.
+  (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887`
+  and :cve:`2025-4435`.)
+
+
 threading
 ---------
 
@@ -2665,6 +2761,7 @@ New features
   * :c:func:`PyUnicodeWriter_Discard`
   * :c:func:`PyUnicodeWriter_Finish`
   * :c:func:`PyUnicodeWriter_Format`
+  * :c:func:`PyUnicodeWriter_WriteASCII`
   * :c:func:`PyUnicodeWriter_WriteChar`
   * :c:func:`PyUnicodeWriter_WriteRepr`
   * :c:func:`PyUnicodeWriter_WriteStr`
@@ -2747,8 +2844,8 @@ New features
 
 * Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier
   superclass identification, which attempts to resolve the `type checking issue
-  <https://peps.python.org/pep-0630/#type-checking>`__ mentioned in :pep:`630`
-  (:gh:`124153`).
+  <https://peps.python.org/pep-0630/#type-checking>`__ mentioned in :pep:`630`.
+  (Contributed in :gh:`124153`.)
 
 * Add :c:func:`PyUnicode_Equal` function to the limited C API:
   test if two strings are equal.
@@ -2941,7 +3038,7 @@ Deprecated
     :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) <PyUnicodeWriter_WriteSubstring>`.
   * :c:func:`!_PyUnicodeWriter_WriteASCIIString`:
     replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with
-    :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) <PyUnicodeWriter_WriteUTF8>`.
+    :c:func:`PyUnicodeWriter_WriteASCII(writer, str) <PyUnicodeWriter_WriteASCII>`.
   * :c:func:`!_PyUnicodeWriter_WriteLatin1String`:
     replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with
     :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) <PyUnicodeWriter_WriteUTF8>`.
diff --git a/Include/audit.h b/Include/audit.h
index 793b7077e10..9be54ad4411 100644
--- a/Include/audit.h
+++ b/Include/audit.h
@@ -1,5 +1,5 @@
-#ifndef Py_AUDIT_H
-#define Py_AUDIT_H
+#ifndef _Py_AUDIT_H
+#define _Py_AUDIT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -18,13 +18,13 @@ PyAPI_FUNC(int) PySys_AuditTuple(
 
 
 #ifndef Py_LIMITED_API
-#  define Py_CPYTHON_AUDIT_H
+#  define _Py_CPYTHON_AUDIT_H
 #  include "cpython/audit.h"
-#  undef Py_CPYTHON_AUDIT_H
+#  undef _Py_CPYTHON_AUDIT_H
 #endif
 
 
 #ifdef __cplusplus
 }
 #endif
-#endif /* !Py_AUDIT_H */
+#endif /* !_Py_AUDIT_H */
diff --git a/Include/ceval.h b/Include/ceval.h
index 32ab38972e5..e9df8684996 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -133,13 +133,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
 #define FVS_MASK      0x4
 #define FVS_HAVE_SPEC 0x4
 
-/* Special methods used by LOAD_SPECIAL */
-#define SPECIAL___ENTER__   0
-#define SPECIAL___EXIT__    1
-#define SPECIAL___AENTER__  2
-#define SPECIAL___AEXIT__   3
-#define SPECIAL_MAX   3
-
 #ifndef Py_LIMITED_API
 #  define Py_CPYTHON_CEVAL_H
 #  include "cpython/ceval.h"
diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h
index 3c5c7a8c060..536f9248632 100644
--- a/Include/cpython/audit.h
+++ b/Include/cpython/audit.h
@@ -1,4 +1,4 @@
-#ifndef Py_CPYTHON_AUDIT_H
+#ifndef _Py_CPYTHON_AUDIT_H
 #  error "this header file must not be included directly"
 #endif
 
diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h
index 18249b95bef..598cd330bc9 100644
--- a/Include/cpython/funcobject.h
+++ b/Include/cpython/funcobject.h
@@ -97,11 +97,6 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) {
 }
 #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func))
 
-static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) {
-    return _PyFunction_CAST(func)->func_builtins;
-}
-#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func))
-
 static inline PyObject* PyFunction_GET_MODULE(PyObject *func) {
     return _PyFunction_CAST(func)->func_module;
 }
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index 54d7e622929..be582122118 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -28,10 +28,10 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *);
 #define PyTrace_OPCODE 7
 
 /* Remote debugger support */
-#define MAX_SCRIPT_PATH_SIZE 512
-typedef struct _remote_debugger_support {
+#define Py_MAX_SCRIPT_PATH_SIZE 512
+typedef struct {
     int32_t debugger_pending_call;
-    char debugger_script_path[MAX_SCRIPT_PATH_SIZE];
+    char debugger_script_path[Py_MAX_SCRIPT_PATH_SIZE];
 } _PyRemoteDebuggerSupport;
 
 typedef struct _err_stackitem {
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 239177deb4a..cc2defbdf77 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -239,6 +239,16 @@ static inline void _Py_LeaveRecursiveCall(void)  {
 
 extern _PyInterpreterFrame* _PyEval_GetFrame(void);
 
+extern PyObject * _PyEval_GetGlobalsFromRunningMain(PyThreadState *);
+extern int _PyEval_EnsureBuiltins(
+    PyThreadState *,
+    PyObject *,
+    PyObject **p_builtins);
+extern int _PyEval_EnsureBuiltinsWithModule(
+    PyThreadState *,
+    PyObject *,
+    PyObject **p_builtins);
+
 PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func);
 
 /* Handle signals, pending calls, GIL drop request
@@ -356,6 +366,13 @@ extern int _PyRunRemoteDebugger(PyThreadState *tstate);
 PyAPI_FUNC(_PyStackRef)
 _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef *index_ptr);
 
+/* Special methods used by LOAD_SPECIAL */
+#define SPECIAL___ENTER__   0
+#define SPECIAL___EXIT__    1
+#define SPECIAL___AENTER__  2
+#define SPECIAL___AEXIT__   3
+#define SPECIAL_MAX   3
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h
index 42f06b935bd..62460c5f8fa 100644
--- a/Include/internal/pycore_critical_section.h
+++ b/Include/internal/pycore_critical_section.h
@@ -64,7 +64,7 @@ extern "C" {
 
 # define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op)                           \
     if (Py_REFCNT(op) != 1) {                                                    \
-        _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyObject_CAST(op)->ob_mutex); \
+        _PyCriticalSection_AssertHeldObj(_PyObject_CAST(op)); \
     }
 
 #else   /* Py_DEBUG */
@@ -239,6 +239,28 @@ _PyCriticalSection_AssertHeld(PyMutex *mutex)
 #endif
 }
 
+static inline void
+_PyCriticalSection_AssertHeldObj(PyObject *op)
+{
+#ifdef Py_DEBUG
+    PyMutex *mutex = &_PyObject_CAST(op)->ob_mutex;
+    PyThreadState *tstate = _PyThreadState_GET();
+    uintptr_t prev = tstate->critical_section;
+    if (prev & _Py_CRITICAL_SECTION_TWO_MUTEXES) {
+        PyCriticalSection2 *cs = (PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK);
+        _PyObject_ASSERT_WITH_MSG(op,
+            (cs != NULL && (cs->_cs_base._cs_mutex == mutex || cs->_cs_mutex2 == mutex)),
+            "Critical section of object is not held");
+    }
+    else {
+        PyCriticalSection *cs = (PyCriticalSection *)(prev & ~_Py_CRITICAL_SECTION_MASK);
+        _PyObject_ASSERT_WITH_MSG(op,
+            (cs != NULL && cs->_cs_mutex == mutex),
+            "Critical section of object is not held");
+    }
+
+#endif
+}
 #endif /* Py_GIL_DISABLED */
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h
index ce3fcb109f4..1b59fa2ef60 100644
--- a/Include/internal/pycore_debug_offsets.h
+++ b/Include/internal/pycore_debug_offsets.h
@@ -368,7 +368,7 @@ typedef struct _Py_DebugOffsets {
         .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug),  \
         .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call),  \
         .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path),  \
-        .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \
+        .debugger_script_path_size = Py_MAX_SCRIPT_PATH_SIZE, \
     }, \
 }
 
diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h
index a30d52d49bd..6e120965956 100644
--- a/Include/internal/pycore_function.h
+++ b/Include/internal/pycore_function.h
@@ -41,6 +41,11 @@ extern PyObject *_Py_set_function_type_params(
 PyAPI_FUNC(int)
 _PyFunction_VerifyStateless(PyThreadState *, PyObject *);
 
+static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) {
+    return _PyFunction_CAST(func)->func_builtins;
+}
+#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func))
+
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h
index b8efba74bdc..3c213783cd4 100644
--- a/Include/internal/pycore_long.h
+++ b/Include/internal/pycore_long.h
@@ -112,9 +112,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t);
 // Export for 'math' shared extension
 PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t);
 
-PyAPI_FUNC(PyObject*) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right);
-PyAPI_FUNC(PyObject*) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right);
-PyAPI_FUNC(PyObject*) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right);
+PyAPI_FUNC(_PyStackRef) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right);
+PyAPI_FUNC(_PyStackRef) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right);
+PyAPI_FUNC(_PyStackRef) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right);
 
 // Export for 'binascii' shared extension.
 PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
@@ -213,7 +213,6 @@ _PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) {
     assert(PyLong_Check(b));
     return (a->long_value.lv_tag | b->long_value.lv_tag) < (2 << NON_SIZE_BITS);
 }
-
 static inline bool
 _PyLong_IsZero(const PyLongObject *op)
 {
@@ -313,6 +312,12 @@ _PyLong_FlipSign(PyLongObject *op) {
 #define _PyLong_FALSE_TAG TAG_FROM_SIGN_AND_SIZE(0, 0)
 #define _PyLong_TRUE_TAG TAG_FROM_SIGN_AND_SIZE(1, 1)
 
+static inline int
+_PyLong_CheckExactAndCompact(PyObject *op)
+{
+    return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 50225623fe5..50807e68e9a 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -767,6 +767,27 @@ _Py_TryIncref(PyObject *op)
 #endif
 }
 
+// Enqueue an object to be freed possibly after some delay
+#ifdef Py_GIL_DISABLED
+PyAPI_FUNC(void) _PyObject_XDecRefDelayed(PyObject *obj);
+#else
+static inline void _PyObject_XDecRefDelayed(PyObject *obj)
+{
+    Py_XDECREF(obj);
+}
+#endif
+
+#ifdef Py_GIL_DISABLED
+// Same as `Py_XSETREF` but in free-threading, it stores the object atomically
+// and queues the old object to be decrefed at a safe point using QSBR.
+PyAPI_FUNC(void) _PyObject_XSetRefDelayed(PyObject **p_obj, PyObject *obj);
+#else
+static inline void _PyObject_XSetRefDelayed(PyObject **p_obj, PyObject *obj)
+{
+    Py_XSETREF(*p_obj, obj);
+}
+#endif
+
 #ifdef Py_REF_DEBUG
 extern void _PyInterpreterState_FinalizeRefTotal(PyInterpreterState *);
 extern void _Py_FinalizeRefTotal(_PyRuntimeState *);
diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
index 59c85ddeb3d..dd1bf2d1d2b 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -1072,12 +1072,12 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[267];
 const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
     [BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
-    [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
+    [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG },
     [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
     [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
-    [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
+    [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG },
     [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG },
     [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
@@ -1085,7 +1085,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
     [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
-    [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
+    [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG },
     [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
@@ -1129,7 +1129,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
     [CLEANUP_THROW] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
     [COMPARE_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [COMPARE_OP_FLOAT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG },
-    [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG },
     [COMPARE_OP_STR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG },
     [CONTAINS_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [CONTAINS_OP_DICT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h
index d3674726997..8b7f12bf03d 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -10,6 +10,7 @@ extern "C" {
 
 #include "pycore_typedefs.h"      // _PyInterpreterFrame
 #include "pycore_uop_ids.h"
+#include "pycore_stackref.h"      // _PyStackRef
 #include <stdbool.h>
 
 
@@ -178,6 +179,7 @@ typedef enum _JitSymType {
     JIT_SYM_KNOWN_VALUE_TAG = 7,
     JIT_SYM_TUPLE_TAG = 8,
     JIT_SYM_TRUTHINESS_TAG = 9,
+    JIT_SYM_COMPACT_INT = 10,
 } JitSymType;
 
 typedef struct _jit_opt_known_class {
@@ -210,6 +212,10 @@ typedef struct {
     uint16_t value;
 } JitOptTruthiness;
 
+typedef struct {
+    uint8_t tag;
+} JitOptCompactInt;
+
 typedef union _jit_opt_symbol {
     uint8_t tag;
     JitOptKnownClass cls;
@@ -217,18 +223,62 @@ typedef union _jit_opt_symbol {
     JitOptKnownVersion version;
     JitOptTuple tuple;
     JitOptTruthiness truthiness;
+    JitOptCompactInt compact;
 } JitOptSymbol;
 
 
+// This mimics the _PyStackRef API
+typedef union {
+    uintptr_t bits;
+} JitOptRef;
+
+#define REF_IS_BORROWED 1
+
+#define JIT_BITS_TO_PTR_MASKED(REF) ((JitOptSymbol *)(((REF).bits) & (~REF_IS_BORROWED)))
+
+static inline JitOptSymbol *
+PyJitRef_Unwrap(JitOptRef ref)
+{
+    return JIT_BITS_TO_PTR_MASKED(ref);
+}
+
+bool _Py_uop_symbol_is_immortal(JitOptSymbol *sym);
+
+
+static inline JitOptRef
+PyJitRef_Wrap(JitOptSymbol *sym)
+{
+    return (JitOptRef){.bits=(uintptr_t)sym};
+}
+
+static inline JitOptRef
+PyJitRef_Borrow(JitOptRef ref)
+{
+    return (JitOptRef){ .bits = ref.bits | REF_IS_BORROWED };
+}
+
+static const JitOptRef PyJitRef_NULL = {.bits = REF_IS_BORROWED};
+
+static inline bool
+PyJitRef_IsNull(JitOptRef ref)
+{
+    return ref.bits == PyJitRef_NULL.bits;
+}
+
+static inline int
+PyJitRef_IsBorrowed(JitOptRef ref)
+{
+    return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
+}
 
 struct _Py_UOpsAbstractFrame {
     // Max stacklen
     int stack_len;
     int locals_len;
 
-    JitOptSymbol **stack_pointer;
-    JitOptSymbol **stack;
-    JitOptSymbol **locals;
+    JitOptRef *stack_pointer;
+    JitOptRef *stack;
+    JitOptRef *locals;
 };
 
 typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
@@ -251,37 +301,43 @@ typedef struct _JitOptContext {
     // Arena for the symbolic types.
     ty_arena t_arena;
 
-    JitOptSymbol **n_consumed;
-    JitOptSymbol **limit;
-    JitOptSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
+    JitOptRef *n_consumed;
+    JitOptRef *limit;
+    JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
 } JitOptContext;
 
-extern bool _Py_uop_sym_is_null(JitOptSymbol *sym);
-extern bool _Py_uop_sym_is_not_null(JitOptSymbol *sym);
-extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym);
-extern PyObject *_Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym);
-extern JitOptSymbol *_Py_uop_sym_new_unknown(JitOptContext *ctx);
-extern JitOptSymbol *_Py_uop_sym_new_not_null(JitOptContext *ctx);
-extern JitOptSymbol *_Py_uop_sym_new_type(
+extern bool _Py_uop_sym_is_null(JitOptRef sym);
+extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
+extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);
+extern PyObject *_Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef sym);
+extern JitOptRef _Py_uop_sym_new_unknown(JitOptContext *ctx);
+extern JitOptRef _Py_uop_sym_new_not_null(JitOptContext *ctx);
+extern JitOptRef _Py_uop_sym_new_type(
     JitOptContext *ctx, PyTypeObject *typ);
-extern JitOptSymbol *_Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val);
-extern JitOptSymbol *_Py_uop_sym_new_null(JitOptContext *ctx);
-extern bool _Py_uop_sym_has_type(JitOptSymbol *sym);
-extern bool _Py_uop_sym_matches_type(JitOptSymbol *sym, PyTypeObject *typ);
-extern bool _Py_uop_sym_matches_type_version(JitOptSymbol *sym, unsigned int version);
-extern void _Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym);
-extern void _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym);
-extern void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ);
-extern bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version);
-extern void _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val);
-extern bool _Py_uop_sym_is_bottom(JitOptSymbol *sym);
-extern int _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym);
-extern PyTypeObject *_Py_uop_sym_get_type(JitOptSymbol *sym);
-extern bool _Py_uop_sym_is_immortal(JitOptSymbol *sym);
-extern JitOptSymbol *_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args);
-extern JitOptSymbol *_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item);
-extern int _Py_uop_sym_tuple_length(JitOptSymbol *sym);
-extern JitOptSymbol *_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy);
+
+extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val);
+extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val);
+bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym);
+_PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym);
+extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx);
+extern bool _Py_uop_sym_has_type(JitOptRef sym);
+extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ);
+extern bool _Py_uop_sym_matches_type_version(JitOptRef sym, unsigned int version);
+extern void _Py_uop_sym_set_null(JitOptContext *ctx, JitOptRef sym);
+extern void _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptRef sym);
+extern void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef sym, PyTypeObject *typ);
+extern bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef sym, unsigned int version);
+extern void _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef sym, PyObject *const_val);
+extern bool _Py_uop_sym_is_bottom(JitOptRef sym);
+extern int _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef sym);
+extern PyTypeObject *_Py_uop_sym_get_type(JitOptRef sym);
+extern JitOptRef _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef *args);
+extern JitOptRef _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef sym, int item);
+extern int _Py_uop_sym_tuple_length(JitOptRef sym);
+extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, bool truthy);
+extern bool _Py_uop_sym_is_compact_int(JitOptRef sym);
+extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx);
+extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx,  JitOptRef sym);
 
 extern void _Py_uop_abstractcontext_init(JitOptContext *ctx);
 extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx);
@@ -290,7 +346,7 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
     JitOptContext *ctx,
     PyCodeObject *co,
     int curr_stackentries,
-    JitOptSymbol **args,
+    JitOptRef *args,
     int arg_len);
 extern int _Py_uop_frame_pop(JitOptContext *ctx);
 
diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h
index 02537bdfef8..f3f2ae0a140 100644
--- a/Include/internal/pycore_pymem.h
+++ b/Include/internal/pycore_pymem.h
@@ -88,17 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str);
 extern int _PyMem_DebugEnabled(void);
 
 // Enqueue a pointer to be freed possibly after some delay.
-extern void _PyMem_FreeDelayed(void *ptr);
-
-// Enqueue an object to be freed possibly after some delay
-#ifdef Py_GIL_DISABLED
-PyAPI_FUNC(void) _PyObject_XDecRefDelayed(PyObject *obj);
-#else
-static inline void _PyObject_XDecRefDelayed(PyObject *obj)
-{
-    Py_XDECREF(obj);
-}
-#endif
+extern void _PyMem_FreeDelayed(void *ptr, size_t size);
 
 // Periodically process delayed free requests.
 extern void _PyMem_ProcessDelayed(PyThreadState *tstate);
diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h
index b835c3abaf5..1f9b3fcf777 100644
--- a/Include/internal/pycore_qsbr.h
+++ b/Include/internal/pycore_qsbr.h
@@ -48,8 +48,21 @@ struct _qsbr_thread_state {
     // Thread state (or NULL)
     PyThreadState *tstate;
 
-    // Used to defer advancing write sequence a fixed number of times
-    int deferrals;
+    // Number of held items added by this thread since the last write sequence
+    // advance
+    int deferred_count;
+
+    // Estimate for the amount of memory that is held by this thread since
+    // the last write sequence advance
+    size_t deferred_memory;
+
+    // Amount of memory in mimalloc pages deferred from collection.  When
+    // deferred, they are prevented from being used for a different size class
+    // and in a different thread.
+    size_t deferred_page_memory;
+
+    // True if the deferred memory frees should be processed.
+    bool should_process;
 
     // Is this thread state allocated?
     bool allocated;
@@ -109,11 +122,17 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
 extern uint64_t
 _Py_qsbr_advance(struct _qsbr_shared *shared);
 
-// Batches requests to advance the write sequence. This advances the write
-// sequence every N calls, which reduces overhead but increases time to
-// reclamation. Returns the new goal.
+// Return the next value for the write sequence (current plus the increment).
 extern uint64_t
-_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr);
+_Py_qsbr_shared_next(struct _qsbr_shared *shared);
+
+// Return true if deferred memory frees held by QSBR should be processed to
+// determine if they can be safely freed.
+static inline bool
+_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr)
+{
+    return qsbr->should_process;
+}
 
 // Have the read sequences advanced to the given goal? If this returns true,
 // it safe to reclaim any memory tagged with the goal (or earlier goal).
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
index 2b845527cf2..a9432401525 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -13,22 +13,25 @@ extern "C" {
 #define _SET_IP 301
 #define _BINARY_OP 302
 #define _BINARY_OP_ADD_FLOAT 303
-#define _BINARY_OP_ADD_INT 304
-#define _BINARY_OP_ADD_UNICODE 305
-#define _BINARY_OP_EXTEND 306
-#define _BINARY_OP_INPLACE_ADD_UNICODE 307
-#define _BINARY_OP_MULTIPLY_FLOAT 308
-#define _BINARY_OP_MULTIPLY_INT 309
-#define _BINARY_OP_SUBSCR_CHECK_FUNC 310
-#define _BINARY_OP_SUBSCR_DICT 311
-#define _BINARY_OP_SUBSCR_INIT_CALL 312
-#define _BINARY_OP_SUBSCR_LIST_INT 313
-#define _BINARY_OP_SUBSCR_LIST_SLICE 314
-#define _BINARY_OP_SUBSCR_STR_INT 315
-#define _BINARY_OP_SUBSCR_TUPLE_INT 316
-#define _BINARY_OP_SUBTRACT_FLOAT 317
-#define _BINARY_OP_SUBTRACT_INT 318
-#define _BINARY_SLICE 319
+#define _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS 304
+#define _BINARY_OP_ADD_INT 305
+#define _BINARY_OP_ADD_UNICODE 306
+#define _BINARY_OP_EXTEND 307
+#define _BINARY_OP_INPLACE_ADD_UNICODE 308
+#define _BINARY_OP_MULTIPLY_FLOAT 309
+#define _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS 310
+#define _BINARY_OP_MULTIPLY_INT 311
+#define _BINARY_OP_SUBSCR_CHECK_FUNC 312
+#define _BINARY_OP_SUBSCR_DICT 313
+#define _BINARY_OP_SUBSCR_INIT_CALL 314
+#define _BINARY_OP_SUBSCR_LIST_INT 315
+#define _BINARY_OP_SUBSCR_LIST_SLICE 316
+#define _BINARY_OP_SUBSCR_STR_INT 317
+#define _BINARY_OP_SUBSCR_TUPLE_INT 318
+#define _BINARY_OP_SUBTRACT_FLOAT 319
+#define _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS 320
+#define _BINARY_OP_SUBTRACT_INT 321
+#define _BINARY_SLICE 322
 #define _BUILD_INTERPOLATION BUILD_INTERPOLATION
 #define _BUILD_LIST BUILD_LIST
 #define _BUILD_MAP BUILD_MAP
@@ -37,135 +40,140 @@ extern "C" {
 #define _BUILD_STRING BUILD_STRING
 #define _BUILD_TEMPLATE BUILD_TEMPLATE
 #define _BUILD_TUPLE BUILD_TUPLE
-#define _CALL_BUILTIN_CLASS 320
-#define _CALL_BUILTIN_FAST 321
-#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 322
-#define _CALL_BUILTIN_O 323
+#define _CALL_BUILTIN_CLASS 323
+#define _CALL_BUILTIN_FAST 324
+#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 325
+#define _CALL_BUILTIN_O 326
 #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1
 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2
-#define _CALL_ISINSTANCE 324
-#define _CALL_KW_NON_PY 325
-#define _CALL_LEN 326
-#define _CALL_LIST_APPEND 327
-#define _CALL_METHOD_DESCRIPTOR_FAST 328
-#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 329
-#define _CALL_METHOD_DESCRIPTOR_NOARGS 330
-#define _CALL_METHOD_DESCRIPTOR_O 331
-#define _CALL_NON_PY_GENERAL 332
-#define _CALL_STR_1 333
-#define _CALL_TUPLE_1 334
-#define _CALL_TYPE_1 335
-#define _CHECK_AND_ALLOCATE_OBJECT 336
-#define _CHECK_ATTR_CLASS 337
-#define _CHECK_ATTR_METHOD_LAZY_DICT 338
-#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 339
+#define _CALL_ISINSTANCE 327
+#define _CALL_KW_NON_PY 328
+#define _CALL_LEN 329
+#define _CALL_LIST_APPEND 330
+#define _CALL_METHOD_DESCRIPTOR_FAST 331
+#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 332
+#define _CALL_METHOD_DESCRIPTOR_NOARGS 333
+#define _CALL_METHOD_DESCRIPTOR_O 334
+#define _CALL_NON_PY_GENERAL 335
+#define _CALL_STR_1 336
+#define _CALL_TUPLE_1 337
+#define _CALL_TYPE_1 338
+#define _CHECK_AND_ALLOCATE_OBJECT 339
+#define _CHECK_ATTR_CLASS 340
+#define _CHECK_ATTR_METHOD_LAZY_DICT 341
+#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 342
 #define _CHECK_EG_MATCH CHECK_EG_MATCH
 #define _CHECK_EXC_MATCH CHECK_EXC_MATCH
-#define _CHECK_FUNCTION 340
-#define _CHECK_FUNCTION_EXACT_ARGS 341
-#define _CHECK_FUNCTION_VERSION 342
-#define _CHECK_FUNCTION_VERSION_INLINE 343
-#define _CHECK_FUNCTION_VERSION_KW 344
-#define _CHECK_IS_NOT_PY_CALLABLE 345
-#define _CHECK_IS_NOT_PY_CALLABLE_KW 346
-#define _CHECK_MANAGED_OBJECT_HAS_VALUES 347
-#define _CHECK_METHOD_VERSION 348
-#define _CHECK_METHOD_VERSION_KW 349
-#define _CHECK_PEP_523 350
-#define _CHECK_PERIODIC 351
-#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 352
-#define _CHECK_RECURSION_REMAINING 353
-#define _CHECK_STACK_SPACE 354
-#define _CHECK_STACK_SPACE_OPERAND 355
-#define _CHECK_VALIDITY 356
-#define _COMPARE_OP 357
-#define _COMPARE_OP_FLOAT 358
-#define _COMPARE_OP_INT 359
-#define _COMPARE_OP_STR 360
-#define _CONTAINS_OP 361
-#define _CONTAINS_OP_DICT 362
-#define _CONTAINS_OP_SET 363
+#define _CHECK_FUNCTION 343
+#define _CHECK_FUNCTION_EXACT_ARGS 344
+#define _CHECK_FUNCTION_VERSION 345
+#define _CHECK_FUNCTION_VERSION_INLINE 346
+#define _CHECK_FUNCTION_VERSION_KW 347
+#define _CHECK_IS_NOT_PY_CALLABLE 348
+#define _CHECK_IS_NOT_PY_CALLABLE_KW 349
+#define _CHECK_MANAGED_OBJECT_HAS_VALUES 350
+#define _CHECK_METHOD_VERSION 351
+#define _CHECK_METHOD_VERSION_KW 352
+#define _CHECK_PEP_523 353
+#define _CHECK_PERIODIC 354
+#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 355
+#define _CHECK_RECURSION_REMAINING 356
+#define _CHECK_STACK_SPACE 357
+#define _CHECK_STACK_SPACE_OPERAND 358
+#define _CHECK_VALIDITY 359
+#define _COMPARE_OP 360
+#define _COMPARE_OP_FLOAT 361
+#define _COMPARE_OP_INT 362
+#define _COMPARE_OP_STR 363
+#define _CONTAINS_OP 364
+#define _CONTAINS_OP_DICT 365
+#define _CONTAINS_OP_SET 366
 #define _CONVERT_VALUE CONVERT_VALUE
-#define _COPY COPY
+#define _COPY 367
+#define _COPY_1 368
+#define _COPY_2 369
+#define _COPY_3 370
 #define _COPY_FREE_VARS COPY_FREE_VARS
-#define _CREATE_INIT_FRAME 364
+#define _CREATE_INIT_FRAME 371
 #define _DELETE_ATTR DELETE_ATTR
 #define _DELETE_DEREF DELETE_DEREF
 #define _DELETE_FAST DELETE_FAST
 #define _DELETE_GLOBAL DELETE_GLOBAL
 #define _DELETE_NAME DELETE_NAME
 #define _DELETE_SUBSCR DELETE_SUBSCR
-#define _DEOPT 365
+#define _DEOPT 372
 #define _DICT_MERGE DICT_MERGE
 #define _DICT_UPDATE DICT_UPDATE
-#define _DO_CALL 366
-#define _DO_CALL_FUNCTION_EX 367
-#define _DO_CALL_KW 368
+#define _DO_CALL 373
+#define _DO_CALL_FUNCTION_EX 374
+#define _DO_CALL_KW 375
 #define _END_FOR END_FOR
 #define _END_SEND END_SEND
-#define _ERROR_POP_N 369
+#define _ERROR_POP_N 376
 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK
-#define _EXPAND_METHOD 370
-#define _EXPAND_METHOD_KW 371
-#define _FATAL_ERROR 372
+#define _EXPAND_METHOD 377
+#define _EXPAND_METHOD_KW 378
+#define _FATAL_ERROR 379
 #define _FORMAT_SIMPLE FORMAT_SIMPLE
 #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
-#define _FOR_ITER 373
-#define _FOR_ITER_GEN_FRAME 374
-#define _FOR_ITER_TIER_TWO 375
+#define _FOR_ITER 380
+#define _FOR_ITER_GEN_FRAME 381
+#define _FOR_ITER_TIER_TWO 382
 #define _GET_AITER GET_AITER
 #define _GET_ANEXT GET_ANEXT
 #define _GET_AWAITABLE GET_AWAITABLE
 #define _GET_ITER GET_ITER
 #define _GET_LEN GET_LEN
 #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
-#define _GUARD_BINARY_OP_EXTEND 376
-#define _GUARD_CALLABLE_ISINSTANCE 377
-#define _GUARD_CALLABLE_LEN 378
-#define _GUARD_CALLABLE_LIST_APPEND 379
-#define _GUARD_CALLABLE_STR_1 380
-#define _GUARD_CALLABLE_TUPLE_1 381
-#define _GUARD_CALLABLE_TYPE_1 382
-#define _GUARD_DORV_NO_DICT 383
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384
-#define _GUARD_GLOBALS_VERSION 385
-#define _GUARD_IS_FALSE_POP 386
-#define _GUARD_IS_NONE_POP 387
-#define _GUARD_IS_NOT_NONE_POP 388
-#define _GUARD_IS_TRUE_POP 389
-#define _GUARD_KEYS_VERSION 390
-#define _GUARD_NOS_DICT 391
-#define _GUARD_NOS_FLOAT 392
-#define _GUARD_NOS_INT 393
-#define _GUARD_NOS_LIST 394
-#define _GUARD_NOS_NOT_NULL 395
-#define _GUARD_NOS_NULL 396
-#define _GUARD_NOS_TUPLE 397
-#define _GUARD_NOS_UNICODE 398
-#define _GUARD_NOT_EXHAUSTED_LIST 399
-#define _GUARD_NOT_EXHAUSTED_RANGE 400
-#define _GUARD_NOT_EXHAUSTED_TUPLE 401
-#define _GUARD_THIRD_NULL 402
-#define _GUARD_TOS_ANY_SET 403
-#define _GUARD_TOS_DICT 404
-#define _GUARD_TOS_FLOAT 405
-#define _GUARD_TOS_INT 406
-#define _GUARD_TOS_LIST 407
-#define _GUARD_TOS_SLICE 408
-#define _GUARD_TOS_TUPLE 409
-#define _GUARD_TOS_UNICODE 410
-#define _GUARD_TYPE_VERSION 411
-#define _GUARD_TYPE_VERSION_AND_LOCK 412
+#define _GUARD_BINARY_OP_EXTEND 383
+#define _GUARD_CALLABLE_ISINSTANCE 384
+#define _GUARD_CALLABLE_LEN 385
+#define _GUARD_CALLABLE_LIST_APPEND 386
+#define _GUARD_CALLABLE_STR_1 387
+#define _GUARD_CALLABLE_TUPLE_1 388
+#define _GUARD_CALLABLE_TYPE_1 389
+#define _GUARD_DORV_NO_DICT 390
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 391
+#define _GUARD_GLOBALS_VERSION 392
+#define _GUARD_IS_FALSE_POP 393
+#define _GUARD_IS_NONE_POP 394
+#define _GUARD_IS_NOT_NONE_POP 395
+#define _GUARD_IS_TRUE_POP 396
+#define _GUARD_KEYS_VERSION 397
+#define _GUARD_NOS_DICT 398
+#define _GUARD_NOS_FLOAT 399
+#define _GUARD_NOS_INT 400
+#define _GUARD_NOS_LIST 401
+#define _GUARD_NOS_NOT_NULL 402
+#define _GUARD_NOS_NULL 403
+#define _GUARD_NOS_OVERFLOWED 404
+#define _GUARD_NOS_TUPLE 405
+#define _GUARD_NOS_UNICODE 406
+#define _GUARD_NOT_EXHAUSTED_LIST 407
+#define _GUARD_NOT_EXHAUSTED_RANGE 408
+#define _GUARD_NOT_EXHAUSTED_TUPLE 409
+#define _GUARD_THIRD_NULL 410
+#define _GUARD_TOS_ANY_SET 411
+#define _GUARD_TOS_DICT 412
+#define _GUARD_TOS_FLOAT 413
+#define _GUARD_TOS_INT 414
+#define _GUARD_TOS_LIST 415
+#define _GUARD_TOS_OVERFLOWED 416
+#define _GUARD_TOS_SLICE 417
+#define _GUARD_TOS_TUPLE 418
+#define _GUARD_TOS_UNICODE 419
+#define _GUARD_TYPE_VERSION 420
+#define _GUARD_TYPE_VERSION_AND_LOCK 421
 #define _IMPORT_FROM IMPORT_FROM
 #define _IMPORT_NAME IMPORT_NAME
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413
-#define _INIT_CALL_PY_EXACT_ARGS 414
-#define _INIT_CALL_PY_EXACT_ARGS_0 415
-#define _INIT_CALL_PY_EXACT_ARGS_1 416
-#define _INIT_CALL_PY_EXACT_ARGS_2 417
-#define _INIT_CALL_PY_EXACT_ARGS_3 418
-#define _INIT_CALL_PY_EXACT_ARGS_4 419
-#define _INSERT_NULL 420
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 422
+#define _INIT_CALL_PY_EXACT_ARGS 423
+#define _INIT_CALL_PY_EXACT_ARGS_0 424
+#define _INIT_CALL_PY_EXACT_ARGS_1 425
+#define _INIT_CALL_PY_EXACT_ARGS_2 426
+#define _INIT_CALL_PY_EXACT_ARGS_3 427
+#define _INIT_CALL_PY_EXACT_ARGS_4 428
+#define _INSERT_NULL 429
 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
 #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION
 #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD
@@ -175,171 +183,177 @@ extern "C" {
 #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
 #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
 #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
-#define _IS_NONE 421
+#define _IS_NONE 430
 #define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 422
-#define _ITER_CHECK_RANGE 423
-#define _ITER_CHECK_TUPLE 424
-#define _ITER_JUMP_LIST 425
-#define _ITER_JUMP_RANGE 426
-#define _ITER_JUMP_TUPLE 427
-#define _ITER_NEXT_LIST 428
-#define _ITER_NEXT_LIST_TIER_TWO 429
-#define _ITER_NEXT_RANGE 430
-#define _ITER_NEXT_TUPLE 431
-#define _JUMP_TO_TOP 432
+#define _ITER_CHECK_LIST 431
+#define _ITER_CHECK_RANGE 432
+#define _ITER_CHECK_TUPLE 433
+#define _ITER_JUMP_LIST 434
+#define _ITER_JUMP_RANGE 435
+#define _ITER_JUMP_TUPLE 436
+#define _ITER_NEXT_LIST 437
+#define _ITER_NEXT_LIST_TIER_TWO 438
+#define _ITER_NEXT_RANGE 439
+#define _ITER_NEXT_TUPLE 440
+#define _JUMP_TO_TOP 441
 #define _LIST_APPEND LIST_APPEND
 #define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 433
-#define _LOAD_ATTR_CLASS 434
+#define _LOAD_ATTR 442
+#define _LOAD_ATTR_CLASS 443
 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 435
-#define _LOAD_ATTR_METHOD_LAZY_DICT 436
-#define _LOAD_ATTR_METHOD_NO_DICT 437
-#define _LOAD_ATTR_METHOD_WITH_VALUES 438
-#define _LOAD_ATTR_MODULE 439
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441
-#define _LOAD_ATTR_PROPERTY_FRAME 442
-#define _LOAD_ATTR_SLOT 443
-#define _LOAD_ATTR_WITH_HINT 444
+#define _LOAD_ATTR_INSTANCE_VALUE 444
+#define _LOAD_ATTR_METHOD_LAZY_DICT 445
+#define _LOAD_ATTR_METHOD_NO_DICT 446
+#define _LOAD_ATTR_METHOD_WITH_VALUES 447
+#define _LOAD_ATTR_MODULE 448
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 449
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 450
+#define _LOAD_ATTR_PROPERTY_FRAME 451
+#define _LOAD_ATTR_SLOT 452
+#define _LOAD_ATTR_WITH_HINT 453
 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 445
+#define _LOAD_BYTECODE 454
 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
 #define _LOAD_CONST LOAD_CONST
-#define _LOAD_CONST_INLINE 446
-#define _LOAD_CONST_INLINE_BORROW 447
-#define _LOAD_CONST_UNDER_INLINE 448
-#define _LOAD_CONST_UNDER_INLINE_BORROW 449
+#define _LOAD_CONST_INLINE 455
+#define _LOAD_CONST_INLINE_BORROW 456
+#define _LOAD_CONST_UNDER_INLINE 457
+#define _LOAD_CONST_UNDER_INLINE_BORROW 458
 #define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 450
-#define _LOAD_FAST_0 451
-#define _LOAD_FAST_1 452
-#define _LOAD_FAST_2 453
-#define _LOAD_FAST_3 454
-#define _LOAD_FAST_4 455
-#define _LOAD_FAST_5 456
-#define _LOAD_FAST_6 457
-#define _LOAD_FAST_7 458
+#define _LOAD_FAST 459
+#define _LOAD_FAST_0 460
+#define _LOAD_FAST_1 461
+#define _LOAD_FAST_2 462
+#define _LOAD_FAST_3 463
+#define _LOAD_FAST_4 464
+#define _LOAD_FAST_5 465
+#define _LOAD_FAST_6 466
+#define _LOAD_FAST_7 467
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
-#define _LOAD_FAST_BORROW 459
-#define _LOAD_FAST_BORROW_0 460
-#define _LOAD_FAST_BORROW_1 461
-#define _LOAD_FAST_BORROW_2 462
-#define _LOAD_FAST_BORROW_3 463
-#define _LOAD_FAST_BORROW_4 464
-#define _LOAD_FAST_BORROW_5 465
-#define _LOAD_FAST_BORROW_6 466
-#define _LOAD_FAST_BORROW_7 467
+#define _LOAD_FAST_BORROW 468
+#define _LOAD_FAST_BORROW_0 469
+#define _LOAD_FAST_BORROW_1 470
+#define _LOAD_FAST_BORROW_2 471
+#define _LOAD_FAST_BORROW_3 472
+#define _LOAD_FAST_BORROW_4 473
+#define _LOAD_FAST_BORROW_5 474
+#define _LOAD_FAST_BORROW_6 475
+#define _LOAD_FAST_BORROW_7 476
 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 468
-#define _LOAD_GLOBAL_BUILTINS 469
-#define _LOAD_GLOBAL_MODULE 470
+#define _LOAD_GLOBAL 477
+#define _LOAD_GLOBAL_BUILTINS 478
+#define _LOAD_GLOBAL_MODULE 479
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 471
-#define _LOAD_SMALL_INT_0 472
-#define _LOAD_SMALL_INT_1 473
-#define _LOAD_SMALL_INT_2 474
-#define _LOAD_SMALL_INT_3 475
-#define _LOAD_SPECIAL 476
+#define _LOAD_SMALL_INT 480
+#define _LOAD_SMALL_INT_0 481
+#define _LOAD_SMALL_INT_1 482
+#define _LOAD_SMALL_INT_2 483
+#define _LOAD_SMALL_INT_3 484
+#define _LOAD_SPECIAL 485
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
 #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 477
+#define _MAKE_CALLARGS_A_TUPLE 486
 #define _MAKE_CELL MAKE_CELL
 #define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 478
+#define _MAKE_WARM 487
 #define _MAP_ADD MAP_ADD
 #define _MATCH_CLASS MATCH_CLASS
 #define _MATCH_KEYS MATCH_KEYS
 #define _MATCH_MAPPING MATCH_MAPPING
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 479
-#define _MAYBE_EXPAND_METHOD_KW 480
-#define _MONITOR_CALL 481
-#define _MONITOR_CALL_KW 482
-#define _MONITOR_JUMP_BACKWARD 483
-#define _MONITOR_RESUME 484
+#define _MAYBE_EXPAND_METHOD 488
+#define _MAYBE_EXPAND_METHOD_KW 489
+#define _MONITOR_CALL 490
+#define _MONITOR_CALL_KW 491
+#define _MONITOR_JUMP_BACKWARD 492
+#define _MONITOR_RESUME 493
 #define _NOP NOP
-#define _POP_CALL 485
-#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486
-#define _POP_CALL_ONE 487
-#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488
-#define _POP_CALL_TWO 489
-#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490
+#define _POP_CALL 494
+#define _POP_CALL_LOAD_CONST_INLINE_BORROW 495
+#define _POP_CALL_ONE 496
+#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 497
+#define _POP_CALL_TWO 498
+#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 499
 #define _POP_EXCEPT POP_EXCEPT
 #define _POP_ITER POP_ITER
-#define _POP_JUMP_IF_FALSE 491
-#define _POP_JUMP_IF_TRUE 492
+#define _POP_JUMP_IF_FALSE 500
+#define _POP_JUMP_IF_TRUE 501
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE 493
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494
-#define _POP_TWO 495
-#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496
+#define _POP_TOP_FLOAT 502
+#define _POP_TOP_INT 503
+#define _POP_TOP_LOAD_CONST_INLINE 504
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505
+#define _POP_TOP_NOP 506
+#define _POP_TOP_UNICODE 507
+#define _POP_TWO 508
+#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 497
+#define _PUSH_FRAME 510
 #define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 498
-#define _PY_FRAME_GENERAL 499
-#define _PY_FRAME_KW 500
-#define _QUICKEN_RESUME 501
-#define _REPLACE_WITH_TRUE 502
+#define _PUSH_NULL_CONDITIONAL 511
+#define _PY_FRAME_GENERAL 512
+#define _PY_FRAME_KW 513
+#define _QUICKEN_RESUME 514
+#define _REPLACE_WITH_TRUE 515
 #define _RESUME_CHECK RESUME_CHECK
 #define _RETURN_GENERATOR RETURN_GENERATOR
 #define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 503
-#define _SEND 504
-#define _SEND_GEN_FRAME 505
+#define _SAVE_RETURN_OFFSET 516
+#define _SEND 517
+#define _SEND_GEN_FRAME 518
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 506
-#define _STORE_ATTR 507
-#define _STORE_ATTR_INSTANCE_VALUE 508
-#define _STORE_ATTR_SLOT 509
-#define _STORE_ATTR_WITH_HINT 510
+#define _START_EXECUTOR 519
+#define _STORE_ATTR 520
+#define _STORE_ATTR_INSTANCE_VALUE 521
+#define _STORE_ATTR_SLOT 522
+#define _STORE_ATTR_WITH_HINT 523
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 511
-#define _STORE_FAST_0 512
-#define _STORE_FAST_1 513
-#define _STORE_FAST_2 514
-#define _STORE_FAST_3 515
-#define _STORE_FAST_4 516
-#define _STORE_FAST_5 517
-#define _STORE_FAST_6 518
-#define _STORE_FAST_7 519
+#define _STORE_FAST 524
+#define _STORE_FAST_0 525
+#define _STORE_FAST_1 526
+#define _STORE_FAST_2 527
+#define _STORE_FAST_3 528
+#define _STORE_FAST_4 529
+#define _STORE_FAST_5 530
+#define _STORE_FAST_6 531
+#define _STORE_FAST_7 532
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 520
-#define _STORE_SUBSCR 521
-#define _STORE_SUBSCR_DICT 522
-#define _STORE_SUBSCR_LIST_INT 523
-#define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 524
-#define _TO_BOOL 525
+#define _STORE_SLICE 533
+#define _STORE_SUBSCR 534
+#define _STORE_SUBSCR_DICT 535
+#define _STORE_SUBSCR_LIST_INT 536
+#define _SWAP 537
+#define _SWAP_2 538
+#define _SWAP_3 539
+#define _TIER2_RESUME_CHECK 540
+#define _TO_BOOL 541
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
-#define _TO_BOOL_LIST 526
+#define _TO_BOOL_LIST 542
 #define _TO_BOOL_NONE TO_BOOL_NONE
-#define _TO_BOOL_STR 527
+#define _TO_BOOL_STR 543
 #define _UNARY_INVERT UNARY_INVERT
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 528
-#define _UNPACK_SEQUENCE_LIST 529
-#define _UNPACK_SEQUENCE_TUPLE 530
-#define _UNPACK_SEQUENCE_TWO_TUPLE 531
+#define _UNPACK_SEQUENCE 544
+#define _UNPACK_SEQUENCE_LIST 545
+#define _UNPACK_SEQUENCE_TUPLE 546
+#define _UNPACK_SEQUENCE_TWO_TUPLE 547
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
 #define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 531
+#define MAX_UOP_ID 547
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
index cd36023c25c..ff7e800aa9b 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -12,7 +12,8 @@ extern "C" {
 #include <stdint.h>
 #include "pycore_uop_ids.h"
 extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];
-extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];
+typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;
+extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];
 extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
 
 extern int _PyUop_num_popped(int opcode, int oparg);
@@ -63,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG,
     [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG,
     [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
+    [_POP_TOP_NOP] = 0,
+    [_POP_TOP_INT] = 0,
+    [_POP_TOP_FLOAT] = 0,
+    [_POP_TOP_UNICODE] = 0,
     [_POP_TWO] = HAS_ESCAPES_FLAG,
     [_PUSH_NULL] = HAS_PURE_FLAG,
     [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG,
@@ -85,18 +90,23 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_GUARD_NOS_INT] = HAS_EXIT_FLAG,
     [_GUARD_TOS_INT] = HAS_EXIT_FLAG,
-    [_BINARY_OP_MULTIPLY_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG,
-    [_BINARY_OP_ADD_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG,
-    [_BINARY_OP_SUBTRACT_INT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG,
+    [_GUARD_NOS_OVERFLOWED] = HAS_EXIT_FLAG,
+    [_GUARD_TOS_OVERFLOWED] = HAS_EXIT_FLAG,
+    [_BINARY_OP_MULTIPLY_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_ADD_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_SUBTRACT_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG,
     [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG,
     [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG,
     [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_ADD_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
-    [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
+    [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG,
     [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
@@ -184,7 +194,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_STORE_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
     [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG,
-    [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
+    [_COMPARE_OP_INT] = HAS_ARG_FLAG,
     [_COMPARE_OP_STR] = HAS_ARG_FLAG,
     [_IS_OP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG,
     [_CONTAINS_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
@@ -288,8 +298,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
+    [_COPY_1] = HAS_PURE_FLAG,
+    [_COPY_2] = HAS_PURE_FLAG,
+    [_COPY_3] = HAS_PURE_FLAG,
     [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
+    [_SWAP_2] = HAS_PURE_FLAG,
+    [_SWAP_3] = HAS_PURE_FLAG,
     [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG,
     [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG,
     [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG,
@@ -323,22 +338,26 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG,
 };
 
-const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {
-    [_LOAD_FAST] = 8,
-    [_LOAD_FAST_BORROW] = 8,
-    [_LOAD_SMALL_INT] = 4,
-    [_STORE_FAST] = 8,
-    [_INIT_CALL_PY_EXACT_ARGS] = 5,
+const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {
+    [_LOAD_FAST] = { 0, 8 },
+    [_LOAD_FAST_BORROW] = { 0, 8 },
+    [_LOAD_SMALL_INT] = { 0, 4 },
+    [_STORE_FAST] = { 0, 8 },
+    [_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 },
+    [_COPY] = { 1, 4 },
+    [_SWAP] = { 2, 4 },
 };
 
 const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_BINARY_OP] = "_BINARY_OP",
     [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT",
+    [_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS] = "_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS",
     [_BINARY_OP_ADD_INT] = "_BINARY_OP_ADD_INT",
     [_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE",
     [_BINARY_OP_EXTEND] = "_BINARY_OP_EXTEND",
     [_BINARY_OP_INPLACE_ADD_UNICODE] = "_BINARY_OP_INPLACE_ADD_UNICODE",
     [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT",
+    [_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS] = "_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS",
     [_BINARY_OP_MULTIPLY_INT] = "_BINARY_OP_MULTIPLY_INT",
     [_BINARY_OP_SUBSCR_CHECK_FUNC] = "_BINARY_OP_SUBSCR_CHECK_FUNC",
     [_BINARY_OP_SUBSCR_DICT] = "_BINARY_OP_SUBSCR_DICT",
@@ -348,6 +367,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_BINARY_OP_SUBSCR_STR_INT] = "_BINARY_OP_SUBSCR_STR_INT",
     [_BINARY_OP_SUBSCR_TUPLE_INT] = "_BINARY_OP_SUBSCR_TUPLE_INT",
     [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT",
+    [_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS] = "_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS",
     [_BINARY_OP_SUBTRACT_INT] = "_BINARY_OP_SUBTRACT_INT",
     [_BINARY_SLICE] = "_BINARY_SLICE",
     [_BUILD_INTERPOLATION] = "_BUILD_INTERPOLATION",
@@ -408,6 +428,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_CONTAINS_OP_SET] = "_CONTAINS_OP_SET",
     [_CONVERT_VALUE] = "_CONVERT_VALUE",
     [_COPY] = "_COPY",
+    [_COPY_1] = "_COPY_1",
+    [_COPY_2] = "_COPY_2",
+    [_COPY_3] = "_COPY_3",
     [_COPY_FREE_VARS] = "_COPY_FREE_VARS",
     [_CREATE_INIT_FRAME] = "_CREATE_INIT_FRAME",
     [_DELETE_ATTR] = "_DELETE_ATTR",
@@ -458,6 +481,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_GUARD_NOS_LIST] = "_GUARD_NOS_LIST",
     [_GUARD_NOS_NOT_NULL] = "_GUARD_NOS_NOT_NULL",
     [_GUARD_NOS_NULL] = "_GUARD_NOS_NULL",
+    [_GUARD_NOS_OVERFLOWED] = "_GUARD_NOS_OVERFLOWED",
     [_GUARD_NOS_TUPLE] = "_GUARD_NOS_TUPLE",
     [_GUARD_NOS_UNICODE] = "_GUARD_NOS_UNICODE",
     [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST",
@@ -469,6 +493,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT",
     [_GUARD_TOS_INT] = "_GUARD_TOS_INT",
     [_GUARD_TOS_LIST] = "_GUARD_TOS_LIST",
+    [_GUARD_TOS_OVERFLOWED] = "_GUARD_TOS_OVERFLOWED",
     [_GUARD_TOS_SLICE] = "_GUARD_TOS_SLICE",
     [_GUARD_TOS_TUPLE] = "_GUARD_TOS_TUPLE",
     [_GUARD_TOS_UNICODE] = "_GUARD_TOS_UNICODE",
@@ -572,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_POP_EXCEPT] = "_POP_EXCEPT",
     [_POP_ITER] = "_POP_ITER",
     [_POP_TOP] = "_POP_TOP",
+    [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT",
+    [_POP_TOP_INT] = "_POP_TOP_INT",
     [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE",
     [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW",
+    [_POP_TOP_NOP] = "_POP_TOP_NOP",
+    [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE",
     [_POP_TWO] = "_POP_TWO",
     [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW",
     [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO",
@@ -617,6 +646,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT",
     [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT",
     [_SWAP] = "_SWAP",
+    [_SWAP_2] = "_SWAP_2",
+    [_SWAP_3] = "_SWAP_3",
     [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK",
     [_TO_BOOL] = "_TO_BOOL",
     [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL",
@@ -726,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 2;
         case _POP_TOP:
             return 1;
+        case _POP_TOP_NOP:
+            return 1;
+        case _POP_TOP_INT:
+            return 1;
+        case _POP_TOP_FLOAT:
+            return 1;
+        case _POP_TOP_UNICODE:
+            return 1;
         case _POP_TWO:
             return 2;
         case _PUSH_NULL:
@@ -770,6 +809,10 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 0;
         case _GUARD_TOS_INT:
             return 0;
+        case _GUARD_NOS_OVERFLOWED:
+            return 0;
+        case _GUARD_TOS_OVERFLOWED:
+            return 0;
         case _BINARY_OP_MULTIPLY_INT:
             return 2;
         case _BINARY_OP_ADD_INT:
@@ -786,6 +829,12 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 2;
         case _BINARY_OP_SUBTRACT_FLOAT:
             return 2;
+        case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS:
+            return 2;
+        case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS:
+            return 2;
+        case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS:
+            return 2;
         case _BINARY_OP_ADD_UNICODE:
             return 2;
         case _BINARY_OP_INPLACE_ADD_UNICODE:
@@ -1176,10 +1225,20 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 1;
         case _FORMAT_WITH_SPEC:
             return 2;
+        case _COPY_1:
+            return 0;
+        case _COPY_2:
+            return 0;
+        case _COPY_3:
+            return 0;
         case _COPY:
             return 0;
         case _BINARY_OP:
             return 2;
+        case _SWAP_2:
+            return 0;
+        case _SWAP_3:
+            return 0;
         case _SWAP:
             return 0;
         case _GUARD_IS_TRUE_POP:
diff --git a/Include/pymacro.h b/Include/pymacro.h
index bfe660e8303..b2886ddac5d 100644
--- a/Include/pymacro.h
+++ b/Include/pymacro.h
@@ -73,14 +73,14 @@
 #        else
 #            define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
 #        endif
+#    elif defined(_MSC_VER)
+#        define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
 #    elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
 #        define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
 #    elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
 #        define _Py_ALIGNED_DEF(N, T)  _Alignas(N) _Alignas(T) T
 #    elif (defined(__GNUC__) || defined(__clang__))
 #        define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
-#    elif defined(_MSC_VER)
-#        define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
 #    else
 #        define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
 #    endif
diff --git a/Include/pyport.h b/Include/pyport.h
index 3eac119bf8e..0675294d5bc 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -49,8 +49,9 @@
 // Static inline functions should use _Py_NULL rather than using directly NULL
 // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer,
 // _Py_NULL is defined as nullptr.
-#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \
-        || (defined(__cplusplus) && __cplusplus >= 201103)
+#if !defined(_MSC_VER) && \
+    ((defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \
+        || (defined(__cplusplus) && __cplusplus >= 201103))
 #  define _Py_NULL nullptr
 #else
 #  define _Py_NULL NULL
diff --git a/Include/refcount.h b/Include/refcount.h
index ebd1dba6d15..034c453f449 100644
--- a/Include/refcount.h
+++ b/Include/refcount.h
@@ -1,5 +1,5 @@
-#ifndef Py_REFCOUNT_H
-#define Py_REFCOUNT_H
+#ifndef _Py_REFCOUNT_H
+#define _Py_REFCOUNT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -561,4 +561,4 @@ static inline PyObject* _Py_XNewRef(PyObject *obj)
 #ifdef __cplusplus
 }
 #endif
-#endif   // !Py_REFCOUNT_H
+#endif   // !_Py_REFCOUNT_H
diff --git a/InternalDocs/README.md b/InternalDocs/README.md
index 4502902307c..6b1d9198264 100644
--- a/InternalDocs/README.md
+++ b/InternalDocs/README.md
@@ -41,3 +41,10 @@ Program Execution
 - [Garbage Collector Design](garbage_collector.md)
 
 - [Exception Handling](exception_handling.md)
+
+- [Quiescent-State Based Reclamation (QSBR)](qsbr.md)
+
+Modules
+---
+
+- [asyncio](asyncio.md)
diff --git a/InternalDocs/asyncio.md b/InternalDocs/asyncio.md
new file mode 100644
index 00000000000..22159852ca5
--- /dev/null
+++ b/InternalDocs/asyncio.md
@@ -0,0 +1,328 @@
+asyncio
+=======
+
+
+This document describes the working and implementation details of the
+[`asyncio`](https://docs.python.org/3/library/asyncio.html) module.
+
+**The following section describes the implementation details of the C implementation**.
+
+# Task management
+
+## Pre-Python 3.14 implementation
+
+Before Python 3.14, the C implementation of `asyncio` used a
+[`WeakSet`](https://docs.python.org/3/library/weakref.html#weakref.WeakSet)
+to store all the tasks created by the event loop. `WeakSet` was used
+so that the event loop doesn't hold strong references to the tasks,
+allowing them to be garbage collected when they are no longer needed.
+The current task of the event loop was stored in a dict mapping the
+event loop to the current task.
+
+```c
+    /* Dictionary containing tasks that are currently active in
+       all running event loops.  {EventLoop: Task} */
+    PyObject *current_tasks;
+
+    /* WeakSet containing all tasks scheduled to run on event loops. */
+    PyObject *scheduled_tasks;
+```
+
+This implementation had a few drawbacks:
+
+1. **Performance**: Using a `WeakSet` for storing tasks is
+inefficient, as it requires maintaining a full set of weak references
+to tasks along with corresponding weakref callback to cleanup the
+tasks when they are garbage collected.  This increases the work done
+by the garbage collector, and in applications with a large number of
+tasks, this becomes a bottleneck, with increased memory usage and
+lower performance. Looking up the current task was slow as it required
+a dictionary lookup on the `current_tasks` dict.
+
+2. **Thread safety**: Before Python 3.14, concurrent iterations over
+`WeakSet` was not thread safe[^1]. This meant calling APIs like
+`asyncio.all_tasks()` could lead to inconsistent results or even
+`RuntimeError` if used in multiple threads[^2].
+
+3. **Poor scaling in free-threading**: Using global `WeakSet` for
+storing all tasks across all threads lead to contention when adding
+and removing tasks from the set which is a frequent operation. As such
+it performed poorly in free-threading and did not scale well with the
+number of threads. Similarly, accessing the current task in multiple
+threads did not scale due to contention on the global `current_tasks`
+dictionary.
+
+## Python 3.14 implementation
+
+To address these issues, Python 3.14 implements several changes to
+improve the performance and thread safety of tasks management.
+
+- **Per-thread double linked list for tasks**: Python 3.14 introduces
+  a per-thread circular double linked list implementation for
+  storing tasks. This allows each thread to maintain its own list of
+  tasks and allows for lock free addition and removal of tasks. This
+  is designed to be efficient, and thread-safe and scales well with
+  the number of threads in free-threading. This also allows external
+  introspection tools such as `python -m asyncio pstree` to inspect
+  tasks running in all threads and was implemented as part of [Audit
+  asyncio thread
+  safety](https://github.com/python/cpython/issues/128002).
+
+- **Per-thread current task**: Python 3.14 stores the current task on
+  the current thread state instead of a global dictionary. This
+  allows for faster access to the current task without the need for
+  a dictionary lookup. Each thread maintains its own current task,
+  which is stored in the `PyThreadState` structure. This was
+  implemented in https://github.com/python/cpython/issues/129898.
+
+Storing the current task and list of all tasks per-thread instead of
+storing it per-loop was chosen primarily to support external
+introspection tools such as `python -m asyncio pstree` as looking up
+arbitrary attributes on the loop object is not possible
+externally. Storing data per-thread also makes it easy to support
+third party event loop implementations such as `uvloop`, and is more
+efficient for the single threaded asyncio use-case as it avoids the
+overhead of attribute lookups on the loop object and several other
+calls on the performance critical path of adding and removing tasks
+from the per-loop task list.
+
+## Per-thread double linked list for tasks
+
+This implementation uses a circular doubly linked list to store tasks
+on the thread states.  This is used for all tasks which are instances
+of `asyncio.Task` or subclasses of it, for third-party tasks a
+fallback `WeakSet` implementation is used. The linked list is
+implemented using an embedded `llist_node` structure within each
+`TaskObj`. By embedding the list node directly into the task object,
+the implementation avoids additional memory allocations for linked
+list nodes.
+
+The `PyThreadState` structure gained a new field `asyncio_tasks_head`,
+which serves as the head of the circular linked list of tasks. This
+allows for lock free addition and removal of tasks from the list.
+
+It is possible that when a thread state is deallocated, there are
+lingering tasks in its list; this can happen if another thread has
+references to the tasks of this thread. Therefore, the
+`PyInterpreterState` structure also gains a new `asyncio_tasks_head`
+field to store any lingering tasks. When a thread state is
+deallocated, any remaining lingering tasks are moved to the
+interpreter state tasks list, and the thread state tasks list is
+cleared.  The `asyncio_tasks_lock` is used protect the interpreter's
+tasks list from concurrent modifications.
+
+```c
+typedef struct TaskObj {
+    ...
+    struct llist_node asyncio_node;
+} TaskObj;
+
+typedef struct PyThreadState {
+    ...
+    struct llist_node asyncio_tasks_head;
+} PyThreadState;
+
+typedef struct PyInterpreterState {
+    ...
+    struct llist_node asyncio_tasks_head;
+    PyMutex asyncio_tasks_lock;
+} PyInterpreterState;
+```
+
+When a task is created, it is added to the current thread's list of
+tasks by the `register_task` function. When the task is done, it is
+removed from the list by the `unregister_task` function. In
+free-threading, the thread id of the thread which created the task is
+stored in `task_tid` field of the `TaskObj`. This is used to check if
+the task is being removed from the correct thread's task list. If the
+current thread is same as the thread which created it then no locking
+is required, otherwise in free-threading, the `stop-the-world` pause
+is used to pause all other threads and then safely remove the task
+from the tasks list.
+
+```mermaid
+
+flowchart TD
+    subgraph one["Executing Thread"]
+        A["task = asyncio.create_task(coro())"] -->B("register_task(task)")
+        B --> C{"task->task_state?"}
+        C -->|pending| D["task_step(task)"]
+        C -->|done| F["unregister_task(task)"]
+        C -->|cancelled| F["unregister_task(task)"]
+        D --> C
+        F --> G{"free-threading?"}
+        G --> |false| H["unregister_task_safe(task)"]
+        G --> |true| J{"correct thread? <br>task->task_tid == _Py_ThreadId()"}
+        J --> |true| H
+        J --> |false| I["stop the world <br> pause all threads"]
+        I --> H["unregister_task_safe(task)"]
+    end
+    subgraph two["Thread deallocating"]
+        A1{"thread's task list empty? <br> llist_empty(tstate->asyncio_tasks_head)"}
+        A1 --> |true| B1["deallocate thread<br>free_threadstate(tstate)"]
+        A1 --> |false| C1["add tasks to interpreter's task list<br> llist_concat(&tstate->interp->asyncio_tasks_head,
+        &tstate->asyncio_tasks_head)"]
+        C1 --> B1
+    end
+
+    one --> two
+```
+
+`asyncio.all_tasks` now iterates over the per-thread task lists of all
+threads and the interpreter's task list to get all the tasks. In
+free-threading, this is done by pausing all the threads using the
+`stop-the-world` pause to ensure that no tasks are being added or
+removed while iterating over the lists. This allows for a consistent
+view of all task lists across all threads and is thread safe.
+
+This design allows for lock free execution and scales well in
+free-threading with multiple event loops running in different threads.
+
+## Per-thread current task
+
+This implementation stores the current task in the `PyThreadState`
+structure, which allows for faster access to the current task without
+the need for a dictionary lookup.
+
+```c
+typedef struct PyThreadState {
+    ...
+    PyObject *asyncio_current_loop;
+    PyObject *asyncio_current_task;
+} PyThreadState;
+```
+
+When a task is entered or left, the current task is updated in the
+thread state using `enter_task` and `leave_task` functions. When
+`current_task(loop)` is called where `loop` is the current running
+event loop of the current thread, no locking is required as the
+current task is stored in the thread state and is returned directly
+(general case). Otherwise, if the `loop` is not current running event
+loop, the `stop-the-world` pause is used to pause all threads in
+free-threading and then by iterating over all the thread states and
+checking if the `loop` matches with `tstate->asyncio_current_loop`,
+the current task is found and returned. If no matching thread state is
+found, `None` is returned.
+
+In free-threading, it avoids contention on a global dictionary as
+threads can access the current task of thier running loop without any
+locking.
+
+---
+
+**The following section describes the implementation details of the Python implementation**.
+
+# async generators
+
+This section describes the implementation details of async generators in `asyncio`.
+
+Since async generators are meant to be used from coroutines,
+their finalization (execution of finally blocks) needs
+to be done while the loop is running.
+Most async generators are closed automatically
+when they are fully iterated over and exhausted; however,
+if the async generator is not fully iterated over,
+it may not be closed properly, leading to the `finally` blocks not being executed.
+
+Consider the following code:
+```py
+import asyncio
+
+async def agen():
+    try:
+        yield 1
+    finally:
+        await asyncio.sleep(1)
+        print("finally executed")
+
+
+async def main():
+    async for i in agen():
+        break
+
+loop = asyncio.EventLoop()
+loop.run_until_complete(main())
+```
+
+The above code will not print "finally executed", because the
+async generator `agen` is not fully iterated over
+and it is not closed manually by awaiting `agen.aclose()`.
+
+To solve this, `asyncio` uses the `sys.set_asyncgen_hooks` function to
+set hooks for finalizing async generators as described in
+[PEP 525](https://peps.python.org/pep-0525/).
+
+- **firstiter hook**: When the async generator is iterated over for the first time,
+the *firstiter hook* is called. The async generator is added to `loop._asyncgens` WeakSet
+and the event loop tracks all active async generators.
+
+- **finalizer hook**: When the async generator is about to be finalized,
+the *finalizer hook* is called. The event loop removes the async generator
+from `loop._asyncgens` WeakSet, and schedules the finalization of the async
+generator by creating a task calling `agen.aclose()`. This ensures that the
+finally block is executed while the event loop is running. When the loop is
+shutting down, the loop checks if there are active async generators and if so,
+it similarly schedules the finalization of all active async generators by calling
+`agen.aclose()` on each of them and waits for them to complete before shutting
+down the loop.
+
+This ensures that the async generator's `finally` blocks are executed even
+if the generator is not explicitly closed.
+
+Consider the following example:
+
+```python
+import asyncio
+
+async def agen():
+    try:
+        yield 1
+        yield 2
+    finally:
+        print("executing finally block")
+
+async def main():
+    async for item in agen():
+        print(item)
+        break  # not fully iterated
+
+asyncio.run(main())
+```
+
+```mermaid
+flowchart TD
+    subgraph one["Loop running"]
+        A["asyncio.run(main())"] --> B
+        B["set async generator hooks <br> sys.set_asyncgen_hooks()"] --> C
+        C["async for item in agen"] --> F
+        F{"first iteration?"} --> |true|D
+        F{"first iteration?"} --> |false|H
+        D["calls firstiter hook<br>loop._asyncgen_firstiter_hook(agen)"] --> E
+        E["add agen to WeakSet<br> loop._asyncgens.add(agen)"] --> H
+        H["item = await agen.\_\_anext\_\_()"] --> J
+        J{"StopAsyncIteration?"} --> |true|M
+        J{"StopAsyncIteration?"} --> |false|I
+        I["print(item)"] --> S
+        S{"continue iterating?"} --> |true|C
+        S{"continue iterating?"} --> |false|M
+        M{"agen is no longer referenced?"} --> |true|N
+        M{"agen is no longer referenced?"} --> |false|two
+        N["finalize agen<br>_PyGen_Finalize(agen)"] --> O
+        O["calls finalizer hook<br>loop._asyncgen_finalizer_hook(agen)"] --> P
+        P["remove agen from WeakSet<br>loop._asyncgens.discard(agen)"] --> Q
+        Q["schedule task to close it<br>self.create_task(agen.aclose())"] --> R
+        R["print('executing finally block')"] --> E1
+
+    end
+
+    subgraph two["Loop shutting down"]
+        A1{"check for alive async generators?"} --> |true|B1
+        B1["close all async generators <br> await asyncio.gather\(*\[ag.aclose\(\) for ag in loop._asyncgens\]"] --> R
+        A1{"check for alive async generators?"} --> |false|E1
+        E1["loop.close()"]
+    end
+
+```
+
+[^1]: https://github.com/python/cpython/issues/123089
+[^2]: https://github.com/python/cpython/issues/80788
diff --git a/InternalDocs/qsbr.md b/InternalDocs/qsbr.md
new file mode 100644
index 00000000000..1c4a79a7b44
--- /dev/null
+++ b/InternalDocs/qsbr.md
@@ -0,0 +1,129 @@
+# Quiescent-State Based Reclamation (QSBR)
+
+## Introduction
+
+When implementing lock-free data structures, a key challenge is determining
+when it is safe to free memory that has been logically removed from a
+structure. Freeing memory too early can lead to use-after-free bugs if another
+thread is still accessing it. Freeing it too late results in excessive memory
+consumption.
+
+Safe memory reclamation (SMR) schemes address this by delaying the free
+operation until all concurrent read accesses are guaranteed to have completed.
+Quiescent-State Based Reclamation (QSBR) is a SMR scheme used in Python's
+free-threaded build to manage the lifecycle of shared memory.
+
+QSBR requires threads to periodically report that they are in a quiescent
+state. A thread is in a quiescent state if it holds no references to shared
+objects that might be reclaimed. Think of it as a checkpoint where a thread
+signals, "I am not in the middle of any operation that relies on a shared
+resource." In Python, the eval_breaker provides a natural and convenient place
+for threads to report this state.
+
+
+## Use in Free-Threaded Python
+
+While CPython's memory management is dominated by reference counting and a
+tracing garbage collector, these mechanisms are not suitable for all data
+structures. For example, the backing array of a list object is not individually
+reference-counted but may have a shorter lifetime than the `PyListObject` that
+contains it. We could delay reclamation until the next GC run, but we want
+reclamation to be prompt and to run the GC less frequently in the free-threaded
+build, as it requires pausing all threads.
+
+Many operations in the free-threaded build are protected by locks. However, for
+performance-critical code, we want to allow reads to happen concurrently with
+updates. For instance, we want to avoid locking during most list read accesses.
+If a list is resized while another thread is reading it, QSBR provides the
+mechanism to determine when it is safe to free the list's old backing array.
+
+Specific use cases for QSBR include:
+
+* Dictionary keys (`PyDictKeysObject`) and list arrays (`_PyListArray`): When a
+dictionary or list that may be shared between threads is resized, we use QSBR
+to delay freeing the old keys or array until it's safe. For dicts and lists
+that are not shared, their storage can be freed immediately upon resize.
+
+* Mimalloc `mi_page_t`: Non-locking dictionary and list accesses require
+cooperation from the memory allocator. If an object is freed and its memory is
+reused, we must ensure the new object's reference count field is at the same
+memory location. In practice, this means when a mimalloc page (`mi_page_t`)
+becomes empty, we don't immediately allow it to be reused for allocations of a
+different size class. QSBR is used to determine when it's safe to repurpose the
+page or return its memory to the OS.
+
+
+## Implementation Details
+
+
+### Core Implementation
+
+The proposal to add QSBR to Python is contained in
+[Github issue 115103](https://github.com/python/cpython/issues/115103).
+Many details of that proposal have been copied here, so they can be kept
+up-to-date with the actual implementation.
+
+Python's QSBR implementation is based on FreeBSD's "Global Unbounded
+Sequences." [^1][^2][^3].  It relies on a few key counters:
+
+* Global Write Sequence (`wr_seq`): A per-interpreter counter, `wr_seq`, is started
+at 1 and incremented by 2 each time it is advanced. This ensures its value is
+always odd, which can be used to distinguish it from other state values. When
+an object needs to be reclaimed, `wr_seq` is advanced, and the object is tagged
+with this new sequence number.
+
+* Per-Thread Read Sequence: Each thread has a local read sequence counter. When
+a thread reaches a quiescent state (e.g., at the eval_breaker), it copies the
+current global `wr_seq` to its local counter.
+
+* Global Read Sequence (`rd_seq`): This per-interpreter value stores the minimum
+of all per-thread read sequence counters (excluding detached threads). It is
+updated by a "polling" operation.
+
+To free an object, the following steps are taken:
+
+1. Advance the global `wr_seq`.
+
+2. Add the object's pointer to a deferred-free list, tagging it with the new
+   `wr_seq` value as its qsbr_goal.
+
+Periodically, a polling mechanism processes this deferred-free list:
+
+1. The minimum read sequence value across all active threads is calculated and
+   stored as the global `rd_seq`.
+
+2. For each item on the deferred-free list, if its qsbr_goal is less than or
+   equal to the new `rd_seq`, its memory is freed, and it is removed from the:
+   list. Otherwise, it remains on the list for a future attempt.
+
+
+### Deferred Advance Optimization
+
+To reduce memory contention from frequent updates to the global `wr_seq`, its
+advancement is sometimes deferred. Instead of incrementing `wr_seq` on every
+reclamation request, each thread tracks its number of deferrals locally. Once
+the deferral count reaches a limit (QSBR_DEFERRED_LIMIT, currently 10), the
+thread advances the global `wr_seq` and resets its local count.
+
+When an object is added to the deferred-free list, its qsbr_goal is set to
+`wr_seq` + 2. By setting the goal to the next sequence value, we ensure it's safe
+to defer the global counter advancement. This optimization improves runtime
+speed but may increase peak memory usage by slightly delaying when memory can
+be reclaimed.
+
+
+## Limitations
+
+Determining the `rd_seq` requires scanning over all thread states. This operation
+could become a bottleneck in applications with a very large number of threads
+(e.g., >1,000). Future work may address this with more advanced mechanisms,
+such as a tree-based structure or incremental scanning. For now, the
+implementation prioritizes simplicity, with plans for refinement if
+multi-threaded benchmarks reveal performance issues.
+
+
+## References
+
+[^1]: https://youtu.be/ZXUIFj4nRjk?t=694
+[^2]: https://people.kernel.org/joelfernandes/gus-vs-rcu
+[^3]: http://bxr.su/FreeBSD/sys/kern/subr_smr.c#44
diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py
index 71f619024e5..bc35823f701 100644
--- a/Lib/_pydatetime.py
+++ b/Lib/_pydatetime.py
@@ -2164,7 +2164,7 @@ class datetime(date):
         By default, the fractional part is omitted if self.microsecond == 0.
 
         If self.tzinfo is not None, the UTC offset is also attached, giving
-        giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'.
+        a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'.
 
         Optional argument sep specifies the separator between date and
         time, default 'T'.
diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py
index 842599bd187..0589a0f437e 100644
--- a/Lib/_pyrepl/base_eventqueue.py
+++ b/Lib/_pyrepl/base_eventqueue.py
@@ -87,7 +87,7 @@ class BaseEventQueue:
             if isinstance(k, dict):
                 self.keymap = k
             else:
-                self.insert(Event('key', k, self.flush_buf()))
+                self.insert(Event('key', k, bytes(self.flush_buf())))
                 self.keymap = self.compiled_keymap
 
         elif self.buf and self.buf[0] == 27:  # escape
@@ -96,7 +96,7 @@ class BaseEventQueue:
             # the docstring in keymap.py
             trace('unrecognized escape sequence, propagating...')
             self.keymap = self.compiled_keymap
-            self.insert(Event('key', '\033', bytearray(b'\033')))
+            self.insert(Event('key', '\033', b'\033'))
             for _c in self.flush_buf()[1:]:
                 self.push(_c)
 
@@ -106,5 +106,5 @@ class BaseEventQueue:
             except UnicodeError:
                 return
             else:
-                self.insert(Event('key', decoded, self.flush_buf()))
+                self.insert(Event('key', decoded, bytes(self.flush_buf())))
             self.keymap = self.compiled_keymap
diff --git a/Lib/_strptime.py b/Lib/_strptime.py
index ae67949626d..cdc55e8daaf 100644
--- a/Lib/_strptime.py
+++ b/Lib/_strptime.py
@@ -14,6 +14,7 @@ import os
 import time
 import locale
 import calendar
+import re
 from re import compile as re_compile
 from re import sub as re_sub
 from re import IGNORECASE
@@ -41,6 +42,29 @@ def _findall(haystack, needle):
         yield i
         i += len(needle)
 
+def _fixmonths(months):
+    yield from months
+    # The lower case of 'İ' ('\u0130') is 'i\u0307'.
+    # The re module only supports 1-to-1 character matching in
+    # case-insensitive mode.
+    for s in months:
+        if 'i\u0307' in s:
+            yield s.replace('i\u0307', '\u0130')
+
+lzh_TW_alt_digits = (
+    # 〇:一:二:三:四:五:六:七:八:九
+    '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db',
+    '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d',
+    # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九
+    '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db',
+    '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d',
+    # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九
+    '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db',
+    '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d',
+    # 卅:卅一
+    '\u5345', '\u5345\u4e00')
+
+
 class LocaleTime(object):
     """Stores and handles locale-specific information related to time.
 
@@ -84,6 +108,7 @@ class LocaleTime(object):
         self.__calc_weekday()
         self.__calc_month()
         self.__calc_am_pm()
+        self.__calc_alt_digits()
         self.__calc_timezone()
         self.__calc_date_time()
         if _getlang() != self.lang:
@@ -119,9 +144,43 @@ class LocaleTime(object):
             am_pm.append(time.strftime("%p", time_tuple).lower().strip())
         self.am_pm = am_pm
 
+    def __calc_alt_digits(self):
+        # Set self.LC_alt_digits by using time.strftime().
+
+        # The magic data should contain all decimal digits.
+        time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0))
+        s = time.strftime("%x%X", time_tuple)
+        if s.isascii():
+            # Fast path -- all digits are ASCII.
+            self.LC_alt_digits = ()
+            return
+
+        digits = ''.join(sorted(set(re.findall(r'\d', s))))
+        if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9:
+            # All 10 decimal digits from the same set.
+            if digits.isascii():
+                # All digits are ASCII.
+                self.LC_alt_digits = ()
+                return
+
+            self.LC_alt_digits = [a + b for a in digits for b in digits]
+            # Test whether the numbers contain leading zero.
+            time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0))
+            if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2):
+                self.LC_alt_digits[:10] = digits
+            return
+
+        # Either non-Gregorian calendar or non-decimal numbers.
+        if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s):
+            # lzh_TW
+            self.LC_alt_digits = lzh_TW_alt_digits
+            return
+
+        self.LC_alt_digits = None
+
     def __calc_date_time(self):
-        # Set self.date_time, self.date, & self.time by using
-        # time.strftime().
+        # Set self.LC_date_time, self.LC_date, self.LC_time and
+        # self.LC_time_ampm by using time.strftime().
 
         # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
         # overloaded numbers is minimized.  The order in which searches for
@@ -129,26 +188,32 @@ class LocaleTime(object):
         # possible ambiguity for what something represents.
         time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
         time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0))
-        replacement_pairs = [
+        replacement_pairs = []
+
+        # Non-ASCII digits
+        if self.LC_alt_digits or self.LC_alt_digits is None:
+            for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'),
+                         (44, '%OM'), (55, '%OS'), (17, '%Od'),
+                         (3, '%Om'), (2, '%Ow'), (10, '%OI')]:
+                if self.LC_alt_digits is None:
+                    s = chr(0x660 + n // 10) + chr(0x660 + n % 10)
+                    replacement_pairs.append((s, d))
+                    if n < 10:
+                        replacement_pairs.append((s[1], d))
+                elif len(self.LC_alt_digits) > n:
+                    replacement_pairs.append((self.LC_alt_digits[n], d))
+                else:
+                    replacement_pairs.append((time.strftime(d, time_tuple), d))
+        replacement_pairs += [
             ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
             ('44', '%M'), ('55', '%S'), ('76', '%j'),
             ('17', '%d'), ('03', '%m'), ('3', '%m'),
             # '3' needed for when no leading zero.
             ('2', '%w'), ('10', '%I'),
-            # Non-ASCII digits
-            ('\u0661\u0669\u0669\u0669', '%Y'),
-            ('\u0669\u0669', '%Oy'),
-            ('\u0662\u0662', '%OH'),
-            ('\u0664\u0664', '%OM'),
-            ('\u0665\u0665', '%OS'),
-            ('\u0661\u0667', '%Od'),
-            ('\u0660\u0663', '%Om'),
-            ('\u0663', '%Om'),
-            ('\u0662', '%Ow'),
-            ('\u0661\u0660', '%OI'),
         ]
+
         date_time = []
-        for directive in ('%c', '%x', '%X'):
+        for directive in ('%c', '%x', '%X', '%r'):
             current_format = time.strftime(directive, time_tuple).lower()
             current_format = current_format.replace('%', '%%')
             # The month and the day of the week formats are treated specially
@@ -172,9 +237,10 @@ class LocaleTime(object):
                     if tz:
                         current_format = current_format.replace(tz, "%Z")
             # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
-            current_format = re_sub(r'\d(?<![0-9])',
-                                    lambda m: chr(0x0660 + int(m[0])),
-                                    current_format)
+            if not current_format.isascii() and self.LC_alt_digits is None:
+                current_format = re_sub(r'\d(?<![0-9])',
+                                        lambda m: chr(0x0660 + int(m[0])),
+                                        current_format)
             for old, new in replacement_pairs:
                 current_format = current_format.replace(old, new)
             # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -189,6 +255,7 @@ class LocaleTime(object):
         self.LC_date_time = date_time[0]
         self.LC_date = date_time[1]
         self.LC_time = date_time[2]
+        self.LC_time_ampm = date_time[3]
 
     def __find_month_format(self, directive):
         """Find the month format appropriate for the current locale.
@@ -213,7 +280,7 @@ class LocaleTime(object):
                 full_indices &= indices
             indices = set(_findall(datetime, self.a_month[m]))
             if abbr_indices is None:
-                abbr_indices = indices
+                abbr_indices = set(indices)
             else:
                 abbr_indices &= indices
             if not full_indices and not abbr_indices:
@@ -241,7 +308,7 @@ class LocaleTime(object):
             if self.f_weekday[wd] != self.a_weekday[wd]:
                 indices = set(_findall(datetime, self.a_weekday[wd]))
             if abbr_indices is None:
-                abbr_indices = indices
+                abbr_indices = set(indices)
             else:
                 abbr_indices &= indices
             if not full_indices and not abbr_indices:
@@ -288,8 +355,10 @@ class TimeRE(dict):
             # The " [1-9]" part of the regex is to make %c from ANSI C work
             'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
             'f': r"(?P<f>[0-9]{1,6})",
-            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
+            'H': r"(?P<H>2[0-3]|[0-1]\d|\d| \d)",
+            'k': r"(?P<H>2[0-3]|[0-1]\d|\d| \d)",
             'I': r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])",
+            'l': r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])",
             'G': r"(?P<G>\d\d\d\d)",
             'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
             'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
@@ -305,23 +374,56 @@ class TimeRE(dict):
             'z': r"(?P<z>([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?",
             'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
             'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
-            'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
-            'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
+            'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'),
+            'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'),
             'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
             'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
                                         for tz in tz_names),
                                 'Z'),
             '%': '%'}
-        for d in 'dmyHIMS':
-            mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d
-        mapping['Ow'] = r'(?P<w>\d)'
+        if self.locale_time.LC_alt_digits is None:
+            for d in 'dmyCHIMS':
+                mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d
+            mapping['Ow'] = r'(?P<w>\d)'
+        else:
+            mapping.update({
+                'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd',
+                                     '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'),
+                'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm',
+                                     '1[0-2]|0[1-9]|[1-9]'),
+                'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w',
+                                     '[0-6]'),
+                'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y',
+                                     '[0-9][0-9]'),
+                'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C',
+                                     '[0-9][0-9]'),
+                'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H',
+                                     '2[0-3]|[0-1][0-9]|[0-9]'),
+                'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I',
+                                     '1[0-2]|0[1-9]|[1-9]'),
+                'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M',
+                                     '[0-5][0-9]|[0-9]'),
+                'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S',
+                                     '6[0-1]|[0-5][0-9]|[0-9]'),
+            })
+        mapping.update({
+            'e': mapping['d'],
+            'Oe': mapping['Od'],
+            'P': mapping['p'],
+            'Op': mapping['p'],
+            'W': mapping['U'].replace('U', 'W'),
+        })
         mapping['W'] = mapping['U'].replace('U', 'W')
+
         base.__init__(mapping)
+        base.__setitem__('T', self.pattern('%H:%M:%S'))
+        base.__setitem__('R', self.pattern('%H:%M'))
+        base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm))
         base.__setitem__('X', self.pattern(self.locale_time.LC_time))
         base.__setitem__('x', self.pattern(self.locale_time.LC_date))
         base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
 
-    def __seqToRE(self, to_convert, directive):
+    def __seqToRE(self, to_convert, directive, altregex=None):
         """Convert a list to a regex string for matching a directive.
 
         Want possible matching values to be from longest to shortest.  This
@@ -337,8 +439,9 @@ class TimeRE(dict):
         else:
             return ''
         regex = '|'.join(re_escape(stuff) for stuff in to_convert)
-        regex = '(?P<%s>%s' % (directive, regex)
-        return '%s)' % regex
+        if altregex is not None:
+            regex += '|' + altregex
+        return '(?P<%s>%s)' % (directive, regex)
 
     def pattern(self, format):
         """Return regex pattern for the format string.
@@ -365,7 +468,7 @@ class TimeRE(dict):
                     nonlocal day_of_month_in_format
                     day_of_month_in_format = True
             return self[format_char]
-        format = re_sub(r'%([OE]?\\?.?)', repl, format)
+        format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format)
         if day_of_month_in_format and not year_in_format:
             import warnings
             warnings.warn("""\
@@ -467,6 +570,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
     # values
     weekday = julian = None
     found_dict = found.groupdict()
+    if locale_time.LC_alt_digits:
+        def parse_int(s):
+            try:
+                return locale_time.LC_alt_digits.index(s)
+            except ValueError:
+                return int(s)
+    else:
+        parse_int = int
+
     for group_key in found_dict.keys():
         # Directives not explicitly handled below:
         #   c, x, X
@@ -474,30 +586,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
         #   U, W
         #      worthless without day of the week
         if group_key == 'y':
-            year = int(found_dict['y'])
-            # Open Group specification for strptime() states that a %y
-            #value in the range of [00, 68] is in the century 2000, while
-            #[69,99] is in the century 1900
-            if year <= 68:
-                year += 2000
+            year = parse_int(found_dict['y'])
+            if 'C' in found_dict:
+                century = parse_int(found_dict['C'])
+                year += century * 100
             else:
-                year += 1900
+                # Open Group specification for strptime() states that a %y
+                #value in the range of [00, 68] is in the century 2000, while
+                #[69,99] is in the century 1900
+                if year <= 68:
+                    year += 2000
+                else:
+                    year += 1900
         elif group_key == 'Y':
             year = int(found_dict['Y'])
         elif group_key == 'G':
             iso_year = int(found_dict['G'])
         elif group_key == 'm':
-            month = int(found_dict['m'])
+            month = parse_int(found_dict['m'])
         elif group_key == 'B':
             month = locale_time.f_month.index(found_dict['B'].lower())
         elif group_key == 'b':
             month = locale_time.a_month.index(found_dict['b'].lower())
         elif group_key == 'd':
-            day = int(found_dict['d'])
+            day = parse_int(found_dict['d'])
         elif group_key == 'H':
-            hour = int(found_dict['H'])
+            hour = parse_int(found_dict['H'])
         elif group_key == 'I':
-            hour = int(found_dict['I'])
+            hour = parse_int(found_dict['I'])
             ampm = found_dict.get('p', '').lower()
             # If there was no AM/PM indicator, we'll treat this like AM
             if ampm in ('', locale_time.am_pm[0]):
@@ -513,9 +629,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
                 if hour != 12:
                     hour += 12
         elif group_key == 'M':
-            minute = int(found_dict['M'])
+            minute = parse_int(found_dict['M'])
         elif group_key == 'S':
-            second = int(found_dict['S'])
+            second = parse_int(found_dict['S'])
         elif group_key == 'f':
             s = found_dict['f']
             # Pad to always return microseconds.
diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py
index a7dfb91515a..c83a1573ccd 100644
--- a/Lib/annotationlib.py
+++ b/Lib/annotationlib.py
@@ -27,6 +27,9 @@ class Format(enum.IntEnum):
 
 
 _sentinel = object()
+# Following `NAME_ERROR_MSG` in `ceval_macros.h`:
+_NAME_ERROR_MSG = "name '{name:.200}' is not defined"
+
 
 # Slots shared by ForwardRef and _Stringifier. The __forward__ names must be
 # preserved for compatibility with the old typing.ForwardRef class. The remaining
@@ -184,7 +187,7 @@ class ForwardRef:
             elif is_forwardref_format:
                 return self
             else:
-                raise NameError(arg)
+                raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg)
         else:
             code = self.__forward_code__
             try:
@@ -939,48 +942,49 @@ def get_annotations(
     if not eval_str:
         return dict(ann)
 
-    if isinstance(obj, type):
-        # class
-        obj_globals = None
-        module_name = getattr(obj, "__module__", None)
-        if module_name:
-            module = sys.modules.get(module_name, None)
-            if module:
-                obj_globals = getattr(module, "__dict__", None)
-        obj_locals = dict(vars(obj))
-        unwrap = obj
-    elif isinstance(obj, types.ModuleType):
-        # module
-        obj_globals = getattr(obj, "__dict__")
-        obj_locals = None
-        unwrap = None
-    elif callable(obj):
-        # this includes types.Function, types.BuiltinFunctionType,
-        # types.BuiltinMethodType, functools.partial, functools.singledispatch,
-        # "class funclike" from Lib/test/test_inspect... on and on it goes.
-        obj_globals = getattr(obj, "__globals__", None)
-        obj_locals = None
-        unwrap = obj
-    else:
-        obj_globals = obj_locals = unwrap = None
-
-    if unwrap is not None:
-        while True:
-            if hasattr(unwrap, "__wrapped__"):
-                unwrap = unwrap.__wrapped__
-                continue
-            if functools := sys.modules.get("functools"):
-                if isinstance(unwrap, functools.partial):
-                    unwrap = unwrap.func
+    if globals is None or locals is None:
+        if isinstance(obj, type):
+            # class
+            obj_globals = None
+            module_name = getattr(obj, "__module__", None)
+            if module_name:
+                module = sys.modules.get(module_name, None)
+                if module:
+                    obj_globals = getattr(module, "__dict__", None)
+            obj_locals = dict(vars(obj))
+            unwrap = obj
+        elif isinstance(obj, types.ModuleType):
+            # module
+            obj_globals = getattr(obj, "__dict__")
+            obj_locals = None
+            unwrap = None
+        elif callable(obj):
+            # this includes types.Function, types.BuiltinFunctionType,
+            # types.BuiltinMethodType, functools.partial, functools.singledispatch,
+            # "class funclike" from Lib/test/test_inspect... on and on it goes.
+            obj_globals = getattr(obj, "__globals__", None)
+            obj_locals = None
+            unwrap = obj
+        else:
+            obj_globals = obj_locals = unwrap = None
+
+        if unwrap is not None:
+            while True:
+                if hasattr(unwrap, "__wrapped__"):
+                    unwrap = unwrap.__wrapped__
                     continue
-            break
-        if hasattr(unwrap, "__globals__"):
-            obj_globals = unwrap.__globals__
-
-    if globals is None:
-        globals = obj_globals
-    if locals is None:
-        locals = obj_locals
+                if functools := sys.modules.get("functools"):
+                    if isinstance(unwrap, functools.partial):
+                        unwrap = unwrap.func
+                        continue
+                break
+            if hasattr(unwrap, "__globals__"):
+                obj_globals = unwrap.__globals__
+
+        if globals is None:
+            globals = obj_globals
+        if locals is None:
+            locals = obj_locals
 
     # "Inject" type parameters into the local namespace
     # (unless they are shadowed by assignments *in* the local namespace),
diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py
index f12b4ac33cd..cbb60ce80c1 100644
--- a/Lib/concurrent/futures/interpreter.py
+++ b/Lib/concurrent/futures/interpreter.py
@@ -1,56 +1,39 @@
 """Implements InterpreterPoolExecutor."""
 
-import contextlib
-import pickle
+from concurrent import interpreters
+import sys
 import textwrap
 from . import thread as _thread
-import _interpreters
-import _interpqueues
+import traceback
 
 
-class ExecutionFailed(_interpreters.InterpreterError):
-    """An unhandled exception happened during execution."""
-
-    def __init__(self, excinfo):
-        msg = excinfo.formatted
-        if not msg:
-            if excinfo.type and excinfo.msg:
-                msg = f'{excinfo.type.__name__}: {excinfo.msg}'
-            else:
-                msg = excinfo.type.__name__ or excinfo.msg
-        super().__init__(msg)
-        self.excinfo = excinfo
-
-    def __str__(self):
+def do_call(results, func, args, kwargs):
+    try:
+        return func(*args, **kwargs)
+    except BaseException as exc:
+        # Send the captured exception out on the results queue,
+        # but still leave it unhandled for the interpreter to handle.
         try:
-            formatted = self.excinfo.errdisplay
-        except Exception:
-            return super().__str__()
-        else:
-            return textwrap.dedent(f"""
-{super().__str__()}
-
-Uncaught in the interpreter:
-
-{formatted}
-                """.strip())
+            results.put(exc)
+        except interpreters.NotShareableError:
+            # The exception is not shareable.
+            print('exception is not shareable:', file=sys.stderr)
+            traceback.print_exception(exc)
+            results.put(None)
+        raise  # re-raise
 
 
 class WorkerContext(_thread.WorkerContext):
 
     @classmethod
-    def prepare(cls, initializer, initargs, shared):
+    def prepare(cls, initializer, initargs):
         def resolve_task(fn, args, kwargs):
             if isinstance(fn, str):
                 # XXX Circle back to this later.
                 raise TypeError('scripts not supported')
             else:
-                # Functions defined in the __main__ module can't be pickled,
-                # so they can't be used here.  In the future, we could possibly
-                # borrow from multiprocessing to work around this.
                 task = (fn, args, kwargs)
-                data = pickle.dumps(task)
-            return data
+            return task
 
         if initializer is not None:
             try:
@@ -62,68 +45,24 @@ class WorkerContext(_thread.WorkerContext):
         else:
             initdata = None
         def create_context():
-            return cls(initdata, shared)
+            return cls(initdata)
         return create_context, resolve_task
 
-    @classmethod
-    @contextlib.contextmanager
-    def _capture_exc(cls, resultsid):
-        try:
-            yield
-        except BaseException as exc:
-            # Send the captured exception out on the results queue,
-            # but still leave it unhandled for the interpreter to handle.
-            _interpqueues.put(resultsid, (None, exc))
-            raise  # re-raise
-
-    @classmethod
-    def _send_script_result(cls, resultsid):
-        _interpqueues.put(resultsid, (None, None))
-
-    @classmethod
-    def _call(cls, func, args, kwargs, resultsid):
-        with cls._capture_exc(resultsid):
-            res = func(*args or (), **kwargs or {})
-        # Send the result back.
-        with cls._capture_exc(resultsid):
-            _interpqueues.put(resultsid, (res, None))
-
-    @classmethod
-    def _call_pickled(cls, pickled, resultsid):
-        with cls._capture_exc(resultsid):
-            fn, args, kwargs = pickle.loads(pickled)
-        cls._call(fn, args, kwargs, resultsid)
-
-    def __init__(self, initdata, shared=None):
+    def __init__(self, initdata):
         self.initdata = initdata
-        self.shared = dict(shared) if shared else None
-        self.interpid = None
-        self.resultsid = None
+        self.interp = None
+        self.results = None
 
     def __del__(self):
-        if self.interpid is not None:
+        if self.interp is not None:
             self.finalize()
 
-    def _exec(self, script):
-        assert self.interpid is not None
-        excinfo = _interpreters.exec(self.interpid, script, restrict=True)
-        if excinfo is not None:
-            raise ExecutionFailed(excinfo)
-
     def initialize(self):
-        assert self.interpid is None, self.interpid
-        self.interpid = _interpreters.create(reqrefs=True)
+        assert self.interp is None, self.interp
+        self.interp = interpreters.create()
         try:
-            _interpreters.incref(self.interpid)
-
             maxsize = 0
-            self.resultsid = _interpqueues.create(maxsize)
-
-            self._exec(f'from {__name__} import WorkerContext')
-
-            if self.shared:
-                _interpreters.set___main___attrs(
-                                    self.interpid, self.shared, restrict=True)
+            self.results = interpreters.create_queue(maxsize)
 
             if self.initdata:
                 self.run(self.initdata)
@@ -132,53 +71,25 @@ class WorkerContext(_thread.WorkerContext):
             raise  # re-raise
 
     def finalize(self):
-        interpid = self.interpid
-        resultsid = self.resultsid
-        self.resultsid = None
-        self.interpid = None
-        if resultsid is not None:
-            try:
-                _interpqueues.destroy(resultsid)
-            except _interpqueues.QueueNotFoundError:
-                pass
-        if interpid is not None:
-            try:
-                _interpreters.decref(interpid)
-            except _interpreters.InterpreterNotFoundError:
-                pass
+        interp = self.interp
+        results = self.results
+        self.results = None
+        self.interp = None
+        if results is not None:
+            del results
+        if interp is not None:
+            interp.close()
 
     def run(self, task):
-        data = task
-        script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})'
-
         try:
-            self._exec(script)
-        except ExecutionFailed as exc:
-            exc_wrapper = exc
-        else:
-            exc_wrapper = None
-
-        # Return the result, or raise the exception.
-        while True:
-            try:
-                obj = _interpqueues.get(self.resultsid)
-            except _interpqueues.QueueNotFoundError:
+            return self.interp.call(do_call, self.results, *task)
+        except interpreters.ExecutionFailed as wrapper:
+            # Wait for the exception data to show up.
+            exc = self.results.get()
+            if exc is None:
+                # The exception must have been not shareable.
                 raise  # re-raise
-            except _interpqueues.QueueError:
-                continue
-            except ModuleNotFoundError:
-                # interpreters._queues doesn't exist, which means
-                # QueueEmpty doesn't.  Act as though it does.
-                continue
-            else:
-                break
-        (res, exc), unboundop = obj
-        assert unboundop is None, unboundop
-        if exc is not None:
-            assert res is None, res
-            assert exc_wrapper is not None
-            raise exc from exc_wrapper
-        return res
+            raise exc from wrapper
 
 
 class BrokenInterpreterPool(_thread.BrokenThreadPool):
@@ -192,11 +103,11 @@ class InterpreterPoolExecutor(_thread.ThreadPoolExecutor):
     BROKEN = BrokenInterpreterPool
 
     @classmethod
-    def prepare_context(cls, initializer, initargs, shared):
-        return WorkerContext.prepare(initializer, initargs, shared)
+    def prepare_context(cls, initializer, initargs):
+        return WorkerContext.prepare(initializer, initargs)
 
     def __init__(self, max_workers=None, thread_name_prefix='',
-                 initializer=None, initargs=(), shared=None):
+                 initializer=None, initargs=()):
         """Initializes a new InterpreterPoolExecutor instance.
 
         Args:
@@ -206,8 +117,6 @@ class InterpreterPoolExecutor(_thread.ThreadPoolExecutor):
             initializer: A callable or script used to initialize
                 each worker interpreter.
             initargs: A tuple of arguments to pass to the initializer.
-            shared: A mapping of shareabled objects to be inserted into
-                each worker interpreter.
         """
         super().__init__(max_workers, thread_name_prefix,
-                         initializer, initargs, shared=shared)
+                         initializer, initargs)
diff --git a/Lib/configparser.py b/Lib/configparser.py
index 239fda60a02..18af1eadaad 100644
--- a/Lib/configparser.py
+++ b/Lib/configparser.py
@@ -1218,11 +1218,14 @@ class RawConfigParser(MutableMapping):
 
     def _validate_key_contents(self, key):
         """Raises an InvalidWriteError for any keys containing
-        delimiters or that match the section header pattern"""
+        delimiters or that begins with the section header pattern"""
         if re.match(self.SECTCRE, key):
-            raise InvalidWriteError("Cannot write keys matching section pattern")
-        if any(delim in key for delim in self._delimiters):
-            raise InvalidWriteError("Cannot write key that contains delimiters")
+            raise InvalidWriteError(
+                f"Cannot write key {key}; begins with section pattern")
+        for delim in self._delimiters:
+            if delim in key:
+                raise InvalidWriteError(
+                    f"Cannot write key {key}; contains delimiter {delim}")
 
     def _validate_value_types(self, *, section="", option="", value=""):
         """Raises a TypeError for illegal non-string values.
diff --git a/Lib/difflib.py b/Lib/difflib.py
index 18801a9b19e..487936dbf47 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -78,8 +78,8 @@ class SequenceMatcher:
     sequences.  As a rule of thumb, a .ratio() value over 0.6 means the
     sequences are close matches:
 
-    >>> print(round(s.ratio(), 3))
-    0.866
+    >>> print(round(s.ratio(), 2))
+    0.87
     >>>
 
     If you're only interested in where the sequences match,
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 113a81f4131..220a84a7454 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -59,16 +59,22 @@ _max_append = email.quoprimime._max_append
 def decode_header(header):
     """Decode a message header value without converting charset.
 
-    Returns a list of (string, charset) pairs containing each of the decoded
-    parts of the header.  Charset is None for non-encoded parts of the header,
-    otherwise a lower-case string containing the name of the character set
-    specified in the encoded string.
+    For historical reasons, this function may return either:
+
+    1. A list of length 1 containing a pair (str, None).
+    2. A list of (bytes, charset) pairs containing each of the decoded
+       parts of the header.  Charset is None for non-encoded parts of the header,
+       otherwise a lower-case string containing the name of the character set
+       specified in the encoded string.
 
     header may be a string that may or may not contain RFC2047 encoded words,
     or it may be a Header object.
 
     An email.errors.HeaderParseError may be raised when certain decoding error
     occurs (e.g. a base64 decoding exception).
+
+    This function exists for backwards compatibility only. For new code, we
+    recommend using email.headerregistry.HeaderRegistry instead.
     """
     # If it is a Header object, we can just return the encoded chunks.
     if hasattr(header, '_chunks'):
@@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
     This function takes one of those sequence of pairs and returns a Header
     instance.  Optional maxlinelen, header_name, and continuation_ws are as in
     the Header constructor.
+
+    This function exists for backwards compatibility only, and is not
+    recommended for use in new code.
     """
     h = Header(maxlinelen=maxlinelen, header_name=header_name,
                continuation_ws=continuation_ws)
diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py
index c506d654523..df164ca5b95 100644
--- a/Lib/encodings/palmos.py
+++ b/Lib/encodings/palmos.py
@@ -201,7 +201,7 @@ decoding_table = (
     '\u02dc'   #  0x98 -> SMALL TILDE
     '\u2122'   #  0x99 -> TRADE MARK SIGN
     '\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
-    '\x9b'     #  0x9B -> <control>
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
     '\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
     '\x9d'     #  0x9D -> <control>
     '\x9e'     #  0x9E -> <control>
diff --git a/Lib/http/server.py b/Lib/http/server.py
index ef10d185932..a2ffbe2e44d 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -115,7 +115,7 @@ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
 class HTTPServer(socketserver.TCPServer):
 
     allow_reuse_address = True    # Seems to make sense in testing environment
-    allow_reuse_port = True
+    allow_reuse_port = False
 
     def server_bind(self):
         """Override server_bind to store the server name."""
diff --git a/Lib/idlelib/NEWS2x.txt b/Lib/idlelib/NEWS2x.txt
index 6751ca5f111..3721193007e 100644
--- a/Lib/idlelib/NEWS2x.txt
+++ b/Lib/idlelib/NEWS2x.txt
@@ -1,6 +1,6 @@
 What's New in IDLE 2.7? (Merged into 3.1 before 2.7 release.)
 =======================
-*Release date: XX-XXX-2010*
+*Release date: 03-Jul-2010*
 
 - idle.py modified and simplified to better support developing experimental
   versions of IDLE which are not installed in the standard location.
diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py
index 5c3c4424934..c5860d53b1b 100644
--- a/Lib/logging/__init__.py
+++ b/Lib/logging/__init__.py
@@ -1475,8 +1475,6 @@ class Logger(Filterer):
     level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels.
     There is no arbitrary limit to the depth of nesting.
     """
-    _tls = threading.local()
-
     def __init__(self, name, level=NOTSET):
         """
         Initialize the logger with a name and an optional level.
@@ -1673,19 +1671,14 @@ class Logger(Filterer):
         This method is used for unpickled records received from a socket, as
         well as those created locally. Logger-level filtering is applied.
         """
-        if self._is_disabled():
+        if self.disabled:
             return
-
-        self._tls.in_progress = True
-        try:
-            maybe_record = self.filter(record)
-            if not maybe_record:
-                return
-            if isinstance(maybe_record, LogRecord):
-                record = maybe_record
-            self.callHandlers(record)
-        finally:
-            self._tls.in_progress = False
+        maybe_record = self.filter(record)
+        if not maybe_record:
+            return
+        if isinstance(maybe_record, LogRecord):
+            record = maybe_record
+        self.callHandlers(record)
 
     def addHandler(self, hdlr):
         """
@@ -1773,7 +1766,7 @@ class Logger(Filterer):
         """
         Is this logger enabled for level 'level'?
         """
-        if self._is_disabled():
+        if self.disabled:
             return False
 
         try:
@@ -1823,11 +1816,6 @@ class Logger(Filterer):
                        if isinstance(item, Logger) and item.parent is self and
                        _hierlevel(item) == 1 + _hierlevel(item.parent))
 
-    def _is_disabled(self):
-        # We need to use getattr as it will only be set the first time a log
-        # message is recorded on any given thread
-        return self.disabled or getattr(self._tls, 'in_progress', False)
-
     def __repr__(self):
         level = getLevelName(self.getEffectiveLevel())
         return '<%s %s (%s)>' % (self.__class__.__name__, self.name, level)
diff --git a/Lib/logging/config.py b/Lib/logging/config.py
index c994349fd6e..3d9aa00fa52 100644
--- a/Lib/logging/config.py
+++ b/Lib/logging/config.py
@@ -1018,7 +1018,7 @@ def listen(port=DEFAULT_LOGGING_CONFIG_PORT, verify=None):
         """
 
         allow_reuse_address = True
-        allow_reuse_port = True
+        allow_reuse_port = False
 
         def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT,
                      handler=None, ready=None, verify=None):
diff --git a/Lib/multiprocessing/forkserver.py b/Lib/multiprocessing/forkserver.py
index 681af2610e9..c91891ff162 100644
--- a/Lib/multiprocessing/forkserver.py
+++ b/Lib/multiprocessing/forkserver.py
@@ -222,6 +222,10 @@ def main(listener_fd, alive_r, preload, main_path=None, sys_path=None,
             except ImportError:
                 pass
 
+        # gh-135335: flush stdout/stderr in case any of the preloaded modules
+        # wrote to them, otherwise children might inherit buffered data
+        util._flush_std_streams()
+
     util._close_stdin()
 
     sig_r, sig_w = os.pipe()
diff --git a/Lib/netrc.py b/Lib/netrc.py
index b285fd8e357..2f502c1d533 100644
--- a/Lib/netrc.py
+++ b/Lib/netrc.py
@@ -7,6 +7,19 @@ import os, stat
 __all__ = ["netrc", "NetrcParseError"]
 
 
+def _can_security_check():
+    # On WASI, getuid() is indicated as a stub but it may also be missing.
+    return os.name == 'posix' and hasattr(os, 'getuid')
+
+
+def _getpwuid(uid):
+    try:
+        import pwd
+        return pwd.getpwuid(uid)[0]
+    except (ImportError, LookupError):
+        return f'uid {uid}'
+
+
 class NetrcParseError(Exception):
     """Exception raised on syntax errors in the .netrc file."""
     def __init__(self, msg, filename=None, lineno=None):
@@ -142,21 +155,15 @@ class netrc:
             self._security_check(fp, default_netrc, self.hosts[entryname][0])
 
     def _security_check(self, fp, default_netrc, login):
-        if os.name == 'posix' and default_netrc and login != "anonymous":
+        if _can_security_check() and default_netrc and login != "anonymous":
             prop = os.fstat(fp.fileno())
-            if prop.st_uid != os.getuid():
-                import pwd
-                try:
-                    fowner = pwd.getpwuid(prop.st_uid)[0]
-                except KeyError:
-                    fowner = 'uid %s' % prop.st_uid
-                try:
-                    user = pwd.getpwuid(os.getuid())[0]
-                except KeyError:
-                    user = 'uid %s' % os.getuid()
+            current_user_id = os.getuid()
+            if prop.st_uid != current_user_id:
+                fowner = _getpwuid(prop.st_uid)
+                user = _getpwuid(current_user_id)
                 raise NetrcParseError(
-                    (f"~/.netrc file owner ({fowner}, {user}) does not match"
-                     " current user"))
+                    f"~/.netrc file owner ({fowner}) does not match"
+                    f" current user ({user})")
             if (prop.st_mode & (stat.S_IRWXG | stat.S_IRWXO)):
                 raise NetrcParseError(
                     "~/.netrc access too permissive: access"
diff --git a/Lib/platform.py b/Lib/platform.py
index 077db81264a..e7f180fc5ac 100644
--- a/Lib/platform.py
+++ b/Lib/platform.py
@@ -1144,7 +1144,7 @@ def _sys_version(sys_version=None):
         # CPython
         cpython_sys_version_parser = re.compile(
             r'([\w.+]+)\s*'  # "version<space>"
-            r'(?:experimental free-threading build\s+)?' # "free-threading-build<space>"
+            r'(?:free-threading build\s+)?' # "free-threading-build<space>"
             r'\(#?([^,]+)'  # "(#buildno"
             r'(?:,\s*([\w ]*)'  # ", builddate"
             r'(?:,\s*([\w :]*))?)?\)\s*'  # ", buildtime)<space>"
diff --git a/Lib/pprint.py b/Lib/pprint.py
index 1e611481b51..92a2c543ac2 100644
--- a/Lib/pprint.py
+++ b/Lib/pprint.py
@@ -653,6 +653,40 @@ class PrettyPrinter:
             del context[objid]
             return "{%s}" % ", ".join(components), readable, recursive
 
+        if (issubclass(typ, list) and r is list.__repr__) or \
+           (issubclass(typ, tuple) and r is tuple.__repr__):
+            if issubclass(typ, list):
+                if not object:
+                    return "[]", True, False
+                format = "[%s]"
+            elif len(object) == 1:
+                format = "(%s,)"
+            else:
+                if not object:
+                    return "()", True, False
+                format = "(%s)"
+            objid = id(object)
+            if maxlevels and level >= maxlevels:
+                return format % "...", False, objid in context
+            if objid in context:
+                return _recursion(object), False, True
+            context[objid] = 1
+            readable = True
+            recursive = False
+            components = []
+            append = components.append
+            level += 1
+            for o in object:
+                orepr, oreadable, orecur = self.format(
+                    o, context, maxlevels, level)
+                append(orepr)
+                if not oreadable:
+                    readable = False
+                if orecur:
+                    recursive = True
+            del context[objid]
+            return format % ", ".join(components), readable, recursive
+
         if issubclass(typ, _collections.abc.MappingView) and r in self._view_reprs:
             objid = id(object)
             if maxlevels and level >= maxlevels:
@@ -689,40 +723,6 @@ class PrettyPrinter:
             del context[objid]
             return typ.__name__ + '([%s])' % ", ".join(components), readable, recursive
 
-        if (issubclass(typ, list) and r is list.__repr__) or \
-           (issubclass(typ, tuple) and r is tuple.__repr__):
-            if issubclass(typ, list):
-                if not object:
-                    return "[]", True, False
-                format = "[%s]"
-            elif len(object) == 1:
-                format = "(%s,)"
-            else:
-                if not object:
-                    return "()", True, False
-                format = "(%s)"
-            objid = id(object)
-            if maxlevels and level >= maxlevels:
-                return format % "...", False, objid in context
-            if objid in context:
-                return _recursion(object), False, True
-            context[objid] = 1
-            readable = True
-            recursive = False
-            components = []
-            append = components.append
-            level += 1
-            for o in object:
-                orepr, oreadable, orecur = self.format(
-                    o, context, maxlevels, level)
-                append(orepr)
-                if not oreadable:
-                    readable = False
-                if orecur:
-                    recursive = True
-            del context[objid]
-            return format % ", ".join(components), readable, recursive
-
         rep = repr(object)
         return rep, (rep and not rep.startswith('<')), False
 
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
index 7528178fdca..d508fb70ea4 100644
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -1812,7 +1812,6 @@ def writedocs(dir, pkgpath='', done=None):
 
 
 def _introdoc():
-    import textwrap
     ver = '%d.%d' % sys.version_info[:2]
     if os.environ.get('PYTHON_BASIC_REPL'):
         pyrepl_keys = ''
@@ -2170,7 +2169,6 @@ module "pydoc_data.topics" could not be found.
         if more_xrefs:
             xrefs = (xrefs or '') + ' ' + more_xrefs
         if xrefs:
-            import textwrap
             text = 'Related help topics: ' + ', '.join(xrefs.split()) + '\n'
             wrapped_text = textwrap.wrap(text, 72)
             doc += '\n%s\n' % '\n'.join(wrapped_text)
diff --git a/Lib/reprlib.py b/Lib/reprlib.py
index 441d1be4bde..ab18247682b 100644
--- a/Lib/reprlib.py
+++ b/Lib/reprlib.py
@@ -181,7 +181,22 @@ class Repr:
         return s
 
     def repr_int(self, x, level):
-        s = builtins.repr(x) # XXX Hope this isn't too slow...
+        try:
+            s = builtins.repr(x)
+        except ValueError as exc:
+            assert 'sys.set_int_max_str_digits()' in str(exc)
+            # Those imports must be deferred due to Python's build system
+            # where the reprlib module is imported before the math module.
+            import math, sys
+            # Integers with more than sys.get_int_max_str_digits() digits
+            # are rendered differently as their repr() raises a ValueError.
+            # See https://github.com/python/cpython/issues/135487.
+            k = 1 + int(math.log10(abs(x)))
+            # Note: math.log10(abs(x)) may be overestimated or underestimated,
+            # but for simplicity, we do not compute the exact number of digits.
+            max_digits = sys.get_int_max_str_digits()
+            return (f'<{x.__class__.__name__} instance with roughly {k} '
+                    f'digits (limit at {max_digits}) at 0x{id(x):x}>')
         if len(s) > self.maxlong:
             i = max(0, (self.maxlong-3)//2)
             j = max(0, self.maxlong-3-i)
diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py
index 9e74b49ee82..35344ecceff 100644
--- a/Lib/sqlite3/__main__.py
+++ b/Lib/sqlite3/__main__.py
@@ -63,17 +63,21 @@ class SqliteInteractiveConsole(InteractiveConsole):
         if source[0] == ".":
             match source[1:].strip():
                 case "version":
-                    print(f"{sqlite3.sqlite_version}")
+                    print(sqlite3.sqlite_version)
                 case "help":
-                    print("Enter SQL code and press enter.")
+                    t = theme.syntax
+                    print(f"Enter SQL code or one of the below commands, and press enter.\n\n"
+                          f"{t.builtin}.version{t.reset}    Print underlying SQLite library version\n"
+                          f"{t.builtin}.help{t.reset}       Print this help message\n"
+                          f"{t.builtin}.quit{t.reset}       Exit the CLI, equivalent to CTRL-D\n")
                 case "quit":
                     sys.exit(0)
                 case "":
                     pass
                 case _ as unknown:
                     t = theme.traceback
-                    self.write(f'{t.type}Error{t.reset}:{t.message} unknown'
-                               f'command or invalid arguments:  "{unknown}".\n{t.reset}')
+                    self.write(f'{t.type}Error{t.reset}: {t.message}unknown '
+                               f'command: "{unknown}"{t.reset}\n')
         else:
             if not sqlite3.complete_statement(source):
                 return True
diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py
index 75f31d858d3..a1259ff1d63 100644
--- a/Lib/test/_test_multiprocessing.py
+++ b/Lib/test/_test_multiprocessing.py
@@ -6801,6 +6801,35 @@ class _TestSpawnedSysPath(BaseTestCase):
         self.assertEqual(child_sys_path[1:], sys.path[1:])
         self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}")
 
+    def test_std_streams_flushed_after_preload(self):
+        # gh-135335: Check fork server flushes standard streams after
+        # preloading modules
+        if multiprocessing.get_start_method() != "forkserver":
+            self.skipTest("forkserver specific test")
+
+        # Create a test module in the temporary directory on the child's path
+        # TODO: This can all be simplified once gh-126631 is fixed and we can
+        #       use __main__ instead of a module.
+        dirname = os.path.join(self._temp_dir, 'preloaded_module')
+        init_name = os.path.join(dirname, '__init__.py')
+        os.mkdir(dirname)
+        with open(init_name, "w") as f:
+            cmd = '''if 1:
+                import sys
+                print('stderr', end='', file=sys.stderr)
+                print('stdout', end='', file=sys.stdout)
+            '''
+            f.write(cmd)
+
+        name = os.path.join(os.path.dirname(__file__), 'mp_preload_flush.py')
+        env = {'PYTHONPATH': self._temp_dir}
+        _, out, err = test.support.script_helper.assert_python_ok(name, **env)
+
+        # Check stderr first, as it is more likely to be useful to see in the
+        # event of a failure.
+        self.assertEqual(err.decode().rstrip(), 'stderr')
+        self.assertEqual(out.decode().rstrip(), 'stdout')
+
 
 class MiscTestCase(unittest.TestCase):
     def test__all__(self):
diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py
index 08b638e4b8d..6884ac0dbe6 100644
--- a/Lib/test/audit-tests.py
+++ b/Lib/test/audit-tests.py
@@ -643,6 +643,34 @@ def test_assert_unicode():
     else:
         raise RuntimeError("Expected sys.audit(9) to fail.")
 
+def test_sys_remote_exec():
+    import tempfile
+
+    pid = os.getpid()
+    event_pid = -1
+    event_script_path = ""
+    remote_event_script_path = ""
+    def hook(event, args):
+        if event not in ["sys.remote_exec", "cpython.remote_debugger_script"]:
+            return
+        print(event, args)
+        match event:
+            case "sys.remote_exec":
+                nonlocal event_pid, event_script_path
+                event_pid = args[0]
+                event_script_path = args[1]
+            case "cpython.remote_debugger_script":
+                nonlocal remote_event_script_path
+                remote_event_script_path = args[0]
+
+    sys.addaudithook(hook)
+    with tempfile.NamedTemporaryFile(mode='w+', delete=True) as tmp_file:
+        tmp_file.write("a = 1+1\n")
+        tmp_file.flush()
+        sys.remote_exec(pid, tmp_file.name)
+        assertEqual(event_pid, pid)
+        assertEqual(event_script_path, tmp_file.name)
+        assertEqual(remote_event_script_path, tmp_file.name)
 
 if __name__ == "__main__":
     from test.support import suppress_msvcrt_asserts
diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index 0d9c059a938..a2d01b157ac 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -190,6 +190,12 @@ class Regrtest:
 
         strip_py_suffix(tests)
 
+        exclude_tests = set()
+        if self.exclude:
+            for arg in self.cmdline_args:
+                exclude_tests.add(arg)
+            self.cmdline_args = []
+
         if self.pgo:
             # add default PGO tests if no tests are specified
             setup_pgo_tests(self.cmdline_args, self.pgo_extended)
@@ -200,17 +206,15 @@ class Regrtest:
         if self.tsan_parallel:
             setup_tsan_parallel_tests(self.cmdline_args)
 
-        exclude_tests = set()
-        if self.exclude:
-            for arg in self.cmdline_args:
-                exclude_tests.add(arg)
-            self.cmdline_args = []
-
         alltests = findtests(testdir=self.test_dir,
                              exclude=exclude_tests)
 
         if not self.fromfile:
             selected = tests or self.cmdline_args
+            if exclude_tests:
+                # Support "--pgo/--tsan -x test_xxx" command
+                selected = [name for name in selected
+                            if name not in exclude_tests]
             if selected:
                 selected = split_test_packages(selected)
             else:
diff --git a/Lib/test/libregrtest/single.py b/Lib/test/libregrtest/single.py
index 57d7b649d2e..958a915626a 100644
--- a/Lib/test/libregrtest/single.py
+++ b/Lib/test/libregrtest/single.py
@@ -283,7 +283,7 @@ def _runtest(result: TestResult, runtests: RunTests) -> None:
     try:
         setup_tests(runtests)
 
-        if output_on_failure:
+        if output_on_failure or runtests.pgo:
             support.verbose = True
 
             stream = io.StringIO()
diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py
index d984a735bdf..3545c5f999f 100644
--- a/Lib/test/libregrtest/tsan.py
+++ b/Lib/test/libregrtest/tsan.py
@@ -8,7 +8,7 @@ TSAN_TESTS = [
     'test_capi.test_pyatomic',
     'test_code',
     'test_ctypes',
-    # 'test_concurrent_futures',  # gh-130605: too many data races
+    'test_concurrent_futures',
     'test_enum',
     'test_functools',
     'test_httpservers',
diff --git a/Lib/test/mp_preload_flush.py b/Lib/test/mp_preload_flush.py
new file mode 100644
index 00000000000..3501554d366
--- /dev/null
+++ b/Lib/test/mp_preload_flush.py
@@ -0,0 +1,15 @@
+import multiprocessing
+import sys
+
+modname = 'preloaded_module'
+if __name__ == '__main__':
+    if modname in sys.modules:
+        raise AssertionError(f'{modname!r} is not in sys.modules')
+    multiprocessing.set_start_method('forkserver')
+    multiprocessing.set_forkserver_preload([modname])
+    for _ in range(2):
+        p = multiprocessing.Process()
+        p.start()
+        p.join()
+elif modname not in sys.modules:
+    raise AssertionError(f'{modname!r} is not in sys.modules')
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 48e74adcce3..fd39d3f7c95 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -46,6 +46,7 @@ __all__ = [
     # sys
     "MS_WINDOWS", "is_jython", "is_android", "is_emscripten", "is_wasi",
     "is_apple_mobile", "check_impl_detail", "unix_shell", "setswitchinterval",
+    "support_remote_exec_only",
     # os
     "get_pagesize",
     # network
@@ -2332,6 +2333,7 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds):
         qualname = f"{name}"
     msg = f"cannot create '{re.escape(qualname)}' instances"
     testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds)
+    testcase.assertRaisesRegex(TypeError, msg, tp.__new__, tp, *args, **kwds)
 
 def get_recursion_depth():
     """Get the recursion depth of the caller function.
@@ -3069,6 +3071,27 @@ def is_libssl_fips_mode():
         return False  # more of a maybe, unless we add this to the _ssl module.
     return get_fips_mode() != 0
 
+def _supports_remote_attaching():
+    PROCESS_VM_READV_SUPPORTED = False
+
+    try:
+        from _remote_debugging import PROCESS_VM_READV_SUPPORTED
+    except ImportError:
+        pass
+
+    return PROCESS_VM_READV_SUPPORTED
+
+def _support_remote_exec_only_impl():
+    if not sys.is_remote_debug_enabled():
+        return unittest.skip("Remote debugging is not enabled")
+    if sys.platform not in ("darwin", "linux", "win32"):
+        return unittest.skip("Test only runs on Linux, Windows and macOS")
+    if sys.platform == "linux" and not _supports_remote_attaching():
+        return unittest.skip("Test only runs on Linux with process_vm_readv support")
+    return _id
+
+def support_remote_exec_only(test):
+    return _support_remote_exec_only_impl()(test)
 
 class EqualToForwardRef:
     """Helper to ease use of annotationlib.ForwardRef in tests.
diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py
index ad3ebbfdff6..a32d5d81d2b 100644
--- a/Lib/test/test__interpreters.py
+++ b/Lib/test/test__interpreters.py
@@ -485,6 +485,21 @@ class CommonTests(TestBase):
         msg = r'_interpreters.run_func\(\) argument 3 must be dict, not int'
         with self.assertRaisesRegex(TypeError, msg):
             _interpreters.run_func(self.id, lambda: None, shared=1)
+        # See https://github.com/python/cpython/issues/135855
+        msg = r'_interpreters.set___main___attrs\(\) argument 2 must be dict, not int'
+        with self.assertRaisesRegex(TypeError, msg):
+            _interpreters.set___main___attrs(self.id, 1)
+
+    def test_invalid_shared_none(self):
+        msg = r'must be dict, not None'
+        with self.assertRaisesRegex(TypeError, msg):
+            _interpreters.exec(self.id, 'a', shared=None)
+        with self.assertRaisesRegex(TypeError, msg):
+            _interpreters.run_string(self.id, 'a', shared=None)
+        with self.assertRaisesRegex(TypeError, msg):
+            _interpreters.run_func(self.id, lambda: None, shared=None)
+        with self.assertRaisesRegex(TypeError, msg):
+            _interpreters.set___main___attrs(self.id, None)
 
     def test_invalid_shared_encoding(self):
         # See https://github.com/python/cpython/issues/127196
diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py
index fe091e52a86..ae0e73f08c5 100644
--- a/Lib/test/test_annotationlib.py
+++ b/Lib/test/test_annotationlib.py
@@ -1650,9 +1650,11 @@ class TestForwardRefClass(unittest.TestCase):
         with support.swap_attr(builtins, "int", dict):
             self.assertIs(ForwardRef("int").evaluate(), dict)
 
-        with self.assertRaises(NameError):
+        with self.assertRaises(NameError, msg="name 'doesntexist' is not defined") as exc:
             ForwardRef("doesntexist").evaluate()
 
+        self.assertEqual(exc.exception.name, "doesntexist")
+
     def test_fwdref_invalid_syntax(self):
         fr = ForwardRef("if")
         with self.assertRaises(SyntaxError):
diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py
index 5f9eb381f60..077765fcda2 100644
--- a/Lib/test/test_audit.py
+++ b/Lib/test/test_audit.py
@@ -322,6 +322,14 @@ class AuditTest(unittest.TestCase):
         if returncode:
             self.fail(stderr)
 
+    @support.support_remote_exec_only
+    @support.cpython_only
+    def test_sys_remote_exec(self):
+        returncode, events, stderr = self.run_python("test_sys_remote_exec")
+        self.assertTrue(any(["sys.remote_exec" in event for event in events]))
+        self.assertTrue(any(["cpython.remote_debugger_script" in event for event in events]))
+        if returncode:
+            self.fail(stderr)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_build_details.py b/Lib/test/test_build_details.py
index 05ce163a337..ba4b8c5aa9b 100644
--- a/Lib/test/test_build_details.py
+++ b/Lib/test/test_build_details.py
@@ -117,12 +117,26 @@ class CPythonBuildDetailsTests(unittest.TestCase, FormatTestsBase):
     # Override generic format tests with tests for our specific implemenation.
 
     @needs_installed_python
-    @unittest.skipIf(is_android or is_apple_mobile, 'Android and iOS run tests via a custom testbed method that changes sys.executable')
+    @unittest.skipIf(
+        is_android or is_apple_mobile,
+        'Android and iOS run tests via a custom testbed method that changes sys.executable'
+    )
     def test_base_interpreter(self):
         value = self.key('base_interpreter')
 
         self.assertEqual(os.path.realpath(value), os.path.realpath(sys.executable))
 
+    @needs_installed_python
+    @unittest.skipIf(
+        is_android or is_apple_mobile,
+        "Android and iOS run tests via a custom testbed method that doesn't ship headers"
+    )
+    def test_c_api(self):
+        value = self.key('c_api')
+        self.assertTrue(os.path.exists(os.path.join(value['headers'], 'Python.h')))
+        version = sysconfig.get_config_var('VERSION')
+        self.assertTrue(os.path.exists(os.path.join(value['pkgconfig_path'], f'python-{version}.pc')))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index d221aa5e1d9..14fe3355239 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -2991,7 +2991,8 @@ class TestType(unittest.TestCase):
 
 def load_tests(loader, tests, pattern):
     from doctest import DocTestSuite
-    tests.addTest(DocTestSuite(builtins))
+    if sys.float_repr_style == 'short':
+        tests.addTest(DocTestSuite(builtins))
     return tests
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
index f74694a7a74..ef950f5df04 100644
--- a/Lib/test/test_capi/test_misc.py
+++ b/Lib/test/test_capi/test_misc.py
@@ -413,11 +413,13 @@ class CAPITest(unittest.TestCase):
 
     @support.requires_resource('cpu')
     @support.skip_emscripten_stack_overflow()
+    @support.skip_wasi_stack_overflow()
     def test_trashcan_python_class1(self):
         self.do_test_trashcan_python_class(list)
 
     @support.requires_resource('cpu')
     @support.skip_emscripten_stack_overflow()
+    @support.skip_wasi_stack_overflow()
     def test_trashcan_python_class2(self):
         from _testcapi import MyList
         self.do_test_trashcan_python_class(MyList)
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index bf22ef2a592..e4c9a463855 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -407,12 +407,12 @@ class TestUops(unittest.TestCase):
             x = 0
             for i in range(m):
                 for j in MyIter(n):
-                    x += 1000*i + j
+                    x += j
             return x
 
-        x = testfunc(TIER2_THRESHOLD, TIER2_THRESHOLD)
+        x = testfunc(TIER2_THRESHOLD, 2)
 
-        self.assertEqual(x, sum(range(TIER2_THRESHOLD)) * TIER2_THRESHOLD * 1001)
+        self.assertEqual(x, sum(range(TIER2_THRESHOLD)) * 2)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -678,7 +678,7 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertLessEqual(len(guard_nos_float_count), 1)
         # TODO gh-115506: this assertion may change after propagating constants.
         # We'll also need to verify that propagation actually occurs.
-        self.assertIn("_BINARY_OP_ADD_FLOAT", uops)
+        self.assertIn("_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS", uops)
 
     def test_float_subtract_constant_propagation(self):
         def testfunc(n):
@@ -700,7 +700,7 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertLessEqual(len(guard_nos_float_count), 1)
         # TODO gh-115506: this assertion may change after propagating constants.
         # We'll also need to verify that propagation actually occurs.
-        self.assertIn("_BINARY_OP_SUBTRACT_FLOAT", uops)
+        self.assertIn("_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS", uops)
 
     def test_float_multiply_constant_propagation(self):
         def testfunc(n):
@@ -722,7 +722,7 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertLessEqual(len(guard_nos_float_count), 1)
         # TODO gh-115506: this assertion may change after propagating constants.
         # We'll also need to verify that propagation actually occurs.
-        self.assertIn("_BINARY_OP_MULTIPLY_FLOAT", uops)
+        self.assertIn("_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS", uops)
 
     def test_add_unicode_propagation(self):
         def testfunc(n):
@@ -1381,6 +1381,21 @@ class TestUopsOptimization(unittest.TestCase):
         # Removed guard
         self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)
 
+    def test_method_guards_removed_or_reduced(self):
+        def testfunc(n):
+            result = 0
+            for i in range(n):
+                result += test_bound_method(i)
+            return result
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, sum(range(TIER2_THRESHOLD)))
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_PUSH_FRAME", uops)
+        # Strength reduced version
+        self.assertIn("_CHECK_FUNCTION_VERSION_INLINE", uops)
+        self.assertNotIn("_CHECK_METHOD_VERSION", uops)
+
     def test_jit_error_pops(self):
         """
         Tests that the correct number of pops are inserted into the
@@ -1953,6 +1968,49 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertNotIn("_GUARD_NOS_INT", uops)
         self.assertNotIn("_GUARD_TOS_INT", uops)
 
+    def test_call_len_known_length_small_int(self):
+        def testfunc(n):
+            x = 0
+            for _ in range(n):
+                t = (1, 2, 3, 4, 5)
+                if len(t) == 5:
+                    x += 1
+            return x
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # When the length is < _PY_NSMALLPOSINTS, the len() call is replaced
+        # with just an inline load.
+        self.assertNotIn("_CALL_LEN", uops)
+        self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops)
+        self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops)
+        self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops)
+
+    def test_call_len_known_length(self):
+        def testfunc(n):
+            class C:
+                t = tuple(range(300))
+
+            x = 0
+            for _ in range(n):
+                if len(C.t) == 300:  # comparison + guard removed
+                    x += 1
+            return x
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # When the length is >= _PY_NSMALLPOSINTS, we cannot replace
+        # the len() call with an inline load, but knowing the exact
+        # length allows us to optimize more code, such as conditionals
+        # in this case
+        self.assertIn("_CALL_LEN", uops)
+        self.assertNotIn("_COMPARE_OP_INT", uops)
+        self.assertNotIn("_GUARD_IS_TRUE_POP", uops)
+
     def test_get_len_with_const_tuple(self):
         def testfunc(n):
             x = 0.0
@@ -2247,6 +2305,20 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertNotIn("_LOAD_ATTR_METHOD_NO_DICT", uops)
         self.assertNotIn("_LOAD_ATTR_METHOD_LAZY_DICT", uops)
 
+    def test_float_op_refcount_elimination(self):
+        def testfunc(args):
+            a, b, n = args
+            c = 0.0
+            for _ in range(n):
+                c += a + b
+            return c
+
+        res, ex = self._run_with_optimizer(testfunc, (0.1, 0.1, TIER2_THRESHOLD))
+        self.assertAlmostEqual(res, TIER2_THRESHOLD * (0.1 + 0.1))
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS", uops)
+
     def test_remove_guard_for_slice_list(self):
         def f(n):
             for i in range(n):
@@ -2290,9 +2362,104 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertNotIn("_GUARD_TOS_INT", uops)
         self.assertNotIn("_GUARD_NOS_INT", uops)
 
+    def test_attr_promotion_failure(self):
+        # We're not testing for any specific uops here, just
+        # testing it doesn't crash.
+        script_helper.assert_python_ok('-c', textwrap.dedent("""
+        import _testinternalcapi
+        import _opcode
+        import email
+
+        def get_first_executor(func):
+            code = func.__code__
+            co_code = code.co_code
+            for i in range(0, len(co_code), 2):
+                try:
+                    return _opcode.get_executor(code, i)
+                except ValueError:
+                    pass
+            return None
+
+        def testfunc(n):
+            for _ in range(n):
+                email.jit_testing = None
+                prompt = email.jit_testing
+                del email.jit_testing
+
+
+        testfunc(_testinternalcapi.TIER2_THRESHOLD)
+        ex = get_first_executor(testfunc)
+        assert ex is not None
+        """))
+
+    def test_pop_top_specialize_none(self):
+        def testfunc(n):
+            for _ in range(n):
+                global_identity(None)
+
+        testfunc(TIER2_THRESHOLD)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+
+        self.assertIn("_POP_TOP_NOP", uops)
+
+    def test_pop_top_specialize_int(self):
+        def testfunc(n):
+            for _ in range(n):
+                global_identity(100000)
+
+        testfunc(TIER2_THRESHOLD)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+
+        self.assertIn("_POP_TOP_INT", uops)
+
+    def test_pop_top_specialize_float(self):
+        def testfunc(n):
+            for _ in range(n):
+                global_identity(1e6)
+
+        testfunc(TIER2_THRESHOLD)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+
+        self.assertIn("_POP_TOP_FLOAT", uops)
+
+
+    def test_unary_negative_long_float_type(self):
+        def testfunc(n):
+            for _ in range(n):
+                a = 9397
+                f = 9397.0
+                x = -a + -a
+                y = -f + -f
+
+        testfunc(TIER2_THRESHOLD)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+
+        self.assertNotIn("_GUARD_TOS_INT", uops)
+        self.assertNotIn("_GUARD_NOS_INT", uops)
+        self.assertNotIn("_GUARD_TOS_FLOAT", uops)
+        self.assertNotIn("_GUARD_NOS_FLOAT", uops)
 
 def global_identity(x):
     return x
 
+class TestObject:
+    def test(self, *args, **kwargs):
+        return args[0]
+
+test_object = TestObject()
+test_bound_method = TestObject.test.__get__(test_object)
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_concurrent_futures/test_init.py b/Lib/test/test_concurrent_futures/test_init.py
index df640929309..6b8484c0d5f 100644
--- a/Lib/test/test_concurrent_futures/test_init.py
+++ b/Lib/test/test_concurrent_futures/test_init.py
@@ -20,6 +20,10 @@ INITIALIZER_STATUS = 'uninitialized'
 def init(x):
     global INITIALIZER_STATUS
     INITIALIZER_STATUS = x
+    # InterpreterPoolInitializerTest.test_initializer fails
+    # if we don't have a LOAD_GLOBAL.  (It could be any global.)
+    # We will address this separately.
+    INITIALIZER_STATUS
 
 def get_init_status():
     return INITIALIZER_STATUS
diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py
index 5fd5684e103..844dfdd6fc9 100644
--- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py
+++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py
@@ -2,35 +2,78 @@ import asyncio
 import contextlib
 import io
 import os
-import pickle
+import sys
 import time
 import unittest
-from concurrent.futures.interpreter import (
-    ExecutionFailed, BrokenInterpreterPool,
-)
+from concurrent.futures.interpreter import BrokenInterpreterPool
+from concurrent import interpreters
 from concurrent.interpreters import _queues as queues
 import _interpreters
 from test import support
+from test.support import os_helper
+from test.support import script_helper
 import test.test_asyncio.utils as testasyncio_utils
 
 from .executor import ExecutorTest, mul
 from .util import BaseTestCase, InterpreterPoolMixin, setup_module
 
 
+WINDOWS = sys.platform.startswith('win')
+
+
+@contextlib.contextmanager
+def nonblocking(fd):
+    blocking = os.get_blocking(fd)
+    if blocking:
+        os.set_blocking(fd, False)
+    try:
+        yield
+    finally:
+        if blocking:
+            os.set_blocking(fd, blocking)
+
+
+def read_file_with_timeout(fd, nbytes, timeout):
+    with nonblocking(fd):
+        end = time.time() + timeout
+        try:
+            return os.read(fd, nbytes)
+        except BlockingIOError:
+            pass
+        while time.time() < end:
+            try:
+                return os.read(fd, nbytes)
+            except BlockingIOError:
+                continue
+        else:
+            raise TimeoutError('nothing to read')
+
+
+if not WINDOWS:
+    import select
+    def read_file_with_timeout(fd, nbytes, timeout):
+        r, _, _ = select.select([fd], [], [], timeout)
+        if fd not in r:
+            raise TimeoutError('nothing to read')
+        return os.read(fd, nbytes)
+
+
 def noop():
     pass
 
 
 def write_msg(fd, msg):
+    import os
     os.write(fd, msg + b'\0')
 
 
-def read_msg(fd):
+def read_msg(fd, timeout=10.0):
     msg = b''
-    while ch := os.read(fd, 1):
-        if ch == b'\0':
-            return msg
+    ch = read_file_with_timeout(fd, 1, timeout)
+    while ch != b'\0':
         msg += ch
+        ch = os.read(fd, 1)
+    return msg
 
 
 def get_current_name():
@@ -113,6 +156,38 @@ class InterpreterPoolExecutorTest(
         self.assertEqual(before, b'\0')
         self.assertEqual(after, msg)
 
+    def test_init_with___main___global(self):
+        # See https://github.com/python/cpython/pull/133957#issuecomment-2927415311.
+        text = """if True:
+            from concurrent.futures import InterpreterPoolExecutor
+
+            INITIALIZER_STATUS = 'uninitialized'
+
+            def init(x):
+                global INITIALIZER_STATUS
+                INITIALIZER_STATUS = x
+                INITIALIZER_STATUS
+
+            def get_init_status():
+                return INITIALIZER_STATUS
+
+            if __name__ == "__main__":
+                exe = InterpreterPoolExecutor(initializer=init,
+                                              initargs=('initialized',))
+                fut = exe.submit(get_init_status)
+                print(fut.result())  # 'initialized'
+                exe.shutdown(wait=True)
+                print(INITIALIZER_STATUS)  # 'uninitialized'
+           """
+        with os_helper.temp_dir() as tempdir:
+            filename = script_helper.make_script(tempdir, 'my-script', text)
+            res = script_helper.assert_python_ok(filename)
+        stdout = res.out.decode('utf-8').strip()
+        self.assertEqual(stdout.splitlines(), [
+            'initialized',
+            'uninitialized',
+        ])
+
     def test_init_closure(self):
         count = 0
         def init1():
@@ -121,10 +196,19 @@ class InterpreterPoolExecutorTest(
             nonlocal count
             count += 1
 
-        with self.assertRaises(pickle.PicklingError):
-            self.executor_type(initializer=init1)
-        with self.assertRaises(pickle.PicklingError):
-            self.executor_type(initializer=init2)
+        with contextlib.redirect_stderr(io.StringIO()) as stderr:
+            with self.executor_type(initializer=init1) as executor:
+                fut = executor.submit(lambda: None)
+        self.assertIn('NotShareableError', stderr.getvalue())
+        with self.assertRaises(BrokenInterpreterPool):
+            fut.result()
+
+        with contextlib.redirect_stderr(io.StringIO()) as stderr:
+            with self.executor_type(initializer=init2) as executor:
+                fut = executor.submit(lambda: None)
+        self.assertIn('NotShareableError', stderr.getvalue())
+        with self.assertRaises(BrokenInterpreterPool):
+            fut.result()
 
     def test_init_instance_method(self):
         class Spam:
@@ -132,26 +216,12 @@ class InterpreterPoolExecutorTest(
                 raise NotImplementedError
         spam = Spam()
 
-        with self.assertRaises(pickle.PicklingError):
-            self.executor_type(initializer=spam.initializer)
-
-    def test_init_shared(self):
-        msg = b'eggs'
-        r, w = self.pipe()
-        script = f"""if True:
-            import os
-            if __name__ != '__main__':
-                import __main__
-                spam = __main__.spam
-            os.write({w}, spam + b'\\0')
-            """
-
-        executor = self.executor_type(shared={'spam': msg})
-        fut = executor.submit(exec, script)
-        fut.result()
-        after = read_msg(r)
-
-        self.assertEqual(after, msg)
+        with contextlib.redirect_stderr(io.StringIO()) as stderr:
+            with self.executor_type(initializer=spam.initializer) as executor:
+                fut = executor.submit(lambda: None)
+        self.assertIn('NotShareableError', stderr.getvalue())
+        with self.assertRaises(BrokenInterpreterPool):
+            fut.result()
 
     @unittest.expectedFailure
     def test_init_exception_in_script(self):
@@ -178,8 +248,6 @@ class InterpreterPoolExecutorTest(
         stderr = stderr.getvalue()
         self.assertIn('ExecutionFailed: Exception: spam', stderr)
         self.assertIn('Uncaught in the interpreter:', stderr)
-        self.assertIn('The above exception was the direct cause of the following exception:',
-                      stderr)
 
     @unittest.expectedFailure
     def test_submit_script(self):
@@ -208,10 +276,14 @@ class InterpreterPoolExecutorTest(
             return spam
 
         executor = self.executor_type()
-        with self.assertRaises(pickle.PicklingError):
-            executor.submit(task1)
-        with self.assertRaises(pickle.PicklingError):
-            executor.submit(task2)
+
+        fut = executor.submit(task1)
+        with self.assertRaises(_interpreters.NotShareableError):
+            fut.result()
+
+        fut = executor.submit(task2)
+        with self.assertRaises(_interpreters.NotShareableError):
+            fut.result()
 
     def test_submit_local_instance(self):
         class Spam:
@@ -219,8 +291,9 @@ class InterpreterPoolExecutorTest(
                 self.value = True
 
         executor = self.executor_type()
-        with self.assertRaises(pickle.PicklingError):
-            executor.submit(Spam)
+        fut = executor.submit(Spam)
+        with self.assertRaises(_interpreters.NotShareableError):
+            fut.result()
 
     def test_submit_instance_method(self):
         class Spam:
@@ -229,8 +302,9 @@ class InterpreterPoolExecutorTest(
         spam = Spam()
 
         executor = self.executor_type()
-        with self.assertRaises(pickle.PicklingError):
-            executor.submit(spam.run)
+        fut = executor.submit(spam.run)
+        with self.assertRaises(_interpreters.NotShareableError):
+            fut.result()
 
     def test_submit_func_globals(self):
         executor = self.executor_type()
@@ -242,13 +316,14 @@ class InterpreterPoolExecutorTest(
 
     @unittest.expectedFailure
     def test_submit_exception_in_script(self):
+        # Scripts are not supported currently.
         fut = self.executor.submit('raise Exception("spam")')
         with self.assertRaises(Exception) as captured:
             fut.result()
         self.assertIs(type(captured.exception), Exception)
         self.assertEqual(str(captured.exception), 'spam')
         cause = captured.exception.__cause__
-        self.assertIs(type(cause), ExecutionFailed)
+        self.assertIs(type(cause), interpreters.ExecutionFailed)
         for attr in ('__name__', '__qualname__', '__module__'):
             self.assertEqual(getattr(cause.excinfo.type, attr),
                              getattr(Exception, attr))
@@ -261,7 +336,7 @@ class InterpreterPoolExecutorTest(
         self.assertIs(type(captured.exception), Exception)
         self.assertEqual(str(captured.exception), 'spam')
         cause = captured.exception.__cause__
-        self.assertIs(type(cause), ExecutionFailed)
+        self.assertIs(type(cause), interpreters.ExecutionFailed)
         for attr in ('__name__', '__qualname__', '__module__'):
             self.assertEqual(getattr(cause.excinfo.type, attr),
                              getattr(Exception, attr))
@@ -269,16 +344,93 @@ class InterpreterPoolExecutorTest(
 
     def test_saturation(self):
         blocker = queues.create()
-        executor = self.executor_type(4, shared=dict(blocker=blocker))
+        executor = self.executor_type(4)
 
         for i in range(15 * executor._max_workers):
-            executor.submit(exec, 'import __main__; __main__.blocker.get()')
-            #executor.submit('blocker.get()')
+            executor.submit(blocker.get)
         self.assertEqual(len(executor._threads), executor._max_workers)
         for i in range(15 * executor._max_workers):
             blocker.put_nowait(None)
         executor.shutdown(wait=True)
 
+    def test_blocking(self):
+        # There is no guarantee that a worker will be created for every
+        # submitted task.  That's because there's a race between:
+        #
+        # * a new worker thread, created when task A was just submitted,
+        #   becoming non-idle when it picks up task A
+        # * after task B is added to the queue, a new worker thread
+        #   is started only if there are no idle workers
+        #   (the check in ThreadPoolExecutor._adjust_thread_count())
+        #
+        # That means we must not block waiting for *all* tasks to report
+        # "ready" before we unblock the known-ready workers.
+        ready = queues.create()
+        blocker = queues.create()
+
+        def run(taskid, ready, blocker):
+            # There can't be any globals here.
+            ready.put_nowait(taskid)
+            blocker.get()  # blocking
+
+        numtasks = 10
+        futures = []
+        with self.executor_type() as executor:
+            # Request the jobs.
+            for i in range(numtasks):
+                fut = executor.submit(run, i, ready, blocker)
+                futures.append(fut)
+            pending = numtasks
+            while pending > 0:
+                # Wait for any to be ready.
+                done = 0
+                for _ in range(pending):
+                    try:
+                        ready.get(timeout=1)  # blocking
+                    except interpreters.QueueEmpty:
+                        pass
+                    else:
+                        done += 1
+                pending -= done
+                # Unblock the workers.
+                for _ in range(done):
+                    blocker.put_nowait(None)
+
+    def test_blocking_with_limited_workers(self):
+        # This is essentially the same as test_blocking,
+        # but we explicitly force a limited number of workers,
+        # instead of it happening implicitly sometimes due to a race.
+        ready = queues.create()
+        blocker = queues.create()
+
+        def run(taskid, ready, blocker):
+            # There can't be any globals here.
+            ready.put_nowait(taskid)
+            blocker.get()  # blocking
+
+        numtasks = 10
+        futures = []
+        with self.executor_type(4) as executor:
+            # Request the jobs.
+            for i in range(numtasks):
+                fut = executor.submit(run, i, ready, blocker)
+                futures.append(fut)
+            pending = numtasks
+            while pending > 0:
+                # Wait for any to be ready.
+                done = 0
+                for _ in range(pending):
+                    try:
+                        ready.get(timeout=1)  # blocking
+                    except interpreters.QueueEmpty:
+                        pass
+                    else:
+                        done += 1
+                pending -= done
+                # Unblock the workers.
+                for _ in range(done):
+                    blocker.put_nowait(None)
+
     @support.requires_gil_enabled("gh-117344: test is flaky without the GIL")
     def test_idle_thread_reuse(self):
         executor = self.executor_type()
@@ -289,12 +441,21 @@ class InterpreterPoolExecutorTest(
         executor.shutdown(wait=True)
 
     def test_pickle_errors_propagate(self):
-        # GH-125864: Pickle errors happen before the script tries to execute, so the
-        # queue used to wait infinitely.
-
+        # GH-125864: Pickle errors happen before the script tries to execute,
+        # so the queue used to wait infinitely.
         fut = self.executor.submit(PickleShenanigans(0))
-        with self.assertRaisesRegex(RuntimeError, "gotcha"):
+        expected = interpreters.NotShareableError
+        with self.assertRaisesRegex(expected, 'args not shareable') as cm:
             fut.result()
+        self.assertRegex(str(cm.exception.__cause__), 'unpickled')
+
+    def test_no_stale_references(self):
+        # Weak references don't cross between interpreters.
+        raise unittest.SkipTest('not applicable')
+
+    def test_free_reference(self):
+        # Weak references don't cross between interpreters.
+        raise unittest.SkipTest('not applicable')
 
 
 class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase):
diff --git a/Lib/test/test_configparser.py b/Lib/test/test_configparser.py
index 23904d17d32..e7364e18742 100644
--- a/Lib/test/test_configparser.py
+++ b/Lib/test/test_configparser.py
@@ -986,12 +986,12 @@ class ConfigParserTestCase(BasicTestCase, unittest.TestCase):
 
     def test_defaults_keyword(self):
         """bpo-23835 fix for ConfigParser"""
-        cf = self.newconfig(defaults={1: 2.4})
-        self.assertEqual(cf[self.default_section]['1'], '2.4')
-        self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.4)
-        cf = self.newconfig(defaults={"A": 5.2})
-        self.assertEqual(cf[self.default_section]['a'], '5.2')
-        self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.2)
+        cf = self.newconfig(defaults={1: 2.5})
+        self.assertEqual(cf[self.default_section]['1'], '2.5')
+        self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.5)
+        cf = self.newconfig(defaults={"A": 5.25})
+        self.assertEqual(cf[self.default_section]['a'], '5.25')
+        self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.25)
 
 
 class ConfigParserTestCaseNoInterpolation(BasicTestCase, unittest.TestCase):
diff --git a/Lib/test/test_ctypes/test_parameters.py b/Lib/test/test_ctypes/test_parameters.py
index f89521cf8b3..46f8ff93efa 100644
--- a/Lib/test/test_ctypes/test_parameters.py
+++ b/Lib/test/test_ctypes/test_parameters.py
@@ -1,3 +1,4 @@
+import sys
 import unittest
 import test.support
 from ctypes import (CDLL, PyDLL, ArgumentError,
@@ -240,7 +241,8 @@ class SimpleTypesTestCase(unittest.TestCase):
         self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^<cparam '[LIQ]' \(20000\)>$")
         self.assertEqual(repr(c_float.from_param(1.5)), "<cparam 'f' (1.5)>")
         self.assertEqual(repr(c_double.from_param(1.5)), "<cparam 'd' (1.5)>")
-        self.assertEqual(repr(c_double.from_param(1e300)), "<cparam 'd' (1e+300)>")
+        if sys.float_repr_style == 'short':
+            self.assertEqual(repr(c_double.from_param(1e300)), "<cparam 'd' (1e+300)>")
         self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^<cparam ('d' \(1.5\)|'g' at 0x[A-Fa-f0-9]+)>$")
         self.assertRegex(repr(c_char_p.from_param(b'hihi')), r"^<cparam 'z' \(0x[A-Fa-f0-9]+\)>$")
         self.assertRegex(repr(c_wchar_p.from_param('hihi')), r"^<cparam 'Z' \(0x[A-Fa-f0-9]+\)>$")
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 52c38e42eca..60c62430370 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -290,6 +290,38 @@ class DictTest(unittest.TestCase):
             ['Cannot convert dictionary update sequence element #0 to a sequence'],
         )
 
+    def test_update_shared_keys(self):
+        class MyClass: pass
+
+        # Subclass str to enable us to create an object during the
+        # dict.update() call.
+        class MyStr(str):
+            def __hash__(self):
+                return super().__hash__()
+
+            def __eq__(self, other):
+                # Create an object that shares the same PyDictKeysObject as
+                # obj.__dict__.
+                obj2 = MyClass()
+                obj2.a = "a"
+                obj2.b = "b"
+                obj2.c = "c"
+                return super().__eq__(other)
+
+        obj = MyClass()
+        obj.a = "a"
+        obj.b = "b"
+
+        x = {}
+        x[MyStr("a")] = MyStr("a")
+
+        # gh-132617: this previously raised "dict mutated during update" error
+        x.update(obj.__dict__)
+
+        self.assertEqual(x, {
+            MyStr("a"): "a",
+            "b": "b",
+        })
 
     def test_fromkeys(self):
         self.assertEqual(dict.fromkeys('abc'), {'a':None, 'b':None, 'c':None})
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 8765d121fd0..b8116d073a2 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -2568,6 +2568,18 @@ Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
         self.assertEqual(str(make_header(decode_header(s))),
                          '"Müller T" <T.Mueller@xxx.com>')
 
+    def test_unencoded_ascii(self):
+        # bpo-22833/gh-67022: returns [(str, None)] rather than [(bytes, None)]
+        s = 'header without encoded words'
+        self.assertEqual(decode_header(s),
+            [('header without encoded words', None)])
+
+    def test_unencoded_utf8(self):
+        # bpo-22833/gh-67022: returns [(str, None)] rather than [(bytes, None)]
+        s = 'header with unexpected non ASCII caract\xe8res'
+        self.assertEqual(decode_header(s),
+            [('header with unexpected non ASCII caract\xe8res', None)])
+
 
 # Test the MIMEMessage class
 class TestMIMEMessage(TestEmailBase):
diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py
index 221f9db7763..bbc7630fa83 100644
--- a/Lib/test/test_enum.py
+++ b/Lib/test/test_enum.py
@@ -36,7 +36,7 @@ def load_tests(loader, tests, ignore):
                 optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE,
                 ))
     howto_tests = os.path.join(REPO_ROOT, 'Doc/howto/enum.rst')
-    if os.path.exists(howto_tests):
+    if os.path.exists(howto_tests) and sys.float_repr_style == 'short':
         tests.addTests(doctest.DocFileSuite(
                 howto_tests,
                 module_relative=False,
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 175ef531386..57d0656487d 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -1445,6 +1445,7 @@ class ExceptionTests(unittest.TestCase):
         foo()
         support.gc_collect()
 
+    @support.skip_emscripten_stack_overflow()
     @cpython_only
     def test_recursion_normalizing_exception(self):
         import_module("_testinternalcapi")
@@ -1522,6 +1523,7 @@ class ExceptionTests(unittest.TestCase):
         self.assertIn(b'Done.', out)
 
 
+    @support.skip_emscripten_stack_overflow()
     def test_recursion_in_except_handler(self):
 
         def set_relative_recursion_limit(n):
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
index 237d7b5d35e..00518abcb11 100644
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -795,6 +795,8 @@ class FormatTestCase(unittest.TestCase):
         self.assertRaises(ValueError, format, x, '.6,n')
 
     @support.requires_IEEE_754
+    @unittest.skipUnless(sys.float_repr_style == 'short',
+                         "applies only when using short float repr style")
     def test_format_testfile(self):
         with open(format_testfile, encoding="utf-8") as testfile:
             for line in testfile:
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
index c7cc32e0949..1f626d87fa6 100644
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -346,12 +346,12 @@ class FormatTest(unittest.TestCase):
         testcommon(b"%s", memoryview(b"abc"), b"abc")
         # %a will give the equivalent of
         # repr(some_obj).encode('ascii', 'backslashreplace')
-        testcommon(b"%a", 3.14, b"3.14")
+        testcommon(b"%a", 3.25, b"3.25")
         testcommon(b"%a", b"ghi", b"b'ghi'")
         testcommon(b"%a", "jkl", b"'jkl'")
         testcommon(b"%a", "\u0544", b"'\\u0544'")
         # %r is an alias for %a
-        testcommon(b"%r", 3.14, b"3.14")
+        testcommon(b"%r", 3.25, b"3.25")
         testcommon(b"%r", b"ghi", b"b'ghi'")
         testcommon(b"%r", "jkl", b"'jkl'")
         testcommon(b"%r", "\u0544", b"'\\u0544'")
@@ -407,19 +407,19 @@ class FormatTest(unittest.TestCase):
 
         self.assertEqual(format("abc", "\u2007<5"), "abc\u2007\u2007")
         self.assertEqual(format(123, "\u2007<5"), "123\u2007\u2007")
-        self.assertEqual(format(12.3, "\u2007<6"), "12.3\u2007\u2007")
+        self.assertEqual(format(12.5, "\u2007<6"), "12.5\u2007\u2007")
         self.assertEqual(format(0j, "\u2007<4"), "0j\u2007\u2007")
         self.assertEqual(format(1+2j, "\u2007<8"), "(1+2j)\u2007\u2007")
 
         self.assertEqual(format("abc", "\u2007>5"), "\u2007\u2007abc")
         self.assertEqual(format(123, "\u2007>5"), "\u2007\u2007123")
-        self.assertEqual(format(12.3, "\u2007>6"), "\u2007\u200712.3")
+        self.assertEqual(format(12.5, "\u2007>6"), "\u2007\u200712.5")
         self.assertEqual(format(1+2j, "\u2007>8"), "\u2007\u2007(1+2j)")
         self.assertEqual(format(0j, "\u2007>4"), "\u2007\u20070j")
 
         self.assertEqual(format("abc", "\u2007^5"), "\u2007abc\u2007")
         self.assertEqual(format(123, "\u2007^5"), "\u2007123\u2007")
-        self.assertEqual(format(12.3, "\u2007^6"), "\u200712.3\u2007")
+        self.assertEqual(format(12.5, "\u2007^6"), "\u200712.5\u2007")
         self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007")
         self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007")
 
diff --git a/Lib/test/test_free_threading/test_generators.py b/Lib/test/test_free_threading/test_generators.py
new file mode 100644
index 00000000000..d01675eb38b
--- /dev/null
+++ b/Lib/test/test_free_threading/test_generators.py
@@ -0,0 +1,51 @@
+import concurrent.futures
+import unittest
+from threading import Barrier
+from unittest import TestCase
+import random
+import time
+
+from test.support import threading_helper, Py_GIL_DISABLED
+
+threading_helper.requires_working_threading(module=True)
+
+
+def random_sleep():
+    delay_us = random.randint(50, 100)
+    time.sleep(delay_us * 1e-6)
+
+def random_string():
+    return ''.join(random.choice('0123456789ABCDEF') for _ in range(10))
+
+def set_gen_name(g, b):
+    b.wait()
+    random_sleep()
+    g.__name__ = random_string()
+    return g.__name__
+
+def set_gen_qualname(g, b):
+    b.wait()
+    random_sleep()
+    g.__qualname__ = random_string()
+    return g.__qualname__
+
+
+@unittest.skipUnless(Py_GIL_DISABLED, "Enable only in FT build")
+class TestFTGenerators(TestCase):
+    NUM_THREADS = 4
+
+    def concurrent_write_with_func(self, func):
+        gen = (x for x in range(42))
+        for j in range(1000):
+            with concurrent.futures.ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor:
+                b = Barrier(self.NUM_THREADS)
+                futures = {executor.submit(func, gen, b): i for i in range(self.NUM_THREADS)}
+                for fut in concurrent.futures.as_completed(futures):
+                    gen_name = fut.result()
+                    self.assertEqual(len(gen_name), 10)
+
+    def test_concurrent_write(self):
+        with self.subTest(func=set_gen_name):
+            self.concurrent_write_with_func(func=set_gen_name)
+        with self.subTest(func=set_gen_qualname):
+            self.concurrent_write_with_func(func=set_gen_qualname)
diff --git a/Lib/test/test_free_threading/test_heapq.py b/Lib/test/test_free_threading/test_heapq.py
index f75fb264c8a..ee7adfb2b78 100644
--- a/Lib/test/test_free_threading/test_heapq.py
+++ b/Lib/test/test_free_threading/test_heapq.py
@@ -3,7 +3,7 @@ import unittest
 import heapq
 
 from enum import Enum
-from threading import Thread, Barrier
+from threading import Thread, Barrier, Lock
 from random import shuffle, randint
 
 from test.support import threading_helper
@@ -178,6 +178,33 @@ class TestHeapq(unittest.TestCase):
         self.assertEqual(len(max_heap), OBJECT_COUNT)
         self.test_heapq.check_max_invariant(max_heap)
 
+    def test_lock_free_list_read(self):
+        n, n_threads = 1_000, 10
+        l = []
+        barrier = Barrier(n_threads * 2)
+
+        count = 0
+        lock = Lock()
+
+        def worker():
+            with lock:
+                nonlocal count
+                x = count
+                count += 1
+
+            barrier.wait()
+            for i in range(n):
+                if x % 2:
+                    heapq.heappush(l, 1)
+                    heapq.heappop(l)
+                else:
+                    try:
+                        l[0]
+                    except IndexError:
+                        pass
+
+        self.run_concurrently(worker, (), n_threads * 2)
+
     @staticmethod
     def is_sorted_ascending(lst):
         """
diff --git a/Lib/test/test_free_threading/test_itertools.py b/Lib/test/test_free_threading/test_itertools.py
index 8360afbf78c..b8663ade1d4 100644
--- a/Lib/test/test_free_threading/test_itertools.py
+++ b/Lib/test/test_free_threading/test_itertools.py
@@ -44,7 +44,10 @@ class ItertoolsThreading(unittest.TestCase):
         def work(it):
             barrier.wait()
             for _ in range(number_of_cycles):
-                _ = next(it)
+                try:
+                    next(it)
+                except StopIteration:
+                    pass
 
         data = (1, 2, 3, 4)
         for it in range(number_of_iterations):
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index f5455705678..58a30c8e6ac 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1336,9 +1336,9 @@ x = (
 
     def test_conversions(self):
         self.assertEqual(f'{3.14:10.10}', '      3.14')
-        self.assertEqual(f'{3.14!s:10.10}', '3.14      ')
-        self.assertEqual(f'{3.14!r:10.10}', '3.14      ')
-        self.assertEqual(f'{3.14!a:10.10}', '3.14      ')
+        self.assertEqual(f'{1.25!s:10.10}', '1.25      ')
+        self.assertEqual(f'{1.25!r:10.10}', '1.25      ')
+        self.assertEqual(f'{1.25!a:10.10}', '1.25      ')
 
         self.assertEqual(f'{"a"}', 'a')
         self.assertEqual(f'{"a"!r}', "'a'")
@@ -1347,7 +1347,7 @@ x = (
         # Conversions can have trailing whitespace after them since it
         # does not provide any significance
         self.assertEqual(f"{3!s  }", "3")
-        self.assertEqual(f'{3.14!s  :10.10}', '3.14      ')
+        self.assertEqual(f'{1.25!s  :10.10}', '1.25      ')
 
         # Not a conversion.
         self.assertEqual(f'{"a!r"}', "a!r")
diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py
index 6411e4318b6..eb01328b6ea 100644
--- a/Lib/test/test_generated_cases.py
+++ b/Lib/test/test_generated_cases.py
@@ -1976,8 +1976,8 @@ class TestGeneratedAbstractCases(unittest.TestCase):
         """
         output = """
         case OP: {
-            JitOptSymbol *arg1;
-            JitOptSymbol *out;
+            JitOptRef arg1;
+            JitOptRef out;
             arg1 = stack_pointer[-1];
             out = EGGS(arg1);
             stack_pointer[-1] = out;
@@ -1985,7 +1985,7 @@ class TestGeneratedAbstractCases(unittest.TestCase):
         }
 
         case OP2: {
-            JitOptSymbol *out;
+            JitOptRef out;
             out = sym_new_not_null(ctx);
             stack_pointer[-1] = out;
             break;
@@ -2010,14 +2010,14 @@ class TestGeneratedAbstractCases(unittest.TestCase):
         """
         output = """
         case OP: {
-            JitOptSymbol *out;
+            JitOptRef out;
             out = sym_new_not_null(ctx);
             stack_pointer[-1] = out;
             break;
         }
 
         case OP2: {
-            JitOptSymbol *out;
+            JitOptRef out;
             out = NULL;
             stack_pointer[-1] = out;
             break;
@@ -2151,7 +2151,7 @@ class TestGeneratedAbstractCases(unittest.TestCase):
         """
         output = """
         case OP: {
-            JitOptSymbol *foo;
+            JitOptRef foo;
             foo = NULL;
             stack_pointer[0] = foo;
             stack_pointer += 1;
@@ -2224,5 +2224,202 @@ class TestGeneratedAbstractCases(unittest.TestCase):
                                     "Inputs must have equal sizes"):
             self.run_cases_test(input, input2, output)
 
+    def test_pure_uop_body_copied_in(self):
+        # Note: any non-escaping call works.
+        # In this case, we use PyStackRef_IsNone.
+        input = """
+        pure op(OP, (foo -- res)) {
+            res = PyStackRef_IsNone(foo);
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = sym_new_known(ctx, foo);
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                res_stackref = PyStackRef_IsNone(foo);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = sym_new_known(ctx, foo);
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+    def test_pure_uop_body_copied_in_deopt(self):
+        # Note: any non-escaping call works.
+        # In this case, we use PyStackRef_IsNone.
+        input = """
+        pure op(OP, (foo -- res)) {
+            DEOPT_IF(PyStackRef_IsNull(foo));
+            res = foo;
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = foo;
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                if (PyStackRef_IsNull(foo)) {
+                    ctx->done = true;
+                    break;
+                }
+                res_stackref = foo;
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = foo;
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+    def test_pure_uop_body_copied_in_error_if(self):
+        # Note: any non-escaping call works.
+        # In this case, we use PyStackRef_IsNone.
+        input = """
+        pure op(OP, (foo -- res)) {
+            ERROR_IF(PyStackRef_IsNull(foo));
+            res = foo;
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = foo;
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                if (PyStackRef_IsNull(foo)) {
+                    goto error;
+                }
+                res_stackref = foo;
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = foo;
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+
+    def test_replace_opcode_uop_body_copied_in_complex(self):
+        input = """
+        pure op(OP, (foo -- res)) {
+            if (foo) {
+                res = PyStackRef_IsNone(foo);
+            }
+            else {
+                res = 1;
+            }
+        }
+        """
+        input2 = """
+        op(OP, (foo -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = sym_new_known(ctx, foo);
+        }
+        """
+        output = """
+        case OP: {
+            JitOptRef foo;
+            JitOptRef res;
+            foo = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, foo)
+            ) {
+                JitOptRef foo_sym = foo;
+                _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                if (foo) {
+                    res_stackref = PyStackRef_IsNone(foo);
+                }
+                else {
+                    res_stackref = 1;
+                }
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
+            res = sym_new_known(ctx, foo);
+            stack_pointer[-1] = res;
+            break;
+        }
+        """
+        self.run_cases_test(input, input2, output)
+
+    def test_replace_opocode_uop_reject_array_effects(self):
+        input = """
+        pure op(OP, (foo[2] -- res)) {
+            if (foo) {
+                res = PyStackRef_IsNone(foo);
+            }
+            else {
+                res = 1;
+            }
+        }
+        """
+        input2 = """
+        op(OP, (foo[2] -- res)) {
+            REPLACE_OPCODE_IF_EVALUATES_PURE(foo);
+            res = sym_new_unknown(ctx);
+        }
+        """
+        output = """
+        """
+        with self.assertRaisesRegex(SyntaxError,
+                                    "Pure evaluation cannot take array-like inputs"):
+            self.run_cases_test(input, input2, output)
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py
index b83ae181718..5bad483ae9d 100644
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -208,6 +208,11 @@ class HashLibTestCase(unittest.TestCase):
         return itertools.chain.from_iterable(constructors)
 
     @property
+    def shake_constructors(self):
+        for shake_name in self.shakes:
+            yield from self.constructors_to_test.get(shake_name, ())
+
+    @property
     def is_fips_mode(self):
         return get_fips_mode()
 
@@ -274,7 +279,10 @@ class HashLibTestCase(unittest.TestCase):
                 with self.assertWarnsRegex(DeprecationWarning,
                                            DEPRECATED_STRING_PARAMETER):
                     hashlib.new(digest_name, string=b'')
-                if self._hashlib:
+                # Make sure that _hashlib contains the constructor
+                # to test when using a combination of libcrypto and
+                # interned hash implementations.
+                if self._hashlib and digest_name in self._hashlib._constructors:
                     self._hashlib.new(digest_name, b'')
                     self._hashlib.new(digest_name, data=b'')
                     with self.assertWarnsRegex(DeprecationWarning,
@@ -328,7 +336,8 @@ class HashLibTestCase(unittest.TestCase):
                 with self.subTest(digest_name, args=args, kwds=kwds):
                     with self.assertRaisesRegex(TypeError, errmsg):
                         hashlib.new(digest_name, *args, **kwds)
-                    if self._hashlib:
+                    if (self._hashlib and
+                            digest_name in self._hashlib._constructors):
                         with self.assertRaisesRegex(TypeError, errmsg):
                             self._hashlib.new(digest_name, *args, **kwds)
 
@@ -366,6 +375,16 @@ class HashLibTestCase(unittest.TestCase):
         self.assertIs(constructor, _md5.md5)
         self.assertEqual(sorted(builtin_constructor_cache), ['MD5', 'md5'])
 
+    def test_copy(self):
+        for cons in self.hash_constructors:
+            h1 = cons(os.urandom(16), usedforsecurity=False)
+            h2 = h1.copy()
+            self.assertIs(type(h1), type(h2))
+            self.assertEqual(h1.name, h2.name)
+            size = (16,) if h1.name in self.shakes else ()
+            self.assertEqual(h1.digest(*size), h2.digest(*size))
+            self.assertEqual(h1.hexdigest(*size), h2.hexdigest(*size))
+
     def test_hexdigest(self):
         for cons in self.hash_constructors:
             h = cons(usedforsecurity=False)
@@ -376,21 +395,50 @@ class HashLibTestCase(unittest.TestCase):
                 self.assertIsInstance(h.digest(), bytes)
                 self.assertEqual(hexstr(h.digest()), h.hexdigest())
 
-    def test_digest_length_overflow(self):
-        # See issue #34922
-        large_sizes = (2**29, 2**32-10, 2**32+10, 2**61, 2**64-10, 2**64+10)
-        for cons in self.hash_constructors:
-            h = cons(usedforsecurity=False)
-            if h.name not in self.shakes:
-                continue
-            if HASH is not None and isinstance(h, HASH):
-                # _hashopenssl's take a size_t
-                continue
-            for digest in h.digest, h.hexdigest:
-                self.assertRaises(ValueError, digest, -10)
-                for length in large_sizes:
-                    with self.assertRaises((ValueError, OverflowError)):
-                        digest(length)
+    def test_shakes_zero_digest_length(self):
+        for constructor in self.shake_constructors:
+            with self.subTest(constructor=constructor):
+                h = constructor(b'abcdef', usedforsecurity=False)
+                self.assertEqual(h.digest(0), b'')
+                self.assertEqual(h.hexdigest(0), '')
+
+    def test_shakes_invalid_digest_length(self):
+        # See https://github.com/python/cpython/issues/79103.
+        for constructor in self.shake_constructors:
+            with self.subTest(constructor=constructor):
+                h = constructor(usedforsecurity=False)
+                # Note: digest() and hexdigest() take a signed input and
+                # raise if it is negative; the rationale is that we use
+                # internally PyBytes_FromStringAndSize() and _Py_strhex()
+                # which both take a Py_ssize_t.
+                for negative_size in (-1, -10, -(1 << 31), -sys.maxsize):
+                    self.assertRaises(ValueError, h.digest, negative_size)
+                    self.assertRaises(ValueError, h.hexdigest, negative_size)
+
+    def test_shakes_overflow_digest_length(self):
+        # See https://github.com/python/cpython/issues/135759.
+
+        exc_types = (OverflowError, ValueError)
+        # HACL* accepts an 'uint32_t' while OpenSSL accepts a 'size_t'.
+        openssl_overflown_sizes = (sys.maxsize + 1, 2 * sys.maxsize)
+        # https://github.com/python/cpython/issues/79103 restricts
+        # the accepted built-in lengths to 2 ** 29, even if OpenSSL
+        # accepts such lengths.
+        builtin_overflown_sizes = openssl_overflown_sizes + (
+            2 ** 29, 2 ** 32 - 10, 2 ** 32, 2 ** 32 + 10,
+            2 ** 61, 2 ** 64 - 10, 2 ** 64, 2 ** 64 + 10,
+        )
+
+        for constructor in self.shake_constructors:
+            with self.subTest(constructor=constructor):
+                h = constructor(usedforsecurity=False)
+                if HASH is not None and isinstance(h, HASH):
+                    overflown_sizes = openssl_overflown_sizes
+                else:
+                    overflown_sizes = builtin_overflown_sizes
+                for invalid_size in overflown_sizes:
+                    self.assertRaises(exc_types, h.digest, invalid_size)
+                    self.assertRaises(exc_types, h.hexdigest, invalid_size)
 
     def test_name_attribute(self):
         for cons in self.hash_constructors:
@@ -1009,49 +1057,67 @@ class HashLibTestCase(unittest.TestCase):
 
     def test_sha256_gil(self):
         gil_minsize = hashlib_helper.find_gil_minsize(['_sha2', '_hashlib'])
+        data = b'1' + b'#' * gil_minsize + b'1'
+        expected = hashlib.sha256(data).hexdigest()
+
         m = hashlib.sha256()
         m.update(b'1')
         m.update(b'#' * gil_minsize)
         m.update(b'1')
-        self.assertEqual(
-            m.hexdigest(),
-            '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94'
-        )
+        self.assertEqual(m.hexdigest(), expected)
 
-        m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1')
-        self.assertEqual(
-            m.hexdigest(),
-            '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94'
-        )
+    @threading_helper.reap_threads
+    @threading_helper.requires_working_threading()
+    def test_threaded_hashing_fast(self):
+        # Same as test_threaded_hashing_slow() but only tests some functions
+        # since otherwise test_hashlib.py becomes too slow during development.
+        for name in ['md5', 'sha1', 'sha256', 'sha3_256', 'blake2s']:
+            if constructor := getattr(hashlib, name, None):
+                with self.subTest(name):
+                    self.do_test_threaded_hashing(constructor, is_shake=False)
+        if shake_128 := getattr(hashlib, 'shake_128', None):
+            self.do_test_threaded_hashing(shake_128, is_shake=True)
 
+    @requires_resource('cpu')
     @threading_helper.reap_threads
     @threading_helper.requires_working_threading()
-    def test_threaded_hashing(self):
+    def test_threaded_hashing_slow(self):
+        for algorithm, constructors in self.constructors_to_test.items():
+            is_shake = algorithm in self.shakes
+            for constructor in constructors:
+                with self.subTest(constructor.__name__, is_shake=is_shake):
+                    self.do_test_threaded_hashing(constructor, is_shake)
+
+    def do_test_threaded_hashing(self, constructor, is_shake):
         # Updating the same hash object from several threads at once
         # using data chunk sizes containing the same byte sequences.
         #
         # If the internal locks are working to prevent multiple
         # updates on the same object from running at once, the resulting
         # hash will be the same as doing it single threaded upfront.
-        hasher = hashlib.sha1()
-        num_threads = 5
-        smallest_data = b'swineflu'
-        data = smallest_data * 200000
-        expected_hash = hashlib.sha1(data*num_threads).hexdigest()
-
-        def hash_in_chunks(chunk_size):
-            index = 0
-            while index < len(data):
-                hasher.update(data[index:index + chunk_size])
-                index += chunk_size
+
+        # The data to hash has length s|M|q^N and the chunk size for the i-th
+        # thread is s|M|q^(N-i), where N is the number of threads, M is a fixed
+        # message of small length, and s >= 1 and q >= 2 are small integers.
+        smallest_size, num_threads, s, q = 8, 5, 2, 10
+
+        smallest_data = os.urandom(smallest_size)
+        data = s * smallest_data * (q ** num_threads)
+
+        h1 = constructor(usedforsecurity=False)
+        h2 = constructor(data * num_threads, usedforsecurity=False)
+
+        def update(chunk_size):
+            for index in range(0, len(data), chunk_size):
+                h1.update(data[index:index + chunk_size])
 
         threads = []
-        for threadnum in range(num_threads):
-            chunk_size = len(data) // (10 ** threadnum)
+        for thread_num in range(num_threads):
+            # chunk_size = len(data) // (q ** thread_num)
+            chunk_size = s * smallest_size * q ** (num_threads - thread_num)
             self.assertGreater(chunk_size, 0)
-            self.assertEqual(chunk_size % len(smallest_data), 0)
-            thread = threading.Thread(target=hash_in_chunks,
-                                      args=(chunk_size,))
+            self.assertEqual(chunk_size % smallest_size, 0)
+            thread = threading.Thread(target=update, args=(chunk_size,))
             threads.append(thread)
 
         for thread in threads:
@@ -1059,7 +1125,10 @@ class HashLibTestCase(unittest.TestCase):
         for thread in threads:
             thread.join()
 
-        self.assertEqual(expected_hash, hasher.hexdigest())
+        if is_shake:
+            self.assertEqual(h1.hexdigest(16), h2.hexdigest(16))
+        else:
+            self.assertEqual(h1.hexdigest(), h2.hexdigest())
 
     def test_get_fips_mode(self):
         fips_mode = self.is_fips_mode
diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py
index e584fb417b9..79eb103224b 100644
--- a/Lib/test/test_inspect/test_inspect.py
+++ b/Lib/test/test_inspect/test_inspect.py
@@ -5875,9 +5875,9 @@ class TestSignatureDefinitions(unittest.TestCase):
         self._test_module_has_signatures(operator)
 
     def test_os_module_has_signatures(self):
-        unsupported_signature = {'chmod', 'link', 'utime'}
+        unsupported_signature = {'chmod', 'utime'}
         unsupported_signature |= {name for name in
-            ['get_terminal_size', 'posix_spawn', 'posix_spawnp',
+            ['get_terminal_size', 'link', 'posix_spawn', 'posix_spawnp',
              'register_at_fork', 'startfile']
             if hasattr(os, name)}
         self._test_module_has_signatures(os, unsupported_signature=unsupported_signature)
diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py
index 1403cd145b6..0ee4582b5d1 100644
--- a/Lib/test/test_interpreters/test_api.py
+++ b/Lib/test/test_interpreters/test_api.py
@@ -944,6 +944,22 @@ class TestInterpreterExec(TestBase):
             with self.assertRaisesRegex(InterpreterError, 'unrecognized'):
                 interp.exec('raise Exception("it worked!")')
 
+    def test_list_comprehension(self):
+        # gh-135450: List comprehensions caused an assertion failure
+        # in _PyCode_CheckNoExternalState()
+        import string
+        r_interp, w_interp = self.pipe()
+
+        interp = interpreters.create()
+        interp.exec(f"""if True:
+            import os
+            comp = [str(i) for i in range(10)]
+            os.write({w_interp}, ''.join(comp).encode())
+        """)
+        self.assertEqual(os.read(r_interp, 10).decode(), string.digits)
+        interp.close()
+
+
     # test__interpreters covers the remaining
     # Interpreter.exec() behavior.
 
@@ -1356,6 +1372,187 @@ class TestInterpreterCall(TestBase):
                 with self.assertRaises(interpreters.NotShareableError):
                     interp.call(defs.spam_returns_arg, arg)
 
+    def test_func_in___main___hidden(self):
+        # When a top-level function that uses global variables is called
+        # through Interpreter.call(), it will be pickled, sent over,
+        # and unpickled.  That requires that it be found in the other
+        # interpreter's __main__ module.  However, the original script
+        # that defined the function is only run in the main interpreter,
+        # so pickle.loads() would normally fail.
+        #
+        # We work around this by running the script in the other
+        # interpreter.  However, this is a one-off solution for the sake
+        # of unpickling, so we avoid modifying that interpreter's
+        # __main__ module by running the script in a hidden module.
+        #
+        # In this test we verify that the function runs with the hidden
+        # module as its __globals__ when called in the other interpreter,
+        # and that the interpreter's __main__ module is unaffected.
+        text = dedent("""
+            eggs = True
+
+            def spam(*, explicit=False):
+                if explicit:
+                    import __main__
+                    ns = __main__.__dict__
+                else:
+                    # For now we have to have a LOAD_GLOBAL in the
+                    # function in order for globals() to actually return
+                    # spam.__globals__.  Maybe it doesn't go through pickle?
+                    # XXX We will fix this later.
+                    spam
+                    ns = globals()
+
+                func = ns.get('spam')
+                return [
+                    id(ns),
+                    ns.get('__name__'),
+                    ns.get('__file__'),
+                    id(func),
+                    None if func is None else repr(func),
+                    ns.get('eggs'),
+                    ns.get('ham'),
+                ]
+
+            if __name__ == "__main__":
+                from concurrent import interpreters
+                interp = interpreters.create()
+
+                ham = True
+                print([
+                    [
+                        spam(explicit=True),
+                        spam(),
+                    ],
+                    [
+                        interp.call(spam, explicit=True),
+                        interp.call(spam),
+                    ],
+                ])
+           """)
+        with os_helper.temp_dir() as tempdir:
+            filename = script_helper.make_script(tempdir, 'my-script', text)
+            res = script_helper.assert_python_ok(filename)
+        stdout = res.out.decode('utf-8').strip()
+        local, remote = eval(stdout)
+
+        # In the main interpreter.
+        main, unpickled = local
+        nsid, _, _, funcid, func, _, _ = main
+        self.assertEqual(main, [
+            nsid,
+            '__main__',
+            filename,
+            funcid,
+            func,
+            True,
+            True,
+        ])
+        self.assertIsNot(func, None)
+        self.assertRegex(func, '^<function spam at 0x.*>$')
+        self.assertEqual(unpickled, main)
+
+        # In the subinterpreter.
+        main, unpickled = remote
+        nsid1, _, _, funcid1, _, _, _ = main
+        self.assertEqual(main, [
+            nsid1,
+            '__main__',
+            None,
+            funcid1,
+            None,
+            None,
+            None,
+        ])
+        nsid2, _, _, funcid2, func, _, _ = unpickled
+        self.assertEqual(unpickled, [
+            nsid2,
+            '<fake __main__>',
+            filename,
+            funcid2,
+            func,
+            True,
+            None,
+        ])
+        self.assertIsNot(func, None)
+        self.assertRegex(func, '^<function spam at 0x.*>$')
+        self.assertNotEqual(nsid2, nsid1)
+        self.assertNotEqual(funcid2, funcid1)
+
+    def test_func_in___main___uses_globals(self):
+        # See the note in test_func_in___main___hidden about pickle
+        # and the __main__ module.
+        #
+        # Additionally, the solution to that problem must provide
+        # for global variables on which a pickled function might rely.
+        #
+        # To check that, we run a script that has two global functions
+        # and a global variable in the __main__ module.  One of the
+        # functions sets the global variable and the other returns
+        # the value.
+        #
+        # The script calls those functions multiple times in another
+        # interpreter, to verify the following:
+        #
+        #  * the global variable is properly initialized
+        #  * the global variable retains state between calls
+        #  * the setter modifies that persistent variable
+        #  * the getter uses the variable
+        #  * the calls in the other interpreter do not modify
+        #    the main interpreter
+        #  * those calls don't modify the interpreter's __main__ module
+        #  * the functions and variable do not actually show up in the
+        #    other interpreter's __main__ module
+        text = dedent("""
+            count = 0
+
+            def inc(x=1):
+                global count
+                count += x
+
+            def get_count():
+                return count
+
+            if __name__ == "__main__":
+                counts = []
+                results = [count, counts]
+
+                from concurrent import interpreters
+                interp = interpreters.create()
+
+                val = interp.call(get_count)
+                counts.append(val)
+
+                interp.call(inc)
+                val = interp.call(get_count)
+                counts.append(val)
+
+                interp.call(inc, 3)
+                val = interp.call(get_count)
+                counts.append(val)
+
+                results.append(count)
+
+                modified = {name: interp.call(eval, f'{name!r} in vars()')
+                            for name in ('count', 'inc', 'get_count')}
+                results.append(modified)
+
+                print(results)
+           """)
+        with os_helper.temp_dir() as tempdir:
+            filename = script_helper.make_script(tempdir, 'my-script', text)
+            res = script_helper.assert_python_ok(filename)
+        stdout = res.out.decode('utf-8').strip()
+        before, counts, after, modified = eval(stdout)
+        self.assertEqual(modified, {
+            'count': False,
+            'inc': False,
+            'get_count': False,
+        })
+        self.assertEqual(before, 0)
+        self.assertEqual(after, 0)
+        self.assertEqual(counts, [0, 1, 4])
+
     def test_raises(self):
         interp = interpreters.create()
         with self.assertRaises(ExecutionFailed):
@@ -1414,6 +1611,113 @@ class TestInterpreterCall(TestBase):
             with self.assertRaises(interpreters.NotShareableError):
                 interp.call(func, op, 'eggs!')
 
+    def test_callable_requires_frame(self):
+        # There are various functions that require a current frame.
+        interp = interpreters.create()
+        for call, expected in [
+            ((eval, '[1, 2, 3]'),
+                [1, 2, 3]),
+            ((eval, 'sum([1, 2, 3])'),
+                6),
+            ((exec, '...'),
+                None),
+        ]:
+            with self.subTest(str(call)):
+                res = interp.call(*call)
+                self.assertEqual(res, expected)
+
+        result_not_pickleable = [
+            globals,
+            locals,
+            vars,
+        ]
+        for func, expectedtype in {
+            globals: dict,
+            locals: dict,
+            vars: dict,
+            dir: list,
+        }.items():
+            with self.subTest(str(func)):
+                if func in result_not_pickleable:
+                    with self.assertRaises(interpreters.NotShareableError):
+                        interp.call(func)
+                else:
+                    res = interp.call(func)
+                    self.assertIsInstance(res, expectedtype)
+                    self.assertIn('__builtins__', res)
+
+    def test_globals_from_builtins(self):
+        # The builtins  exec(), eval(), globals(), locals(), vars(),
+        # and dir() each runs relative to the target interpreter's
+        # __main__ module, when called directly.  However,
+        # globals(), locals(), and vars() don't work when called
+        # directly so we don't check them.
+        from _frozen_importlib import BuiltinImporter
+        interp = interpreters.create()
+
+        names = interp.call(dir)
+        self.assertEqual(names, [
+            '__builtins__',
+            '__doc__',
+            '__loader__',
+            '__name__',
+            '__package__',
+            '__spec__',
+        ])
+
+        values = {name: interp.call(eval, name)
+                  for name in names if name != '__builtins__'}
+        self.assertEqual(values, {
+            '__name__': '__main__',
+            '__doc__': None,
+            '__spec__': None,  # It wasn't imported, so no module spec?
+            '__package__': None,
+            '__loader__': BuiltinImporter,
+        })
+        with self.assertRaises(ExecutionFailed):
+            interp.call(eval, 'spam'),
+
+        interp.call(exec, f'assert dir() == {names}')
+
+        # Update the interpreter's __main__.
+        interp.prepare_main(spam=42)
+        expected = names + ['spam']
+
+        names = interp.call(dir)
+        self.assertEqual(names, expected)
+
+        value = interp.call(eval, 'spam')
+        self.assertEqual(value, 42)
+
+        interp.call(exec, f'assert dir() == {expected}, dir()')
+
+    def test_globals_from_stateless_func(self):
+        # A stateless func, which doesn't depend on any globals,
+        # doesn't go through pickle, so it runs in __main__.
+        def set_global(name, value):
+            globals()[name] = value
+
+        def get_global(name):
+            return globals().get(name)
+
+        interp = interpreters.create()
+
+        modname = interp.call(get_global, '__name__')
+        self.assertEqual(modname, '__main__')
+
+        res = interp.call(get_global, 'spam')
+        self.assertIsNone(res)
+
+        interp.exec('spam = True')
+        res = interp.call(get_global, 'spam')
+        self.assertTrue(res)
+
+        interp.call(set_global, 'spam', 42)
+        res = interp.call(get_global, 'spam')
+        self.assertEqual(res, 42)
+
+        interp.exec('assert spam == 42, repr(spam)')
+
     def test_call_in_thread(self):
         interp = interpreters.create()
 
diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py
index 3e982d76e86..cb17340f581 100644
--- a/Lib/test/test_interpreters/test_queues.py
+++ b/Lib/test/test_interpreters/test_queues.py
@@ -208,18 +208,64 @@ class TestQueueOps(TestBase):
         self.assertIs(after, True)
 
     def test_full(self):
-        expected = [False, False, False, True, False, False, False]
-        actual = []
-        queue = queues.create(3)
-        for _ in range(3):
-            actual.append(queue.full())
-            queue.put(None)
-        actual.append(queue.full())
-        for _ in range(3):
-            queue.get()
-            actual.append(queue.full())
+        for maxsize in [1, 3, 11]:
+            with self.subTest(f'maxsize={maxsize}'):
+                num_to_add = maxsize
+                expected = [False] * (num_to_add * 2 + 3)
+                expected[maxsize] = True
+                expected[maxsize + 1] = True
+
+                queue = queues.create(maxsize)
+                actual = []
+                empty = [queue.empty()]
+
+                for _ in range(num_to_add):
+                    actual.append(queue.full())
+                    queue.put_nowait(None)
+                actual.append(queue.full())
+                with self.assertRaises(queues.QueueFull):
+                    queue.put_nowait(None)
+                empty.append(queue.empty())
+
+                for _ in range(num_to_add):
+                    actual.append(queue.full())
+                    queue.get_nowait()
+                actual.append(queue.full())
+                with self.assertRaises(queues.QueueEmpty):
+                    queue.get_nowait()
+                actual.append(queue.full())
+                empty.append(queue.empty())
 
-        self.assertEqual(actual, expected)
+                self.assertEqual(actual, expected)
+                self.assertEqual(empty, [True, False, True])
+
+        # no max size
+        for args in [(), (0,), (-1,), (-10,)]:
+            with self.subTest(f'maxsize={args[0]}' if args else '<default>'):
+                num_to_add = 13
+                expected = [False] * (num_to_add * 2 + 3)
+
+                queue = queues.create(*args)
+                actual = []
+                empty = [queue.empty()]
+
+                for _ in range(num_to_add):
+                    actual.append(queue.full())
+                    queue.put_nowait(None)
+                actual.append(queue.full())
+                empty.append(queue.empty())
+
+                for _ in range(num_to_add):
+                    actual.append(queue.full())
+                    queue.get_nowait()
+                actual.append(queue.full())
+                with self.assertRaises(queues.QueueEmpty):
+                    queue.get_nowait()
+                actual.append(queue.full())
+                empty.append(queue.empty())
+
+                self.assertEqual(actual, expected)
+                self.assertEqual(empty, [True, False, True])
 
     def test_qsize(self):
         expected = [0, 1, 2, 3, 2, 3, 2, 1, 0, 1, 0]
diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py
index 9ea2679c77e..30f9bb33316 100644
--- a/Lib/test/test_json/test_tool.py
+++ b/Lib/test/test_json/test_tool.py
@@ -270,7 +270,7 @@ class TestMain(unittest.TestCase):
             (r'" \"foo\" "', f'{t.string}" \\"foo\\" "{t.reset}'),
             ('"α"', f'{t.string}"\\u03b1"{t.reset}'),
             ('123', f'{t.number}123{t.reset}'),
-            ('-1.2345e+23', f'{t.number}-1.2345e+23{t.reset}'),
+            ('-1.25e+23', f'{t.number}-1.25e+23{t.reset}'),
             (r'{"\\": ""}',
              f'''\
 {ob}
diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py
index fa5b1e43816..3819965ed2c 100644
--- a/Lib/test/test_logging.py
+++ b/Lib/test/test_logging.py
@@ -1036,7 +1036,7 @@ class TestTCPServer(ControlMixin, ThreadingTCPServer):
     """
 
     allow_reuse_address = True
-    allow_reuse_port = True
+    allow_reuse_port = False
 
     def __init__(self, addr, handler, poll_interval=0.5,
                  bind_and_activate=True):
@@ -4214,89 +4214,6 @@ class ConfigDictTest(BaseTest):
         handler = logging.getHandlerByName('custom')
         self.assertEqual(handler.custom_kwargs, custom_kwargs)
 
-    # See gh-91555 and gh-90321
-    @support.requires_subprocess()
-    def test_deadlock_in_queue(self):
-        queue = multiprocessing.Queue()
-        handler = logging.handlers.QueueHandler(queue)
-        logger = multiprocessing.get_logger()
-        level = logger.level
-        try:
-            logger.setLevel(logging.DEBUG)
-            logger.addHandler(handler)
-            logger.debug("deadlock")
-        finally:
-            logger.setLevel(level)
-            logger.removeHandler(handler)
-
-    def test_recursion_in_custom_handler(self):
-        class BadHandler(logging.Handler):
-            def __init__(self):
-                super().__init__()
-            def emit(self, record):
-                logger.debug("recurse")
-        logger = logging.getLogger("test_recursion_in_custom_handler")
-        logger.addHandler(BadHandler())
-        logger.setLevel(logging.DEBUG)
-        logger.debug("boom")
-
-    @threading_helper.requires_working_threading()
-    def test_thread_supression_noninterference(self):
-        lock = threading.Lock()
-        logger = logging.getLogger("test_thread_supression_noninterference")
-
-        # Block on the first call, allow others through
-        #
-        # NOTE: We need to bypass the base class's lock, otherwise that will
-        #       block multiple calls to the same handler itself.
-        class BlockOnceHandler(TestHandler):
-            def __init__(self, barrier):
-                super().__init__(support.Matcher())
-                self.barrier = barrier
-
-            def createLock(self):
-                self.lock = None
-
-            def handle(self, record):
-                self.emit(record)
-
-            def emit(self, record):
-                if self.barrier:
-                    barrier = self.barrier
-                    self.barrier = None
-                    barrier.wait()
-                    with lock:
-                        pass
-                super().emit(record)
-                logger.info("blow up if not supressed")
-
-        barrier = threading.Barrier(2)
-        handler = BlockOnceHandler(barrier)
-        logger.addHandler(handler)
-        logger.setLevel(logging.DEBUG)
-
-        t1 = threading.Thread(target=logger.debug, args=("1",))
-        with lock:
-
-            # Ensure first thread is blocked in the handler, hence supressing logging...
-            t1.start()
-            barrier.wait()
-
-            # ...but the second thread should still be able to log...
-            t2 = threading.Thread(target=logger.debug, args=("2",))
-            t2.start()
-            t2.join(timeout=3)
-
-            self.assertEqual(len(handler.buffer), 1)
-            self.assertTrue(handler.matches(levelno=logging.DEBUG, message='2'))
-
-            # The first thread should still be blocked here
-            self.assertTrue(t1.is_alive())
-
-        # Now the lock has been released the first thread should complete
-        t1.join()
-        self.assertEqual(len(handler.buffer), 2)
-        self.assertTrue(handler.matches(levelno=logging.DEBUG, message='1'))
 
 class ManagerTest(BaseTest):
     def test_manager_loggerclass(self):
diff --git a/Lib/test/test_netrc.py b/Lib/test/test_netrc.py
index 81e11a293cc..9d720f62710 100644
--- a/Lib/test/test_netrc.py
+++ b/Lib/test/test_netrc.py
@@ -1,11 +1,7 @@
 import netrc, os, unittest, sys, textwrap
+from test import support
 from test.support import os_helper
 
-try:
-    import pwd
-except ImportError:
-    pwd = None
-
 temp_filename = os_helper.TESTFN
 
 class NetrcTestCase(unittest.TestCase):
@@ -269,9 +265,14 @@ class NetrcTestCase(unittest.TestCase):
             machine bar.domain.com login foo password pass
             """, '#pass')
 
+    @unittest.skipUnless(support.is_wasi, 'WASI only test')
+    def test_security_on_WASI(self):
+        self.assertFalse(netrc._can_security_check())
+        self.assertEqual(netrc._getpwuid(0), 'uid 0')
+        self.assertEqual(netrc._getpwuid(123456), 'uid 123456')
 
     @unittest.skipUnless(os.name == 'posix', 'POSIX only test')
-    @unittest.skipIf(pwd is None, 'security check requires pwd module')
+    @unittest.skipUnless(hasattr(os, 'getuid'), "os.getuid is required")
     @os_helper.skip_unless_working_chmod
     def test_security(self):
         # This test is incomplete since we are normally not run as root and
diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py
index e6ffd2b0ffe..e476e472780 100644
--- a/Lib/test/test_optparse.py
+++ b/Lib/test/test_optparse.py
@@ -615,9 +615,9 @@ Options:
         self.parser.add_option(
             "-p", "--prob",
             help="blow up with probability PROB [default: %default]")
-        self.parser.set_defaults(prob=0.43)
+        self.parser.set_defaults(prob=0.25)
         expected_help = self.help_prefix + \
-            "  -p PROB, --prob=PROB  blow up with probability PROB [default: 0.43]\n"
+            "  -p PROB, --prob=PROB  blow up with probability PROB [default: 0.25]\n"
         self.assertHelp(self.parser, expected_help)
 
     def test_alt_expand(self):
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 88b5b0e6e35..5217037ae9d 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -13,7 +13,6 @@ import itertools
 import locale
 import os
 import pickle
-import platform
 import select
 import selectors
 import shutil
@@ -1919,6 +1918,10 @@ class MakedirTests(unittest.TestCase):
         support.is_wasi,
         "WASI's umask is a stub."
     )
+    @unittest.skipIf(
+        support.is_emscripten,
+        "TODO: Fails in buildbot; see #135783"
+    )
     def test_mode(self):
         with os_helper.temp_umask(0o002):
             base = os_helper.TESTFN
@@ -4291,13 +4294,8 @@ class EventfdTests(unittest.TestCase):
 @unittest.skipIf(sys.platform == "android", "gh-124873: Test is flaky on Android")
 @support.requires_linux_version(2, 6, 30)
 class TimerfdTests(unittest.TestCase):
-    # 1 ms accuracy is reliably achievable on every platform except Android
-    # emulators, where we allow 10 ms (gh-108277).
-    if sys.platform == "android" and platform.android_ver().is_emulator:
-        CLOCK_RES_PLACES = 2
-    else:
-        CLOCK_RES_PLACES = 3
-
+    # gh-126112: Use 10 ms to tolerate slow buildbots
+    CLOCK_RES_PLACES = 2  # 10 ms
     CLOCK_RES = 10 ** -CLOCK_RES_PLACES
     CLOCK_RES_NS = 10 ** (9 - CLOCK_RES_PLACES)
 
diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py
index 13356b4cfe0..b2e2cdb3338 100644
--- a/Lib/test/test_pathlib/test_pathlib.py
+++ b/Lib/test/test_pathlib/test_pathlib.py
@@ -2954,7 +2954,13 @@ class PathTest(PurePathTest):
         else:
             # ".." segments are normalized first on Windows, so this path is stat()able.
             self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") })
-        self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)})
+        if sys.platform == "emscripten":
+            # Emscripten will return ELOOP if there are 49 or more ..'s.
+            # Can remove when https://github.com/emscripten-core/emscripten/pull/24591 is merged.
+            NDOTDOTS = 48
+        else:
+            NDOTDOTS = 50
+        self.assertEqual(set(p.glob("/".join([".."] * NDOTDOTS))), { P(self.base, *[".."] * NDOTDOTS)})
 
     def test_glob_inaccessible(self):
         P = self.cls
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
index ef596630b93..3d7300e1480 100644
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -718,9 +718,9 @@ class TestTranforms(BytecodeTestCase):
         self.assertEqual(format('x = %d!', 1234), 'x = 1234!')
         self.assertEqual(format('x = %x!', 1234), 'x = 4d2!')
         self.assertEqual(format('x = %f!', 1234), 'x = 1234.000000!')
-        self.assertEqual(format('x = %s!', 1234.5678901), 'x = 1234.5678901!')
-        self.assertEqual(format('x = %f!', 1234.5678901), 'x = 1234.567890!')
-        self.assertEqual(format('x = %d!', 1234.5678901), 'x = 1234!')
+        self.assertEqual(format('x = %s!', 1234.0000625), 'x = 1234.0000625!')
+        self.assertEqual(format('x = %f!', 1234.0000625), 'x = 1234.000063!')
+        self.assertEqual(format('x = %d!', 1234.0000625), 'x = 1234!')
         self.assertEqual(format('x = %s%% %%%%', 1234), 'x = 1234% %%')
         self.assertEqual(format('x = %s!', '%% %s'), 'x = %% %s!')
         self.assertEqual(format('x = %s, y = %d', 12, 34), 'x = 12, y = 34')
diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py
index 21d097dbb55..7529c853f9c 100644
--- a/Lib/test/test_perf_profiler.py
+++ b/Lib/test/test_perf_profiler.py
@@ -506,9 +506,12 @@ def _is_perf_version_at_least(major, minor):
     # The output of perf --version looks like "perf version 6.7-3" but
     # it can also be perf version "perf version 5.15.143", or even include
     # a commit hash in the version string, like "6.12.9.g242e6068fd5c"
+    #
+    # PermissionError is raised if perf does not exist on the Windows Subsystem
+    # for Linux, see #134987
     try:
         output = subprocess.check_output(["perf", "--version"], text=True)
-    except (subprocess.CalledProcessError, FileNotFoundError):
+    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
         return False
     version = output.split()[2]
     version = version.split("-")[0]
diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py
index 0c84d3d3bfd..41c337ade7e 100644
--- a/Lib/test/test_pprint.py
+++ b/Lib/test/test_pprint.py
@@ -458,7 +458,7 @@ class QueryTestCase(unittest.TestCase):
                 return super().__new__(Temperature, celsius_degrees)
             def __repr__(self):
                 kelvin_degrees = self + 273.15
-                return f"{kelvin_degrees}°K"
+                return f"{kelvin_degrees:.2f}°K"
         self.assertEqual(pprint.pformat(Temperature(1000)), '1273.15°K')
 
     def test_sorted_dict(self):
diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py
index 31ebcb3b8b0..0217ebd132b 100644
--- a/Lib/test/test_random.py
+++ b/Lib/test/test_random.py
@@ -151,6 +151,7 @@ class TestBasicOps:
         # Exception raised if size of sample exceeds that of population
         self.assertRaises(ValueError, self.gen.sample, population, N+1)
         self.assertRaises(ValueError, self.gen.sample, [], -1)
+        self.assertRaises(TypeError, self.gen.sample, population, 1.0)
 
     def test_sample_distribution(self):
         # For the entire allowable range of 0 <= k <= N, validate that
@@ -268,6 +269,7 @@ class TestBasicOps:
             choices(data, range(4), k=5),
             choices(k=5, population=data, weights=range(4)),
             choices(k=5, population=data, cum_weights=range(4)),
+            choices(data, k=MyIndex(5)),
         ]:
             self.assertEqual(len(sample), 5)
             self.assertEqual(type(sample), list)
@@ -378,122 +380,40 @@ class TestBasicOps:
             self.assertEqual(x1, x2)
             self.assertEqual(y1, y2)
 
+    @support.requires_IEEE_754
+    def test_53_bits_per_float(self):
+        span = 2 ** 53
+        cum = 0
+        for i in range(100):
+            cum |= int(self.gen.random() * span)
+        self.assertEqual(cum, span-1)
+
     def test_getrandbits(self):
+        getrandbits = self.gen.getrandbits
         # Verify ranges
         for k in range(1, 1000):
-            self.assertTrue(0 <= self.gen.getrandbits(k) < 2**k)
-        self.assertEqual(self.gen.getrandbits(0), 0)
+            self.assertTrue(0 <= getrandbits(k) < 2**k)
+        self.assertEqual(getrandbits(0), 0)
 
         # Verify all bits active
-        getbits = self.gen.getrandbits
         for span in [1, 2, 3, 4, 31, 32, 32, 52, 53, 54, 119, 127, 128, 129]:
             all_bits = 2**span-1
             cum = 0
             cpl_cum = 0
             for i in range(100):
-                v = getbits(span)
+                v = getrandbits(span)
                 cum |= v
                 cpl_cum |= all_bits ^ v
             self.assertEqual(cum, all_bits)
             self.assertEqual(cpl_cum, all_bits)
 
         # Verify argument checking
-        self.assertRaises(TypeError, self.gen.getrandbits)
-        self.assertRaises(TypeError, self.gen.getrandbits, 1, 2)
-        self.assertRaises(ValueError, self.gen.getrandbits, -1)
-        self.assertRaises(OverflowError, self.gen.getrandbits, 1<<1000)
-        self.assertRaises(ValueError, self.gen.getrandbits, -1<<1000)
-        self.assertRaises(TypeError, self.gen.getrandbits, 10.1)
-
-    def test_pickling(self):
-        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
-            state = pickle.dumps(self.gen, proto)
-            origseq = [self.gen.random() for i in range(10)]
-            newgen = pickle.loads(state)
-            restoredseq = [newgen.random() for i in range(10)]
-            self.assertEqual(origseq, restoredseq)
-
-    def test_bug_1727780(self):
-        # verify that version-2-pickles can be loaded
-        # fine, whether they are created on 32-bit or 64-bit
-        # platforms, and that version-3-pickles load fine.
-        files = [("randv2_32.pck", 780),
-                 ("randv2_64.pck", 866),
-                 ("randv3.pck", 343)]
-        for file, value in files:
-            with open(support.findfile(file),"rb") as f:
-                r = pickle.load(f)
-            self.assertEqual(int(r.random()*1000), value)
-
-    def test_bug_9025(self):
-        # Had problem with an uneven distribution in int(n*random())
-        # Verify the fix by checking that distributions fall within expectations.
-        n = 100000
-        randrange = self.gen.randrange
-        k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n))
-        self.assertTrue(0.30 < k/n < .37, (k/n))
-
-    def test_randbytes(self):
-        # Verify ranges
-        for n in range(1, 10):
-            data = self.gen.randbytes(n)
-            self.assertEqual(type(data), bytes)
-            self.assertEqual(len(data), n)
-
-        self.assertEqual(self.gen.randbytes(0), b'')
-
-        # Verify argument checking
-        self.assertRaises(TypeError, self.gen.randbytes)
-        self.assertRaises(TypeError, self.gen.randbytes, 1, 2)
-        self.assertRaises(ValueError, self.gen.randbytes, -1)
-        self.assertRaises(OverflowError, self.gen.randbytes, 1<<1000)
-        self.assertRaises((ValueError, OverflowError), self.gen.randbytes, -1<<1000)
-        self.assertRaises(TypeError, self.gen.randbytes, 1.0)
-
-    def test_mu_sigma_default_args(self):
-        self.assertIsInstance(self.gen.normalvariate(), float)
-        self.assertIsInstance(self.gen.gauss(), float)
-
-
-try:
-    random.SystemRandom().random()
-except NotImplementedError:
-    SystemRandom_available = False
-else:
-    SystemRandom_available = True
-
-@unittest.skipUnless(SystemRandom_available, "random.SystemRandom not available")
-class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase):
-    gen = random.SystemRandom()
-
-    def test_autoseed(self):
-        # Doesn't need to do anything except not fail
-        self.gen.seed()
-
-    def test_saverestore(self):
-        self.assertRaises(NotImplementedError, self.gen.getstate)
-        self.assertRaises(NotImplementedError, self.gen.setstate, None)
-
-    def test_seedargs(self):
-        # Doesn't need to do anything except not fail
-        self.gen.seed(100)
-
-    def test_gauss(self):
-        self.gen.gauss_next = None
-        self.gen.seed(100)
-        self.assertEqual(self.gen.gauss_next, None)
-
-    def test_pickling(self):
-        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
-            self.assertRaises(NotImplementedError, pickle.dumps, self.gen, proto)
-
-    def test_53_bits_per_float(self):
-        # This should pass whenever a C double has 53 bit precision.
-        span = 2 ** 53
-        cum = 0
-        for i in range(100):
-            cum |= int(self.gen.random() * span)
-        self.assertEqual(cum, span-1)
+        self.assertRaises(TypeError, getrandbits)
+        self.assertRaises(TypeError, getrandbits, 1, 2)
+        self.assertRaises(ValueError, getrandbits, -1)
+        self.assertRaises(OverflowError, getrandbits, 1<<1000)
+        self.assertRaises(ValueError, getrandbits, -1<<1000)
+        self.assertRaises(TypeError, getrandbits, 10.1)
 
     def test_bigrand(self):
         # The randrange routine should build-up the required number of bits
@@ -572,6 +492,10 @@ class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase):
             randrange(1000, step=100)
         with self.assertRaises(TypeError):
             randrange(1000, None, step=100)
+        with self.assertRaises(TypeError):
+            randrange(1000, step=MyIndex(1))
+        with self.assertRaises(TypeError):
+            randrange(1000, None, step=MyIndex(1))
 
     def test_randbelow_logic(self, _log=log, int=int):
         # check bitcount transition points:  2**i and 2**(i+1)-1
@@ -594,6 +518,116 @@ class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase):
             self.assertEqual(k, numbits)        # note the stronger assertion
             self.assertTrue(2**k > n > 2**(k-1))   # note the stronger assertion
 
+    def test_randrange_index(self):
+        randrange = self.gen.randrange
+        self.assertIn(randrange(MyIndex(5)), range(5))
+        self.assertIn(randrange(MyIndex(2), MyIndex(7)), range(2, 7))
+        self.assertIn(randrange(MyIndex(5), MyIndex(15), MyIndex(2)), range(5, 15, 2))
+
+    def test_randint(self):
+        randint = self.gen.randint
+        self.assertIn(randint(2, 5), (2, 3, 4, 5))
+        self.assertEqual(randint(2, 2), 2)
+        self.assertIn(randint(MyIndex(2), MyIndex(5)), (2, 3, 4, 5))
+        self.assertEqual(randint(MyIndex(2), MyIndex(2)), 2)
+
+        self.assertRaises(ValueError, randint, 5, 2)
+        self.assertRaises(TypeError, randint)
+        self.assertRaises(TypeError, randint, 2)
+        self.assertRaises(TypeError, randint, 2, 5, 1)
+        self.assertRaises(TypeError, randint, 2.0, 5)
+        self.assertRaises(TypeError, randint, 2, 5.0)
+
+    def test_pickling(self):
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            state = pickle.dumps(self.gen, proto)
+            origseq = [self.gen.random() for i in range(10)]
+            newgen = pickle.loads(state)
+            restoredseq = [newgen.random() for i in range(10)]
+            self.assertEqual(origseq, restoredseq)
+
+    def test_bug_1727780(self):
+        # verify that version-2-pickles can be loaded
+        # fine, whether they are created on 32-bit or 64-bit
+        # platforms, and that version-3-pickles load fine.
+        files = [("randv2_32.pck", 780),
+                 ("randv2_64.pck", 866),
+                 ("randv3.pck", 343)]
+        for file, value in files:
+            with open(support.findfile(file),"rb") as f:
+                r = pickle.load(f)
+            self.assertEqual(int(r.random()*1000), value)
+
+    def test_bug_9025(self):
+        # Had problem with an uneven distribution in int(n*random())
+        # Verify the fix by checking that distributions fall within expectations.
+        n = 100000
+        randrange = self.gen.randrange
+        k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n))
+        self.assertTrue(0.30 < k/n < .37, (k/n))
+
+    def test_randrange_bug_1590891(self):
+        start = 1000000000000
+        stop = -100000000000000000000
+        step = -200
+        x = self.gen.randrange(start, stop, step)
+        self.assertTrue(stop < x <= start)
+        self.assertEqual((x+stop)%step, 0)
+
+    def test_randbytes(self):
+        # Verify ranges
+        for n in range(1, 10):
+            data = self.gen.randbytes(n)
+            self.assertEqual(type(data), bytes)
+            self.assertEqual(len(data), n)
+
+        self.assertEqual(self.gen.randbytes(0), b'')
+
+        # Verify argument checking
+        self.assertRaises(TypeError, self.gen.randbytes)
+        self.assertRaises(TypeError, self.gen.randbytes, 1, 2)
+        self.assertRaises(ValueError, self.gen.randbytes, -1)
+        self.assertRaises(OverflowError, self.gen.randbytes, 1<<1000)
+        self.assertRaises((ValueError, OverflowError), self.gen.randbytes, -1<<1000)
+        self.assertRaises(TypeError, self.gen.randbytes, 1.0)
+
+    def test_mu_sigma_default_args(self):
+        self.assertIsInstance(self.gen.normalvariate(), float)
+        self.assertIsInstance(self.gen.gauss(), float)
+
+
+try:
+    random.SystemRandom().random()
+except NotImplementedError:
+    SystemRandom_available = False
+else:
+    SystemRandom_available = True
+
+@unittest.skipUnless(SystemRandom_available, "random.SystemRandom not available")
+class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase):
+    gen = random.SystemRandom()
+
+    def test_autoseed(self):
+        # Doesn't need to do anything except not fail
+        self.gen.seed()
+
+    def test_saverestore(self):
+        self.assertRaises(NotImplementedError, self.gen.getstate)
+        self.assertRaises(NotImplementedError, self.gen.setstate, None)
+
+    def test_seedargs(self):
+        # Doesn't need to do anything except not fail
+        self.gen.seed(100)
+
+    def test_gauss(self):
+        self.gen.gauss_next = None
+        self.gen.seed(100)
+        self.assertEqual(self.gen.gauss_next, None)
+
+    def test_pickling(self):
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            self.assertRaises(NotImplementedError, pickle.dumps, self.gen, proto)
+
 
 class TestRawMersenneTwister(unittest.TestCase):
     @test.support.cpython_only
@@ -779,38 +813,6 @@ class MersenneTwister_TestBasicOps(TestBasicOps, unittest.TestCase):
         seed = (1 << (10000 * 8)) - 1  # about 10K bytes
         self.gen.seed(seed)
 
-    def test_53_bits_per_float(self):
-        # This should pass whenever a C double has 53 bit precision.
-        span = 2 ** 53
-        cum = 0
-        for i in range(100):
-            cum |= int(self.gen.random() * span)
-        self.assertEqual(cum, span-1)
-
-    def test_bigrand(self):
-        # The randrange routine should build-up the required number of bits
-        # in stages so that all bit positions are active.
-        span = 2 ** 500
-        cum = 0
-        for i in range(100):
-            r = self.gen.randrange(span)
-            self.assertTrue(0 <= r < span)
-            cum |= r
-        self.assertEqual(cum, span-1)
-
-    def test_bigrand_ranges(self):
-        for i in [40,80, 160, 200, 211, 250, 375, 512, 550]:
-            start = self.gen.randrange(2 ** (i-2))
-            stop = self.gen.randrange(2 ** i)
-            if stop <= start:
-                continue
-            self.assertTrue(start <= self.gen.randrange(start, stop) < stop)
-
-    def test_rangelimits(self):
-        for start, stop in [(-2,0), (-(2**60)-2,-(2**60)), (2**60,2**60+2)]:
-            self.assertEqual(set(range(start,stop)),
-                set([self.gen.randrange(start,stop) for i in range(100)]))
-
     def test_getrandbits(self):
         super().test_getrandbits()
 
@@ -848,27 +850,6 @@ class MersenneTwister_TestBasicOps(TestBasicOps, unittest.TestCase):
         self.assertEqual(self.gen.randrange(2**99),
                          97904845777343510404718956115)
 
-    def test_randbelow_logic(self, _log=log, int=int):
-        # check bitcount transition points:  2**i and 2**(i+1)-1
-        # show that: k = int(1.001 + _log(n, 2))
-        # is equal to or one greater than the number of bits in n
-        for i in range(1, 1000):
-            n = 1 << i # check an exact power of two
-            numbits = i+1
-            k = int(1.00001 + _log(n, 2))
-            self.assertEqual(k, numbits)
-            self.assertEqual(n, 2**(k-1))
-
-            n += n - 1      # check 1 below the next power of two
-            k = int(1.00001 + _log(n, 2))
-            self.assertIn(k, [numbits, numbits+1])
-            self.assertTrue(2**k > n > 2**(k-2))
-
-            n -= n >> 15     # check a little farther below the next power of two
-            k = int(1.00001 + _log(n, 2))
-            self.assertEqual(k, numbits)        # note the stronger assertion
-            self.assertTrue(2**k > n > 2**(k-1))   # note the stronger assertion
-
     def test_randbelow_without_getrandbits(self):
         # Random._randbelow() can only use random() when the built-in one
         # has been overridden but no new getrandbits() method was supplied.
@@ -903,14 +884,6 @@ class MersenneTwister_TestBasicOps(TestBasicOps, unittest.TestCase):
             self.gen._randbelow_without_getrandbits(n, maxsize=maxsize)
             self.assertEqual(random_mock.call_count, 2)
 
-    def test_randrange_bug_1590891(self):
-        start = 1000000000000
-        stop = -100000000000000000000
-        step = -200
-        x = self.gen.randrange(start, stop, step)
-        self.assertTrue(stop < x <= start)
-        self.assertEqual((x+stop)%step, 0)
-
     def test_choices_algorithms(self):
         # The various ways of specifying weights should produce the same results
         choices = self.gen.choices
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index a43d2678ebd..5bc3c5924b0 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -2346,6 +2346,17 @@ class ArgsTestCase(BaseTestCase):
         output = self.run_tests('-j1', '-v', testname, env=env, isolated=False)
         check(output)
 
+    def test_pgo_exclude(self):
+        # Get PGO tests
+        output = self.run_tests('--pgo', '--list-tests')
+        pgo_tests = output.strip().split()
+
+        # Exclude test_re
+        output = self.run_tests('--pgo', '--list-tests', '-x', 'test_re')
+        tests = output.strip().split()
+        self.assertNotIn('test_re', tests)
+        self.assertEqual(len(tests), len(pgo_tests) - 1)
+
 
 class TestUtils(unittest.TestCase):
     def test_format_duration(self):
diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py
index 16623654c29..22a55b57c07 100644
--- a/Lib/test/test_reprlib.py
+++ b/Lib/test/test_reprlib.py
@@ -151,14 +151,38 @@ class ReprTests(unittest.TestCase):
         eq(r(frozenset({1, 2, 3, 4, 5, 6, 7})), "frozenset({1, 2, 3, 4, 5, 6, ...})")
 
     def test_numbers(self):
-        eq = self.assertEqual
-        eq(r(123), repr(123))
-        eq(r(123), repr(123))
-        eq(r(1.0/3), repr(1.0/3))
-
-        n = 10**100
-        expected = repr(n)[:18] + "..." + repr(n)[-19:]
-        eq(r(n), expected)
+        for x in [123, 1.0 / 3]:
+            self.assertEqual(r(x), repr(x))
+
+        max_digits = sys.get_int_max_str_digits()
+        for k in [100, max_digits - 1]:
+            with self.subTest(f'10 ** {k}', k=k):
+                n = 10 ** k
+                expected = repr(n)[:18] + "..." + repr(n)[-19:]
+                self.assertEqual(r(n), expected)
+
+        def re_msg(n, d):
+            return (rf'<{n.__class__.__name__} instance with roughly {d} '
+                    rf'digits \(limit at {max_digits}\) at 0x[a-f0-9]+>')
+
+        k = max_digits
+        with self.subTest(f'10 ** {k}', k=k):
+            n = 10 ** k
+            self.assertRaises(ValueError, repr, n)
+            self.assertRegex(r(n), re_msg(n, k + 1))
+
+        for k in [max_digits + 1, 2 * max_digits]:
+            self.assertGreater(k, 100)
+            with self.subTest(f'10 ** {k}', k=k):
+                n = 10 ** k
+                self.assertRaises(ValueError, repr, n)
+                self.assertRegex(r(n), re_msg(n, k + 1))
+            with self.subTest(f'10 ** {k} - 1', k=k):
+                n = 10 ** k - 1
+                # Here, since math.log10(n) == math.log10(n-1),
+                # the number of digits of n - 1 is overestimated.
+                self.assertRaises(ValueError, repr, n)
+                self.assertRegex(r(n), re_msg(n, k + 1))
 
     def test_instance(self):
         eq = self.assertEqual
@@ -373,20 +397,20 @@ class ReprTests(unittest.TestCase):
                 'object': {
                     1: 'two',
                     b'three': [
-                        (4.5, 6.7),
+                        (4.5, 6.25),
                         [set((8, 9)), frozenset((10, 11))],
                     ],
                 },
                 'tests': (
                     (dict(indent=None), '''\
-                        {1: 'two', b'three': [(4.5, 6.7), [{8, 9}, frozenset({10, 11})]]}'''),
+                        {1: 'two', b'three': [(4.5, 6.25), [{8, 9}, frozenset({10, 11})]]}'''),
                     (dict(indent=False), '''\
                         {
                         1: 'two',
                         b'three': [
                         (
                         4.5,
-                        6.7,
+                        6.25,
                         ),
                         [
                         {
@@ -406,7 +430,7 @@ class ReprTests(unittest.TestCase):
                          b'three': [
                           (
                            4.5,
-                           6.7,
+                           6.25,
                           ),
                           [
                            {
@@ -426,7 +450,7 @@ class ReprTests(unittest.TestCase):
                         b'three': [
                         (
                         4.5,
-                        6.7,
+                        6.25,
                         ),
                         [
                         {
@@ -446,7 +470,7 @@ class ReprTests(unittest.TestCase):
                          b'three': [
                           (
                            4.5,
-                           6.7,
+                           6.25,
                           ),
                           [
                            {
@@ -466,7 +490,7 @@ class ReprTests(unittest.TestCase):
                             b'three': [
                                 (
                                     4.5,
-                                    6.7,
+                                    6.25,
                                 ),
                                 [
                                     {
@@ -494,7 +518,7 @@ class ReprTests(unittest.TestCase):
                         b'three': [
                         (
                         4.5,
-                        6.7,
+                        6.25,
                         ),
                         [
                         {
@@ -514,7 +538,7 @@ class ReprTests(unittest.TestCase):
                         -->b'three': [
                         -->-->(
                         -->-->-->4.5,
-                        -->-->-->6.7,
+                        -->-->-->6.25,
                         -->-->),
                         -->-->[
                         -->-->-->{
@@ -534,7 +558,7 @@ class ReprTests(unittest.TestCase):
                         ....b'three': [
                         ........(
                         ............4.5,
-                        ............6.7,
+                        ............6.25,
                         ........),
                         ........[
                         ............{
diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py
index d993e28c4bb..720fa3c4c1e 100644
--- a/Lib/test/test_sqlite3/test_cli.py
+++ b/Lib/test/test_sqlite3/test_cli.py
@@ -138,7 +138,7 @@ class InteractiveSession(unittest.TestCase):
         self.assertEndsWith(out, self.PS1)
         self.assertEqual(out.count(self.PS1), 2)
         self.assertEqual(out.count(self.PS2), 0)
-        self.assertIn("Error", err)
+        self.assertIn('Error: unknown command: "', err)
         # test "unknown_command" is pointed out in the error message
         self.assertIn("unknown_command", err)
 
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 0dd619dd7c8..8250b0aef09 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -3319,7 +3319,8 @@ class TestNormalDistC(unittest.TestCase, TestNormalDist):
 def load_tests(loader, tests, ignore):
     """Used for doctest/unittest integration."""
     tests.addTests(doctest.DocTestSuite())
-    tests.addTests(doctest.DocTestSuite(statistics))
+    if sys.float_repr_style == 'short':
+        tests.addTests(doctest.DocTestSuite(statistics))
     return tests
 
 
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
index d6a7bd0da59..2584fbf72d3 100644
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -1231,10 +1231,10 @@ class StrTest(string_tests.StringLikeTest,
         self.assertEqual('{0:\x00^6}'.format(3), '\x00\x003\x00\x00\x00')
         self.assertEqual('{0:<6}'.format(3), '3     ')
 
-        self.assertEqual('{0:\x00<6}'.format(3.14), '3.14\x00\x00')
-        self.assertEqual('{0:\x01<6}'.format(3.14), '3.14\x01\x01')
-        self.assertEqual('{0:\x00^6}'.format(3.14), '\x003.14\x00')
-        self.assertEqual('{0:^6}'.format(3.14), ' 3.14 ')
+        self.assertEqual('{0:\x00<6}'.format(3.25), '3.25\x00\x00')
+        self.assertEqual('{0:\x01<6}'.format(3.25), '3.25\x01\x01')
+        self.assertEqual('{0:\x00^6}'.format(3.25), '\x003.25\x00')
+        self.assertEqual('{0:^6}'.format(3.25), ' 3.25 ')
 
         self.assertEqual('{0:\x00<12}'.format(3+2.0j), '(3+2j)\x00\x00\x00\x00\x00\x00')
         self.assertEqual('{0:\x01<12}'.format(3+2.0j), '(3+2j)\x01\x01\x01\x01\x01\x01')
diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py
index 268230f6da7..0241e543cd7 100644
--- a/Lib/test/test_strptime.py
+++ b/Lib/test/test_strptime.py
@@ -221,14 +221,16 @@ class StrptimeTests(unittest.TestCase):
         self.assertRaises(ValueError, _strptime._strptime_time, data_string="%d",
                           format="%A")
         for bad_format in ("%", "% ", "%\n"):
-            with self.assertRaisesRegex(ValueError, "stray % in format "):
+            with (self.subTest(format=bad_format),
+                  self.assertRaisesRegex(ValueError, "stray % in format ")):
                 _strptime._strptime_time("2005", bad_format)
-        for bad_format in ("%e", "%Oe", "%O", "%O ", "%Ee", "%E", "%E ",
-                           "%.", "%+", "%_", "%~", "%\\",
+        for bad_format in ("%i", "%Oi", "%O", "%O ", "%Ee", "%E", "%E ",
+                           "%.", "%+", "%~", "%\\",
                            "%O.", "%O+", "%O_", "%O~", "%O\\"):
             directive = bad_format[1:].rstrip()
-            with self.assertRaisesRegex(ValueError,
-                    f"'{re.escape(directive)}' is a bad directive in format "):
+            with (self.subTest(format=bad_format),
+                  self.assertRaisesRegex(ValueError,
+                    f"'{re.escape(directive)}' is a bad directive in format ")):
                 _strptime._strptime_time("2005", bad_format)
 
         msg_week_no_year_or_weekday = r"ISO week directive '%V' must be used with " \
@@ -335,6 +337,15 @@ class StrptimeTests(unittest.TestCase):
             self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0))
             self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0))
 
+    @run_with_locales('LC_TIME', 'az_AZ', 'ber_DZ', 'ber_MA', 'crh_UA')
+    def test_month_locale2(self):
+        # Test for month directives
+        # Month name contains 'İ' ('\u0130')
+        self.roundtrip('%B', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0))
+        self.roundtrip('%b', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0))
+        self.roundtrip('%B', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0))
+        self.roundtrip('%b', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0))
+
     def test_day(self):
         # Test for day directives
         self.roundtrip('%d %Y', 2)
@@ -480,13 +491,11 @@ class StrptimeTests(unittest.TestCase):
     # * Year is not included: ha_NG.
     # * Use non-Gregorian calendar: lo_LA, thai, th_TH.
     #   On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
-    #
-    # BUG: Generates regexp that does not match the current date and time
-    # for lzh_TW.
     @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
                       'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG',
                       'csb_PL', 'br_FR', 'gez_ET', 'brx_IN',
-                      'my_MM', 'or_IN', 'shn_MM', 'az_IR')
+                      'my_MM', 'or_IN', 'shn_MM', 'az_IR',
+                      'byn_ER', 'wal_ET', 'lzh_TW')
     def test_date_time_locale(self):
         # Test %c directive
         loc = locale.getlocale(locale.LC_TIME)[0]
@@ -525,11 +534,9 @@ class StrptimeTests(unittest.TestCase):
 
     # NB: Does not roundtrip because use non-Gregorian calendar:
     # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
-    # BUG: Generates regexp that does not match the current date
-    # for lzh_TW.
     @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
                       'he_IL', 'eu_ES', 'ar_AE',
-                      'az_IR', 'my_MM', 'or_IN', 'shn_MM')
+                      'az_IR', 'my_MM', 'or_IN', 'shn_MM', 'lzh_TW')
     def test_date_locale(self):
         # Test %x directive
         now = time.time()
@@ -546,7 +553,7 @@ class StrptimeTests(unittest.TestCase):
     # NB: Dates before 1969 do not roundtrip on many locales, including C.
     @unittest.skipIf(support.linked_to_musl(), "musl libc issue, bpo-46390")
     @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
-                      'eu_ES', 'ar_AE', 'my_MM', 'shn_MM')
+                      'eu_ES', 'ar_AE', 'my_MM', 'shn_MM', 'lzh_TW')
     def test_date_locale2(self):
         # Test %x directive
         loc = locale.getlocale(locale.LC_TIME)[0]
@@ -562,11 +569,11 @@ class StrptimeTests(unittest.TestCase):
     #   norwegian, nynorsk.
     # * Hours are in 12-hour notation without AM/PM indication: hy_AM,
     #   ms_MY, sm_WS.
-    # BUG: Generates regexp that does not match the current time for lzh_TW.
     @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
                       'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET',
                       'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO',
-                      'ti_ET', 'tig_ER', 'wal_ET')
+                      'ti_ET', 'tig_ER', 'wal_ET', 'lzh_TW',
+                      'ar_SA', 'bg_BG')
     def test_time_locale(self):
         # Test %X directive
         loc = locale.getlocale(locale.LC_TIME)[0]
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 39e62027f03..486bf10a0b5 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -869,12 +869,7 @@ class SysModuleTest(unittest.TestCase):
     def assert_raise_on_new_sys_type(self, sys_attr):
         # Users are intentionally prevented from creating new instances of
         # sys.flags, sys.version_info, and sys.getwindowsversion.
-        arg = sys_attr
-        attr_type = type(sys_attr)
-        with self.assertRaises(TypeError):
-            attr_type(arg)
-        with self.assertRaises(TypeError):
-            attr_type.__new__(attr_type, arg)
+        support.check_disallow_instantiation(self, type(sys_attr), sys_attr)
 
     def test_sys_flags_no_instantiation(self):
         self.assert_raise_on_new_sys_type(sys.flags)
@@ -1074,6 +1069,7 @@ class SysModuleTest(unittest.TestCase):
         self.assertHasAttr(sys.implementation, 'version')
         self.assertHasAttr(sys.implementation, 'hexversion')
         self.assertHasAttr(sys.implementation, 'cache_tag')
+        self.assertHasAttr(sys.implementation, 'supports_isolated_interpreters')
 
         version = sys.implementation.version
         self.assertEqual(version[:2], (version.major, version.minor))
@@ -1087,6 +1083,15 @@ class SysModuleTest(unittest.TestCase):
         self.assertEqual(sys.implementation.name,
                          sys.implementation.name.lower())
 
+        # https://peps.python.org/pep-0734
+        sii = sys.implementation.supports_isolated_interpreters
+        self.assertIsInstance(sii, bool)
+        if test.support.check_impl_detail(cpython=True):
+            if test.support.is_emscripten or test.support.is_wasi:
+                self.assertFalse(sii)
+            else:
+                self.assertTrue(sii)
+
     @test.support.cpython_only
     def test_debugmallocstats(self):
         # Test sys._debugmallocstats()
@@ -1943,22 +1948,7 @@ class SizeofTest(unittest.TestCase):
         self.assertEqual(out, b"")
         self.assertEqual(err, b"")
 
-
-def _supports_remote_attaching():
-    PROCESS_VM_READV_SUPPORTED = False
-
-    try:
-        from _remote_debugging import PROCESS_VM_READV_SUPPORTED
-    except ImportError:
-        pass
-
-    return PROCESS_VM_READV_SUPPORTED
-
-@unittest.skipIf(not sys.is_remote_debug_enabled(), "Remote debugging is not enabled")
-@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux" and sys.platform != "win32",
-                    "Test only runs on Linux, Windows and MacOS")
-@unittest.skipIf(sys.platform == "linux" and not _supports_remote_attaching(),
-                    "Test only runs on Linux with process_vm_readv support")
+@test.support.support_remote_exec_only
 @test.support.cpython_only
 class TestRemoteExec(unittest.TestCase):
     def tearDown(self):
@@ -2117,7 +2107,7 @@ print("Remote script executed successfully!")
         returncode, stdout, stderr = self._run_remote_exec_test(script, prologue=prologue)
         self.assertEqual(returncode, 0)
         self.assertIn(b"Remote script executed successfully!", stdout)
-        self.assertIn(b"Audit event: remote_debugger_script, arg: ", stdout)
+        self.assertIn(b"Audit event: cpython.remote_debugger_script, arg: ", stdout)
         self.assertEqual(stderr, b"")
 
     def test_remote_exec_with_exception(self):
diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py
index 2c0df9376ab..2eb8de4b29f 100644
--- a/Lib/test/test_sysconfig.py
+++ b/Lib/test/test_sysconfig.py
@@ -711,8 +711,8 @@ class TestSysConfig(unittest.TestCase, VirtualEnvironmentMixin):
             ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase', 'installed_base', 'installed_platbase'}
 
         for key in ignore_keys:
-            json_config_vars.pop(key)
-            system_config_vars.pop(key)
+            json_config_vars.pop(key, None)
+            system_config_vars.pop(key, None)
 
         self.assertEqual(system_config_vars, json_config_vars)
 
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index 125c2744698..13b55d0f0a2 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -1284,12 +1284,6 @@ class ThreadTests(BaseTestCase):
 
     @cpython_only
     def test_finalize_daemon_thread_hang(self):
-        if support.check_sanitizer(thread=True, memory=True):
-            # the thread running `time.sleep(100)` below will still be alive
-            # at process exit
-            self.skipTest(
-                    "https://github.com/python/cpython/issues/124878 - Known"
-                    " race condition that TSAN identifies.")
         # gh-87135: tests that daemon threads hang during finalization
         script = textwrap.dedent('''
             import os
diff --git a/Lib/test/test_tstring.py b/Lib/test/test_tstring.py
index e72a1ea5417..aabae385567 100644
--- a/Lib/test/test_tstring.py
+++ b/Lib/test/test_tstring.py
@@ -219,6 +219,7 @@ class TestTString(unittest.TestCase, TStringBaseCase):
             ("t'{lambda:1}'", "t-string: lambda expressions are not allowed "
                               "without parentheses"),
             ("t'{x:{;}}'", "t-string: expecting a valid expression after '{'"),
+            ("t'{1:d\n}'", "t-string: newlines are not allowed in format specifiers")
         ):
             with self.subTest(case), self.assertRaisesRegex(SyntaxError, err):
                 eval(case)
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
index a117413301b..fc26e71ffcb 100644
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -2,7 +2,7 @@
 
 from test.support import (
     run_with_locale, cpython_only, no_rerun,
-    MISSING_C_DOCSTRINGS, EqualToForwardRef,
+    MISSING_C_DOCSTRINGS, EqualToForwardRef, check_disallow_instantiation,
 )
 from test.support.script_helper import assert_python_ok
 from test.support.import_helper import import_fresh_module
@@ -517,8 +517,8 @@ class TypesTests(unittest.TestCase):
         # and a number after the decimal.  This is tricky, because
         # a totally empty format specifier means something else.
         # So, just use a sign flag
-        test(1e200, '+g', '+1e+200')
-        test(1e200, '+', '+1e+200')
+        test(1.25e200, '+g', '+1.25e+200')
+        test(1.25e200, '+', '+1.25e+200')
 
         test(1.1e200, '+g', '+1.1e+200')
         test(1.1e200, '+', '+1.1e+200')
@@ -1148,8 +1148,7 @@ class UnionTests(unittest.TestCase):
                              msg='Check for union reference leak.')
 
     def test_instantiation(self):
-        with self.assertRaises(TypeError):
-            types.UnionType()
+        check_disallow_instantiation(self, types.UnionType)
         self.assertIs(int, types.UnionType[int])
         self.assertIs(int, types.UnionType[int, int])
         self.assertEqual(int | str, types.UnionType[int, str])
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index bc1030eea60..1d889ae7cf4 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1569,6 +1569,7 @@ class Pathname_Tests(unittest.TestCase):
                     urllib.request.url2pathname,
                     url, require_scheme=True)
 
+    @unittest.skipIf(support.is_emscripten, "Fixed by https://github.com/emscripten-core/emscripten/pull/24593")
     def test_url2pathname_resolve_host(self):
         fn = urllib.request.url2pathname
         sep = os.path.sep
diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py
index 90a356fbb8e..8130c739af2 100644
--- a/Lib/xmlrpc/server.py
+++ b/Lib/xmlrpc/server.py
@@ -578,7 +578,7 @@ class SimpleXMLRPCServer(socketserver.TCPServer,
     """
 
     allow_reuse_address = True
-    allow_reuse_port = True
+    allow_reuse_port = False
 
     # Warning: this is for debugging purposes only! Never set this to True in
     # production code, as will be sending out sensitive information (exception
diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py
index b77dc0ed391..3ffdb4c8371 100644
--- a/Lib/zoneinfo/_zoneinfo.py
+++ b/Lib/zoneinfo/_zoneinfo.py
@@ -75,12 +75,12 @@ class ZoneInfo(tzinfo):
         return obj
 
     @classmethod
-    def from_file(cls, fobj, /, key=None):
+    def from_file(cls, file_obj, /, key=None):
         obj = super().__new__(cls)
         obj._key = key
         obj._file_path = None
-        obj._load_file(fobj)
-        obj._file_repr = repr(fobj)
+        obj._load_file(file_obj)
+        obj._file_repr = repr(file_obj)
 
         # Disable pickling for objects created from files
         obj.__reduce__ = obj._file_reduce
diff --git a/Misc/ACKS b/Misc/ACKS
index d4557a03eb5..6ab50763fea 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -943,6 +943,7 @@ Anton Kasyanov
 Lou Kates
 Makoto Kato
 Irit Katriel
+Kattni
 Hiroaki Kawai
 Dmitry Kazakov
 Brian Kearns
@@ -1480,6 +1481,7 @@ Jean-François Piéronne
 Oleg Plakhotnyuk
 Anatoliy Platonov
 Marcel Plch
+Stefan Pochmann
 Kirill Podoprigora
 Remi Pointel
 Jon Poler
diff --git a/Misc/NEWS.d/3.14.0a6.rst b/Misc/NEWS.d/3.14.0a6.rst
index bafd8845de6..d8840b6f283 100644
--- a/Misc/NEWS.d/3.14.0a6.rst
+++ b/Misc/NEWS.d/3.14.0a6.rst
@@ -1325,7 +1325,7 @@ variable.
 .. nonce: d75n8U
 .. section: Core and Builtins
 
-Adapt :func:`reversed` for use in the free-theading build. The
+Adapt :func:`reversed` for use in the free-threading build. The
 :func:`reversed` is still not thread-safe in the sense that concurrent
 iterations may see the same object, but they will not corrupt the
 interpreter state.
diff --git a/Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst b/Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst
new file mode 100644
index 00000000000..f41c8744b8a
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-05-24-16-59-20.gh-issue-134632.i0W2hc.rst
@@ -0,0 +1,3 @@
+Fixed ``build-details.json`` generation to use ``INCLUDEPY``, in order to
+reference the ``pythonX.Y`` subdirectory of the include directory, as
+required in :pep:`739`, instead of the top-level include directory.
diff --git a/Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst b/Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst
new file mode 100644
index 00000000000..c84663b1466
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-06-14-10-32-11.gh-issue-135497.ajlV4F.rst
@@ -0,0 +1 @@
+Fix the detection of ``MAXLOGNAME`` in the ``configure.ac`` script.
diff --git a/Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst b/Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst
new file mode 100644
index 00000000000..3eb0805b9ce
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-06-16-07-20-28.gh-issue-119132.fcI8s7.rst
@@ -0,0 +1 @@
+Remove "experimental" tag from the CPython free-threading build.
diff --git a/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst
new file mode 100644
index 00000000000..21a2c87d344
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst
@@ -0,0 +1 @@
+Fix building with MSVC when passing option ``/std:clatest``.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst
new file mode 100644
index 00000000000..aadaf2169fd
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst
@@ -0,0 +1 @@
+Automatically constant evaluate bytecode operations marked as pure in the JIT optimizer.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst
new file mode 100644
index 00000000000..5f9e1553ae7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-23-14-54-07.gh-issue-134584.y-WDjf.rst
@@ -0,0 +1 @@
+Add a reference count elimination pass to the JIT compiler. Patch by Ken Jin.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst
new file mode 100644
index 00000000000..53aef541e64
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-27-20-29-00.gh-issue-132617.EmUfQQ.rst
@@ -0,0 +1,3 @@
+Fix :meth:`dict.update` modification check that could incorrectly raise a
+"dict mutated during update" error when a different dictionary was modified
+that happens to share the same underlying keys object.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst
new file mode 100644
index 00000000000..0e68c793e5e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-02-20-13-37.gh-issue-131798.JQRFvR.rst
@@ -0,0 +1 @@
+Optimize ``_CHECK_METHOD_VERSION`` into ``_CHECK_FUNCTION_VERSION_INLINE`` in JIT-compiled code.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst
new file mode 100644
index 00000000000..a9501c13c95
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst
@@ -0,0 +1,2 @@
+Limit excess memory usage in the :term:`free threading` build when a
+large dictionary or list is resized and accessed by multiple threads.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst
new file mode 100644
index 00000000000..6a9d9c683f9
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst
@@ -0,0 +1 @@
+Optimize ``_UNARY_NEGATIVE`` in JIT-compiled code.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst
new file mode 100644
index 00000000000..7965169d46e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-08-14-24-29.gh-issue-131798.qfw91T.rst
@@ -0,0 +1 @@
+Optimize _CALL_LEN in the JIT when the length is known. Patch by Tomas Roun
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst
new file mode 100644
index 00000000000..d90153c9684
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-11-15-08-10.gh-issue-127319.OVGFSZ.rst
@@ -0,0 +1,3 @@
+Set the ``allow_reuse_port`` class variable to ``False`` on the XMLRPC,
+logging, and HTTP servers. This matches the behavior in prior Python
+releases, which is to not allow port reuse.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst
new file mode 100644
index 00000000000..6efe2a47bac
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-02-31-42.gh-issue-135543.6b0HOF.rst
@@ -0,0 +1,2 @@
+Emit ``sys.remote_exec`` audit event when :func:`sys.remote_exec` is called
+and migrate ``remote_debugger_script`` to ``cpython.remote_debugger_script``.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst
new file mode 100644
index 00000000000..22dda2a3e97
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst
@@ -0,0 +1 @@
+Sorting randomly ordered lists will often run a bit faster, thanks to a new scheme for picking minimum run lengths from Stefan Pochmann, which arranges for the merge tree to be as evenly balanced as is possible.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst
new file mode 100644
index 00000000000..a65a0c85fa6
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-17-12-50-48.gh-issue-135608.PnHckD.rst
@@ -0,0 +1 @@
+Fix a crash in the JIT involving attributes of modules.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst
new file mode 100644
index 00000000000..089d00c77da
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-12-19-13.gh-issue-135379.TCvGpj.rst
@@ -0,0 +1,3 @@
+Changes specialization of ``BINARY_OP`` for ints to only specialize for
+"compact" ints. This streamlines the fast path at the cost of fewer
+specializations when very large integers are used.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst
new file mode 100644
index 00000000000..b6e953a7719
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst
@@ -0,0 +1,2 @@
+Restrict the trashcan mechanism to GC'ed objects and untrack them while in
+the trashcan to prevent the GC and trashcan mechanisms conflicting.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst
new file mode 100644
index 00000000000..715ac7dc925
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst
@@ -0,0 +1 @@
+Specialize :opcode:`POP_TOP` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst
new file mode 100644
index 00000000000..ce29ddecefe
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst
@@ -0,0 +1,2 @@
+Non-blocking mutex lock attempts now return immediately when the lock is busy
+instead of briefly spinning in the :term:`free threading` build.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst
new file mode 100644
index 00000000000..70b3e99425d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst
@@ -0,0 +1,2 @@
+Differentiate between t-strings and f-strings in syntax error for newlines
+in format specifiers of single-quoted interpolated strings.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst
new file mode 100644
index 00000000000..ecbd8fda9a5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst
@@ -0,0 +1,2 @@
+Perform more aggressive control-flow optimizations on the machine code
+templates emitted by the experimental JIT compiler.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst
new file mode 100644
index 00000000000..99734d63c5d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst
@@ -0,0 +1,2 @@
+Fix error message for ``cls.__new__(cls, ...)`` where ``cls`` is not
+instantiable builtin or extension type (with ``tp_new`` set to ``NULL``).
diff --git a/Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst b/Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst
new file mode 100644
index 00000000000..129ff74189b
--- /dev/null
+++ b/Misc/NEWS.d/next/Documentation/2025-06-10-17-02-06.gh-issue-135171.quHvts.rst
@@ -0,0 +1,2 @@
+Document that the :term:`iterator` for the leftmost :keyword:`!for` clause
+in the generator expression is created immediately.
diff --git a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst b/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst
deleted file mode 100644
index e8f5ba56fcc..00000000000
--- a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Ignore log messages generated during handling of log messages, to avoid
-deadlock or infinite recursion.
diff --git a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst
index 38d5c311b1d..943e4addebc 100644
--- a/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst
+++ b/Misc/NEWS.d/next/Library/2025-05-05-03-14-08.gh-issue-133390.AuTggn.rst
@@ -1 +1,2 @@
-Support keyword completion in the :mod:`sqlite3` command-line interface.
+Support keyword completion in the :mod:`sqlite3` command-line interface and add
+:data:`sqlite3.SQLITE_KEYWORDS` constant.
diff --git a/Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst b/Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst
new file mode 100644
index 00000000000..4de7b4cceca
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-06-17-34-18.gh-issue-133934.yT1r68.rst
@@ -0,0 +1 @@
+Improve :mod:`sqlite3` CLI's ``.help`` message.
diff --git a/Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst b/Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst
new file mode 100644
index 00000000000..466ba0d232c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-10-21-42-04.gh-issue-135335.WnUqb_.rst
@@ -0,0 +1,2 @@
+:mod:`multiprocessing`: Flush ``stdout`` and ``stderr`` after preloading
+modules in the ``forkserver``.
diff --git a/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst b/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst
new file mode 100644
index 00000000000..3ef51fa31df
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst
@@ -0,0 +1,2 @@
+Fix :meth:`!reprlib.Repr.repr_int` when given integers with more than
+:func:`sys.get_int_max_str_digits` digits. Patch by Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst b/Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst
new file mode 100644
index 00000000000..d3e81de9dbf
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-14-14-19-13.gh-issue-135497.1pzwdA.rst
@@ -0,0 +1 @@
+Fix :func:`os.getlogin` failing for longer usernames on BSD-based platforms.
diff --git a/Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst b/Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst
new file mode 100644
index 00000000000..d374220d02f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-15-03-03-22.gh-issue-65697.COdwZd.rst
@@ -0,0 +1 @@
+:class:`configparser`'s error message when attempting to write an invalid key is now more helpful.
diff --git a/Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst b/Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst
new file mode 100644
index 00000000000..ee743f16113
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-16-15-03-03.gh-issue-135561.mJCN8D.rst
@@ -0,0 +1,2 @@
+Fix a crash on DEBUG builds when an HACL* HMAC routine fails. Patch by
+Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst b/Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst
new file mode 100644
index 00000000000..c5e5d5b4f8d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-17-22-44-19.gh-issue-119180.Ogv8Nj.rst
@@ -0,0 +1,2 @@
+Only fetch globals and locals if necessary in
+:func:`annotationlib.get_annotations`
diff --git a/Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst b/Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst
new file mode 100644
index 00000000000..eabf5ea4aaa
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-17-23-13-56.gh-issue-135557.Bfcy4v.rst
@@ -0,0 +1,2 @@
+Fix races on :mod:`heapq` updates and :class:`list` reads on the :term:`free threaded <free threading>`
+build.
diff --git a/Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst b/Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst
new file mode 100644
index 00000000000..5fbd751467d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-18-11-43-17.gh-issue-135646.r7ekEn.rst
@@ -0,0 +1 @@
+Raise consistent :exc:`NameError` exceptions in :func:`annotationlib.ForwardRef.evaluate`
diff --git a/Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst b/Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst
new file mode 100644
index 00000000000..a7764a0105b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-18-13-58-13.gh-issue-135645.109nff.rst
@@ -0,0 +1,2 @@
+Added ``supports_isolated_interpreters`` field to
+:data:`sys.implementation`.
diff --git a/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst
new file mode 100644
index 00000000000..268d7eccdab
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-20-16-28-47.gh-issue-135759.jne0Zi.rst
@@ -0,0 +1,4 @@
+:mod:`hashlib`: reject negative digest lengths in OpenSSL-based SHAKE objects
+by raising a :exc:`ValueError`. Previously, negative lengths were implicitly
+rejected by raising a :exc:`MemoryError` or a :exc:`SystemError`.
+Patch by Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst
new file mode 100644
index 00000000000..2bb15cb6d9c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst
@@ -0,0 +1 @@
+Speed up :mod:`pprint` for :class:`list` and :class:`tuple`.
diff --git a/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst b/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst
new file mode 100644
index 00000000000..0f4a68bf745
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst
@@ -0,0 +1,2 @@
+:mod:`netrc`: skip security checks if :func:`os.getuid` is missing.
+Patch by Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst
new file mode 100644
index 00000000000..5b9d89caae7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst
@@ -0,0 +1,3 @@
+:mod:`netrc`: improve the error message when the security check for the
+ownership of the default configuration file ``~/.netrc`` fails. Patch by
+Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst b/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst
new file mode 100644
index 00000000000..fcf495bdceb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst
@@ -0,0 +1,3 @@
+Raise :exc:`TypeError` instead of :exc:`SystemError` when
+:func:`!_interpreters.set___main___attrs` is passed a non-dict object.
+Patch by Brian Schubert.
diff --git a/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst b/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst
new file mode 100644
index 00000000000..969cf2dfa40
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst
@@ -0,0 +1,3 @@
+Fixes a crash of :class:`types.SimpleNamespace` on :term:`free threading` builds,
+when several threads were calling its :meth:`~object.__repr__` method at the
+same time.
diff --git a/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst
new file mode 100644
index 00000000000..ba2fae49fdc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst
@@ -0,0 +1,2 @@
+Fix :func:`time.strptime` for ``%c`` and ``%x`` formats on locales byn_ER,
+wal_ET and lzh_TW, and for ``%X`` format on locales ar_SA, bg_BG and lzh_TW.
diff --git a/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst
new file mode 100644
index 00000000000..998b3cd85b1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst
@@ -0,0 +1 @@
+In the palmos encoding, make byte ``0x9b`` decode to ``›`` (U+203A - SINGLE RIGHT-POINTING ANGLE QUOTATION MARK).
diff --git a/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst
new file mode 100644
index 00000000000..9859df7cf6a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst
@@ -0,0 +1,3 @@
+Fix parsing month names containing "İ" (U+0130, LATIN CAPITAL LETTER I WITH
+DOT ABOVE) in :func:`time.strptime`. This affects locales az_AZ, ber_DZ,
+ber_MA and crh_UA.
diff --git a/Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst b/Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst
new file mode 100644
index 00000000000..2c9ecc51829
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2025-06-14-13-20-17.gh-issue-135489.Uh0yVO.rst
@@ -0,0 +1 @@
+Show verbose output for failing tests during PGO profiling step with --enable-optimizations.
diff --git a/Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst b/Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst
new file mode 100644
index 00000000000..5b7485ce2d6
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2025-06-17-08-48-08.gh-issue-132815.CY1Esu.rst
@@ -0,0 +1 @@
+Fix test__opcode: add ``JUMP_BACKWARD`` to specialization stats.
diff --git a/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst b/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst
new file mode 100644
index 00000000000..832d1fe033e
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst
@@ -0,0 +1,2 @@
+Fix regrtest to support excluding tests from ``--pgo`` tests. Patch by
+Victor Stinner.
diff --git a/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst
new file mode 100644
index 00000000000..8dc007431f3
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst
@@ -0,0 +1 @@
+The iOS testbed now handles the ``app_packages`` folder as a site directory.
diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst
new file mode 100644
index 00000000000..1c0b3825c71
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst
@@ -0,0 +1 @@
+Stubs for ``strip`` are now provided as part of an iOS install.
diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json
index 10a5c646d2a..738b3390885 100644
--- a/Misc/sbom.spdx.json
+++ b/Misc/sbom.spdx.json
@@ -930,11 +930,11 @@
       "checksums": [
         {
           "algorithm": "SHA1",
-          "checksumValue": "4a0bdb9496d49bbfa3ad50bb7854d8f099e84891"
+          "checksumValue": "682b069d3888f3e8f8cc90f1a49bac9d1a5903d2"
         },
         {
           "algorithm": "SHA256",
-          "checksumValue": "b1a45149239ee7af7de769a3e9339950d47c199bb9eaa10edce8a00fde603b12"
+          "checksumValue": "8551d2c3fde03b92c6fab5febde00347bd9184ea0085077976863c7836e9669d"
         }
       ],
       "fileName": "Modules/_hacl/python_hacl_namespaces.h"
diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c
index d28e5708b44..66338805007 100644
--- a/Modules/_ctypes/_ctypes_test.c
+++ b/Modules/_ctypes/_ctypes_test.c
@@ -23,7 +23,7 @@
 #  define _Py_thread_local __thread
 #endif
 
-#if defined(Py_FFI_SUPPORT_C_COMPLEX)
+#if defined(_Py_FFI_SUPPORT_C_COMPLEX)
 #  include <complex.h>            // csqrt()
 #  undef I                        // for _ctypes_test_generated.c.h
 #endif
@@ -457,7 +457,7 @@ EXPORT(double) my_sqrt(double a)
     return sqrt(a);
 }
 
-#if defined(Py_FFI_SUPPORT_C_COMPLEX)
+#if defined(_Py_FFI_SUPPORT_C_COMPLEX)
 EXPORT(double complex) my_csqrt(double complex a)
 {
     return csqrt(a);
diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c
index 163b9264261..547e2471a1c 100644
--- a/Modules/_ctypes/cfield.c
+++ b/Modules/_ctypes/cfield.c
@@ -759,7 +759,7 @@ d_get(void *ptr, Py_ssize_t size)
     return PyFloat_FromDouble(val);
 }
 
-#if defined(Py_FFI_SUPPORT_C_COMPLEX)
+#if defined(_Py_FFI_SUPPORT_C_COMPLEX)
 
 /* We don't use _Complex types here, using arrays instead, as the C11+
    standard says: "Each complex type has the same representation and alignment
@@ -1599,7 +1599,7 @@ for base_code, base_c_type in [
     ///////////////////////////////////////////////////////////////////////////
 
     TABLE_ENTRY_SW(d, &ffi_type_double);
-#if defined(Py_FFI_SUPPORT_C_COMPLEX)
+#if defined(_Py_FFI_SUPPORT_C_COMPLEX)
     if (Py_FFI_COMPLEX_AVAILABLE) {
         TABLE_ENTRY(D, &ffi_type_complex_double);
         TABLE_ENTRY(F, &ffi_type_complex_float);
diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h
index 6a45c11e61a..5b4f97d43b8 100644
--- a/Modules/_ctypes/ctypes.h
+++ b/Modules/_ctypes/ctypes.h
@@ -23,7 +23,7 @@
 
 // Do we support C99 complex types in ffi?
 // For Apple's libffi, this must be determined at runtime (see gh-128156).
-#if defined(Py_FFI_SUPPORT_C_COMPLEX)
+#if defined(_Py_FFI_SUPPORT_C_COMPLEX)
 #   if USING_APPLE_OS_LIBFFI && defined(__has_builtin)
 #       if __has_builtin(__builtin_available)
 #           define Py_FFI_COMPLEX_AVAILABLE __builtin_available(macOS 10.15, *)
diff --git a/Modules/_hacl/python_hacl_namespaces.h b/Modules/_hacl/python_hacl_namespaces.h
index 1c2f7fea5c8..d0b4500395e 100644
--- a/Modules/_hacl/python_hacl_namespaces.h
+++ b/Modules/_hacl/python_hacl_namespaces.h
@@ -2,95 +2,43 @@
 #define _PYTHON_HACL_NAMESPACES_H
 
 /*
- * C's excuse for namespaces: Use globally unique names to avoid linkage
- * conflicts with builds linking or dynamically loading other code potentially
- * using HACL* libraries.
+ * Use globally unique names to avoid linkage conflicts with builds linking
+ * or dynamically loading other code potentially using HACL* libraries.
  *
- * Something like this to generate new entries for the list:
- *
- * nm *.o | grep Hacl | cut -c 20- | sort | uniq | grep -v _Py_LibHacl_ | egrep ^_ | sed 's/_\(.*\)/#define \1 _Py_LibHacl_\1/g'
- */
+ * Assuming that the current working directory is Modules/_hacl,
+ * use the following command to generate a list of candidates:
 
-#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0
+    nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \
+        | sed -e 's/^_Py_LibHacl_//g' \
+        | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' \
+        | sort -u
 
-#define Hacl_Hash_SHA2_state_sha2_224_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224_s
-#define Hacl_Hash_SHA2_state_sha2_224 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224
-#define Hacl_Hash_SHA2_state_sha2_256 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_256
-#define Hacl_Hash_SHA2_state_sha2_384_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384_s
-#define Hacl_Hash_SHA2_state_sha2_384 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384
-#define Hacl_Hash_SHA2_state_sha2_512 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_512
-#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256
-#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224
-#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512
-#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384
-#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256
-#define Hacl_Hash_SHA2_copy_224 _Py_LibHacl_Hacl_Hash_SHA2_copy_224
-#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512
-#define Hacl_Hash_SHA2_copy_384 _Py_LibHacl_Hacl_Hash_SHA2_copy_384
-#define Hacl_Hash_SHA2_init_256 _Py_LibHacl_Hacl_Hash_SHA2_init_256
-#define Hacl_Hash_SHA2_init_224 _Py_LibHacl_Hacl_Hash_SHA2_init_224
-#define Hacl_Hash_SHA2_init_512 _Py_LibHacl_Hacl_Hash_SHA2_init_512
-#define Hacl_Hash_SHA2_init_384 _Py_LibHacl_Hacl_Hash_SHA2_init_384
-#define Hacl_SHA2_Scalar32_sha512_init _Py_LibHacl_Hacl_SHA2_Scalar32_sha512_init
-#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256
-#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224
-#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512
-#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384
-#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256
-#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224
-#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512
-#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384
-#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256
-#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224
-#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512
-#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384
-#define Hacl_Hash_SHA2_sha256 _Py_LibHacl_Hacl_Hash_SHA2_sha256
-#define Hacl_Hash_SHA2_sha224 _Py_LibHacl_Hacl_Hash_SHA2_sha224
-#define Hacl_Hash_SHA2_sha512 _Py_LibHacl_Hacl_Hash_SHA2_sha512
-#define Hacl_Hash_SHA2_sha384 _Py_LibHacl_Hacl_Hash_SHA2_sha384
+ * Compare the entries to add as follows:
 
-#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc
-#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init
-#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update
-#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest
-#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free
-#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy
-#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash
-
-#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc
-#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init
-#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update
-#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest
-#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free
-#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy
-#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash
-
-#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3
-#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3
-#define Hacl_Impl_SHA3_absorb_inner _Py_LibHacl_Hacl_Impl_SHA3_absorb_inner
-#define Hacl_Impl_SHA3_keccak _Py_LibHacl_Hacl_Impl_SHA3_keccak
-#define Hacl_Impl_SHA3_loadState _Py_LibHacl_Hacl_Impl_SHA3_loadState
-#define Hacl_Impl_SHA3_squeeze _Py_LibHacl_Hacl_Impl_SHA3_squeeze
-#define Hacl_Impl_SHA3_state_permute _Py_LibHacl_Hacl_Impl_SHA3_state_permute
-#define Hacl_SHA3_sha3_224 _Py_LibHacl_Hacl_SHA3_sha3_224
-#define Hacl_SHA3_sha3_256 _Py_LibHacl_Hacl_SHA3_sha3_256
-#define Hacl_SHA3_sha3_384 _Py_LibHacl_Hacl_SHA3_sha3_384
-#define Hacl_SHA3_sha3_512 _Py_LibHacl_Hacl_SHA3_sha3_512
-#define Hacl_SHA3_shake128_hacl _Py_LibHacl_Hacl_SHA3_shake128_hacl
-#define Hacl_SHA3_shake256_hacl _Py_LibHacl_Hacl_SHA3_shake256_hacl
-#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len
-#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy
-#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest
-#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free
-#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg
-#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len
-#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake
-#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_
-#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc
-#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset
-#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update
-#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze
+    diff -y --suppress-common-lines \
+        <(grep -P '^#define (?!_PY.+_H)' python_hacl_namespaces.h | sort -u) \
+        <(nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \
+            | sed -e 's/^_Py_LibHacl_//g' \
+            | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' | sort -u)
+ */
 
+// --- Utils ------------------------------------------------------------------
+#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0
+// --- HASH-BLAKE-2b ----------------------------------------------------------
+#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy
+#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest
+#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish
+#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free
+#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key
+#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params
+#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info
+#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init
+#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc
+#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key
+#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key
+#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset
+#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key
+#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params
 #define Hacl_Hash_Blake2b_Simd256_copy _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy
 #define Hacl_Hash_Blake2b_Simd256_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy_internal_state
 #define Hacl_Hash_Blake2b_Simd256_digest _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_digest
@@ -104,7 +52,6 @@
 #define Hacl_Hash_Blake2b_Simd256_malloc _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc
 #define Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key
 #define Hacl_Hash_Blake2b_Simd256_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key
-#define Hacl_Hash_Blake2b_Simd256_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key0
 #define Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key
 #define Hacl_Hash_Blake2b_Simd256_reset _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset
 #define Hacl_Hash_Blake2b_Simd256_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset_with_key
@@ -115,23 +62,24 @@
 #define Hacl_Hash_Blake2b_Simd256_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_last_no_inline
 #define Hacl_Hash_Blake2b_Simd256_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi
 #define Hacl_Hash_Blake2b_Simd256_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi_no_inline
-#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy
-#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest
-#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish
-#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free
-#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key
-#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params
-#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info
-#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init
-#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc
-#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key
-#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key
-#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset
-#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key
-#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params
 #define Hacl_Hash_Blake2b_update _Py_LibHacl_Hacl_Hash_Blake2b_update
 #define Hacl_Hash_Blake2b_update_last _Py_LibHacl_Hacl_Hash_Blake2b_update_last
 #define Hacl_Hash_Blake2b_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_update_multi
+// --- HASH-BLAKE-2s ----------------------------------------------------------
+#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy
+#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest
+#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish
+#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free
+#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key
+#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params
+#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info
+#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init
+#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc
+#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key
+#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key
+#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset
+#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key
+#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params
 #define Hacl_Hash_Blake2s_Simd128_copy _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy
 #define Hacl_Hash_Blake2s_Simd128_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy_internal_state
 #define Hacl_Hash_Blake2s_Simd128_digest _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_digest
@@ -145,7 +93,6 @@
 #define Hacl_Hash_Blake2s_Simd128_malloc _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc
 #define Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key
 #define Hacl_Hash_Blake2s_Simd128_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key
-#define Hacl_Hash_Blake2s_Simd128_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key0
 #define Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key
 #define Hacl_Hash_Blake2s_Simd128_reset _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset
 #define Hacl_Hash_Blake2s_Simd128_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset_with_key
@@ -156,37 +103,54 @@
 #define Hacl_Hash_Blake2s_Simd128_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_last_no_inline
 #define Hacl_Hash_Blake2s_Simd128_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi
 #define Hacl_Hash_Blake2s_Simd128_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi_no_inline
-#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy
-#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest
-#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish
-#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free
-#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key
-#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params
-#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info
-#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init
-#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc
-#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key
-#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key
-#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset
-#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key
-#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params
 #define Hacl_Hash_Blake2s_update _Py_LibHacl_Hacl_Hash_Blake2s_update
 #define Hacl_Hash_Blake2s_update_last _Py_LibHacl_Hacl_Hash_Blake2s_update_last
 #define Hacl_Hash_Blake2s_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_update_multi
+// --- HASH-MD5 ---------------------------------------------------------------
+#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy
+#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest
 #define Hacl_Hash_MD5_finish _Py_LibHacl_Hacl_Hash_MD5_finish
+#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free
+#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash
 #define Hacl_Hash_MD5_hash_oneshot _Py_LibHacl_Hacl_Hash_MD5_hash_oneshot
+#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init
+#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc
 #define Hacl_Hash_MD5_reset _Py_LibHacl_Hacl_Hash_MD5_reset
+#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update
 #define Hacl_Hash_MD5_update_last _Py_LibHacl_Hacl_Hash_MD5_update_last
 #define Hacl_Hash_MD5_update_multi _Py_LibHacl_Hacl_Hash_MD5_update_multi
+// --- HASH-SHA-1 -------------------------------------------------------------
+#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy
+#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest
 #define Hacl_Hash_SHA1_finish _Py_LibHacl_Hacl_Hash_SHA1_finish
+#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free
+#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash
 #define Hacl_Hash_SHA1_hash_oneshot _Py_LibHacl_Hacl_Hash_SHA1_hash_oneshot
+#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init
+#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc
 #define Hacl_Hash_SHA1_reset _Py_LibHacl_Hacl_Hash_SHA1_reset
+#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update
 #define Hacl_Hash_SHA1_update_last _Py_LibHacl_Hacl_Hash_SHA1_update_last
 #define Hacl_Hash_SHA1_update_multi _Py_LibHacl_Hacl_Hash_SHA1_update_multi
+// --- HASH-SHA-2 -------------------------------------------------------------
+#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256
+#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512
+#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224
+#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256
+#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384
+#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512
+#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224
+#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256
+#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384
+#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512
 #define Hacl_Hash_SHA2_hash_224 _Py_LibHacl_Hacl_Hash_SHA2_hash_224
 #define Hacl_Hash_SHA2_hash_256 _Py_LibHacl_Hacl_Hash_SHA2_hash_256
 #define Hacl_Hash_SHA2_hash_384 _Py_LibHacl_Hacl_Hash_SHA2_hash_384
 #define Hacl_Hash_SHA2_hash_512 _Py_LibHacl_Hacl_Hash_SHA2_hash_512
+#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224
+#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256
+#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384
+#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512
 #define Hacl_Hash_SHA2_reset_224 _Py_LibHacl_Hacl_Hash_SHA2_reset_224
 #define Hacl_Hash_SHA2_reset_256 _Py_LibHacl_Hacl_Hash_SHA2_reset_256
 #define Hacl_Hash_SHA2_reset_384 _Py_LibHacl_Hacl_Hash_SHA2_reset_384
@@ -207,10 +171,25 @@
 #define Hacl_Hash_SHA2_sha512_init _Py_LibHacl_Hacl_Hash_SHA2_sha512_init
 #define Hacl_Hash_SHA2_sha512_update_last _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_last
 #define Hacl_Hash_SHA2_sha512_update_nblocks _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_nblocks
+#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224
+#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256
+#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384
+#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512
+// --- HASH-SHA-3 -------------------------------------------------------------
 #define Hacl_Hash_SHA3_absorb_inner_32 _Py_LibHacl_Hacl_Hash_SHA3_absorb_inner_32
+#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len
+#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy
+#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest
+#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free
+#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg
+#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len
+#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_
+#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake
 #define Hacl_Hash_SHA3_keccak_piln _Py_LibHacl_Hacl_Hash_SHA3_keccak_piln
 #define Hacl_Hash_SHA3_keccak_rndc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rndc
 #define Hacl_Hash_SHA3_keccak_rotc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rotc
+#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc
+#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset
 #define Hacl_Hash_SHA3_sha3_224 _Py_LibHacl_Hacl_Hash_SHA3_sha3_224
 #define Hacl_Hash_SHA3_sha3_256 _Py_LibHacl_Hacl_Hash_SHA3_sha3_256
 #define Hacl_Hash_SHA3_sha3_384 _Py_LibHacl_Hacl_Hash_SHA3_sha3_384
@@ -220,37 +199,39 @@
 #define Hacl_Hash_SHA3_shake128_absorb_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_absorb_nblocks
 #define Hacl_Hash_SHA3_shake128_squeeze_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_squeeze_nblocks
 #define Hacl_Hash_SHA3_shake256 _Py_LibHacl_Hacl_Hash_SHA3_shake256
+#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze
 #define Hacl_Hash_SHA3_state_free _Py_LibHacl_Hacl_Hash_SHA3_state_free
 #define Hacl_Hash_SHA3_state_malloc _Py_LibHacl_Hacl_Hash_SHA3_state_malloc
-
-// Streaming HMAC
+#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update
+#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3
+#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3
+// --- STREAMING-MAC ----------------------------------------------------------
+#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy
+#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest
+#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free
+#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl
 #define Hacl_Streaming_HMAC_index_of_state _Py_LibHacl_Hacl_Streaming_HMAC_index_of_state
 #define Hacl_Streaming_HMAC_malloc_ _Py_LibHacl_Hacl_Streaming_HMAC_malloc_
-#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl
 #define Hacl_Streaming_HMAC_reset _Py_LibHacl_Hacl_Streaming_HMAC_reset
-#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update
-#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest
-#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy
-#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free
 #define Hacl_Streaming_HMAC_s1 _Py_LibHacl_Hacl_Streaming_HMAC_s1
 #define Hacl_Streaming_HMAC_s2 _Py_LibHacl_Hacl_Streaming_HMAC_s2
-
-// HMAC-MD5
+#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update
+// --- HMAC-MD5 ---------------------------------------------------------------
 #define Hacl_HMAC_compute_md5 _Py_LibHacl_Hacl_HMAC_compute_md5
-// HMAC-SHA-1
+// --- HMAC-SHA-1 -------------------------------------------------------------
 #define Hacl_HMAC_compute_sha1 _Py_LibHacl_Hacl_HMAC_compute_sha1
-// HMAC-SHA-2
+// --- HMAC-SHA-2 -------------------------------------------------------------
 #define Hacl_HMAC_compute_sha2_224 _Py_LibHacl_Hacl_HMAC_compute_sha2_224
 #define Hacl_HMAC_compute_sha2_256 _Py_LibHacl_Hacl_HMAC_compute_sha2_256
 #define Hacl_HMAC_compute_sha2_384 _Py_LibHacl_Hacl_HMAC_compute_sha2_384
 #define Hacl_HMAC_compute_sha2_512 _Py_LibHacl_Hacl_HMAC_compute_sha2_512
-// HMAC-SHA-3
+// --- HMAC-SHA-3 -------------------------------------------------------------
 #define Hacl_HMAC_compute_sha3_224 _Py_LibHacl_Hacl_HMAC_compute_sha3_224
 #define Hacl_HMAC_compute_sha3_256 _Py_LibHacl_Hacl_HMAC_compute_sha3_256
 #define Hacl_HMAC_compute_sha3_384 _Py_LibHacl_Hacl_HMAC_compute_sha3_384
 #define Hacl_HMAC_compute_sha3_512 _Py_LibHacl_Hacl_HMAC_compute_sha3_512
-// HMAC-BLAKE
-#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32
+// --- HMAC-BLAKE-2 -----------------------------------------------------------
 #define Hacl_HMAC_compute_blake2b_32 _Py_LibHacl_Hacl_HMAC_compute_blake2b_32
+#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32
 
 #endif  // _PYTHON_HACL_NAMESPACES_H
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index ce9603d5db8..90a7391ebb0 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -255,7 +255,8 @@ py_hashentry_table_new(void) {
     return NULL;
 }
 
-/* Module state */
+// --- Module state -----------------------------------------------------------
+
 static PyModuleDef _hashlibmodule;
 
 typedef struct {
@@ -277,35 +278,33 @@ get_hashlib_state(PyObject *module)
     return (_hashlibstate *)state;
 }
 
+// --- Module objects ---------------------------------------------------------
+
 typedef struct {
-    PyObject_HEAD
+    HASHLIB_OBJECT_HEAD
     EVP_MD_CTX *ctx;    /* OpenSSL message digest context */
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;      /* OpenSSL context lock */
 } HASHobject;
 
 #define HASHobject_CAST(op) ((HASHobject *)(op))
 
 typedef struct {
-    PyObject_HEAD
+    HASHLIB_OBJECT_HEAD
     HMAC_CTX *ctx;            /* OpenSSL hmac context */
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;  /* HMAC context lock */
 } HMACobject;
 
 #define HMACobject_CAST(op) ((HMACobject *)(op))
 
-#include "clinic/_hashopenssl.c.h"
+// --- Module clinic configuration --------------------------------------------
+
 /*[clinic input]
 module _hashlib
-class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASH_type"
-class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASHXOF_type"
-class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMAC_type"
+class _hashlib.HASH "HASHobject *" "&PyType_Type"
+class _hashlib.HASHXOF "HASHobject *" "&PyType_Type"
+class _hashlib.HMAC "HMACobject *" "&PyType_Type"
 [clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=eb805ce4b90b1b31]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6b5c9ce5c28bdc58]*/
 
+#include "clinic/_hashopenssl.c.h"
 
 /* LCOV_EXCL_START */
 
@@ -700,9 +699,9 @@ static int
 _hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p)
 {
     int result;
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     result = EVP_MD_CTX_copy(new_ctx_p, self->ctx);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (result == 0) {
         notify_smart_ssl_error_occurred_in(Py_STRINGIFY(EVP_MD_CTX_copy));
         return -1;
@@ -802,27 +801,13 @@ _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj)
 {
     int result;
     Py_buffer view;
-
     GET_BUFFER_VIEW_OR_ERROUT(obj, &view);
-
-    if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        result = _hashlib_HASH_hash(self, view.buf, view.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        result = _hashlib_HASH_hash(self, view.buf, view.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, view.len,
+        result = _hashlib_HASH_hash(self, view.buf, view.len)
+    );
     PyBuffer_Release(&view);
-
-    if (result == -1)
-        return NULL;
-    Py_RETURN_NONE;
+    return result < 0 ? NULL : Py_None;
 }
 
 static PyMethodDef HASH_methods[] = {
@@ -938,8 +923,18 @@ _hashlib_HASHXOF_digest_impl(HASHobject *self, Py_ssize_t length)
 /*[clinic end generated code: output=dcb09335dd2fe908 input=3eb034ce03c55b21]*/
 {
     EVP_MD_CTX *temp_ctx;
-    PyObject *retval = PyBytes_FromStringAndSize(NULL, length);
+    PyObject *retval;
 
+    if (length < 0) {
+        PyErr_SetString(PyExc_ValueError, "negative digest length");
+        return NULL;
+    }
+
+    if (length == 0) {
+        return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
+    }
+
+    retval = PyBytes_FromStringAndSize(NULL, length);
     if (retval == NULL) {
         return NULL;
     }
@@ -986,9 +981,18 @@ _hashlib_HASHXOF_hexdigest_impl(HASHobject *self, Py_ssize_t length)
     EVP_MD_CTX *temp_ctx;
     PyObject *retval;
 
+    if (length < 0) {
+        PyErr_SetString(PyExc_ValueError, "negative digest length");
+        return NULL;
+    }
+
+    if (length == 0) {
+        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
+    }
+
     digest = (unsigned char*)PyMem_Malloc(length);
     if (digest == NULL) {
-        PyErr_NoMemory();
+        (void)PyErr_NoMemory();
         return NULL;
     }
 
@@ -1125,15 +1129,12 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj,
     }
 
     if (view.buf && view.len) {
-        if (view.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            result = _hashlib_HASH_hash(self, view.buf, view.len);
-            Py_END_ALLOW_THREADS
-        } else {
-            result = _hashlib_HASH_hash(self, view.buf, view.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            view.len,
+            result = _hashlib_HASH_hash(self, view.buf, view.len)
+        );
         if (result == -1) {
             assert(PyErr_Occurred());
             Py_CLEAR(self);
@@ -1794,9 +1795,9 @@ static int
 locked_HMAC_CTX_copy(HMAC_CTX *new_ctx_p, HMACobject *self)
 {
     int result;
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     result = HMAC_CTX_copy(new_ctx_p, self->ctx);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (result == 0) {
         notify_smart_ssl_error_occurred_in(Py_STRINGIFY(HMAC_CTX_copy));
         return -1;
@@ -1827,24 +1828,12 @@ _hmac_update(HMACobject *self, PyObject *obj)
     Py_buffer view = {0};
 
     GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0);
-
-    if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        r = HMAC_Update(self->ctx,
-                        (const unsigned char *)view.buf,
-                        (size_t)view.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        r = HMAC_Update(self->ctx,
-                        (const unsigned char *)view.buf,
-                        (size_t)view.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, view.len,
+        r = HMAC_Update(
+            self->ctx, (const unsigned char *)view.buf, (size_t)view.len
+        )
+    );
     PyBuffer_Release(&view);
 
     if (r == 0) {
diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c
index 7784cdcd9ff..560fe431fca 100644
--- a/Modules/_heapqmodule.c
+++ b/Modules/_heapqmodule.c
@@ -12,6 +12,7 @@ annotated by François Pinard, and converted to C by Raymond Hettinger.
 
 #include "Python.h"
 #include "pycore_list.h"          // _PyList_ITEMS(), _PyList_AppendTakeRef()
+#include "pycore_pyatomic_ft_wrappers.h"
 
 #include "clinic/_heapqmodule.c.h"
 
@@ -59,8 +60,8 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
         arr = _PyList_ITEMS(heap);
         parent = arr[parentpos];
         newitem = arr[pos];
-        arr[parentpos] = newitem;
-        arr[pos] = parent;
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem);
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent);
         pos = parentpos;
     }
     return 0;
@@ -108,8 +109,8 @@ siftup(PyListObject *heap, Py_ssize_t pos)
         /* Move the smaller child up. */
         tmp1 = arr[childpos];
         tmp2 = arr[pos];
-        arr[childpos] = tmp2;
-        arr[pos] = tmp1;
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2);
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1);
         pos = childpos;
     }
     /* Bubble it up to its final resting place (by sifting its parents down). */
@@ -172,8 +173,9 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t))
     if (!n)
         return lastelt;
     returnitem = PyList_GET_ITEM(heap, 0);
-    PyList_SET_ITEM(heap, 0, lastelt);
-    if (siftup_func((PyListObject *)heap, 0)) {
+    PyListObject *list = _PyList_CAST(heap);
+    FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], lastelt);
+    if (siftup_func(list, 0)) {
         Py_DECREF(returnitem);
         return NULL;
     }
@@ -208,8 +210,9 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec
     }
 
     returnitem = PyList_GET_ITEM(heap, 0);
-    PyList_SET_ITEM(heap, 0, Py_NewRef(item));
-    if (siftup_func((PyListObject *)heap, 0)) {
+    PyListObject *list = _PyList_CAST(heap);
+    FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item));
+    if (siftup_func(list, 0)) {
         Py_DECREF(returnitem);
         return NULL;
     }
@@ -284,8 +287,9 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item)
     }
 
     returnitem = PyList_GET_ITEM(heap, 0);
-    PyList_SET_ITEM(heap, 0, Py_NewRef(item));
-    if (siftup((PyListObject *)heap, 0)) {
+    PyListObject *list = _PyList_CAST(heap);
+    FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item));
+    if (siftup(list, 0)) {
         Py_DECREF(returnitem);
         return NULL;
     }
@@ -437,8 +441,8 @@ siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
         arr = _PyList_ITEMS(heap);
         parent = arr[parentpos];
         newitem = arr[pos];
-        arr[parentpos] = newitem;
-        arr[pos] = parent;
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem);
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent);
         pos = parentpos;
     }
     return 0;
@@ -486,8 +490,8 @@ siftup_max(PyListObject *heap, Py_ssize_t pos)
         /* Move the smaller child up. */
         tmp1 = arr[childpos];
         tmp2 = arr[pos];
-        arr[childpos] = tmp2;
-        arr[pos] = tmp1;
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2);
+        FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1);
         pos = childpos;
     }
     /* Bubble it up to its final resting place (by sifting its parents down). */
@@ -621,8 +625,9 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item)
     }
 
     returnitem = PyList_GET_ITEM(heap, 0);
-    PyList_SET_ITEM(heap, 0, Py_NewRef(item));
-    if (siftup_max((PyListObject *)heap, 0) < 0) {
+    PyListObject *list = _PyList_CAST(heap);
+    FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item));
+    if (siftup_max(list, 0) < 0) {
         Py_DECREF(returnitem);
         return NULL;
     }
diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c
index ee5e2b005e0..9c1f8615161 100644
--- a/Modules/_interpchannelsmodule.c
+++ b/Modules/_interpchannelsmodule.c
@@ -3614,8 +3614,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg)
 {
     module_state *state = get_module_state(mod);
     assert(state != NULL);
-    (void)traverse_module_state(state, visit, arg);
-    return 0;
+    return traverse_module_state(state, visit, arg);
 }
 
 static int
@@ -3625,8 +3624,7 @@ module_clear(PyObject *mod)
     assert(state != NULL);
 
     // Now we clear the module state.
-    (void)clear_module_state(state);
-    return 0;
+    return clear_module_state(state);
 }
 
 static void
diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c
index e22709d5119..e5afe746f90 100644
--- a/Modules/_interpqueuesmodule.c
+++ b/Modules/_interpqueuesmodule.c
@@ -707,8 +707,11 @@ _queue_is_full(_queue *queue, int *p_is_full)
         return err;
     }
 
-    assert(queue->items.count <= queue->items.maxsize);
-    *p_is_full = queue->items.count == queue->items.maxsize;
+    assert(queue->items.maxsize <= 0
+           || queue->items.count <= queue->items.maxsize);
+    *p_is_full = queue->items.maxsize > 0
+        ? queue->items.count == queue->items.maxsize
+        : 0;
 
     _queue_unlock(queue);
     return 0;
@@ -1949,8 +1952,7 @@ static int
 module_traverse(PyObject *mod, visitproc visit, void *arg)
 {
     module_state *state = get_module_state(mod);
-    (void)traverse_module_state(state, visit, arg);
-    return 0;
+    return traverse_module_state(state, visit, arg);
 }
 
 static int
@@ -1959,8 +1961,7 @@ module_clear(PyObject *mod)
     module_state *state = get_module_state(mod);
 
     // Now we clear the module state.
-    (void)clear_module_state(state);
-    return 0;
+    return clear_module_state(state);
 }
 
 static void
diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c
index fdfb3e6dd34..e7feaa7f186 100644
--- a/Modules/_interpretersmodule.c
+++ b/Modules/_interpretersmodule.c
@@ -601,6 +601,7 @@ _make_call(struct interp_call *call,
         unwrap_not_shareable(tstate, failure);
         return -1;
     }
+    assert(!_PyErr_Occurred(tstate));
 
     // Make the call.
     PyObject *resobj = PyObject_Call(func, args, kwargs);
@@ -1038,8 +1039,8 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs)
     PyObject *id, *updates;
     int restricted = 0;
     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
-                                     "OO|$p:" MODULE_NAME_STR ".set___main___attrs",
-                                     kwlist, &id, &updates, &restricted))
+                                     "OO!|$p:" MODULE_NAME_STR ".set___main___attrs",
+                                     kwlist, &id, &PyDict_Type, &updates, &restricted))
     {
         return NULL;
     }
@@ -1053,16 +1054,14 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs)
     }
 
     // Check the updates.
-    if (updates != Py_None) {
-        Py_ssize_t size = PyObject_Size(updates);
-        if (size < 0) {
-            return NULL;
-        }
-        if (size == 0) {
-            PyErr_SetString(PyExc_ValueError,
-                            "arg 2 must be a non-empty mapping");
-            return NULL;
-        }
+    Py_ssize_t size = PyDict_Size(updates);
+    if (size < 0) {
+        return NULL;
+    }
+    if (size == 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "arg 2 must be a non-empty dict");
+        return NULL;
     }
 
     _PyXI_session *session = _PyXI_NewSession();
@@ -1707,8 +1706,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg)
 {
     module_state *state = get_module_state(mod);
     assert(state != NULL);
-    (void)traverse_module_state(state, visit, arg);
-    return 0;
+    return traverse_module_state(state, visit, arg);
 }
 
 static int
@@ -1716,8 +1714,7 @@ module_clear(PyObject *mod)
 {
     module_state *state = get_module_state(mod);
     assert(state != NULL);
-    (void)clear_module_state(state);
-    return 0;
+    return clear_module_state(state);
 }
 
 static void
diff --git a/Modules/_opcode.c b/Modules/_opcode.c
index c295f7b3152..ef271b6ef56 100644
--- a/Modules/_opcode.c
+++ b/Modules/_opcode.c
@@ -5,7 +5,7 @@
 #include "Python.h"
 #include "compile.h"
 #include "opcode.h"
-#include "pycore_ceval.h"
+#include "pycore_ceval.h"           // SPECIAL_MAX
 #include "pycore_code.h"
 #include "pycore_compile.h"
 #include "pycore_intrinsics.h"
diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c
index 19f12c3b02e..c2421cac6bd 100644
--- a/Modules/_remote_debugging_module.c
+++ b/Modules/_remote_debugging_module.c
@@ -39,6 +39,8 @@
  * ============================================================================ */
 
 #define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset)))
+#define CLEAR_PTR_TAG(ptr) (((uintptr_t)(ptr) & ~Py_TAG_BITS))
+#define GET_MEMBER_NO_TAG(type, obj, offset) (type)(CLEAR_PTR_TAG(*(type*)((char*)(obj) + (offset))))
 
 /* Size macros for opaque buffers */
 #define SIZEOF_BYTES_OBJ sizeof(PyBytesObject)
@@ -212,6 +214,8 @@ typedef struct {
 #endif
 } RemoteUnwinderObject;
 
+#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op))
+
 typedef struct
 {
     int lineno;
@@ -243,6 +247,13 @@ module _remote_debugging
  * FORWARD DECLARATIONS
  * ============================================================================ */
 
+static inline int
+is_frame_valid(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    uintptr_t code_object_addr
+);
+
 static int
 parse_tasks_in_set(
     RemoteUnwinderObject *unwinder,
@@ -734,8 +745,7 @@ parse_task_name(
         return NULL;
     }
 
-    uintptr_t task_name_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name);
-    task_name_addr &= ~Py_TAG_BITS;
+    uintptr_t task_name_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name);
 
     // The task name can be a long or a string so we need to check the type
     char task_name_obj[SIZEOF_PYOBJECT];
@@ -798,8 +808,7 @@ static int parse_task_awaited_by(
         return -1;
     }
 
-    uintptr_t task_ab_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by);
-    task_ab_addr &= ~Py_TAG_BITS;
+    uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by);
 
     if ((void*)task_ab_addr == NULL) {
         return 0;
@@ -849,8 +858,7 @@ handle_yield_from_frame(
         return -1;
     }
 
-    uintptr_t stackpointer_addr = GET_MEMBER(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer);
-    stackpointer_addr &= ~Py_TAG_BITS;
+    uintptr_t stackpointer_addr = GET_MEMBER_NO_TAG(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer);
 
     if ((void*)stackpointer_addr != NULL) {
         uintptr_t gi_await_addr;
@@ -917,6 +925,11 @@ parse_coro_chain(
         return -1;
     }
 
+    int8_t frame_state = GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state);
+    if (frame_state == FRAME_CLEARED) {
+        return 0;
+    }
+
     uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type);
 
     PyObject* name = NULL;
@@ -936,7 +949,7 @@ parse_coro_chain(
     }
     Py_DECREF(name);
 
-    if (GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state) == FRAME_SUSPENDED_YIELD_FROM) {
+    if (frame_state == FRAME_SUSPENDED_YIELD_FROM) {
         return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to);
     }
 
@@ -981,8 +994,7 @@ create_task_result(
         goto error;
     }
 
-    coro_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro);
-    coro_addr &= ~Py_TAG_BITS;
+    coro_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro);
 
     if ((void*)coro_addr != NULL) {
         if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) {
@@ -1816,10 +1828,10 @@ parse_frame_from_chunks(
 
     char *frame = (char *)frame_ptr;
     *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
-
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER ||
-        !GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable)) {
-        return 0;
+    uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable);
+    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
+    if (frame_valid != 1) {
+        return frame_valid;
     }
 
     uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
@@ -1832,9 +1844,7 @@ parse_frame_from_chunks(
     }
 #endif
 
-    return parse_code_object(
-        unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable),
-        instruction_pointer, previous_frame, tlbc_index);
+    return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
 }
 
 /* ============================================================================
@@ -2077,6 +2087,33 @@ find_running_task_and_coro(
  * FRAME PARSING FUNCTIONS
  * ============================================================================ */
 
+static inline int
+is_frame_valid(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    uintptr_t code_object_addr
+) {
+    if ((void*)code_object_addr == NULL) {
+        return 0;
+    }
+
+    void* frame = (void*)frame_addr;
+
+    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK ||
+        GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
+        return 0;  // C frame
+    }
+
+    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
+        && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
+        PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
+                    GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
+        return -1;
+    }
+    return 1;
+}
+
 static int
 parse_frame_object(
     RemoteUnwinderObject *unwinder,
@@ -2098,13 +2135,10 @@ parse_frame_object(
     }
 
     *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
-
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER) {
-        return 0;
-    }
-
-    if ((void*)GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable) == NULL) {
-        return 0;
+    uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
+    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
+    if (frame_valid != 1) {
+        return frame_valid;
     }
 
     uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
@@ -2117,9 +2151,7 @@ parse_frame_object(
     }
 #endif
 
-    return parse_code_object(
-        unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable),
-        instruction_pointer, previous_frame, tlbc_index);
+    return parse_code_object(unwinder, result, code_object,instruction_pointer, previous_frame, tlbc_index);
 }
 
 static int
@@ -2144,26 +2176,10 @@ parse_async_frame_object(
     }
 
     *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
-
-    *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
-    // Strip tag bits for consistent comparison
-    *code_object &= ~Py_TAG_BITS;
-    assert(code_object != NULL);
-    if ((void*)*code_object == NULL) {
-        return 0;
-    }
-
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK ||
-        GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
-        return 0;  // C frame
-    }
-
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
-        && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
-        PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
-                    GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
-        return -1;
+    *code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
+    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, *code_object);
+    if (frame_valid != 1) {
+        return frame_valid;
     }
 
     uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
@@ -2899,8 +2915,9 @@ static PyMethodDef RemoteUnwinder_methods[] = {
 };
 
 static void
-RemoteUnwinder_dealloc(RemoteUnwinderObject *self)
+RemoteUnwinder_dealloc(PyObject *op)
 {
+    RemoteUnwinderObject *self = RemoteUnwinder_CAST(op);
     PyTypeObject *tp = Py_TYPE(self);
     if (self->code_object_cache) {
         _Py_hashtable_destroy(self->code_object_cache);
diff --git a/Modules/_stat.c b/Modules/_stat.c
index f11ca7d23b4..1dabf2f6d5b 100644
--- a/Modules/_stat.c
+++ b/Modules/_stat.c
@@ -57,7 +57,7 @@ typedef unsigned short mode_t;
  * Only the names are defined by POSIX but not their value. All common file
  * types seems to have the same numeric value on all platforms, though.
  *
- * pyport.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK
+ * fileutils.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK
  */
 
 #ifndef S_IFBLK
@@ -86,7 +86,7 @@ typedef unsigned short mode_t;
 
 
 /* S_ISXXX()
- * pyport.h defines S_ISDIR(), S_ISREG() and S_ISCHR()
+ * fileutils.h defines S_ISDIR(), S_ISREG() and S_ISCHR()
  */
 
 #ifndef S_ISBLK
diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c
index 42243023a45..6313abf5485 100644
--- a/Modules/_testcapi/long.c
+++ b/Modules/_testcapi/long.c
@@ -228,7 +228,7 @@ pylongwriter_create(PyObject *module, PyObject *args)
             goto error;
         }
 
-        if (num < 0 || num >= PyLong_BASE) {
+        if (num < 0 || num >= (long)PyLong_BASE) {
             PyErr_SetString(PyExc_ValueError, "digit doesn't fit into digit");
             goto error;
         }
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 281c5b41137..71fffedee14 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -2424,7 +2424,7 @@ test_critical_sections(PyObject *module, PyObject *Py_UNUSED(args))
 
 
 // Used by `finalize_thread_hang`.
-#ifdef _POSIX_THREADS
+#if defined(_POSIX_THREADS) && !defined(__wasi__)
 static void finalize_thread_hang_cleanup_callback(void *Py_UNUSED(arg)) {
     // Should not reach here.
     Py_FatalError("pthread thread termination was triggered unexpectedly");
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 804cb4e4d1c..fdf22a0c994 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -21,6 +21,7 @@
 #include "pycore_fileutils.h"     // _Py_normpath()
 #include "pycore_flowgraph.h"     // _PyCompile_OptimizeCfg()
 #include "pycore_frame.h"         // _PyInterpreterFrame
+#include "pycore_function.h"      // _PyFunction_GET_BUILTINS
 #include "pycore_gc.h"            // PyGC_Head
 #include "pycore_hashtable.h"     // _Py_hashtable_new()
 #include "pycore_import.h"        // _PyImport_ClearExtension()
@@ -1022,7 +1023,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs)
             globalsns = PyFunction_GET_GLOBALS(codearg);
         }
         if (builtinsns == NULL) {
-            builtinsns = PyFunction_GET_BUILTINS(codearg);
+            builtinsns = _PyFunction_GET_BUILTINS(codearg);
         }
         codearg = PyFunction_GET_CODE(codearg);
     }
@@ -1190,7 +1191,7 @@ verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs)
             globalsns = PyFunction_GET_GLOBALS(codearg);
         }
         if (builtinsns == NULL) {
-            builtinsns = PyFunction_GET_BUILTINS(codearg);
+            builtinsns = _PyFunction_GET_BUILTINS(codearg);
         }
         codearg = PyFunction_GET_CODE(codearg);
     }
diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c
index 8d678412fe7..8d8cb992b0e 100644
--- a/Modules/_testinternalcapi/test_lock.c
+++ b/Modules/_testinternalcapi/test_lock.c
@@ -57,7 +57,10 @@ lock_thread(void *arg)
     _Py_atomic_store_int(&test_data->started, 1);
 
     PyMutex_Lock(m);
-    assert(m->_bits == 1);
+    // gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set
+    // (m->_bits == 3) due to bucket collisions in the parking lot hash table
+    // between this mutex and the `test_data.done` event.
+    assert(m->_bits == 1 || m->_bits == 3);
 
     PyMutex_Unlock(m);
     assert(m->_bits == 0);
diff --git a/Modules/blake2module.c b/Modules/blake2module.c
index 2ce8c0cd3d7..163f238a426 100644
--- a/Modules/blake2module.c
+++ b/Modules/blake2module.c
@@ -2,6 +2,7 @@
  * Written in 2013 by Dmitry Chestnykh <dmitry@codingrobots.com>
  * Modified for CPython by Christian Heimes <christian@python.org>
  * Updated to use HACL* by Jonathan Protzenko <jonathan@protzenko.fr>
+ * Additional work by Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
  *
  * To the extent possible under law, the author have dedicated all
  * copyright and related and neighboring rights to this software to
@@ -42,25 +43,25 @@
 
 // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't
 // great; but when compiling a universal2 binary, autoconf will set
-// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be
-// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor
-// symbols.
+// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64.
+// If we're on macOS ARM64, we however disable these preprocessor symbols.
 #if defined(__APPLE__) && defined(__arm64__)
-#  undef HACL_CAN_COMPILE_SIMD128
-#  undef HACL_CAN_COMPILE_SIMD256
+#  undef _Py_HACL_CAN_COMPILE_VEC128
+#  undef _Py_HACL_CAN_COMPILE_VEC256
 #endif
 
-// Small mismatch between the variable names Python defines as part of configure
-// at the ones HACL* expects to be set in order to enable those headers.
-#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
-#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256
+// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable
+// the corresponding SIMD instructions so we need to "forward" the values
+// we just deduced above.
+#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128
+#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256
 
 #include "_hacl/Hacl_Hash_Blake2s.h"
 #include "_hacl/Hacl_Hash_Blake2b.h"
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h"
 #endif
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h"
 #endif
 
@@ -87,7 +88,7 @@ blake2_get_state(PyObject *module)
     return (Blake2State *)state;
 }
 
-#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256)
+#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256)
 static inline Blake2State *
 blake2_get_state_from_type(PyTypeObject *module)
 {
@@ -180,7 +181,7 @@ blake2module_init_cpu_features(Blake2State *state)
 #undef ECX_SSE3
 #undef EBX_AVX2
 
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
     // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
     state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov;
 #else
@@ -190,7 +191,7 @@ blake2module_init_cpu_features(Blake2State *state)
     state->can_run_simd128 = false;
 #endif
 
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
     // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
     state->can_run_simd256 = state->can_run_simd128 && avx && avx2;
 #else
@@ -331,18 +332,18 @@ is_blake2s(blake2_impl impl)
 static inline blake2_impl
 type_to_impl(PyTypeObject *type)
 {
-#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256)
+#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256)
     Blake2State *st = blake2_get_state_from_type(type);
 #endif
     if (!strcmp(type->tp_name, blake2b_type_spec.name)) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         return st->can_run_simd256 ? Blake2b_256 : Blake2b;
 #else
         return Blake2b;
 #endif
     }
     else if (!strcmp(type->tp_name, blake2s_type_spec.name)) {
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         return st->can_run_simd128 ? Blake2s_128 : Blake2s;
 #else
         return Blake2s;
@@ -352,33 +353,34 @@ type_to_impl(PyTypeObject *type)
 }
 
 typedef struct {
-    PyObject_HEAD
+    HASHLIB_OBJECT_HEAD
     union {
         Hacl_Hash_Blake2s_state_t *blake2s_state;
         Hacl_Hash_Blake2b_state_t *blake2b_state;
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state;
 #endif
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state;
 #endif
     };
     blake2_impl impl;
-    bool use_mutex;
-    PyMutex mutex;
 } Blake2Object;
 
 #define _Blake2Object_CAST(op)  ((Blake2Object *)(op))
 
-#include "clinic/blake2module.c.h"
+// --- Module clinic configuration --------------------------------------------
 
 /*[clinic input]
 module _blake2
-class _blake2.blake2b "Blake2Object *" "&PyBlake2_BLAKE2bType"
-class _blake2.blake2s "Blake2Object *" "&PyBlake2_BLAKE2sType"
+class _blake2.blake2b "Blake2Object *" "&PyType_Type"
+class _blake2.blake2s "Blake2Object *" "&PyType_Type"
 [clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b7526666bd18af83]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=86b0972b0c41b3d0]*/
+
+#include "clinic/blake2module.c.h"
 
+// --- BLAKE-2 object interface -----------------------------------------------
 
 static Blake2Object *
 new_Blake2Object(PyTypeObject *type)
@@ -422,18 +424,18 @@ new_Blake2Object(PyTypeObject *type)
     } while (0)
 
 static void
-update(Blake2Object *self, uint8_t *buf, Py_ssize_t len)
+blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len)
 {
     switch (self->impl) {
         // blake2b_256_state and blake2s_128_state must be if'd since
         // otherwise this results in an unresolved symbol at link-time.
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,
                         self->blake2b_256_state, buf, len);
             return;
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,
                         self->blake2s_128_state, buf, len);
@@ -553,12 +555,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size,
     // Ensure that the states are NULL-initialized in case of an error.
     // See: py_blake2_clear() for more details.
     switch (self->impl) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             self->blake2b_256_state = NULL;
             break;
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             self->blake2s_128_state = NULL;
             break;
@@ -621,12 +623,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size,
     } while (0)
 
     switch (self->impl) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             BLAKE2_MALLOC(Blake2b_Simd256, self->blake2b_256_state);
             break;
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             BLAKE2_MALLOC(Blake2s_Simd128, self->blake2s_128_state);
             break;
@@ -646,14 +648,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size,
     if (data != NULL) {
         Py_buffer buf;
         GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error);
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            Py_BEGIN_ALLOW_THREADS
-            update(self, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update(self, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            blake2_update_unlocked(self, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
@@ -744,7 +744,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size,
 }
 
 static int
-blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy)
+blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy)
 {
     assert(cpy != NULL);
 #define BLAKE2_COPY(TYPE, STATE_ATTR)                                       \
@@ -756,12 +756,12 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy)
     } while (0)
 
     switch (self->impl) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             BLAKE2_COPY(Blake2b_Simd256, blake2b_256_state);
             break;
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             BLAKE2_COPY(Blake2s_Simd128, blake2s_128_state);
             break;
@@ -787,23 +787,25 @@ error:
 /*[clinic input]
 _blake2.blake2b.copy
 
+    cls: defining_class
+
 Return a copy of the hash object.
 [clinic start generated code]*/
 
 static PyObject *
-_blake2_blake2b_copy_impl(Blake2Object *self)
-/*[clinic end generated code: output=622d1c56b91c50d8 input=e383c2d199fd8a2e]*/
+_blake2_blake2b_copy_impl(Blake2Object *self, PyTypeObject *cls)
+/*[clinic end generated code: output=5f8ea31c56c52287 input=f38f3475e9aec98d]*/
 {
     int rc;
     Blake2Object *cpy;
 
-    if ((cpy = new_Blake2Object(Py_TYPE(self))) == NULL) {
+    if ((cpy = new_Blake2Object(cls)) == NULL) {
         return NULL;
     }
 
-    ENTER_HASHLIB(self);
-    rc = blake2_blake2b_copy_locked(self, cpy);
-    LEAVE_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
+    rc = blake2_blake2b_copy_unlocked(self, cpy);
+    HASHLIB_RELEASE_LOCK(self);
     if (rc < 0) {
         Py_DECREF(cpy);
         return NULL;
@@ -825,25 +827,12 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data)
 /*[clinic end generated code: output=99330230068e8c99 input=ffc4aa6a6a225d31]*/
 {
     Py_buffer buf;
-
     GET_BUFFER_VIEW_OR_ERROUT(data, &buf);
-
-    if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        update(self, buf.buf, buf.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    }
-    else {
-        update(self, buf.buf, buf.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, buf.len,
+        blake2_update_unlocked(self, buf.buf, buf.len)
+    );
     PyBuffer_Release(&buf);
-
     Py_RETURN_NONE;
 }
 
@@ -851,12 +840,12 @@ static uint8_t
 blake2_blake2b_compute_digest(Blake2Object *self, uint8_t *digest)
 {
     switch (self->impl) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             return Hacl_Hash_Blake2b_Simd256_digest(
                 self->blake2b_256_state, digest);
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             return Hacl_Hash_Blake2s_Simd128_digest(
                 self->blake2s_128_state, digest);
@@ -881,9 +870,9 @@ _blake2_blake2b_digest_impl(Blake2Object *self)
 /*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/
 {
     uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES];
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     digest_length = blake2_blake2b_compute_digest(self, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return PyBytes_FromStringAndSize((const char *)digest, digest_length);
 }
 
@@ -898,9 +887,9 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self)
 /*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/
 {
     uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES];
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     digest_length = blake2_blake2b_compute_digest(self, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return _Py_strhex((const char *)digest, digest_length);
 }
 
@@ -934,11 +923,11 @@ static Hacl_Hash_Blake2b_index
 hacl_get_blake2_info(Blake2Object *self)
 {
     switch (self->impl) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             return Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state);
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             return Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state);
 #endif
@@ -986,12 +975,12 @@ py_blake2_clear(PyObject *op)
     } while (0)
 
     switch (self->impl) {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
         case Blake2b_256:
             BLAKE2_FREE(Blake2b_Simd256, self->blake2b_256_state);
             break;
 #endif
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
         case Blake2s_128:
             BLAKE2_FREE(Blake2s_Simd128, self->blake2s_128_state);
             break;
diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h
index 9e9cd56e569..97d010d03a4 100644
--- a/Modules/clinic/blake2module.c.h
+++ b/Modules/clinic/blake2module.c.h
@@ -434,15 +434,19 @@ PyDoc_STRVAR(_blake2_blake2b_copy__doc__,
 "Return a copy of the hash object.");
 
 #define _BLAKE2_BLAKE2B_COPY_METHODDEF    \
-    {"copy", (PyCFunction)_blake2_blake2b_copy, METH_NOARGS, _blake2_blake2b_copy__doc__},
+    {"copy", _PyCFunction_CAST(_blake2_blake2b_copy), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _blake2_blake2b_copy__doc__},
 
 static PyObject *
-_blake2_blake2b_copy_impl(Blake2Object *self);
+_blake2_blake2b_copy_impl(Blake2Object *self, PyTypeObject *cls);
 
 static PyObject *
-_blake2_blake2b_copy(PyObject *self, PyObject *Py_UNUSED(ignored))
+_blake2_blake2b_copy(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
-    return _blake2_blake2b_copy_impl((Blake2Object *)self);
+    if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) {
+        PyErr_SetString(PyExc_TypeError, "copy() takes no arguments");
+        return NULL;
+    }
+    return _blake2_blake2b_copy_impl((Blake2Object *)self, cls);
 }
 
 PyDoc_STRVAR(_blake2_blake2b_update__doc__,
@@ -502,4 +506,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored))
 {
     return _blake2_blake2b_hexdigest_impl((Blake2Object *)self);
 }
-/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=60a4abbcb8950fe5 input=a9049054013a1b77]*/
diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h
index 121be2c0758..1f631ff406e 100644
--- a/Modules/clinic/sha3module.c.h
+++ b/Modules/clinic/sha3module.c.h
@@ -6,7 +6,7 @@ preserve
 #  include "pycore_gc.h"          // PyGC_Head
 #  include "pycore_runtime.h"     // _Py_ID()
 #endif
-#include "pycore_long.h"          // _PyLong_UnsignedLong_Converter()
+#include "pycore_abstract.h"      // _PyNumber_Index()
 #include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
 
 PyDoc_STRVAR(py_sha3_new__doc__,
@@ -100,15 +100,19 @@ PyDoc_STRVAR(_sha3_sha3_224_copy__doc__,
 "Return a copy of the hash object.");
 
 #define _SHA3_SHA3_224_COPY_METHODDEF    \
-    {"copy", (PyCFunction)_sha3_sha3_224_copy, METH_NOARGS, _sha3_sha3_224_copy__doc__},
+    {"copy", _PyCFunction_CAST(_sha3_sha3_224_copy), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sha3_sha3_224_copy__doc__},
 
 static PyObject *
-_sha3_sha3_224_copy_impl(SHA3object *self);
+_sha3_sha3_224_copy_impl(SHA3object *self, PyTypeObject *cls);
 
 static PyObject *
-_sha3_sha3_224_copy(PyObject *self, PyObject *Py_UNUSED(ignored))
+_sha3_sha3_224_copy(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
-    return _sha3_sha3_224_copy_impl((SHA3object *)self);
+    if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) {
+        PyErr_SetString(PyExc_TypeError, "copy() takes no arguments");
+        return NULL;
+    }
+    return _sha3_sha3_224_copy_impl((SHA3object *)self, cls);
 }
 
 PyDoc_STRVAR(_sha3_sha3_224_digest__doc__,
@@ -179,7 +183,7 @@ PyDoc_STRVAR(_sha3_shake_128_digest__doc__,
     {"digest", _PyCFunction_CAST(_sha3_shake_128_digest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_digest__doc__},
 
 static PyObject *
-_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length);
+_sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length);
 
 static PyObject *
 _sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
@@ -213,15 +217,24 @@ _sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
     };
     #undef KWTUPLE
     PyObject *argsbuf[1];
-    unsigned long length;
+    Py_ssize_t length;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
             /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
     if (!args) {
         goto exit;
     }
-    if (!_PyLong_UnsignedLong_Converter(args[0], &length)) {
-        goto exit;
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(args[0]);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        length = ival;
     }
     return_value = _sha3_shake_128_digest_impl((SHA3object *)self, length);
 
@@ -239,7 +252,7 @@ PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__,
     {"hexdigest", _PyCFunction_CAST(_sha3_shake_128_hexdigest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__},
 
 static PyObject *
-_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length);
+_sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length);
 
 static PyObject *
 _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
@@ -273,19 +286,28 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t narg
     };
     #undef KWTUPLE
     PyObject *argsbuf[1];
-    unsigned long length;
+    Py_ssize_t length;
 
     args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
             /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
     if (!args) {
         goto exit;
     }
-    if (!_PyLong_UnsignedLong_Converter(args[0], &length)) {
-        goto exit;
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(args[0]);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        length = ival;
     }
     return_value = _sha3_shake_128_hexdigest_impl((SHA3object *)self, length);
 
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=65e437799472b89f input=a9049054013a1b77]*/
+/*[clinic end generated code: output=48be77f8a31e8a3e input=a9049054013a1b77]*/
diff --git a/Modules/clinic/socketmodule.c.h b/Modules/clinic/socketmodule.c.h
index 573903be87e..0cedab597db 100644
--- a/Modules/clinic/socketmodule.c.h
+++ b/Modules/clinic/socketmodule.c.h
@@ -7,7 +7,7 @@ preserve
 #  include "pycore_runtime.h"     // _Py_ID()
 #endif
 #include "pycore_long.h"          // _PyLong_UInt16_Converter()
-#include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
+#include "pycore_modsupport.h"    // _PyArg_CheckPositional()
 
 PyDoc_STRVAR(_socket_socket_close__doc__,
 "close($self, /)\n"
@@ -29,6 +29,170 @@ _socket_socket_close(PyObject *s, PyObject *Py_UNUSED(ignored))
     return _socket_socket_close_impl((PySocketSockObject *)s);
 }
 
+PyDoc_STRVAR(_socket_socket_send__doc__,
+"send($self, data, flags=0, /)\n"
+"--\n"
+"\n"
+"Send a data string to the socket.\n"
+"\n"
+"For the optional flags argument, see the Unix manual.\n"
+"Return the number of bytes sent; this may be less than len(data) if the network is busy.");
+
+#define _SOCKET_SOCKET_SEND_METHODDEF    \
+    {"send", _PyCFunction_CAST(_socket_socket_send), METH_FASTCALL, _socket_socket_send__doc__},
+
+static PyObject *
+_socket_socket_send_impl(PySocketSockObject *s, Py_buffer *pbuf, int flags);
+
+static PyObject *
+_socket_socket_send(PyObject *s, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    Py_buffer pbuf = {NULL, NULL};
+    int flags = 0;
+
+    if (!_PyArg_CheckPositional("send", nargs, 1, 2)) {
+        goto exit;
+    }
+    if (PyObject_GetBuffer(args[0], &pbuf, PyBUF_SIMPLE) != 0) {
+        goto exit;
+    }
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    flags = PyLong_AsInt(args[1]);
+    if (flags == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+skip_optional:
+    return_value = _socket_socket_send_impl((PySocketSockObject *)s, &pbuf, flags);
+
+exit:
+    /* Cleanup for pbuf */
+    if (pbuf.obj) {
+       PyBuffer_Release(&pbuf);
+    }
+
+    return return_value;
+}
+
+PyDoc_STRVAR(_socket_socket_sendall__doc__,
+"sendall($self, data, flags=0, /)\n"
+"--\n"
+"\n"
+"Send a data string to the socket.\n"
+"\n"
+"For the optional flags argument, see the Unix manual.\n"
+"This calls send() repeatedly until all data is sent.\n"
+"If an error occurs, it\'s impossible to tell how much data has been sent.");
+
+#define _SOCKET_SOCKET_SENDALL_METHODDEF    \
+    {"sendall", _PyCFunction_CAST(_socket_socket_sendall), METH_FASTCALL, _socket_socket_sendall__doc__},
+
+static PyObject *
+_socket_socket_sendall_impl(PySocketSockObject *s, Py_buffer *pbuf,
+                            int flags);
+
+static PyObject *
+_socket_socket_sendall(PyObject *s, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    Py_buffer pbuf = {NULL, NULL};
+    int flags = 0;
+
+    if (!_PyArg_CheckPositional("sendall", nargs, 1, 2)) {
+        goto exit;
+    }
+    if (PyObject_GetBuffer(args[0], &pbuf, PyBUF_SIMPLE) != 0) {
+        goto exit;
+    }
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    flags = PyLong_AsInt(args[1]);
+    if (flags == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+skip_optional:
+    return_value = _socket_socket_sendall_impl((PySocketSockObject *)s, &pbuf, flags);
+
+exit:
+    /* Cleanup for pbuf */
+    if (pbuf.obj) {
+       PyBuffer_Release(&pbuf);
+    }
+
+    return return_value;
+}
+
+#if defined(CMSG_LEN)
+
+PyDoc_STRVAR(_socket_socket_sendmsg__doc__,
+"sendmsg($self, buffers, ancdata=<unrepresentable>, flags=0,\n"
+"        address=<unrepresentable>, /)\n"
+"--\n"
+"\n"
+"Send normal and ancillary data to the socket.\n"
+"\n"
+"It gathering the non-ancillary data from a series of buffers\n"
+"and concatenating it into a single message.\n"
+"The buffers argument specifies the non-ancillary\n"
+"data as an iterable of bytes-like objects (e.g. bytes objects).\n"
+"The ancdata argument specifies the ancillary data (control messages)\n"
+"as an iterable of zero or more tuples (cmsg_level, cmsg_type,\n"
+"cmsg_data), where cmsg_level and cmsg_type are integers specifying the\n"
+"protocol level and protocol-specific type respectively, and cmsg_data\n"
+"is a bytes-like object holding the associated data.  The flags\n"
+"argument defaults to 0 and has the same meaning as for send().  If\n"
+"address is supplied and not None, it sets a destination address for\n"
+"the message.  The return value is the number of bytes of non-ancillary\n"
+"data sent.");
+
+#define _SOCKET_SOCKET_SENDMSG_METHODDEF    \
+    {"sendmsg", _PyCFunction_CAST(_socket_socket_sendmsg), METH_FASTCALL, _socket_socket_sendmsg__doc__},
+
+static PyObject *
+_socket_socket_sendmsg_impl(PySocketSockObject *s, PyObject *data_arg,
+                            PyObject *cmsg_arg, int flags,
+                            PyObject *addr_arg);
+
+static PyObject *
+_socket_socket_sendmsg(PyObject *s, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    PyObject *data_arg;
+    PyObject *cmsg_arg = NULL;
+    int flags = 0;
+    PyObject *addr_arg = NULL;
+
+    if (!_PyArg_CheckPositional("sendmsg", nargs, 1, 4)) {
+        goto exit;
+    }
+    data_arg = args[0];
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    cmsg_arg = args[1];
+    if (nargs < 3) {
+        goto skip_optional;
+    }
+    flags = PyLong_AsInt(args[2]);
+    if (flags == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (nargs < 4) {
+        goto skip_optional;
+    }
+    addr_arg = args[3];
+skip_optional:
+    return_value = _socket_socket_sendmsg_impl((PySocketSockObject *)s, data_arg, cmsg_arg, flags, addr_arg);
+
+exit:
+    return return_value;
+}
+
+#endif /* defined(CMSG_LEN) */
+
 static int
 sock_initobj_impl(PySocketSockObject *self, int family, int type, int proto,
                   PyObject *fdobj);
@@ -359,6 +523,10 @@ exit:
 
 #endif /* (defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS)) */
 
+#ifndef _SOCKET_SOCKET_SENDMSG_METHODDEF
+    #define _SOCKET_SOCKET_SENDMSG_METHODDEF
+#endif /* !defined(_SOCKET_SOCKET_SENDMSG_METHODDEF) */
+
 #ifndef _SOCKET_INET_NTOA_METHODDEF
     #define _SOCKET_INET_NTOA_METHODDEF
 #endif /* !defined(_SOCKET_INET_NTOA_METHODDEF) */
@@ -370,4 +538,4 @@ exit:
 #ifndef _SOCKET_IF_INDEXTONAME_METHODDEF
     #define _SOCKET_IF_INDEXTONAME_METHODDEF
 #endif /* !defined(_SOCKET_IF_INDEXTONAME_METHODDEF) */
-/*[clinic end generated code: output=07776dd21d1e3b56 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0376c46b76ae2bce input=a9049054013a1b77]*/
diff --git a/Modules/hashlib.h b/Modules/hashlib.h
index e82ec92be25..9a7e72f34a7 100644
--- a/Modules/hashlib.h
+++ b/Modules/hashlib.h
@@ -34,47 +34,78 @@
 
 /*
  * Helper code to synchronize access to the hash object when the GIL is
- * released around a CPU consuming hashlib operation. All code paths that
- * access a mutable part of obj must be enclosed in an ENTER_HASHLIB /
- * LEAVE_HASHLIB block or explicitly acquire and release the lock inside
- * a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for
- * an operation.
+ * released around a CPU consuming hashlib operation.
  *
- * These only drop the GIL if the lock acquisition itself is likely to
- * block. Thus the non-blocking acquire gating the GIL release for a
- * blocking lock acquisition. The intent of these macros is to surround
- * the assumed always "fast" operations that you aren't releasing the
- * GIL around.  Otherwise use code similar to what you see in hash
- * function update() methods.
+ * Code accessing a mutable part of the hash object must be enclosed in
+ * an HASHLIB_{ACQUIRE,RELEASE}_LOCK block or explicitly acquire and release
+ * the mutex inside a Py_BEGIN_ALLOW_THREADS -- Py_END_ALLOW_THREADS block if
+ * they wish to release the GIL for an operation.
  */
 
-#include "pythread.h"
-#define ENTER_HASHLIB(obj) \
-    if ((obj)->use_mutex) { \
-        PyMutex_Lock(&(obj)->mutex); \
-    }
-#define LEAVE_HASHLIB(obj) \
-    if ((obj)->use_mutex) { \
-        PyMutex_Unlock(&(obj)->mutex); \
-    }
+#define HASHLIB_OBJECT_HEAD                                             \
+    PyObject_HEAD                                                       \
+    /* Guard against race conditions during incremental update(). */    \
+    PyMutex mutex;
 
-#ifdef Py_GIL_DISABLED
-#define HASHLIB_INIT_MUTEX(obj) \
-    do { \
-        (obj)->mutex = (PyMutex){0}; \
-        (obj)->use_mutex = true; \
+#define HASHLIB_INIT_MUTEX(OBJ)         \
+    do {                                \
+        (OBJ)->mutex = (PyMutex){0};    \
     } while (0)
-#else
-#define HASHLIB_INIT_MUTEX(obj) \
-    do { \
-        (obj)->mutex = (PyMutex){0}; \
-        (obj)->use_mutex = false; \
+
+#define HASHLIB_ACQUIRE_LOCK(OBJ)   PyMutex_Lock(&(OBJ)->mutex)
+#define HASHLIB_RELEASE_LOCK(OBJ)   PyMutex_Unlock(&(OBJ)->mutex)
+
+/*
+ * Message length above which the GIL is to be released
+ * when performing hashing operations.
+ */
+#define HASHLIB_GIL_MINSIZE         2048
+
+// Macros for executing code while conditionally holding the GIL.
+//
+// These only drop the GIL if the lock acquisition itself is likely to
+// block. Thus the non-blocking acquire gating the GIL release for a
+// blocking lock acquisition. The intent of these macros is to surround
+// the assumed always "fast" operations that you aren't releasing the
+// GIL around.
+
+/*
+ * Execute a suite of C statements 'STATEMENTS'.
+ *
+ * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold.
+ */
+#define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS)    \
+    do {                                                            \
+        if ((SIZE) > HASHLIB_GIL_MINSIZE) {                         \
+            Py_BEGIN_ALLOW_THREADS                                  \
+            STATEMENTS;                                             \
+            Py_END_ALLOW_THREADS                                    \
+        }                                                           \
+        else {                                                      \
+            STATEMENTS;                                             \
+        }                                                           \
     } while (0)
-#endif
 
-/* TODO(gpshead): We should make this a module or class attribute
- * to allow the user to optimize based on the platform they're using. */
-#define HASHLIB_GIL_MINSIZE 2048
+/*
+ * Lock 'OBJ' and execute a suite of C statements 'STATEMENTS'.
+ *
+ * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold.
+ */
+#define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \
+    do {                                                            \
+        if ((SIZE) > HASHLIB_GIL_MINSIZE) {                         \
+            Py_BEGIN_ALLOW_THREADS                                  \
+            HASHLIB_ACQUIRE_LOCK(OBJ);                              \
+            STATEMENTS;                                             \
+            HASHLIB_RELEASE_LOCK(OBJ);                              \
+            Py_END_ALLOW_THREADS                                    \
+        }                                                           \
+        else {                                                      \
+            HASHLIB_ACQUIRE_LOCK(OBJ);                              \
+            STATEMENTS;                                             \
+            HASHLIB_RELEASE_LOCK(OBJ);                              \
+        }                                                           \
+    } while (0)
 
 static inline int
 _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string)
diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c
index b404d5732ec..95e400231bb 100644
--- a/Modules/hmacmodule.c
+++ b/Modules/hmacmodule.c
@@ -31,14 +31,15 @@
 #endif
 
 #if defined(__APPLE__) && defined(__arm64__)
-#  undef HACL_CAN_COMPILE_SIMD128
-#  undef HACL_CAN_COMPILE_SIMD256
+#  undef _Py_HACL_CAN_COMPILE_VEC128
+#  undef _Py_HACL_CAN_COMPILE_VEC256
 #endif
 
-// Small mismatch between the variable names Python defines as part of configure
-// at the ones HACL* expects to be set in order to enable those headers.
-#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
-#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256
+// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable
+// the corresponding SIMD instructions so we need to "forward" the values
+// we just deduced above.
+#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128
+#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256
 
 #include "_hacl/Hacl_HMAC.h"
 #include "_hacl/Hacl_Streaming_HMAC.h"  // Hacl_Agile_Hash_* identifiers
@@ -216,105 +217,6 @@ typedef struct py_hmac_hacl_api {
 #endif
 
 /*
- * Call the HACL* HMAC-HASH update function on the given data.
- *
- * The magnitude of 'LEN' is not checked and thus 'LEN' must be
- * safely convertible to a uint32_t value.
- */
-#define Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN)          \
-    Hacl_Streaming_HMAC_update(HACL_STATE, BUF, (uint32_t)(LEN))
-
-/*
- * Call the HACL* HMAC-HASH update function on the given data.
- *
- * On DEBUG builds, the 'ERRACTION' statements are executed if
- * the update() call returned a non-successful HACL* exit code.
- *
- * The buffer 'BUF' and its length 'LEN' are left untouched.
- *
- * The formal signature of this macro is:
- *
- *     (HACL_HMAC_state *, uint8_t *, uint32_t, PyObject *, (C statements))
- */
-#ifndef NDEBUG
-#define Py_HMAC_HACL_UPDATE_ONCE(                                           \
-    HACL_STATE, BUF, LEN,                                                   \
-    ALGORITHM, ERRACTION                                                    \
-)                                                                           \
-    do {                                                                    \
-        Py_CHECK_HACL_UINT32_T_LENGTH(LEN);                                 \
-        hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \
-        if (_hacl_convert_errno(code, (ALGORITHM)) < 0) {                   \
-            ERRACTION;                                                      \
-        }                                                                   \
-    } while (0)
-#else
-#define Py_HMAC_HACL_UPDATE_ONCE(                                   \
-    HACL_STATE, BUF, LEN,                                           \
-    _ALGORITHM, _ERRACTION                                          \
-)                                                                   \
-    do {                                                            \
-        (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN));     \
-    } while (0)
-#endif
-
-/*
- * Repetivively call the HACL* HMAC-HASH update function on the given
- * data until the buffer length 'LEN' is strictly less than UINT32_MAX.
- *
- * On builds with PY_SSIZE_T_MAX <= UINT32_MAX, this is a no-op.
- *
- * The buffer 'BUF' (resp. 'LEN') is advanced (resp. decremented)
- * by UINT32_MAX after each update. On DEBUG builds, each update()
- * call is verified and the 'ERRACTION' statements are executed if
- * a non-successful HACL* exit code is being returned.
- *
- * In particular, 'BUF' and 'LEN' must be variable names and not
- * expressions on their own.
- *
- * The formal signature of this macro is:
- *
- *     (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements))
- */
-#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32
-#define Py_HMAC_HACL_UPDATE_LOOP(                                   \
-    HACL_STATE, BUF, LEN,                                           \
-    ALGORITHM, ERRACTION                                            \
-)                                                                   \
-    do {                                                            \
-        while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) {           \
-            Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX,   \
-                                     ALGORITHM, ERRACTION);         \
-            BUF += UINT32_MAX;                                      \
-            LEN -= UINT32_MAX;                                      \
-        }                                                           \
-    } while (0)
-#else
-#define Py_HMAC_HACL_UPDATE_LOOP(   \
-    HACL_STATE, BUF, LEN,           \
-    _ALGORITHM, _ERRACTION          \
-)
-#endif
-
-/*
- * Perform the HMAC-HASH update() operation in a streaming fashion.
- *
- * The formal signature of this macro is:
- *
- *     (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements))
- */
-#define Py_HMAC_HACL_UPDATE(                            \
-    HACL_STATE, BUF, LEN,                               \
-    ALGORITHM, ERRACTION                                \
-)                                                       \
-    do {                                                \
-        Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN,  \
-                                 ALGORITHM, ERRACTION); \
-        Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN,  \
-                                 ALGORITHM, ERRACTION); \
-    } while (0)
-
-/*
  * HMAC underlying hash function static information.
  */
 typedef struct py_hmac_hinfo {
@@ -382,11 +284,7 @@ get_hmacmodule_state_by_cls(PyTypeObject *cls)
 typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state;
 
 typedef struct HMACObject {
-    PyObject_HEAD
-
-    bool use_mutex;
-    PyMutex mutex;
-
+    HASHLIB_OBJECT_HEAD
     // Hash function information
     PyObject *name;         // rendered name (exact unicode object)
     HMAC_Hash_Kind kind;    // can be used for runtime dispatch (must be known)
@@ -464,7 +362,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind)
 {
     switch (kind) {
         case Py_hmac_kind_hmac_blake2s_32: {
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
             if (state->can_run_simd128) {
                 return Py_hmac_kind_hmac_vectorized_blake2s_32;
             }
@@ -472,7 +370,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind)
             return kind;
         }
         case Py_hmac_kind_hmac_blake2b_32: {
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
             if (state->can_run_simd256) {
                 return Py_hmac_kind_hmac_vectorized_blake2b_32;
             }
@@ -491,38 +389,40 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind)
  * Otherwise, this sets an appropriate exception and returns -1.
  */
 static int
-_hacl_convert_errno(hacl_errno_t code, PyObject *algorithm)
+_hacl_convert_errno(hacl_errno_t code)
 {
+    assert(PyGILState_GetThisThreadState() != NULL);
+    if (code == Hacl_Streaming_Types_Success) {
+        return 0;
+    }
+
+    PyGILState_STATE gstate = PyGILState_Ensure();
     switch (code) {
-        case Hacl_Streaming_Types_Success: {
-            return 0;
-        }
         case Hacl_Streaming_Types_InvalidAlgorithm: {
-            // only makes sense if an algorithm is known at call time
-            assert(algorithm != NULL);
-            assert(PyUnicode_CheckExact(algorithm));
-            PyErr_Format(PyExc_ValueError, "invalid algorithm: %U", algorithm);
-            return -1;
+            PyErr_SetString(PyExc_ValueError, "invalid HACL* algorithm");
+            break;
         }
         case Hacl_Streaming_Types_InvalidLength: {
             PyErr_SetString(PyExc_ValueError, "invalid length");
-            return -1;
+            break;
         }
         case Hacl_Streaming_Types_MaximumLengthExceeded: {
             PyErr_SetString(PyExc_OverflowError, "maximum length exceeded");
-            return -1;
+            break;
         }
         case Hacl_Streaming_Types_OutOfMemory: {
             PyErr_NoMemory();
-            return -1;
+            break;
         }
         default: {
             PyErr_Format(PyExc_RuntimeError,
-                         "HACL* internal routine failed with error code: %d",
+                         "HACL* internal routine failed with error code: %u",
                          code);
-            return -1;
+            break;
         }
     }
+    PyGILState_Release(gstate);
+    return -1;
 }
 
 /*
@@ -536,7 +436,7 @@ _hacl_hmac_state_new(HMAC_Hash_Kind kind, uint8_t *key, uint32_t len)
     assert(kind != Py_hmac_kind_hash_unknown);
     HACL_HMAC_state *state = NULL;
     hacl_errno_t retcode = Hacl_Streaming_HMAC_malloc_(kind, key, len, &state);
-    if (_hacl_convert_errno(retcode, NULL) < 0) {
+    if (_hacl_convert_errno(retcode) < 0) {
         assert(state == NULL);
         return NULL;
     }
@@ -554,6 +454,51 @@ _hacl_hmac_state_free(HACL_HMAC_state *state)
     }
 }
 
+/*
+ * Call the HACL* HMAC-HASH update function on the given data.
+ *
+ * On DEBUG builds, the update() call is verified.
+ *
+ * Return 0 on success; otherwise, set an exception and return -1 on failure.
+*/
+static int
+_hacl_hmac_state_update_once(HACL_HMAC_state *state,
+                             uint8_t *buf, uint32_t len)
+{
+#ifndef NDEBUG
+    hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, len);
+    return _hacl_convert_errno(code);
+#else
+    (void)Hacl_Streaming_HMAC_update(state, buf, len);
+    return 0;
+#endif
+}
+
+/*
+ * Perform the HMAC-HASH update() operation in a streaming fashion.
+ *
+ * On DEBUG builds, each update() call is verified.
+ *
+ * Return 0 on success; otherwise, set an exception and return -1 on failure.
+ */
+static int
+_hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len)
+{
+    assert(len >= 0);
+#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32
+    while (len > UINT32_MAX_AS_SSIZE_T) {
+        if (_hacl_hmac_state_update_once(state, buf, UINT32_MAX) < 0) {
+            assert(PyErr_Occurred());
+            return -1;
+        }
+        buf += UINT32_MAX;
+        len -= UINT32_MAX;
+    }
+#endif
+    Py_CHECK_HACL_UINT32_T_LENGTH(len);
+    return _hacl_hmac_state_update_once(state, buf, (uint32_t)len);
+}
+
 /* Static information used to construct the hash table. */
 static const py_hmac_hinfo py_hmac_static_hinfo[] = {
 #define Py_HMAC_HINFO_HACL_API(HACL_HID)                                \
@@ -784,45 +729,6 @@ hmac_new_initial_state(HMACObject *self, uint8_t *key, Py_ssize_t len)
     return self->state == NULL ? -1 : 0;
 }
 
-/*
- * Feed initial data.
- *
- * This function MUST only be called by the HMAC object constructor
- * and after hmac_set_hinfo() and hmac_new_initial_state() have been
- * called, lest the behaviour is undefined.
- *
- * Return 0 on success; otherwise, set an exception and return -1 on failure.
- */
-static int
-hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len)
-{
-    assert(self->name != NULL);
-    assert(self->state != NULL);
-    if (len == 0) {
-        // do nothing if the buffer is empty
-        return 0;
-    }
-
-    if (len < HASHLIB_GIL_MINSIZE) {
-        Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, return -1);
-        return 0;
-    }
-
-    int res = 0;
-    Py_BEGIN_ALLOW_THREADS
-        Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, goto error);
-        goto done;
-#ifndef NDEBUG
-error:
-        res = -1;
-#else
-        Py_UNREACHABLE();
-#endif
-done:
-    Py_END_ALLOW_THREADS
-    return res;
-}
-
 /*[clinic input]
 _hmac.new
 
@@ -869,7 +775,12 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj,
     if (msgobj != NULL && msgobj != Py_None) {
         Py_buffer msg;
         GET_BUFFER_VIEW_OR_ERROR(msgobj, &msg, goto error);
-        rc = hmac_feed_initial_data(self, msg.buf, msg.len);
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            msg.len,
+            rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len)
+        );
         PyBuffer_Release(&msg);
 #ifndef NDEBUG
         if (rc < 0) {
@@ -946,12 +857,12 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls)
         return NULL;
     }
 
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     /* copy hash information */
     hmac_copy_hinfo(copy, self);
     /* copy internal state */
     int rc = hmac_copy_state(copy, self);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
 
     if (rc < 0) {
         Py_DECREF(copy);
@@ -963,78 +874,6 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls)
     return (PyObject *)copy;
 }
 
-/*
- * Update the HMAC object with the given buffer.
- *
- * This unconditionally acquires the lock on the HMAC object.
- *
- * On DEBUG builds, each update() call is verified.
- *
- * Return 0 on success; otherwise, set an exception and return -1 on failure.
- */
-static int
-hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len)
-{
-    int res = 0;
-    Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);  // unconditionally acquire a lock
-        Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error);
-        goto done;
-#ifndef NDEBUG
-error:
-        res = -1;
-#else
-        Py_UNREACHABLE();
-#endif
-done:
-        PyMutex_Unlock(&self->mutex);
-    Py_END_ALLOW_THREADS
-    return res;
-}
-
-/*
- * Update the HMAC object with the given buffer.
- *
- * This conditionally acquires the lock on the HMAC object.
- *
- * On DEBUG builds, each update() call is verified.
- *
- * Return 0 on success; otherwise, set an exception and return -1 on failure.
- */
-static int
-hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len)
-{
-    ENTER_HASHLIB(self);  // conditionally acquire a lock
-    Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error);
-    LEAVE_HASHLIB(self);
-    return 0;
-
-#ifndef NDEBUG
-error:
-    LEAVE_HASHLIB(self);
-    return -1;
-#else
-    Py_UNREACHABLE();
-#endif
-}
-
-/*
- * Update the internal HMAC state with the given buffer.
- *
- * Return 0 on success; otherwise, set an exception and return -1 on failure.
- */
-static inline int
-hmac_update_state(HMACObject *self, uint8_t *buf, Py_ssize_t len)
-{
-    assert(buf != 0);
-    assert(len >= 0);
-    return len == 0
-               ? 0 /* nothing to do */
-               : len < HASHLIB_GIL_MINSIZE
-                     ? hmac_update_state_cond_lock(self, buf, len)
-                     : hmac_update_state_with_lock(self, buf, len);
-}
-
 /*[clinic input]
 _hmac.HMAC.update
 
@@ -1047,9 +886,13 @@ static PyObject *
 _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj)
 /*[clinic end generated code: output=962134ada5e55985 input=7c0ea830efb03367]*/
 {
+    int rc = 0;
     Py_buffer msg;
     GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg);
-    int rc = hmac_update_state(self, msg.buf, msg.len);
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, msg.len,
+        rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len)
+    );
     PyBuffer_Release(&msg);
     return rc < 0 ? NULL : Py_None;
 }
@@ -1065,18 +908,18 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj)
  * Note: this function may raise a MemoryError.
  */
 static int
-hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest)
+hmac_digest_compute_locked(HMACObject *self, uint8_t *digest)
 {
     assert(digest != NULL);
     hacl_errno_t rc;
-    ENTER_HASHLIB(self);  // conditionally acquire a lock
+    HASHLIB_ACQUIRE_LOCK(self);
     rc = Hacl_Streaming_HMAC_digest(self->state, digest, self->digest_size);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     assert(
         rc == Hacl_Streaming_Types_Success ||
         rc == Hacl_Streaming_Types_OutOfMemory
     );
-    return _hacl_convert_errno(rc, NULL);
+    return _hacl_convert_errno(rc);
 }
 
 /*[clinic input]
@@ -1093,7 +936,7 @@ _hmac_HMAC_digest_impl(HMACObject *self)
 {
     assert(self->digest_size <= Py_hmac_hash_max_digest_size);
     uint8_t digest[Py_hmac_hash_max_digest_size];
-    if (hmac_digest_compute_cond_lock(self, digest) < 0) {
+    if (hmac_digest_compute_locked(self, digest) < 0) {
         return NULL;
     }
     return PyBytes_FromStringAndSize((const char *)digest, self->digest_size);
@@ -1116,7 +959,7 @@ _hmac_HMAC_hexdigest_impl(HMACObject *self)
 {
     assert(self->digest_size <= Py_hmac_hash_max_digest_size);
     uint8_t digest[Py_hmac_hash_max_digest_size];
-    if (hmac_digest_compute_cond_lock(self, digest) < 0) {
+    if (hmac_digest_compute_locked(self, digest) < 0) {
         return NULL;
     }
     return _Py_strhex((const char *)digest, self->digest_size);
@@ -1759,7 +1602,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state)
 #undef ECX_SSE3
 #undef EBX_AVX2
 
-#if HACL_CAN_COMPILE_SIMD128
+#if _Py_HACL_CAN_COMPILE_VEC128
     // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
     state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov;
 #else
@@ -1769,7 +1612,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state)
     state->can_run_simd128 = false;
 #endif
 
-#if HACL_CAN_COMPILE_SIMD256
+#if _Py_HACL_CAN_COMPILE_VEC256
     // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
     state->can_run_simd256 = state->can_run_simd128 && avx && avx2;
 #else
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index bbbb4911568..1837de4735c 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -3148,7 +3148,7 @@ static PyObject *
 math_issubnormal_impl(PyObject *module, double x)
 /*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/
 {
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
     return PyBool_FromLong(issubnormal(x));
 #else
     return PyBool_FromLong(isfinite(x) && x && !isnormal(x));
diff --git a/Modules/md5module.c b/Modules/md5module.c
index 08dbcd2cbce..8b6dd4a8195 100644
--- a/Modules/md5module.c
+++ b/Modules/md5module.c
@@ -8,6 +8,7 @@
    Andrew Kuchling (amk@amk.ca)
    Greg Stein (gstein@lyra.org)
    Trevor Perrin (trevp@trevp.net)
+   Bénédikt Tran (10796600+picnixz@users.noreply.github.com)
 
    Copyright (C) 2005-2007   Gregory P. Smith (greg@krypto.org)
    Licensed to PSF under a Contributor Agreement.
@@ -21,34 +22,27 @@
 #endif
 
 #include "Python.h"
+#include "pycore_strhex.h" // _Py_strhex()
+
 #include "hashlib.h"
 
-/*[clinic input]
-module _md5
-class MD5Type "MD5object *" "&PyType_Type"
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
+#include "_hacl/Hacl_Hash_MD5.h"
 
 /* The MD5 block size and message digest sizes, in bytes */
 
 #define MD5_BLOCKSIZE    64
 #define MD5_DIGESTSIZE   16
 
-#include "_hacl/Hacl_Hash_MD5.h"
-
+// --- Module objects ---------------------------------------------------------
 
 typedef struct {
-    PyObject_HEAD
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;
+    HASHLIB_OBJECT_HEAD
     Hacl_Hash_MD5_state_t *hash_state;
 } MD5object;
 
 #define _MD5object_CAST(op)     ((MD5object *)(op))
 
-#include "clinic/md5module.c.h"
-
+// --- Module state -----------------------------------------------------------
 
 typedef struct {
     PyTypeObject* md5_type;
@@ -62,6 +56,18 @@ md5_get_state(PyObject *module)
     return (MD5State *)state;
 }
 
+// --- Module clinic configuration --------------------------------------------
+
+/*[clinic input]
+module _md5
+class MD5Type "MD5object *" "&PyType_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
+
+#include "clinic/md5module.c.h"
+
+// --- MD5 object interface ---------------------------------------------------
+
 static MD5object *
 newMD5object(MD5State * st)
 {
@@ -116,9 +122,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls)
         return NULL;
     }
 
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (newobj->hash_state == NULL) {
         Py_DECREF(newobj);
         return PyErr_NoMemory();
@@ -136,10 +142,10 @@ static PyObject *
 MD5Type_digest_impl(MD5object *self)
 /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
 {
-    unsigned char digest[MD5_DIGESTSIZE];
-    ENTER_HASHLIB(self);
+    uint8_t digest[MD5_DIGESTSIZE];
+    HASHLIB_ACQUIRE_LOCK(self);
     Hacl_Hash_MD5_digest(self->hash_state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
 }
 
@@ -153,20 +159,11 @@ static PyObject *
 MD5Type_hexdigest_impl(MD5object *self)
 /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
 {
-    unsigned char digest[MD5_DIGESTSIZE];
-    ENTER_HASHLIB(self);
+    uint8_t digest[MD5_DIGESTSIZE];
+    HASHLIB_ACQUIRE_LOCK(self);
     Hacl_Hash_MD5_digest(self->hash_state, digest);
-    LEAVE_HASHLIB(self);
-
-    const char *hexdigits = "0123456789abcdef";
-    char digest_hex[MD5_DIGESTSIZE * 2];
-    char *str = digest_hex;
-    for (size_t i=0; i < MD5_DIGESTSIZE; i++) {
-        unsigned char byte = digest[i];
-        *str++ = hexdigits[byte >> 4];
-        *str++ = hexdigits[byte & 0x0f];
-    }
-    return PyUnicode_FromStringAndSize(digest_hex, sizeof(digest_hex));
+    HASHLIB_RELEASE_LOCK(self);
+    return _Py_strhex((const char *)digest, MD5_DIGESTSIZE);
 }
 
 static void
@@ -177,6 +174,7 @@ update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len)
     * take more than 1 billion years to overflow the maximum admissible length
     * for MD5 (2^61 - 1).
     */
+    assert(len >= 0);
 #if PY_SSIZE_T_MAX > UINT32_MAX
     while (len > UINT32_MAX) {
         (void)Hacl_Hash_MD5_update(state, buf, UINT32_MAX);
@@ -202,22 +200,11 @@ MD5Type_update_impl(MD5object *self, PyObject *obj)
 /*[clinic end generated code: output=b0fed9a7ce7ad253 input=6e1efcd9ecf17032]*/
 {
     Py_buffer buf;
-
     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
-
-    if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        update(self->hash_state, buf.buf, buf.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        update(self->hash_state, buf.buf, buf.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, buf.len,
+        update(self->hash_state, buf.buf, buf.len)
+    );
     PyBuffer_Release(&buf);
     Py_RETURN_NONE;
 }
@@ -319,16 +306,12 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity,
     }
 
     if (string) {
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            update(new->hash_state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update(new->hash_state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            update(new->hash_state, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 7dc5ef39a56..b570f81b7cf 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -685,7 +685,8 @@ static void
 reset_remotedebug_data(PyThreadState *tstate)
 {
     tstate->remote_debugger_support.debugger_pending_call = 0;
-    memset(tstate->remote_debugger_support.debugger_script_path, 0, MAX_SCRIPT_PATH_SIZE);
+    memset(tstate->remote_debugger_support.debugger_script_path, 0,
+           Py_MAX_SCRIPT_PATH_SIZE);
 }
 
 
diff --git a/Modules/sha1module.c b/Modules/sha1module.c
index a746bf74f8d..faa9dcccc57 100644
--- a/Modules/sha1module.c
+++ b/Modules/sha1module.c
@@ -8,13 +8,13 @@
    Andrew Kuchling (amk@amk.ca)
    Greg Stein (gstein@lyra.org)
    Trevor Perrin (trevp@trevp.net)
+   Bénédikt Tran (10796600+picnixz@users.noreply.github.com)
 
    Copyright (C) 2005-2007   Gregory P. Smith (greg@krypto.org)
    Licensed to PSF under a Contributor Agreement.
 
 */
 
-/* SHA1 objects */
 #ifndef Py_BUILD_CORE_BUILTIN
 #  define Py_BUILD_CORE_MODULE 1
 #endif
@@ -24,32 +24,23 @@
 #include "pycore_strhex.h"        // _Py_strhex()
 #include "pycore_typeobject.h"    // _PyType_GetModuleState()
 
-/*[clinic input]
-module _sha1
-class SHA1Type "SHA1object *" "&PyType_Type"
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/
+#include "_hacl/Hacl_Hash_SHA1.h"
 
 /* The SHA1 block size and message digest sizes, in bytes */
 
 #define SHA1_BLOCKSIZE    64
 #define SHA1_DIGESTSIZE   20
 
-#include "_hacl/Hacl_Hash_SHA1.h"
+// --- Module objects ---------------------------------------------------------
 
 typedef struct {
-    PyObject_HEAD
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;
-    PyThread_type_lock lock;
+    HASHLIB_OBJECT_HEAD
     Hacl_Hash_SHA1_state_t *hash_state;
 } SHA1object;
 
 #define _SHA1object_CAST(op)    ((SHA1object *)(op))
 
-#include "clinic/sha1module.c.h"
-
+// --- Module state -----------------------------------------------------------
 
 typedef struct {
     PyTypeObject* sha1_type;
@@ -63,6 +54,18 @@ sha1_get_state(PyObject *module)
     return (SHA1State *)state;
 }
 
+// --- Module clinic configuration --------------------------------------------
+
+/*[clinic input]
+module _sha1
+class SHA1Type "SHA1object *" "&PyType_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/
+
+#include "clinic/sha1module.c.h"
+
+// --- SHA-1 object interface configuration -----------------------------------
+
 static SHA1object *
 newSHA1object(SHA1State *st)
 {
@@ -121,9 +124,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls)
         return NULL;
     }
 
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     newobj->hash_state = Hacl_Hash_SHA1_copy(self->hash_state);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (newobj->hash_state == NULL) {
         Py_DECREF(newobj);
         return PyErr_NoMemory();
@@ -142,9 +145,9 @@ SHA1Type_digest_impl(SHA1object *self)
 /*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/
 {
     unsigned char digest[SHA1_DIGESTSIZE];
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     Hacl_Hash_SHA1_digest(self->hash_state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE);
 }
 
@@ -159,9 +162,9 @@ SHA1Type_hexdigest_impl(SHA1object *self)
 /*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/
 {
     unsigned char digest[SHA1_DIGESTSIZE];
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     Hacl_Hash_SHA1_digest(self->hash_state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE);
 }
 
@@ -198,22 +201,11 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj)
 /*[clinic end generated code: output=cdc8e0e106dbec5f input=aad8e07812edbba3]*/
 {
     Py_buffer buf;
-
     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
-
-    if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        update(self->hash_state, buf.buf, buf.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        update(self->hash_state, buf.buf, buf.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, buf.len,
+        update(self->hash_state, buf.buf, buf.len)
+    );
     PyBuffer_Release(&buf);
     Py_RETURN_NONE;
 }
@@ -314,16 +306,12 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity,
         return PyErr_NoMemory();
     }
     if (string) {
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            update(new->hash_state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update(new->hash_state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            update(new->hash_state, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
diff --git a/Modules/sha2module.c b/Modules/sha2module.c
index 72931910c5d..36300ba899f 100644
--- a/Modules/sha2module.c
+++ b/Modules/sha2module.c
@@ -9,32 +9,25 @@
    Greg Stein (gstein@lyra.org)
    Trevor Perrin (trevp@trevp.net)
    Jonathan Protzenko (jonathan@protzenko.fr)
+   Bénédikt Tran (10796600+picnixz@users.noreply.github.com)
 
    Copyright (C) 2005-2007   Gregory P. Smith (greg@krypto.org)
    Licensed to PSF under a Contributor Agreement.
 
 */
 
-/* SHA objects */
 #ifndef Py_BUILD_CORE_BUILTIN
 #  define Py_BUILD_CORE_MODULE 1
 #endif
 
 #include "Python.h"
-#include "pycore_bitutils.h"      // _Py_bswap32()
 #include "pycore_moduleobject.h"  // _PyModule_GetState()
 #include "pycore_typeobject.h"    // _PyType_GetModuleState()
 #include "pycore_strhex.h"        // _Py_strhex()
 
 #include "hashlib.h"
 
-/*[clinic input]
-module _sha2
-class SHA256Type "SHA256object *" "&PyType_Type"
-class SHA512Type "SHA512object *" "&PyType_Type"
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/
-
+#include "_hacl/Hacl_Hash_SHA2.h"
 
 /* The SHA block sizes and maximum message digest sizes, in bytes */
 
@@ -43,34 +36,26 @@ class SHA512Type "SHA512object *" "&PyType_Type"
 #define SHA512_BLOCKSIZE   128
 #define SHA512_DIGESTSIZE  64
 
-/* Our SHA2 implementations defer to the HACL* verified library. */
-
-#include "_hacl/Hacl_Hash_SHA2.h"
+// --- Module objects ---------------------------------------------------------
 
 // TODO: Get rid of int digestsize in favor of Hacl state info?
 
 typedef struct {
-    PyObject_HEAD
+    HASHLIB_OBJECT_HEAD
     int digestsize;
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;
     Hacl_Hash_SHA2_state_t_256 *state;
 } SHA256object;
 
 typedef struct {
-    PyObject_HEAD
+    HASHLIB_OBJECT_HEAD
     int digestsize;
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;
     Hacl_Hash_SHA2_state_t_512 *state;
 } SHA512object;
 
 #define _SHA256object_CAST(op)  ((SHA256object *)(op))
 #define _SHA512object_CAST(op)  ((SHA512object *)(op))
 
-#include "clinic/sha2module.c.h"
+// --- Module state -----------------------------------------------------------
 
 /* We shall use run-time type information in the remainder of this module to
  * tell apart SHA2-224 and SHA2-256 */
@@ -89,6 +74,19 @@ sha2_get_state(PyObject *module)
     return (sha2_state *)state;
 }
 
+// --- Module clinic configuration --------------------------------------------
+
+/*[clinic input]
+module _sha2
+class SHA256Type "SHA256object *" "&PyType_Type"
+class SHA512Type "SHA512object *" "&PyType_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/
+
+#include "clinic/sha2module.c.h"
+
+// --- SHA-2 object interface -------------------------------------------------
+
 static int
 SHA256copy(SHA256object *src, SHA256object *dest)
 {
@@ -272,9 +270,9 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls)
         }
     }
 
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     rc = SHA256copy(self, newobj);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (rc < 0) {
         Py_DECREF(newobj);
         return NULL;
@@ -309,9 +307,9 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls)
         }
     }
 
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     rc = SHA512copy(self, newobj);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (rc < 0) {
         Py_DECREF(newobj);
         return NULL;
@@ -331,11 +329,11 @@ SHA256Type_digest_impl(SHA256object *self)
 {
     uint8_t digest[SHA256_DIGESTSIZE];
     assert(self->digestsize <= SHA256_DIGESTSIZE);
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     // HACL* performs copies under the hood so that self->state remains valid
     // after this call.
     Hacl_Hash_SHA2_digest_256(self->state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return PyBytes_FromStringAndSize((const char *)digest, self->digestsize);
 }
 
@@ -351,11 +349,11 @@ SHA512Type_digest_impl(SHA512object *self)
 {
     uint8_t digest[SHA512_DIGESTSIZE];
     assert(self->digestsize <= SHA512_DIGESTSIZE);
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     // HACL* performs copies under the hood so that self->state remains valid
     // after this call.
     Hacl_Hash_SHA2_digest_512(self->state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return PyBytes_FromStringAndSize((const char *)digest, self->digestsize);
 }
 
@@ -371,9 +369,9 @@ SHA256Type_hexdigest_impl(SHA256object *self)
 {
     uint8_t digest[SHA256_DIGESTSIZE];
     assert(self->digestsize <= SHA256_DIGESTSIZE);
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     Hacl_Hash_SHA2_digest_256(self->state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return _Py_strhex((const char *)digest, self->digestsize);
 }
 
@@ -389,9 +387,9 @@ SHA512Type_hexdigest_impl(SHA512object *self)
 {
     uint8_t digest[SHA512_DIGESTSIZE];
     assert(self->digestsize <= SHA512_DIGESTSIZE);
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     Hacl_Hash_SHA2_digest_512(self->state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return _Py_strhex((const char *)digest, self->digestsize);
 }
 
@@ -409,22 +407,11 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj)
 /*[clinic end generated code: output=dc58a580cf8905a5 input=b2d449d5b30f0f5a]*/
 {
     Py_buffer buf;
-
     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
-
-    if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        update_256(self->state, buf.buf, buf.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        update_256(self->state, buf.buf, buf.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, buf.len,
+        update_256(self->state, buf.buf, buf.len)
+    );
     PyBuffer_Release(&buf);
     Py_RETURN_NONE;
 }
@@ -443,22 +430,11 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj)
 /*[clinic end generated code: output=9af211766c0b7365 input=ded2b46656566283]*/
 {
     Py_buffer buf;
-
     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
-
-    if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        update_512(self->state, buf.buf, buf.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        update_512(self->state, buf.buf, buf.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, buf.len,
+        update_512(self->state, buf.buf, buf.len)
+    );
     PyBuffer_Release(&buf);
     Py_RETURN_NONE;
 }
@@ -638,16 +614,12 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity,
         return PyErr_NoMemory();
     }
     if (string) {
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            update_256(new->state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update_256(new->state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            update_256(new->state, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
@@ -700,16 +672,12 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity,
         return PyErr_NoMemory();
     }
     if (string) {
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            update_256(new->state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update_256(new->state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            update_256(new->state, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
@@ -763,16 +731,12 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity,
         return PyErr_NoMemory();
     }
     if (string) {
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            update_512(new->state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update_512(new->state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            update_512(new->state, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
@@ -826,16 +790,12 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity,
         return PyErr_NoMemory();
     }
     if (string) {
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            update_512(new->state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            update_512(new->state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            update_512(new->state, buf.buf, buf.len)
+        );
         PyBuffer_Release(&buf);
     }
 
diff --git a/Modules/sha3module.c b/Modules/sha3module.c
index cfbf0cbcc04..5764556bb68 100644
--- a/Modules/sha3module.c
+++ b/Modules/sha3module.c
@@ -9,6 +9,7 @@
  *  Greg Stein (gstein@lyra.org)
  *  Trevor Perrin (trevp@trevp.net)
  *  Gregory P. Smith (greg@krypto.org)
+ *  Bénédikt Tran (10796600+picnixz@users.noreply.github.com)
  *
  * Copyright (C) 2012-2022  Christian Heimes (christian@python.org)
  * Licensed to PSF under a Contributor Agreement.
@@ -24,8 +25,23 @@
 #include "pycore_typeobject.h"    // _PyType_GetModuleState()
 #include "hashlib.h"
 
+#include "_hacl/Hacl_Hash_SHA3.h"
+
+/*
+ * Assert that 'LEN' can be safely casted to uint32_t.
+ *
+ * The 'LEN' parameter should be convertible to Py_ssize_t.
+ */
+#if !defined(NDEBUG) && (PY_SSIZE_T_MAX > UINT32_MAX)
+#define CHECK_HACL_UINT32_T_LENGTH(LEN) assert((LEN) < (Py_ssize_t)UINT32_MAX)
+#else
+#define CHECK_HACL_UINT32_T_LENGTH(LEN)
+#endif
+
 #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */
 
+// --- Module state -----------------------------------------------------------
+
 typedef struct {
     PyTypeObject *sha3_224_type;
     PyTypeObject *sha3_256_type;
@@ -43,33 +59,34 @@ sha3_get_state(PyObject *module)
     return (SHA3State *)state;
 }
 
-/*[clinic input]
-module _sha3
-class _sha3.sha3_224 "SHA3object *" "&SHA3_224typ"
-class _sha3.sha3_256 "SHA3object *" "&SHA3_256typ"
-class _sha3.sha3_384 "SHA3object *" "&SHA3_384typ"
-class _sha3.sha3_512 "SHA3object *" "&SHA3_512typ"
-class _sha3.shake_128 "SHA3object *" "&SHAKE128type"
-class _sha3.shake_256 "SHA3object *" "&SHAKE256type"
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b8a53680f370285a]*/
+// --- Module objects ---------------------------------------------------------
 
 /* The structure for storing SHA3 info */
 
-#include "_hacl/Hacl_Hash_SHA3.h"
-
 typedef struct {
-    PyObject_HEAD
-    // Prevents undefined behavior via multiple threads entering the C API.
-    bool use_mutex;
-    PyMutex mutex;
+    HASHLIB_OBJECT_HEAD
     Hacl_Hash_SHA3_state_t *hash_state;
 } SHA3object;
 
 #define _SHA3object_CAST(op)    ((SHA3object *)(op))
 
+// --- Module clinic configuration --------------------------------------------
+
+/*[clinic input]
+module _sha3
+class _sha3.sha3_224 "SHA3object *" "&PyType_Type"
+class _sha3.sha3_256 "SHA3object *" "&PyType_Type"
+class _sha3.sha3_384 "SHA3object *" "&PyType_Type"
+class _sha3.sha3_512 "SHA3object *" "&PyType_Type"
+class _sha3.shake_128 "SHA3object *" "&PyType_Type"
+class _sha3.shake_256 "SHA3object *" "&PyType_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ccd22550c7fb99bf]*/
+
 #include "clinic/sha3module.c.h"
 
+// --- SHA-3 object interface -------------------------------------------------
+
 static SHA3object *
 newSHA3object(PyTypeObject *type)
 {
@@ -163,16 +180,12 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity,
 
     if (data) {
         GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error);
-        if (buf.len >= HASHLIB_GIL_MINSIZE) {
-            /* We do not initialize self->lock here as this is the constructor
-             * where it is not yet possible to have concurrent access. */
-            Py_BEGIN_ALLOW_THREADS
-            sha3_update(self->hash_state, buf.buf, buf.len);
-            Py_END_ALLOW_THREADS
-        }
-        else {
-            sha3_update(self->hash_state, buf.buf, buf.len);
-        }
+        /* Do not use self->mutex here as this is the constructor
+         * where it is not yet possible to have concurrent access. */
+        HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(
+            buf.len,
+            sha3_update(self->hash_state, buf.buf, buf.len)
+        );
     }
 
     PyBuffer_Release(&buf);
@@ -226,21 +239,22 @@ SHA3_traverse(PyObject *self, visitproc visit, void *arg)
 /*[clinic input]
 _sha3.sha3_224.copy
 
+    cls: defining_class
+
 Return a copy of the hash object.
 [clinic start generated code]*/
 
 static PyObject *
-_sha3_sha3_224_copy_impl(SHA3object *self)
-/*[clinic end generated code: output=6c537411ecdcda4c input=93a44aaebea51ba8]*/
+_sha3_sha3_224_copy_impl(SHA3object *self, PyTypeObject *cls)
+/*[clinic end generated code: output=13958b44c244013e input=7134b4dc0a2fbcac]*/
 {
     SHA3object *newobj;
-
-    if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) {
+    if ((newobj = newSHA3object(cls)) == NULL) {
         return NULL;
     }
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     newobj->hash_state = Hacl_Hash_SHA3_copy(self->hash_state);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     if (newobj->hash_state == NULL) {
         Py_DECREF(newobj);
         return PyErr_NoMemory();
@@ -262,9 +276,9 @@ _sha3_sha3_224_digest_impl(SHA3object *self)
     unsigned char digest[SHA3_MAX_DIGESTSIZE];
     // This function errors out if the algorithm is SHAKE. Here, we know this
     // not to be the case, and therefore do not perform error checking.
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     (void)Hacl_Hash_SHA3_digest(self->hash_state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return PyBytes_FromStringAndSize((const char *)digest,
         Hacl_Hash_SHA3_hash_len(self->hash_state));
 }
@@ -281,9 +295,9 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self)
 /*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/
 {
     unsigned char digest[SHA3_MAX_DIGESTSIZE];
-    ENTER_HASHLIB(self);
+    HASHLIB_ACQUIRE_LOCK(self);
     (void)Hacl_Hash_SHA3_digest(self->hash_state, digest);
-    LEAVE_HASHLIB(self);
+    HASHLIB_RELEASE_LOCK(self);
     return _Py_strhex((const char *)digest,
         Hacl_Hash_SHA3_hash_len(self->hash_state));
 }
@@ -303,22 +317,11 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data)
 /*[clinic end generated code: output=390b7abf7c9795a5 input=a887f54dcc4ae227]*/
 {
     Py_buffer buf;
-
     GET_BUFFER_VIEW_OR_ERROUT(data, &buf);
-
-    if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
-        self->use_mutex = true;
-    }
-    if (self->use_mutex) {
-        Py_BEGIN_ALLOW_THREADS
-        PyMutex_Lock(&self->mutex);
-        sha3_update(self->hash_state, buf.buf, buf.len);
-        PyMutex_Unlock(&self->mutex);
-        Py_END_ALLOW_THREADS
-    } else {
-        sha3_update(self->hash_state, buf.buf, buf.len);
-    }
-
+    HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(
+        self, buf.len,
+        sha3_update(self->hash_state, buf.buf, buf.len)
+    );
     PyBuffer_Release(&buf);
     Py_RETURN_NONE;
 }
@@ -472,69 +475,94 @@ SHA3_TYPE_SPEC(sha3_384_spec, "sha3_384", sha3_384_slots);
 SHA3_TYPE_SLOTS(sha3_512_slots, sha3_512__doc__, SHA3_methods, SHA3_getseters);
 SHA3_TYPE_SPEC(sha3_512_spec, "sha3_512", sha3_512_slots);
 
-static PyObject *
-_SHAKE_digest(PyObject *op, unsigned long digestlen, int hex)
+static int
+sha3_shake_check_digest_length(Py_ssize_t length)
 {
-    unsigned char *digest = NULL;
-    PyObject *result = NULL;
-    SHA3object *self = _SHA3object_CAST(op);
-
-    if (digestlen >= (1 << 29)) {
-        PyErr_SetString(PyExc_ValueError, "length is too large");
-        return NULL;
-    }
-    digest = (unsigned char*)PyMem_Malloc(digestlen);
-    if (digest == NULL) {
-        return PyErr_NoMemory();
-    }
-
-    /* Get the raw (binary) digest value. The HACL functions errors out if:
-     * - the algorithm is not shake -- not the case here
-     * - the output length is zero -- we follow the existing behavior and return
-     *   an empty digest, without raising an error */
-    if (digestlen > 0) {
-        (void)Hacl_Hash_SHA3_squeeze(self->hash_state, digest, digestlen);
-    }
-    if (hex) {
-        result = _Py_strhex((const char *)digest, digestlen);
+    if (length < 0) {
+        PyErr_SetString(PyExc_ValueError, "negative digest length");
+        return -1;
     }
-    else {
-        result = PyBytes_FromStringAndSize((const char *)digest, digestlen);
+    if ((size_t)length >= (1 << 29)) {
+        /*
+         * Raise OverflowError to match the semantics of OpenSSL SHAKE
+         * when the digest length exceeds the range of a 'Py_ssize_t';
+         * the exception message will however be different in this case.
+         */
+        PyErr_SetString(PyExc_OverflowError, "digest length is too large");
+        return -1;
     }
-    PyMem_Free(digest);
-    return result;
+    return 0;
 }
 
 
 /*[clinic input]
 _sha3.shake_128.digest
 
-    length: unsigned_long
+    length: Py_ssize_t
 
 Return the digest value as a bytes object.
 [clinic start generated code]*/
 
 static PyObject *
-_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length)
-/*[clinic end generated code: output=2313605e2f87bb8f input=93d6d6ff32904f18]*/
+_sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length)
+/*[clinic end generated code: output=6c53fb71a6cff0a0 input=be03ade4b31dd54c]*/
 {
-    return _SHAKE_digest((PyObject *)self, length, 0);
+    if (sha3_shake_check_digest_length(length) < 0) {
+        return NULL;
+    }
+
+    /*
+     * Hacl_Hash_SHA3_squeeze() fails if the algorithm is not SHAKE,
+     * or if the length is 0. In the latter case, we follow OpenSSL's
+     * behavior and return an empty digest, without raising an error.
+     */
+    if (length == 0) {
+        return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
+    }
+
+    CHECK_HACL_UINT32_T_LENGTH(length);
+    PyObject *digest = PyBytes_FromStringAndSize(NULL, length);
+    uint8_t *buffer = (uint8_t *)PyBytes_AS_STRING(digest);
+    HASHLIB_ACQUIRE_LOCK(self);
+    (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length);
+    HASHLIB_RELEASE_LOCK(self);
+    return digest;
 }
 
 
 /*[clinic input]
 _sha3.shake_128.hexdigest
 
-    length: unsigned_long
+    length: Py_ssize_t
 
 Return the digest value as a string of hexadecimal digits.
 [clinic start generated code]*/
 
 static PyObject *
-_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length)
-/*[clinic end generated code: output=bf8e2f1e490944a8 input=562d74e7060b56ab]*/
+_sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length)
+/*[clinic end generated code: output=a27412d404f64512 input=0d84d05d7a8ccd37]*/
 {
-    return _SHAKE_digest((PyObject *)self, length, 1);
+    if (sha3_shake_check_digest_length(length) < 0) {
+        return NULL;
+    }
+
+    /* See _sha3_shake_128_digest_impl() for the fast path rationale. */
+    if (length == 0) {
+        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
+    }
+
+    CHECK_HACL_UINT32_T_LENGTH(length);
+    uint8_t *buffer = PyMem_Malloc(length);
+    if (buffer == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    HASHLIB_ACQUIRE_LOCK(self);
+    (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length);
+    HASHLIB_RELEASE_LOCK(self);
+    PyObject *digest = _Py_strhex((const char *)buffer, length);
+    PyMem_Free(buffer);
+    return digest;
 }
 
 static PyObject *
diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c
index 85c72779bac..f3ad01854de 100644
--- a/Modules/socketmodule.c
+++ b/Modules/socketmodule.c
@@ -4592,55 +4592,62 @@ sock_send_impl(PySocketSockObject *s, void *data)
     return (ctx->result >= 0);
 }
 
-/* s.send(data [,flags]) method */
+/*[clinic input]
+_socket.socket.send
+    self as s: self(type="PySocketSockObject *")
+    data as pbuf: Py_buffer
+    flags: int = 0
+    /
+
+Send a data string to the socket.
+
+For the optional flags argument, see the Unix manual.
+Return the number of bytes sent; this may be less than len(data) if the network is busy.
+[clinic start generated code]*/
 
 static PyObject *
-sock_send(PyObject *self, PyObject *args)
-{
-    PySocketSockObject *s = _PySocketSockObject_CAST(self);
+_socket_socket_send_impl(PySocketSockObject *s, Py_buffer *pbuf, int flags)
+/*[clinic end generated code: output=3ddf83f17d0c875b input=befe7d7790ccb035]*/
 
-    int flags = 0;
-    Py_buffer pbuf;
+{
     struct sock_send ctx;
 
-    if (!PyArg_ParseTuple(args, "y*|i:send", &pbuf, &flags))
-        return NULL;
-
     if (!IS_SELECTABLE(s)) {
-        PyBuffer_Release(&pbuf);
         return select_error();
     }
-    ctx.buf = pbuf.buf;
-    ctx.len = pbuf.len;
+    ctx.buf = pbuf->buf;
+    ctx.len = pbuf->len;
     ctx.flags = flags;
     if (sock_call(s, 1, sock_send_impl, &ctx) < 0) {
-        PyBuffer_Release(&pbuf);
         return NULL;
     }
-    PyBuffer_Release(&pbuf);
 
     return PyLong_FromSsize_t(ctx.result);
 }
 
-PyDoc_STRVAR(send_doc,
-"send(data[, flags]) -> count\n\
-\n\
-Send a data string to the socket.  For the optional flags\n\
-argument, see the Unix manual.  Return the number of bytes\n\
-sent; this may be less than len(data) if the network is busy.");
 
+/*[clinic input]
+_socket.socket.sendall
+    self as s: self(type="PySocketSockObject *")
+    data as pbuf: Py_buffer
+    flags: int = 0
+    /
 
-/* s.sendall(data [,flags]) method */
+Send a data string to the socket.
+
+For the optional flags argument, see the Unix manual.
+This calls send() repeatedly until all data is sent.
+If an error occurs, it's impossible to tell how much data has been sent.
+[clinic start generated code]*/
 
 static PyObject *
-sock_sendall(PyObject *self, PyObject *args)
-{
-    PySocketSockObject *s = _PySocketSockObject_CAST(self);
+_socket_socket_sendall_impl(PySocketSockObject *s, Py_buffer *pbuf,
+                            int flags)
+/*[clinic end generated code: output=ec92861424d3faa8 input=732b15b9ca64dce6]*/
 
+{
     char *buf;
     Py_ssize_t len, n;
-    int flags = 0;
-    Py_buffer pbuf;
     struct sock_send ctx;
     int has_timeout = (s->sock_timeout > 0);
     PyTime_t timeout = s->sock_timeout;
@@ -4648,13 +4655,10 @@ sock_sendall(PyObject *self, PyObject *args)
     int deadline_initialized = 0;
     PyObject *res = NULL;
 
-    if (!PyArg_ParseTuple(args, "y*|i:sendall", &pbuf, &flags))
-        return NULL;
-    buf = pbuf.buf;
-    len = pbuf.len;
+    buf = pbuf->buf;
+    len = pbuf->len;
 
     if (!IS_SELECTABLE(s)) {
-        PyBuffer_Release(&pbuf);
         return select_error();
     }
 
@@ -4692,23 +4696,13 @@ sock_sendall(PyObject *self, PyObject *args)
         if (PyErr_CheckSignals())
             goto done;
     } while (len > 0);
-    PyBuffer_Release(&pbuf);
 
     res = Py_NewRef(Py_None);
 
 done:
-    PyBuffer_Release(&pbuf);
     return res;
 }
 
-PyDoc_STRVAR(sendall_doc,
-"sendall(data[, flags])\n\
-\n\
-Send a data string to the socket.  For the optional flags\n\
-argument, see the Unix manual.  This calls send() repeatedly\n\
-until all data is sent.  If an error occurs, it's impossible\n\
-to tell how much data has been sent.");
-
 
 #ifdef HAVE_SENDTO
 struct sock_sendto {
@@ -4858,10 +4852,8 @@ sock_sendmsg_iovec(PySocketSockObject *s, PyObject *data_arg,
         }
     }
     for (; ndatabufs < ndataparts; ndatabufs++) {
-        if (!PyArg_Parse(PySequence_Fast_GET_ITEM(data_fast, ndatabufs),
-                         "y*;sendmsg() argument 1 must be an iterable of "
-                         "bytes-like objects",
-                         &databufs[ndatabufs]))
+        if (PyObject_GetBuffer(PySequence_Fast_GET_ITEM(data_fast, ndatabufs),
+            &databufs[ndatabufs], PyBUF_SIMPLE) < 0)
             goto finally;
         iovs[ndatabufs].iov_base = databufs[ndatabufs].buf;
         iovs[ndatabufs].iov_len = databufs[ndatabufs].len;
@@ -4883,13 +4875,39 @@ sock_sendmsg_impl(PySocketSockObject *s, void *data)
     return (ctx->result >= 0);
 }
 
-/* s.sendmsg(buffers[, ancdata[, flags[, address]]]) method */
+/*[clinic input]
+_socket.socket.sendmsg
+    self as s: self(type="PySocketSockObject *")
+    buffers as data_arg: object
+    ancdata as cmsg_arg: object = NULL
+    flags: int = 0
+    address as addr_arg: object = NULL
+    /
+
+Send normal and ancillary data to the socket.
+
+It gathering the non-ancillary data from a series of buffers
+and concatenating it into a single message.
+The buffers argument specifies the non-ancillary
+data as an iterable of bytes-like objects (e.g. bytes objects).
+The ancdata argument specifies the ancillary data (control messages)
+as an iterable of zero or more tuples (cmsg_level, cmsg_type,
+cmsg_data), where cmsg_level and cmsg_type are integers specifying the
+protocol level and protocol-specific type respectively, and cmsg_data
+is a bytes-like object holding the associated data.  The flags
+argument defaults to 0 and has the same meaning as for send().  If
+address is supplied and not None, it sets a destination address for
+the message.  The return value is the number of bytes of non-ancillary
+data sent.
+[clinic start generated code]*/
 
 static PyObject *
-sock_sendmsg(PyObject *self, PyObject *args)
-{
-    PySocketSockObject *s = _PySocketSockObject_CAST(self);
+_socket_socket_sendmsg_impl(PySocketSockObject *s, PyObject *data_arg,
+                            PyObject *cmsg_arg, int flags,
+                            PyObject *addr_arg)
+/*[clinic end generated code: output=3b4cb1110644ce39 input=479c13d90bd2f88b]*/
 
+{
     Py_ssize_t i, ndatabufs = 0, ncmsgs, ncmsgbufs = 0;
     Py_buffer *databufs = NULL;
     sock_addr_t addrbuf;
@@ -4901,16 +4919,10 @@ sock_sendmsg(PyObject *self, PyObject *args)
     } *cmsgs = NULL;
     void *controlbuf = NULL;
     size_t controllen, controllen_last;
-    int addrlen, flags = 0;
-    PyObject *data_arg, *cmsg_arg = NULL, *addr_arg = NULL,
-        *cmsg_fast = NULL, *retval = NULL;
+    int addrlen;
+    PyObject *cmsg_fast = NULL, *retval = NULL;
     struct sock_sendmsg ctx;
 
-    if (!PyArg_ParseTuple(args, "O|OiO:sendmsg",
-                          &data_arg, &cmsg_arg, &flags, &addr_arg)) {
-        return NULL;
-    }
-
     memset(&msg, 0, sizeof(msg));
 
     /* Parse destination address. */
@@ -5072,22 +5084,6 @@ finally:
     return retval;
 }
 
-PyDoc_STRVAR(sendmsg_doc,
-"sendmsg(buffers[, ancdata[, flags[, address]]]) -> count\n\
-\n\
-Send normal and ancillary data to the socket, gathering the\n\
-non-ancillary data from a series of buffers and concatenating it into\n\
-a single message.  The buffers argument specifies the non-ancillary\n\
-data as an iterable of bytes-like objects (e.g. bytes objects).\n\
-The ancdata argument specifies the ancillary data (control messages)\n\
-as an iterable of zero or more tuples (cmsg_level, cmsg_type,\n\
-cmsg_data), where cmsg_level and cmsg_type are integers specifying the\n\
-protocol level and protocol-specific type respectively, and cmsg_data\n\
-is a bytes-like object holding the associated data.  The flags\n\
-argument defaults to 0 and has the same meaning as for send().  If\n\
-address is supplied and not None, it sets a destination address for\n\
-the message.  The return value is the number of bytes of non-ancillary\n\
-data sent.");
 #endif    /* CMSG_LEN */
 
 #ifdef HAVE_SOCKADDR_ALG
@@ -5424,8 +5420,8 @@ static PyMethodDef sock_methods[] = {
         recvfrom_into_doc
     },
 #endif
-    {"send", sock_send, METH_VARARGS, send_doc},
-    {"sendall", sock_sendall, METH_VARARGS, sendall_doc},
+    _SOCKET_SOCKET_SEND_METHODDEF
+    _SOCKET_SOCKET_SENDALL_METHODDEF
 #ifdef HAVE_SENDTO
     {"sendto", sock_sendto, METH_VARARGS, sendto_doc},
 #endif
@@ -5445,7 +5441,7 @@ static PyMethodDef sock_methods[] = {
 #ifdef CMSG_LEN
     {"recvmsg", sock_recvmsg, METH_VARARGS, recvmsg_doc},
     {"recvmsg_into", sock_recvmsg_into, METH_VARARGS, recvmsg_into_doc},
-    {"sendmsg", sock_sendmsg, METH_VARARGS, sendmsg_doc},
+    _SOCKET_SOCKET_SENDMSG_METHODDEF
 #endif
 #ifdef HAVE_SOCKADDR_ALG
     {
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 34b50ef97d5..91772bc9d19 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -1999,7 +1999,6 @@ _PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts,
                              const char **p_errmsg)
 {
     const char *errmsg = NULL;
-    assert(counts->locals.hidden.total == 0);
     if (counts->numfree > 0) {  // It's a closure.
         errmsg = "closures not supported";
     }
@@ -3370,7 +3369,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx)
         }
         memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *));
         _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc);
-        _PyMem_FreeDelayed(tlbc);
+        _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *));
         tlbc = new_tlbc;
     }
     char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co));
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index fd8ccf56324..6b7b150f0e2 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr)
 {
 #ifdef Py_GIL_DISABLED
     if (use_qsbr) {
-        _PyMem_FreeDelayed(keys);
+        _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys));
         return;
     }
 #endif
@@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr)
     assert(values->embedded == 0);
 #ifdef Py_GIL_DISABLED
     if (use_qsbr) {
-        _PyMem_FreeDelayed(values);
+        _PyMem_FreeDelayed(values, values_size_from_count(values->capacity));
         return;
     }
 #endif
@@ -3858,7 +3858,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
         }
     }
 
-    Py_ssize_t orig_size = other->ma_keys->dk_nentries;
+    Py_ssize_t orig_size = other->ma_used;
     Py_ssize_t pos = 0;
     Py_hash_t hash;
     PyObject *key, *value;
@@ -3892,7 +3892,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
         if (err != 0)
             return -1;
 
-        if (orig_size != other->ma_keys->dk_nentries) {
+        if (orig_size != other->ma_used) {
             PyErr_SetString(PyExc_RuntimeError,
                     "dict mutated during update");
             return -1;
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index f87b0e5d8f1..f8dd10a346d 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -1256,7 +1256,7 @@ _PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func)
         return -1;
     }
     // Check the builtins.
-    PyObject *builtinsns = PyFunction_GET_BUILTINS(func);
+    PyObject *builtinsns = _PyFunction_GET_BUILTINS(func);
     if (builtinsns != NULL && !PyDict_Check(builtinsns)) {
         _PyErr_Format(tstate, PyExc_TypeError,
                       "unsupported builtins %R", builtinsns);
diff --git a/Objects/genobject.c b/Objects/genobject.c
index da1462deaaa..d0cb75d2d17 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -704,7 +704,8 @@ static PyObject *
 gen_get_name(PyObject *self, void *Py_UNUSED(ignored))
 {
     PyGenObject *op = _PyGen_CAST(self);
-    return Py_NewRef(op->gi_name);
+    PyObject *name = FT_ATOMIC_LOAD_PTR_ACQUIRE(op->gi_name);
+    return Py_NewRef(name);
 }
 
 static int
@@ -718,7 +719,11 @@ gen_set_name(PyObject *self, PyObject *value, void *Py_UNUSED(ignored))
                         "__name__ must be set to a string object");
         return -1;
     }
-    Py_XSETREF(op->gi_name, Py_NewRef(value));
+    Py_BEGIN_CRITICAL_SECTION(self);
+    // gh-133931: To prevent use-after-free from other threads that reference
+    // the gi_name.
+    _PyObject_XSetRefDelayed(&op->gi_name, Py_NewRef(value));
+    Py_END_CRITICAL_SECTION();
     return 0;
 }
 
@@ -726,7 +731,8 @@ static PyObject *
 gen_get_qualname(PyObject *self, void *Py_UNUSED(ignored))
 {
     PyGenObject *op = _PyGen_CAST(self);
-    return Py_NewRef(op->gi_qualname);
+    PyObject *qualname = FT_ATOMIC_LOAD_PTR_ACQUIRE(op->gi_qualname);
+    return Py_NewRef(qualname);
 }
 
 static int
@@ -740,7 +746,11 @@ gen_set_qualname(PyObject *self, PyObject *value, void *Py_UNUSED(ignored))
                         "__qualname__ must be set to a string object");
         return -1;
     }
-    Py_XSETREF(op->gi_qualname, Py_NewRef(value));
+    Py_BEGIN_CRITICAL_SECTION(self);
+    // gh-133931: To prevent use-after-free from other threads that reference
+    // the gi_qualname.
+    _PyObject_XSetRefDelayed(&op->gi_qualname, Py_NewRef(value));
+    Py_END_CRITICAL_SECTION();
     return 0;
 }
 
diff --git a/Objects/listobject.c b/Objects/listobject.c
index c5895645a2d..1b36f4c25ab 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr)
 #ifdef Py_GIL_DISABLED
     _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item);
     if (use_qsbr) {
-        _PyMem_FreeDelayed(array);
+        size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *);
+        _PyMem_FreeDelayed(array, size);
     }
     else {
         PyMem_Free(array);
@@ -1684,10 +1685,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
 /* Avoid malloc for small temp arrays. */
 #define MERGESTATE_TEMP_SIZE 256
 
-/* The largest value of minrun. This must be a power of 2, and >= 1, so that
- * the compute_minrun() algorithm guarantees to return a result no larger than
- * this,
- */
+/* The largest value of minrun. This must be a power of 2, and >= 1 */
 #define MAX_MINRUN 64
 #if ((MAX_MINRUN) < 1) || ((MAX_MINRUN) & ((MAX_MINRUN) - 1))
 #error "MAX_MINRUN must be a power of 2, and >= 1"
@@ -1748,6 +1746,11 @@ struct s_MergeState {
      * of tuples. It may be set to safe_object_compare, but the idea is that hopefully
      * we can assume more, and use one of the special-case compares. */
     int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *);
+
+    /* Varisbles used for minrun computation. The "ideal" minrun length is
+     * the infinite precision listlen / 2**e. See listsort.txt.
+     */
+     Py_ssize_t mr_current, mr_e, mr_mask;
 };
 
 /* binarysort is the best method for sorting small arrays: it does few
@@ -2209,6 +2212,14 @@ merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc,
     ms->min_gallop = MIN_GALLOP;
     ms->listlen = list_size;
     ms->basekeys = lo->keys;
+
+    /* State for generating minrun values. See listsort.txt. */
+    ms->mr_e = 0;
+    while (list_size >> ms->mr_e >= MAX_MINRUN) {
+        ++ms->mr_e;
+    }
+    ms->mr_mask = (1 << ms->mr_e) - 1;
+    ms->mr_current = 0;
 }
 
 /* Free all the temp memory owned by the MergeState.  This must be called
@@ -2686,27 +2697,15 @@ merge_force_collapse(MergeState *ms)
     return 0;
 }
 
-/* Compute a good value for the minimum run length; natural runs shorter
- * than this are boosted artificially via binary insertion.
- *
- * If n < MAX_MINRUN return n (it's too small to bother with fancy stuff).
- * Else if n is an exact power of 2, return MAX_MINRUN / 2.
- * Else return an int k, MAX_MINRUN / 2 <= k <= MAX_MINRUN, such that n/k is
- * close to, but strictly less than, an exact power of 2.
- *
- * See listsort.txt for more info.
- */
-static Py_ssize_t
-merge_compute_minrun(Py_ssize_t n)
+/* Return the next minrun value to use. See listsort.txt. */
+Py_LOCAL_INLINE(Py_ssize_t)
+minrun_next(MergeState *ms)
 {
-    Py_ssize_t r = 0;           /* becomes 1 if any 1 bits are shifted off */
-
-    assert(n >= 0);
-    while (n >= MAX_MINRUN) {
-        r |= n & 1;
-        n >>= 1;
-    }
-    return n + r;
+    ms->mr_current += ms->listlen;
+    assert(ms->mr_current >= 0); /* no overflow */
+    Py_ssize_t result = ms->mr_current >> ms->mr_e;
+    ms->mr_current &= ms->mr_mask;
+    return result;
 }
 
 /* Here we define custom comparison functions to optimize for the cases one commonly
@@ -3074,7 +3073,6 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
     /* March over the array once, left to right, finding natural runs,
      * and extending short natural runs to minrun elements.
      */
-    minrun = merge_compute_minrun(nremaining);
     do {
         Py_ssize_t n;
 
@@ -3083,6 +3081,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
         if (n < 0)
             goto fail;
         /* If short, extend to min(minrun, nremaining). */
+        minrun = minrun_next(&ms);
         if (n < minrun) {
             const Py_ssize_t force = nremaining <= minrun ?
                               nremaining : minrun;
diff --git a/Objects/listsort.txt b/Objects/listsort.txt
index f387d9c116e..5b2fc7d50a2 100644
--- a/Objects/listsort.txt
+++ b/Objects/listsort.txt
@@ -270,8 +270,8 @@ result.  This has two primary good effects:
 
 Computing minrun
 ----------------
-If N < MAX_MINRUN, minrun is N.  IOW, binary insertion sort is used for the 
-whole array then; it's hard to beat that given the overheads of trying 
+If N < MAX_MINRUN, minrun is N.  IOW, binary insertion sort is used for the
+whole array then; it's hard to beat that given the overheads of trying
 something fancier (see note BINSORT).
 
 When N is a power of 2, testing on random data showed that minrun values of
@@ -288,7 +288,6 @@ that 32 isn't a good choice for the general case!  Consider N=2112:
 
 >>> divmod(2112, 32)
 (66, 0)
->>>
 
 If the data is randomly ordered, we're very likely to end up with 66 runs
 each of length 32.  The first 64 of these trigger a sequence of perfectly
@@ -301,22 +300,94 @@ to get 64 elements into place).
 If we take minrun=33 in this case, then we're very likely to end up with 64
 runs each of length 33, and then all merges are perfectly balanced.  Better!
 
-What we want to avoid is picking minrun such that in
+The original code used a cheap heuristic to pick a minrun that avoided the
+very worst cases of imbalance for the final merge, but "pretty bad" cases
+still existed.
 
-    q, r = divmod(N, minrun)
+In 2025, Stefan Pochmann found a much better approach, based on letting minrun
+vary a bit from one run to the next. Under his scheme, at _all_ levels of the
+merge tree:
 
-q is a power of 2 and r>0 (then the last merge only gets r elements into
-place, and r < minrun is small compared to N), or q a little larger than a
-power of 2 regardless of r (then we've got a case similar to "2112", again
-leaving too little work for the last merge to do).
+- The number of runs is a power of 2.
+- At most two different run lengths appear.
+- When two do appear, the smaller is one less than the larger.
+- The lengths of run pairs merged never differ by more than one.
 
-Instead we pick a minrun in range(MAX_MINRUN / 2, MAX_MINRUN + 1) such that 
-N/minrun is exactly a power of 2, or if that isn't possible, is close to, but 
-strictly less than, a power of 2.  This is easier to do than it may sound: 
-take the first log2(MAX_MINRUN) bits of N, and add 1 if any of the remaining 
-bits are set. In fact, that rule covers every case in this section, including 
-small N and exact powers of 2; merge_compute_minrun() is a deceptively simple 
-function.
+So, in all respects, as perfectly balanced as possible.
+
+For the 2112 case, that also keeps minrun at 33, but we were lucky there
+that 2112 is 33 times a power of 2. The new approach doesn't rely on luck.
+
+For example, with 315 random elements, the old scheme uses fixed minrun=40 and
+produces runs of length 40, except for the last. The new scheme produces a
+mix of lengths 39 and 40:
+
+old:  40 40 40 40 40 40 40 35
+new:  39 39 40 39 39 40 39 40
+
+Both schemes produce eight runs, a power of 2. That's good for a balanced
+merge tree. But the new scheme allows merges where left and right length
+never differ by more than 1:
+
+39 39 40 39 39 40 39 40
+  78   79     79   79
+    157         158
+          315
+
+(This shows merges downward, e.g., two runs of length 39 are merged and
+become a run of length 78.)
+
+With larger lists, the old scheme can get even more unbalanced. For example,
+with 32769 elements (that's 2**15 + 1), it uses minrun=33 and produces 993
+runs (of length 33). That's not even a power of 2. The new scheme instead
+produces 1024 runs, all with length 32 except for the last one with length 33.
+
+How does it work? Ideally, all runs would be exactly equally long. For the
+above example, each run would have 315/8 = 39.375 elements. Which of course
+doesn't work. But we can get close:
+
+For the first run, we'd like 39.375 elements. Since that's impossible, we
+instead use 39 (the floor) and remember the current leftover fraction 0.375.
+For the second run, we add 0.375 + 39.375 = 39.75. Again impossible, so we
+instead use 39 and remember 0.75. For the third run, we add 0.75 + 39.375 =
+40.125. This time we get 40 and remember 0.125. And so on. Here's a Python
+generator doing that:
+
+def gen_minruns_with_floats(n):
+    mr = n
+    while mr >= MAX_MINRUN:
+        mr /= 2
+
+    mr_current = 0
+    while True:
+        mr_current += mr
+        yield int(mr_current)
+        mr_current %= 1
+
+But while all arithmetic here can be done exactly using binery floating point,
+floats have less precision that a Py_ssize_t, and mixing floats with ints is
+needlessly expensive anyway.
+
+So here's an integer version, where the internal numbers are scaled up by
+2**e, or rather not divided by 2**e. Instead, only each yielded minrun gets
+divided (by right-shifting). For example instead of adding 39.375 and
+reducing modulo 1, it just adds 315 and reduces modulo 8. And always divides
+by 8 to get each actual minrun value:
+
+def gen_minruns_simpler(n):
+    e = 0
+    while (n >> e) >= MAX_MINRUN:
+        e += 1
+    mask = (1 << e) - 1
+
+    mr_current = 0
+    while True:
+        mr_current += n
+        yield mr_current >> e
+        mr_current &= mask
+
+See note MINRUN CODE for a full implementation and a driver that exhaustively
+verifies the claims above for all list lengths through 2 million.
 
 
 The Merge Pattern
@@ -820,3 +891,75 @@ partially mitigated by pre-scanning the data to determine whether the data is
 homogeneous with respect to type.  If so, it is sometimes possible to
 substitute faster type-specific comparisons for the slower, generic
 PyObject_RichCompareBool.
+
+MINRUN CODE
+from itertools import accumulate
+try:
+    from itertools import batched
+except ImportError:
+    from itertools import islice
+    def batched(xs, k):
+        it = iter(xs)
+        while chunk := tuple(islice(it, k)):
+            yield chunk
+
+MAX_MINRUN = 64
+
+def gen_minruns(n):
+    # In listobject.c, initialization is done in merge_init(), and
+    # the body of the loop in minrun_next().
+    mr_e = 0
+    while (n >> mr_e) >= MAX_MINRUN:
+        mr_e += 1
+    mr_mask = (1 << mr_e) - 1
+
+    mr_current = 0
+    while True:
+        mr_current += n
+        yield mr_current >> mr_e
+        mr_current &= mr_mask
+
+def chew(n, show=False):
+    if n < 1:
+        return
+
+    sizes = []
+    tot = 0
+    for size in gen_minruns(n):
+        sizes.append(size)
+        tot += size
+        if tot >= n:
+            break
+    assert tot == n
+    print(n, len(sizes))
+
+    small, large = MAX_MINRUN // 2, MAX_MINRUN
+    while len(sizes) > 1:
+        assert not len(sizes) & 1
+        assert len(sizes).bit_count() == 1 # i.e., power of 2
+        assert sum(sizes) == n
+        assert min(sizes) >= min(n, small)
+        assert max(sizes) <= large
+
+        d = set(sizes)
+        assert len(d) <= 2
+        if len(d) == 2:
+            lo, hi = sorted(d)
+            assert lo + 1 == hi
+
+        mr = n / len(sizes)
+        for i, s in enumerate(accumulate(sizes, initial=0)):
+            assert int(mr * i) == s
+
+        newsizes = []
+        for a, b in batched(sizes, 2):
+            assert abs(a - b) <= 1
+            newsizes.append(a + b)
+        sizes = newsizes
+        smsll = large
+        large *= 2
+
+    assert sizes[0] == n
+
+for n in range(2_000_001):
+    chew(n)
+\ No newline at end of file
diff --git a/Objects/longobject.c b/Objects/longobject.c
index dfa02851cd8..557bb6e1dd9 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -10,6 +10,7 @@
 #include "pycore_long.h"          // _Py_SmallInts
 #include "pycore_object.h"        // _PyObject_Init()
 #include "pycore_runtime.h"       // _PY_NSMALLPOSINTS
+#include "pycore_stackref.h"
 #include "pycore_structseq.h"     // _PyStructSequence_FiniBuiltin()
 #include "pycore_unicodeobject.h" // _PyUnicode_Equal()
 
@@ -316,6 +317,33 @@ _PyLong_FromSTwoDigits(stwodigits x)
     return (PyLongObject*)_PyLong_FromLarge(x);
 }
 
+/* Create a new medium int object from a medium int.
+ * Do not raise. Return NULL if not medium or can't allocate. */
+static inline _PyStackRef
+medium_from_stwodigits(stwodigits x)
+{
+    if (IS_SMALL_INT(x)) {
+        return PyStackRef_FromPyObjectBorrow(get_small_int((sdigit)x));
+    }
+    assert(x != 0);
+    if(!is_medium_int(x)) {
+        return PyStackRef_NULL;
+    }
+    PyLongObject *v = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints);
+    if (v == NULL) {
+        v = PyObject_Malloc(sizeof(PyLongObject));
+        if (v == NULL) {
+            return PyStackRef_NULL;
+        }
+        _PyObject_Init((PyObject*)v, &PyLong_Type);
+    }
+    digit abs_x = x < 0 ? (digit)(-x) : (digit)x;
+    _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1);
+    v->long_value.ob_digit[0] = abs_x;
+    return PyStackRef_FromPyObjectStealMortal((PyObject *)v);
+}
+
+
 /* If a freshly-allocated int is already shared, it must
    be a small integer, so negating it must go to PyLong_FromLong */
 Py_LOCAL_INLINE(void)
@@ -3771,12 +3799,12 @@ long_add(PyLongObject *a, PyLongObject *b)
     return z;
 }
 
-PyObject *
+_PyStackRef
 _PyCompactLong_Add(PyLongObject *a, PyLongObject *b)
 {
     assert(_PyLong_BothAreCompact(a, b));
-    stwodigits z = medium_value(a) + medium_value(b);
-    return (PyObject *)_PyLong_FromSTwoDigits(z);
+    stwodigits v = medium_value(a) + medium_value(b);
+    return medium_from_stwodigits(v);
 }
 
 static PyObject *
@@ -3816,11 +3844,12 @@ long_sub(PyLongObject *a, PyLongObject *b)
     return z;
 }
 
-PyObject *
+_PyStackRef
 _PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b)
 {
     assert(_PyLong_BothAreCompact(a, b));
-    return (PyObject *)_PyLong_FromSTwoDigits(medium_value(a) - medium_value(b));
+    stwodigits v = medium_value(a) - medium_value(b);
+    return medium_from_stwodigits(v);
 }
 
 static PyObject *
@@ -4264,12 +4293,14 @@ long_mul(PyLongObject *a, PyLongObject *b)
     return z;
 }
 
-PyObject *
+/* This function returns NULL if the result is not compact,
+ * or if it fails to allocate, but never raises */
+_PyStackRef
 _PyCompactLong_Multiply(PyLongObject *a, PyLongObject *b)
 {
     assert(_PyLong_BothAreCompact(a, b));
     stwodigits v = medium_value(a) * medium_value(b);
-    return (PyObject *)_PyLong_FromSTwoDigits(v);
+    return medium_from_stwodigits(v);
 }
 
 static PyObject *
diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c
index caebe6bf543..0fc2bcea4cb 100644
--- a/Objects/namespaceobject.c
+++ b/Objects/namespaceobject.c
@@ -124,9 +124,10 @@ namespace_repr(PyObject *ns)
         if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) {
             PyObject *value, *item;
 
-            value = PyDict_GetItemWithError(d, key);
-            if (value != NULL) {
+            int has_key = PyDict_GetItemRef(d, key, &value);
+            if (has_key == 1) {
                 item = PyUnicode_FromFormat("%U=%R", key, value);
+                Py_DECREF(value);
                 if (item == NULL) {
                     loop_error = 1;
                 }
@@ -135,7 +136,7 @@ namespace_repr(PyObject *ns)
                     Py_DECREF(item);
                 }
             }
-            else if (PyErr_Occurred()) {
+            else if (has_key < 0) {
                 loop_error = 1;
             }
         }
diff --git a/Objects/object.c b/Objects/object.c
index eff3a986212..1223983753a 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1131,11 +1131,14 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op)
     res = PyObject_RichCompare(v, w, op);
     if (res == NULL)
         return -1;
-    if (PyBool_Check(res))
+    if (PyBool_Check(res)) {
         ok = (res == Py_True);
-    else
+        assert(_Py_IsImmortal(res));
+    }
+    else {
         ok = PyObject_IsTrue(res);
-    Py_DECREF(res);
+        Py_DECREF(res);
+    }
     return ok;
 }
 
@@ -2084,9 +2087,25 @@ _dir_locals(void)
     PyObject *names;
     PyObject *locals;
 
-    locals = _PyEval_GetFrameLocals();
-    if (locals == NULL)
+    if (_PyEval_GetFrame() != NULL) {
+        locals = _PyEval_GetFrameLocals();
+    }
+    else {
+        PyThreadState *tstate = _PyThreadState_GET();
+        locals = _PyEval_GetGlobalsFromRunningMain(tstate);
+        if (locals == NULL) {
+            if (!_PyErr_Occurred(tstate)) {
+                locals = _PyEval_GetFrameLocals();
+                assert(_PyErr_Occurred(tstate));
+            }
+        }
+        else {
+            Py_INCREF(locals);
+        }
+    }
+    if (locals == NULL) {
         return NULL;
+    }
 
     names = PyMapping_Keys(locals);
     Py_DECREF(locals);
@@ -3018,57 +3037,28 @@ finally:
 
 /* Trashcan support. */
 
-#ifndef Py_GIL_DISABLED
-/* We need to store a pointer in the refcount field of
- * an object. It is important that we never store 0 (NULL).
-* It is also important to not make the object appear immortal,
-* or it might be untracked by the cycle GC. */
-static uintptr_t
-pointer_to_safe_refcount(void *ptr)
-{
-    uintptr_t full = (uintptr_t)ptr;
-    assert((full & 3) == 0);
-#if SIZEOF_VOID_P > 4
-    uint32_t refcnt = (uint32_t)full;
-    if (refcnt >= (uint32_t)_Py_IMMORTAL_MINIMUM_REFCNT) {
-        full = full - ((uintptr_t)_Py_IMMORTAL_MINIMUM_REFCNT) + 1;
-    }
-    return full + 2;
-#else
-    // Make the top two bits 0, so it appears mortal.
-    return (full >> 2) + 1;
-#endif
-}
-
-static void *
-safe_refcount_to_pointer(uintptr_t refcnt)
-{
-#if SIZEOF_VOID_P > 4
-    if (refcnt & 1) {
-        refcnt += _Py_IMMORTAL_MINIMUM_REFCNT - 1;
-    }
-    return (void *)(refcnt - 2);
-#else
-    return (void *)((refcnt -1) << 2);
-#endif
-}
-#endif
-
 /* Add op to the gcstate->trash_delete_later list.  Called when the current
- * call-stack depth gets large.  op must be a currently untracked gc'ed
- * object, with refcount 0.  Py_DECREF must already have been called on it.
+ * call-stack depth gets large.  op must be a gc'ed object, with refcount 0.
+ *  Py_DECREF must already have been called on it.
  */
 void
 _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op)
 {
     _PyObject_ASSERT(op, Py_REFCNT(op) == 0);
+    PyTypeObject *tp = Py_TYPE(op);
+    assert(tp->tp_flags & Py_TPFLAGS_HAVE_GC);
+    int tracked = 0;
+    if (tp->tp_is_gc == NULL || tp->tp_is_gc(op)) {
+        tracked = _PyObject_GC_IS_TRACKED(op);
+        if (tracked) {
+            _PyObject_GC_UNTRACK(op);
+        }
+    }
+    uintptr_t tagged_ptr = ((uintptr_t)tstate->delete_later) | tracked;
 #ifdef Py_GIL_DISABLED
-    op->ob_tid = (uintptr_t)tstate->delete_later;
+    op->ob_tid = tagged_ptr;
 #else
-    /* Store the delete_later pointer in the refcnt field. */
-    uintptr_t refcnt = pointer_to_safe_refcount(tstate->delete_later);
-    *((uintptr_t*)op) = refcnt;
-    assert(!_Py_IsImmortal(op));
+    _Py_AS_GC(op)->_gc_next = tagged_ptr;
 #endif
     tstate->delete_later = op;
 }
@@ -3083,17 +3073,17 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate)
         destructor dealloc = Py_TYPE(op)->tp_dealloc;
 
 #ifdef Py_GIL_DISABLED
-        tstate->delete_later = (PyObject*) op->ob_tid;
+        uintptr_t tagged_ptr = op->ob_tid;
         op->ob_tid = 0;
         _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED);
 #else
-        /* Get the delete_later pointer from the refcnt field.
-         * See _PyTrash_thread_deposit_object(). */
-        uintptr_t refcnt = *((uintptr_t*)op);
-        tstate->delete_later = safe_refcount_to_pointer(refcnt);
-        op->ob_refcnt = 0;
+        uintptr_t tagged_ptr = _Py_AS_GC(op)->_gc_next;
+        _Py_AS_GC(op)->_gc_next = 0;
 #endif
-
+        tstate->delete_later = (PyObject *)(tagged_ptr & ~1);
+        if (tagged_ptr & 1) {
+            _PyObject_GC_TRACK(op);
+        }
         /* Call the deallocator directly.  This used to try to
          * fool Py_DECREF into calling it indirectly, but
          * Py_DECREF was already called on this object, and in
@@ -3167,10 +3157,11 @@ void
 _Py_Dealloc(PyObject *op)
 {
     PyTypeObject *type = Py_TYPE(op);
+    unsigned long gc_flag = type->tp_flags & Py_TPFLAGS_HAVE_GC;
     destructor dealloc = type->tp_dealloc;
     PyThreadState *tstate = _PyThreadState_GET();
     intptr_t margin = _Py_RecursionLimit_GetMargin(tstate);
-    if (margin < 2) {
+    if (margin < 2 && gc_flag) {
         _PyTrash_thread_deposit_object(tstate, (PyObject *)op);
         return;
     }
@@ -3216,7 +3207,7 @@ _Py_Dealloc(PyObject *op)
     Py_XDECREF(old_exc);
     Py_DECREF(type);
 #endif
-    if (tstate->delete_later && margin >= 4) {
+    if (tstate->delete_later && margin >= 4 && gc_flag) {
         _PyTrash_thread_destroy_chain(tstate);
     }
 }
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index d3931aab623..deb7fd957e5 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page)
 
 }
 
+#ifdef Py_GIL_DISABLED
+
+// If we are deferring collection of more than this amount of memory for
+// mimalloc pages, advance the write sequence.  Advancing allows these
+// pages to be re-used in a different thread or for a different size class.
+#define QSBR_PAGE_MEM_LIMIT 4096*20
+
+// Return true if the global write sequence should be advanced for a mimalloc
+// page that is deferred from collection.
+static bool
+should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
+{
+    size_t bsize = mi_page_block_size(page);
+    size_t page_size = page->capacity*bsize;
+    if (page_size > QSBR_PAGE_MEM_LIMIT) {
+        qsbr->deferred_page_memory = 0;
+        return true;
+    }
+    qsbr->deferred_page_memory += page_size;
+    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
+        qsbr->deferred_page_memory = 0;
+        return true;
+    }
+    return false;
+}
+#endif
+
 static bool
 _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
 {
@@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
 
         _PyMem_mi_page_clear_qsbr(page);
         page->retire_expire = 0;
-        page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr);
+
+        if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
+            page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
+        }
+        else {
+            page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
+        }
+
         llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
         return false;
     }
@@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
     }
 }
 
+
+#ifdef Py_GIL_DISABLED
+
+// For deferred advance on free: the number of deferred items before advancing
+// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
+// want to process a chunk before it overflows.
+#define QSBR_DEFERRED_LIMIT 127
+
+// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
+// helps limit memory usage due to QSBR delaying frees too long.
+#define QSBR_FREE_MEM_LIMIT 1024*1024
+
+// Return true if the global write sequence should be advanced for a deferred
+// memory free.
+static bool
+should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
+{
+    if (size > QSBR_FREE_MEM_LIMIT) {
+        qsbr->deferred_count = 0;
+        qsbr->deferred_memory = 0;
+        qsbr->should_process = true;
+        return true;
+    }
+    qsbr->deferred_count++;
+    qsbr->deferred_memory += size;
+    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
+            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
+        qsbr->deferred_count = 0;
+        qsbr->deferred_memory = 0;
+        qsbr->should_process = true;
+        return true;
+    }
+    return false;
+}
+#endif
+
 static void
-free_delayed(uintptr_t ptr)
+free_delayed(uintptr_t ptr, size_t size)
 {
 #ifndef Py_GIL_DISABLED
     free_work_item(ptr, NULL, NULL);
@@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr)
     }
 
     assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
-    uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr);
+    uint64_t seq;
+    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
+        seq = _Py_qsbr_advance(tstate->qsbr->shared);
+    }
+    else {
+        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
+    }
     buf->array[buf->wr_idx].ptr = ptr;
     buf->array[buf->wr_idx].qsbr_goal = seq;
     buf->wr_idx++;
 
     if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
+        // Normally the processing of delayed items is done from the eval
+        // breaker.  Processing here is a safety measure to ensure too much
+        // work does not accumulate.
         _PyMem_ProcessDelayed((PyThreadState *)tstate);
     }
 #endif
 }
 
 void
-_PyMem_FreeDelayed(void *ptr)
+_PyMem_FreeDelayed(void *ptr, size_t size)
 {
     assert(!((uintptr_t)ptr & 0x01));
     if (ptr != NULL) {
-        free_delayed((uintptr_t)ptr);
+        free_delayed((uintptr_t)ptr, size);
     }
 }
 
@@ -1226,7 +1305,25 @@ _PyObject_XDecRefDelayed(PyObject *ptr)
 {
     assert(!((uintptr_t)ptr & 0x01));
     if (ptr != NULL) {
-        free_delayed(((uintptr_t)ptr)|0x01);
+        // We use 0 as the size since we don't have an easy way to know the
+        // actual size.  If we are freeing many objects, the write sequence
+        // will be advanced due to QSBR_DEFERRED_LIMIT.
+        free_delayed(((uintptr_t)ptr)|0x01, 0);
+    }
+}
+#endif
+
+#ifdef Py_GIL_DISABLED
+void
+_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
+{
+    PyObject *old = *ptr;
+    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
+    if (old == NULL) {
+        return;
+    }
+    if (!_Py_IsImmortal(old)) {
+         _PyObject_XDecRefDelayed(old);
     }
 }
 #endif
@@ -1302,6 +1399,8 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
     PyInterpreterState *interp = tstate->interp;
     _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
 
+    tstate_impl->qsbr->should_process = false;
+
     // Process thread-local work
     process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
 
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index db923c16477..6e7471cb594 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -3967,13 +3967,9 @@ _PyObject_SetDict(PyObject *obj, PyObject *value)
         return -1;
     }
     Py_BEGIN_CRITICAL_SECTION(obj);
-    PyObject *olddict = *dictptr;
-    FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value));
-#ifdef Py_GIL_DISABLED
-    _PyObject_XDecRefDelayed(olddict);
-#else
-    Py_XDECREF(olddict);
-#endif
+    // gh-133980: To prevent use-after-free from other threads that reference
+    // the __dict__
+    _PyObject_XSetRefDelayed(dictptr, Py_NewRef(value));
     Py_END_CRITICAL_SECTION();
     return 0;
 }
@@ -10024,6 +10020,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds)
     /* If staticbase is NULL now, it is a really weird type.
        In the spirit of backwards compatibility (?), just shut up. */
     if (staticbase && staticbase->tp_new != type->tp_new) {
+        if (staticbase->tp_new == NULL) {
+            PyErr_Format(PyExc_TypeError,
+                         "cannot create '%s' instances", subtype->tp_name);
+            return NULL;
+        }
         PyErr_Format(PyExc_TypeError,
                      "%s.__new__(%s) is not safe, use %s.__new__()",
                      type->tp_name,
diff --git a/PCbuild/build.bat b/PCbuild/build.bat
index 2f358991e48..60235704886 100644
--- a/PCbuild/build.bat
+++ b/PCbuild/build.bat
@@ -33,7 +33,7 @@ echo.  -k  Attempt to kill any running Pythons before building (usually done
 echo.      automatically by the pythoncore project)
 echo.  --pgo          Build with Profile-Guided Optimization.  This flag
 echo.                 overrides -c and -d
-echo.  --disable-gil  Enable experimental support for running without the GIL.
+echo.  --disable-gil  Enable support for running without the GIL.
 echo.  --test-marker  Enable the test marker within the build.
 echo.  --regen        Regenerate all opcodes, grammar and tokens.
 echo.  --experimental-jit          Enable the experimental just-in-time compiler.
diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py
index 8c1155c74a6..a78aa6a2304 100755
--- a/PCbuild/get_external.py
+++ b/PCbuild/get_external.py
@@ -5,8 +5,9 @@ import os
 import pathlib
 import sys
 import time
+import urllib.error
+import urllib.request
 import zipfile
-from urllib.request import urlretrieve
 
 
 def retrieve_with_retries(download_location, output_path, reporthook,
@@ -14,12 +15,12 @@ def retrieve_with_retries(download_location, output_path, reporthook,
     """Download a file with exponential backoff retry and save to disk."""
     for attempt in range(max_retries + 1):
         try:
-            resp = urlretrieve(
+            resp = urllib.request.urlretrieve(
                 download_location,
                 output_path,
                 reporthook=reporthook,
             )
-        except ConnectionError as ex:
+        except (urllib.error.URLError, ConnectionError) as ex:
             if attempt == max_retries:
                 msg = f"Download from {download_location} failed."
                 raise OSError(msg) from ex
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 32a8f2dbad3..b911c938563 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -419,8 +419,12 @@
     <ClCompile Include="..\Modules\_abc.c" />
     <ClCompile Include="..\Modules\_bisectmodule.c" />
     <ClCompile Include="..\Modules\blake2module.c">
-      <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">
+        _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions)
+      </PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">
+        _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions)
+      </PreprocessorDefinitions>
     </ClCompile>
     <ClCompile Include="..\Modules\_codecsmodule.c" />
     <ClCompile Include="..\Modules\_collectionsmodule.c" />
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 4d10bccf0a5..0a078dd5941 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -1421,7 +1421,8 @@ f_string_middle:
                     return MAKE_TOKEN(
                         _PyTokenizer_syntaxerror(
                             tok,
-                            "f-string: newlines are not allowed in format specifiers for single quoted f-strings"
+                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
+                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
                         )
                     );
                 }
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index e08c63924ca..51d7297ec24 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -957,6 +957,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals,
                   PyObject *locals)
 /*[clinic end generated code: output=0a0824aa70093116 input=7c7bce5299a89062]*/
 {
+    PyThreadState *tstate = _PyThreadState_GET();
     PyObject *result = NULL, *source_copy;
     const char *str;
 
@@ -970,35 +971,46 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals,
             : "globals must be a dict");
         return NULL;
     }
-    if (globals == Py_None) {
+
+    int fromframe = 0;
+    if (globals != Py_None) {
+        Py_INCREF(globals);
+    }
+    else if (_PyEval_GetFrame() != NULL) {
+        fromframe = 1;
         globals = PyEval_GetGlobals();
-        if (locals == Py_None) {
-            locals = _PyEval_GetFrameLocals();
-            if (locals == NULL)
-                return NULL;
-        }
-        else {
-            Py_INCREF(locals);
-        }
+        assert(globals != NULL);
+        Py_INCREF(globals);
     }
-    else if (locals == Py_None)
-        locals = Py_NewRef(globals);
     else {
-        Py_INCREF(locals);
+        globals = _PyEval_GetGlobalsFromRunningMain(tstate);
+        if (globals == NULL) {
+            if (!_PyErr_Occurred(tstate)) {
+                PyErr_SetString(PyExc_TypeError,
+                    "eval must be given globals and locals "
+                    "when called without a frame");
+            }
+            return NULL;
+        }
+        Py_INCREF(globals);
     }
 
-    if (globals == NULL || locals == NULL) {
-        PyErr_SetString(PyExc_TypeError,
-            "eval must be given globals and locals "
-            "when called without a frame");
-        goto error;
+    if (locals != Py_None) {
+        Py_INCREF(locals);
     }
-
-    int r = PyDict_Contains(globals, &_Py_ID(__builtins__));
-    if (r == 0) {
-        r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins());
+    else if (fromframe) {
+        locals = _PyEval_GetFrameLocals();
+        if (locals == NULL) {
+            assert(PyErr_Occurred());
+            Py_DECREF(globals);
+            return NULL;
+        }
+    }
+    else {
+        locals = Py_NewRef(globals);
     }
-    if (r < 0) {
+
+    if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) {
         goto error;
     }
 
@@ -1039,6 +1051,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals,
     }
 
   error:
+    Py_XDECREF(globals);
     Py_XDECREF(locals);
     return result;
 }
@@ -1069,29 +1082,44 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals,
                   PyObject *locals, PyObject *closure)
 /*[clinic end generated code: output=7579eb4e7646743d input=25e989b6d87a3a21]*/
 {
+    PyThreadState *tstate = _PyThreadState_GET();
     PyObject *v;
 
-    if (globals == Py_None) {
+    int fromframe = 0;
+    if (globals != Py_None) {
+        Py_INCREF(globals);
+    }
+    else if (_PyEval_GetFrame() != NULL) {
+        fromframe = 1;
         globals = PyEval_GetGlobals();
-        if (locals == Py_None) {
-            locals = _PyEval_GetFrameLocals();
-            if (locals == NULL)
-                return NULL;
-        }
-        else {
-            Py_INCREF(locals);
+        assert(globals != NULL);
+        Py_INCREF(globals);
+    }
+    else {
+        globals = _PyEval_GetGlobalsFromRunningMain(tstate);
+        if (globals == NULL) {
+            if (!_PyErr_Occurred(tstate)) {
+                PyErr_SetString(PyExc_SystemError,
+                                "globals and locals cannot be NULL");
+            }
+            goto error;
         }
-        if (!globals || !locals) {
-            PyErr_SetString(PyExc_SystemError,
-                            "globals and locals cannot be NULL");
+        Py_INCREF(globals);
+    }
+
+    if (locals != Py_None) {
+        Py_INCREF(locals);
+    }
+    else if (fromframe) {
+        locals = _PyEval_GetFrameLocals();
+        if (locals == NULL) {
+            assert(PyErr_Occurred());
+            Py_DECREF(globals);
             return NULL;
         }
     }
-    else if (locals == Py_None) {
-        locals = Py_NewRef(globals);
-    }
     else {
-        Py_INCREF(locals);
+        locals = Py_NewRef(globals);
     }
 
     if (!PyDict_Check(globals)) {
@@ -1105,11 +1133,8 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals,
             Py_TYPE(locals)->tp_name);
         goto error;
     }
-    int r = PyDict_Contains(globals, &_Py_ID(__builtins__));
-    if (r == 0) {
-        r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins());
-    }
-    if (r < 0) {
+
+    if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) {
         goto error;
     }
 
@@ -1186,11 +1211,13 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals,
     }
     if (v == NULL)
         goto error;
+    Py_DECREF(globals);
     Py_DECREF(locals);
     Py_DECREF(v);
     Py_RETURN_NONE;
 
   error:
+    Py_XDECREF(globals);
     Py_XDECREF(locals);
     return NULL;
 }
@@ -1240,10 +1267,21 @@ static PyObject *
 builtin_globals_impl(PyObject *module)
 /*[clinic end generated code: output=e5dd1527067b94d2 input=9327576f92bb48ba]*/
 {
-    PyObject *d;
-
-    d = PyEval_GetGlobals();
-    return Py_XNewRef(d);
+    PyObject *globals;
+    if (_PyEval_GetFrame() != NULL) {
+        globals = PyEval_GetGlobals();
+        assert(globals != NULL);
+        return Py_NewRef(globals);
+    }
+    PyThreadState *tstate = _PyThreadState_GET();
+    globals = _PyEval_GetGlobalsFromRunningMain(tstate);
+    if (globals == NULL) {
+        if (_PyErr_Occurred(tstate)) {
+            return NULL;
+        }
+        Py_RETURN_NONE;
+    }
+    return Py_NewRef(globals);
 }
 
 
@@ -1887,7 +1925,21 @@ static PyObject *
 builtin_locals_impl(PyObject *module)
 /*[clinic end generated code: output=b46c94015ce11448 input=7874018d478d5c4b]*/
 {
-    return _PyEval_GetFrameLocals();
+    PyObject *locals;
+    if (_PyEval_GetFrame() != NULL) {
+        locals = _PyEval_GetFrameLocals();
+        assert(locals != NULL || PyErr_Occurred());
+        return locals;
+    }
+    PyThreadState *tstate = _PyThreadState_GET();
+    locals = _PyEval_GetGlobalsFromRunningMain(tstate);
+    if (locals == NULL) {
+        if (_PyErr_Occurred(tstate)) {
+            return NULL;
+        }
+        Py_RETURN_NONE;
+    }
+    return Py_NewRef(locals);
 }
 
 
@@ -2623,7 +2675,22 @@ builtin_vars(PyObject *self, PyObject *args)
     if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v))
         return NULL;
     if (v == NULL) {
-        d = _PyEval_GetFrameLocals();
+        if (_PyEval_GetFrame() != NULL) {
+            d = _PyEval_GetFrameLocals();
+        }
+        else {
+            PyThreadState *tstate = _PyThreadState_GET();
+            d = _PyEval_GetGlobalsFromRunningMain(tstate);
+            if (d == NULL) {
+                if (!_PyErr_Occurred(tstate)) {
+                    d = _PyEval_GetFrameLocals();
+                    assert(_PyErr_Occurred(tstate));
+                }
+            }
+            else {
+                Py_INCREF(d);
+            }
+        }
     }
     else {
         if (PyObject_GetOptionalAttr(v, &_Py_ID(__dict__), &d) == 0) {
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 971e97a5784..1a5a9ff13a2 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -344,6 +344,27 @@ dummy_func(
             PyStackRef_XCLOSE(value);
         }
 
+        op(_POP_TOP_NOP, (value --)) {
+            assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) ||
+                _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value))));
+            DEAD(value);
+        }
+
+        op(_POP_TOP_INT, (value --)) {
+            assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
+            PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc);
+        }
+
+        op(_POP_TOP_FLOAT, (value --)) {
+            assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
+            PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc);
+        }
+
+        op(_POP_TOP_UNICODE, (value --)) {
+            assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
+            PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc);
+        }
+
         tier2 op(_POP_TWO, (nos, tos --)) {
             PyStackRef_CLOSE(tos);
             PyStackRef_CLOSE(nos);
@@ -569,12 +590,24 @@ dummy_func(
 
         op(_GUARD_NOS_INT, (left, unused -- left, unused)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-            EXIT_IF(!PyLong_CheckExact(left_o));
+            EXIT_IF(!_PyLong_CheckExactAndCompact(left_o));
         }
 
         op(_GUARD_TOS_INT, (value -- value)) {
             PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-            EXIT_IF(!PyLong_CheckExact(value_o));
+            EXIT_IF(!_PyLong_CheckExactAndCompact(value_o));
+        }
+
+        op(_GUARD_NOS_OVERFLOWED, (left, unused -- left, unused)) {
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            assert(Py_TYPE(left_o) == &PyLong_Type);
+            EXIT_IF(!_PyLong_IsCompact((PyLongObject *)left_o));
+        }
+
+        op(_GUARD_TOS_OVERFLOWED, (value -- value)) {
+            PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
+            assert(Py_TYPE(value_o) == &PyLong_Type);
+            EXIT_IF(!_PyLong_IsCompact((PyLongObject *)value_o));
         }
 
         pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
@@ -582,15 +615,14 @@ dummy_func(
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(PyLong_CheckExact(left_o));
             assert(PyLong_CheckExact(right_o));
-            DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+            assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
 
             STAT_INC(BINARY_OP, hit);
-            PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
+            res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
+            EXIT_IF(PyStackRef_IsNull(res));
             PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
             INPUTS_DEAD();
-            ERROR_IF(res_o == NULL);
-            res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
         pure op(_BINARY_OP_ADD_INT, (left, right -- res)) {
@@ -598,15 +630,14 @@ dummy_func(
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(PyLong_CheckExact(left_o));
             assert(PyLong_CheckExact(right_o));
-            DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+            assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
 
             STAT_INC(BINARY_OP, hit);
-            PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
+            res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
+            EXIT_IF(PyStackRef_IsNull(res));
             PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
             INPUTS_DEAD();
-            ERROR_IF(res_o == NULL);
-            res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
         pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
@@ -614,21 +645,22 @@ dummy_func(
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(PyLong_CheckExact(left_o));
             assert(PyLong_CheckExact(right_o));
-            DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+            assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
 
             STAT_INC(BINARY_OP, hit);
-            PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
+            res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
+            EXIT_IF(PyStackRef_IsNull(res));
             PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
             INPUTS_DEAD();
-            ERROR_IF(res_o == NULL);
-            res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
         macro(BINARY_OP_MULTIPLY_INT) =
             _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_MULTIPLY_INT;
+
         macro(BINARY_OP_ADD_INT) =
             _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_ADD_INT;
+
         macro(BINARY_OP_SUBTRACT_INT) =
             _GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT;
 
@@ -687,6 +719,52 @@ dummy_func(
             ERROR_IF(PyStackRef_IsNull(res));
         }
 
+
+        pure op(_BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) {
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
+
+            STAT_INC(BINARY_OP, hit);
+            double dres =
+                ((PyFloatObject *)left_o)->ob_fval *
+                ((PyFloatObject *)right_o)->ob_fval;
+            res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres));
+            INPUTS_DEAD();
+            ERROR_IF(PyStackRef_IsNull(res));
+        }
+
+        pure op(_BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) {
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
+
+            STAT_INC(BINARY_OP, hit);
+            double dres =
+                ((PyFloatObject *)left_o)->ob_fval +
+                ((PyFloatObject *)right_o)->ob_fval;
+            res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres));
+            INPUTS_DEAD();
+            ERROR_IF(PyStackRef_IsNull(res));
+        }
+
+        pure op(_BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS, (left, right -- res)) {
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
+
+            STAT_INC(BINARY_OP, hit);
+            double dres =
+                ((PyFloatObject *)left_o)->ob_fval -
+                ((PyFloatObject *)right_o)->ob_fval;
+            res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres));
+            INPUTS_DEAD();
+            ERROR_IF(PyStackRef_IsNull(res));
+        }
+
         macro(BINARY_OP_MULTIPLY_FLOAT) =
             _GUARD_TOS_FLOAT + _GUARD_NOS_FLOAT + unused/5 + _BINARY_OP_MULTIPLY_FLOAT;
         macro(BINARY_OP_ADD_FLOAT) =
@@ -772,7 +850,7 @@ dummy_func(
             DEOPT_IF(!res);
         }
 
-        pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
+       op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5);
@@ -2691,8 +2769,8 @@ dummy_func(
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
 
-            DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left_o));
-            DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right_o));
+            assert(_PyLong_IsCompact((PyLongObject *)left_o));
+            assert(_PyLong_IsCompact((PyLongObject *)right_o));
             STAT_INC(COMPARE_OP, hit);
             assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 &&
                    _PyLong_DigitCount((PyLongObject *)right_o) <= 1);
@@ -4946,8 +5024,7 @@ dummy_func(
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
-        pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
-            assert(oparg > 0);
+        pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
             top = PyStackRef_DUP(bottom);
         }
 
@@ -4980,12 +5057,11 @@ dummy_func(
 
         macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP;
 
-        pure inst(SWAP, (bottom, unused[oparg-2], top --
+        pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
                     bottom, unused[oparg-2], top)) {
             _PyStackRef temp = bottom;
             bottom = top;
             top = temp;
-            assert(oparg >= 2);
         }
 
         inst(INSTRUMENTED_LINE, ( -- )) {
diff --git a/Python/ceval.c b/Python/ceval.c
index 4cfe4bb88f4..d1de4875656 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -8,7 +8,7 @@
 #include "pycore_backoff.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 #include "pycore_cell.h"          // PyCell_GetRef()
-#include "pycore_ceval.h"
+#include "pycore_ceval.h"         // SPECIAL___ENTER__
 #include "pycore_code.h"
 #include "pycore_dict.h"
 #include "pycore_emscripten_signal.h"  // _Py_CHECK_EMSCRIPTEN_SIGNALS
@@ -2746,10 +2746,9 @@ _PyEval_GetFrameLocals(void)
     return locals;
 }
 
-PyObject *
-PyEval_GetGlobals(void)
+static PyObject *
+_PyEval_GetGlobals(PyThreadState *tstate)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
     _PyInterpreterFrame *current_frame = _PyThreadState_GetFrame(tstate);
     if (current_frame == NULL) {
         return NULL;
@@ -2757,6 +2756,120 @@ PyEval_GetGlobals(void)
     return current_frame->f_globals;
 }
 
+PyObject *
+PyEval_GetGlobals(void)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    return _PyEval_GetGlobals(tstate);
+}
+
+PyObject *
+_PyEval_GetGlobalsFromRunningMain(PyThreadState *tstate)
+{
+    if (!_PyInterpreterState_IsRunningMain(tstate->interp)) {
+        return NULL;
+    }
+    PyObject *mod = _Py_GetMainModule(tstate);
+    if (_Py_CheckMainModule(mod) < 0) {
+        Py_XDECREF(mod);
+        return NULL;
+    }
+    PyObject *globals = PyModule_GetDict(mod);  // borrowed
+    Py_DECREF(mod);
+    return globals;
+}
+
+static PyObject *
+get_globals_builtins(PyObject *globals)
+{
+    PyObject *builtins = NULL;
+    if (PyDict_Check(globals)) {
+        if (PyDict_GetItemRef(globals, &_Py_ID(__builtins__), &builtins) < 0) {
+            return NULL;
+        }
+    }
+    else {
+        if (PyMapping_GetOptionalItem(
+                        globals, &_Py_ID(__builtins__), &builtins) < 0)
+        {
+            return NULL;
+        }
+    }
+    return builtins;
+}
+
+static int
+set_globals_builtins(PyObject *globals, PyObject *builtins)
+{
+    if (PyDict_Check(globals)) {
+        if (PyDict_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) {
+            return -1;
+        }
+    }
+    else {
+        if (PyObject_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int
+_PyEval_EnsureBuiltins(PyThreadState *tstate, PyObject *globals,
+                       PyObject **p_builtins)
+{
+    PyObject *builtins = get_globals_builtins(globals);
+    if (builtins == NULL) {
+        if (_PyErr_Occurred(tstate)) {
+            return -1;
+        }
+        builtins = PyEval_GetBuiltins();  // borrowed
+        if (builtins == NULL) {
+            assert(_PyErr_Occurred(tstate));
+            return -1;
+        }
+        Py_INCREF(builtins);
+        if (set_globals_builtins(globals, builtins) < 0) {
+            Py_DECREF(builtins);
+            return -1;
+        }
+    }
+    if (p_builtins != NULL) {
+        *p_builtins = builtins;
+    }
+    else {
+        Py_DECREF(builtins);
+    }
+    return 0;
+}
+
+int
+_PyEval_EnsureBuiltinsWithModule(PyThreadState *tstate, PyObject *globals,
+                                 PyObject **p_builtins)
+{
+    PyObject *builtins = get_globals_builtins(globals);
+    if (builtins == NULL) {
+        if (_PyErr_Occurred(tstate)) {
+            return -1;
+        }
+        builtins = PyImport_ImportModuleLevel("builtins", NULL, NULL, NULL, 0);
+        if (builtins == NULL) {
+            return -1;
+        }
+        if (set_globals_builtins(globals, builtins) < 0) {
+            Py_DECREF(builtins);
+            return -1;
+        }
+    }
+    if (p_builtins != NULL) {
+        *p_builtins = builtins;
+    }
+    else {
+        Py_DECREF(builtins);
+    }
+    return 0;
+}
+
 PyObject*
 PyEval_GetFrameLocals(void)
 {
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index 6d2383ac7c1..aa68371ac8f 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -1220,7 +1220,7 @@ static inline int run_remote_debugger_source(PyObject *source)
 // that would be an easy target for a ROP gadget.
 static inline void run_remote_debugger_script(PyObject *path)
 {
-    if (0 != PySys_Audit("remote_debugger_script", "O", path)) {
+    if (0 != PySys_Audit("cpython.remote_debugger_script", "O", path)) {
         PyErr_FormatUnraisable(
             "Audit hook failed for remote debugger script %U", path);
         return;
@@ -1387,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate)
         _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT);
         _Py_brc_merge_refcounts(tstate);
     }
+    /* Process deferred memory frees held by QSBR */
+    if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) {
+        _PyMem_ProcessDelayed(tstate);
+    }
 #endif
 
     /* GC scheduled to run */
diff --git a/Python/codegen.c b/Python/codegen.c
index 0023d72cd5e..27fe8e1957b 100644
--- a/Python/codegen.c
+++ b/Python/codegen.c
@@ -28,6 +28,7 @@
 #include "pycore_pystate.h"       // _Py_GetConfig()
 #include "pycore_symtable.h"      // PySTEntryObject
 #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString
+#include "pycore_ceval.h"         // SPECIAL___ENTER__
 
 #define NEED_OPCODE_METADATA
 #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed
diff --git a/Python/crossinterp.c b/Python/crossinterp.c
index 39c7ea69890..16a23f0351c 100644
--- a/Python/crossinterp.c
+++ b/Python/crossinterp.c
@@ -7,9 +7,12 @@
 #include "pycore_ceval.h"         // _Py_simple_func
 #include "pycore_crossinterp.h"   // _PyXIData_t
 #include "pycore_function.h"      // _PyFunction_VerifyStateless()
+#include "pycore_global_strings.h"  // _Py_ID()
+#include "pycore_import.h"        // _PyImport_SetModule()
 #include "pycore_initconfig.h"    // _PyStatus_OK()
 #include "pycore_namespace.h"     // _PyNamespace_New()
 #include "pycore_pythonrun.h"     // _Py_SourceAsString()
+#include "pycore_runtime.h"       // _PyRuntime
 #include "pycore_setobject.h"     // _PySet_NextEntry()
 #include "pycore_typeobject.h"    // _PyStaticType_InitBuiltin()
 
@@ -22,6 +25,7 @@ _Py_GetMainfile(char *buffer, size_t maxlen)
     PyThreadState *tstate = _PyThreadState_GET();
     PyObject *module = _Py_GetMainModule(tstate);
     if (_Py_CheckMainModule(module) < 0) {
+        Py_XDECREF(module);
         return -1;
     }
     Py_ssize_t size = _PyModule_GetFilenameUTF8(module, buffer, maxlen);
@@ -31,27 +35,6 @@ _Py_GetMainfile(char *buffer, size_t maxlen)
 
 
 static PyObject *
-import_get_module(PyThreadState *tstate, const char *modname)
-{
-    PyObject *module = NULL;
-    if (strcmp(modname, "__main__") == 0) {
-        module = _Py_GetMainModule(tstate);
-        if (_Py_CheckMainModule(module) < 0) {
-            assert(_PyErr_Occurred(tstate));
-            return NULL;
-        }
-    }
-    else {
-        module = PyImport_ImportModule(modname);
-        if (module == NULL) {
-            return NULL;
-        }
-    }
-    return module;
-}
-
-
-static PyObject *
 runpy_run_path(const char *filename, const char *modname)
 {
     PyObject *run_path = PyImport_ImportModuleAttrString("runpy", "run_path");
@@ -81,97 +64,181 @@ set_exc_with_cause(PyObject *exctype, const char *msg)
 }
 
 
-static PyObject *
-pyerr_get_message(PyObject *exc)
+/****************************/
+/* module duplication utils */
+/****************************/
+
+struct sync_module_result {
+    PyObject *module;
+    PyObject *loaded;
+    PyObject *failed;
+};
+
+struct sync_module {
+    const char *filename;
+    char _filename[MAXPATHLEN+1];
+    struct sync_module_result cached;
+};
+
+static void
+sync_module_clear(struct sync_module *data)
 {
-    assert(!PyErr_Occurred());
-    PyObject *args = PyException_GetArgs(exc);
-    if (args == NULL || args == Py_None || PyObject_Size(args) < 1) {
-        return NULL;
-    }
-    if (PyUnicode_Check(args)) {
-        return args;
-    }
-    PyObject *msg = PySequence_GetItem(args, 0);
-    Py_DECREF(args);
-    if (msg == NULL) {
-        PyErr_Clear();
-        return NULL;
-    }
-    if (!PyUnicode_Check(msg)) {
-        Py_DECREF(msg);
-        return NULL;
-    }
-    return msg;
+    data->filename = NULL;
+    Py_CLEAR(data->cached.module);
+    Py_CLEAR(data->cached.loaded);
+    Py_CLEAR(data->cached.failed);
 }
 
-#define MAX_MODNAME (255)
-#define MAX_ATTRNAME (255)
+static void
+sync_module_capture_exc(PyThreadState *tstate, struct sync_module *data)
+{
+    assert(_PyErr_Occurred(tstate));
+    PyObject *context = data->cached.failed;
+    PyObject *exc = _PyErr_GetRaisedException(tstate);
+    _PyErr_SetRaisedException(tstate, Py_NewRef(exc));
+    if (context != NULL) {
+        PyException_SetContext(exc, context);
+    }
+    data->cached.failed = exc;
+}
 
-struct attributeerror_info {
-    char modname[MAX_MODNAME+1];
-    char attrname[MAX_ATTRNAME+1];
-};
 
 static int
-_parse_attributeerror(PyObject *exc, struct attributeerror_info *info)
+ensure_isolated_main(PyThreadState *tstate, struct sync_module *main)
 {
-    assert(exc != NULL);
-    assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError));
-    int res = -1;
+    // Load the module from the original file (or from a cache).
 
-    PyObject *msgobj = pyerr_get_message(exc);
-    if (msgobj == NULL) {
+    // First try the local cache.
+    if (main->cached.failed != NULL) {
+        // We'll deal with this in apply_isolated_main().
+        assert(main->cached.module == NULL);
+        assert(main->cached.loaded == NULL);
+        return 0;
+    }
+    else if (main->cached.loaded != NULL) {
+        assert(main->cached.module != NULL);
+        return 0;
+    }
+    assert(main->cached.module == NULL);
+
+    if (main->filename == NULL) {
+        _PyErr_SetString(tstate, PyExc_NotImplementedError, "");
         return -1;
     }
-    const char *err = PyUnicode_AsUTF8(msgobj);
 
-    if (strncmp(err, "module '", 8) != 0) {
-        goto finally;
+    // It wasn't in the local cache so we'll need to populate it.
+    PyObject *mod = _Py_GetMainModule(tstate);
+    if (_Py_CheckMainModule(mod) < 0) {
+        // This is probably unrecoverable, so don't bother caching the error.
+        assert(_PyErr_Occurred(tstate));
+        Py_XDECREF(mod);
+        return -1;
     }
-    err += 8;
+    PyObject *loaded = NULL;
 
-    const char *matched = strchr(err, '\'');
-    if (matched == NULL) {
-        goto finally;
+    // Try the per-interpreter cache for the loaded module.
+    // XXX Store it in sys.modules?
+    PyObject *interpns = PyInterpreterState_GetDict(tstate->interp);
+    assert(interpns != NULL);
+    PyObject *key = PyUnicode_FromString("CACHED_MODULE_NS___main__");
+    if (key == NULL) {
+        // It's probably unrecoverable, so don't bother caching the error.
+        Py_DECREF(mod);
+        return -1;
     }
-    Py_ssize_t len = matched - err;
-    if (len > MAX_MODNAME) {
-        goto finally;
+    else if (PyDict_GetItemRef(interpns, key, &loaded) < 0) {
+        // It's probably unrecoverable, so don't bother caching the error.
+        Py_DECREF(mod);
+        Py_DECREF(key);
+        return -1;
     }
-    (void)strncpy(info->modname, err, len);
-    info->modname[len] = '\0';
-    err = matched;
+    else if (loaded == NULL) {
+        // It wasn't already loaded from file.
+        loaded = PyModule_NewObject(&_Py_ID(__main__));
+        if (loaded == NULL) {
+            goto error;
+        }
+        PyObject *ns = _PyModule_GetDict(loaded);
 
-    if (strncmp(err, "' has no attribute '", 20) != 0) {
-        goto finally;
-    }
-    err += 20;
+        // We don't want to trigger "if __name__ == '__main__':",
+        // so we use a bogus module name.
+        PyObject *loaded_ns =
+                    runpy_run_path(main->filename, "<fake __main__>");
+        if (loaded_ns == NULL) {
+            goto error;
+        }
+        int res = PyDict_Update(ns, loaded_ns);
+        Py_DECREF(loaded_ns);
+        if (res < 0) {
+            goto error;
+        }
 
-    matched = strchr(err, '\'');
-    if (matched == NULL) {
-        goto finally;
-    }
-    len = matched - err;
-    if (len > MAX_ATTRNAME) {
-        goto finally;
+        // Set the per-interpreter cache entry.
+        if (PyDict_SetItem(interpns, key, loaded) < 0) {
+            goto error;
+        }
     }
-    (void)strncpy(info->attrname, err, len);
-    info->attrname[len] = '\0';
-    err = matched + 1;
 
-    if (strlen(err) > 0) {
-        goto finally;
+    Py_DECREF(key);
+    main->cached = (struct sync_module_result){
+       .module = mod,
+       .loaded = loaded,
+    };
+    return 0;
+
+error:
+    sync_module_capture_exc(tstate, main);
+    Py_XDECREF(loaded);
+    Py_DECREF(mod);
+    Py_XDECREF(key);
+    return -1;
+}
+
+#ifndef NDEBUG
+static int
+main_mod_matches(PyObject *expected)
+{
+    PyObject *mod = PyImport_GetModule(&_Py_ID(__main__));
+    Py_XDECREF(mod);
+    return mod == expected;
+}
+#endif
+
+static int
+apply_isolated_main(PyThreadState *tstate, struct sync_module *main)
+{
+    assert((main->cached.loaded == NULL) == (main->cached.loaded == NULL));
+    if (main->cached.failed != NULL) {
+        // It must have failed previously.
+        assert(main->cached.loaded == NULL);
+        _PyErr_SetRaisedException(tstate, main->cached.failed);
+        return -1;
     }
-    res = 0;
+    assert(main->cached.loaded != NULL);
 
-finally:
-    Py_DECREF(msgobj);
-    return res;
+    assert(main_mod_matches(main->cached.module));
+    if (_PyImport_SetModule(&_Py_ID(__main__), main->cached.loaded) < 0) {
+        sync_module_capture_exc(tstate, main);
+        return -1;
+    }
+    return 0;
 }
 
-#undef MAX_MODNAME
-#undef MAX_ATTRNAME
+static void
+restore_main(PyThreadState *tstate, struct sync_module *main)
+{
+    assert(main->cached.failed == NULL);
+    assert(main->cached.module != NULL);
+    assert(main->cached.loaded != NULL);
+    PyObject *exc = _PyErr_GetRaisedException(tstate);
+    assert(main_mod_matches(main->cached.loaded));
+    int res = _PyImport_SetModule(&_Py_ID(__main__), main->cached.module);
+    assert(res == 0);
+    if (res < 0) {
+        PyErr_FormatUnraisable("Exception ignored while restoring __main__");
+    }
+    _PyErr_SetRaisedException(tstate, exc);
+}
 
 
 /**************/
@@ -518,28 +585,6 @@ _PyPickle_Dumps(struct _pickle_context *ctx, PyObject *obj)
 }
 
 
-struct sync_module_result {
-    PyObject *module;
-    PyObject *loaded;
-    PyObject *failed;
-};
-
-struct sync_module {
-    const char *filename;
-    char _filename[MAXPATHLEN+1];
-    struct sync_module_result cached;
-};
-
-static void
-sync_module_clear(struct sync_module *data)
-{
-    data->filename = NULL;
-    Py_CLEAR(data->cached.module);
-    Py_CLEAR(data->cached.loaded);
-    Py_CLEAR(data->cached.failed);
-}
-
-
 struct _unpickle_context {
     PyThreadState *tstate;
     // We only special-case the __main__ module,
@@ -553,142 +598,88 @@ _unpickle_context_clear(struct _unpickle_context *ctx)
     sync_module_clear(&ctx->main);
 }
 
-static struct sync_module_result
-_unpickle_context_get_module(struct _unpickle_context *ctx,
-                             const char *modname)
+static int
+check_missing___main___attr(PyObject *exc)
 {
-    if (strcmp(modname, "__main__") == 0) {
-        return ctx->main.cached;
+    assert(!PyErr_Occurred());
+    if (!PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)) {
+        return 0;
     }
-    else {
-        return (struct sync_module_result){
-            .failed = PyExc_NotImplementedError,
-        };
+
+    // Get the error message.
+    PyObject *args = PyException_GetArgs(exc);
+    if (args == NULL || args == Py_None || PyObject_Size(args) < 1) {
+        assert(!PyErr_Occurred());
+        return 0;
     }
+    PyObject *msgobj = args;
+    if (!PyUnicode_Check(msgobj)) {
+        msgobj = PySequence_GetItem(args, 0);
+        Py_DECREF(args);
+        if (msgobj == NULL) {
+            PyErr_Clear();
+            return 0;
+        }
+    }
+    const char *err = PyUnicode_AsUTF8(msgobj);
+
+    // Check if it's a missing __main__ attr.
+    int cmp = strncmp(err, "module '__main__' has no attribute '", 36);
+    Py_DECREF(msgobj);
+    return cmp == 0;
 }
 
-static struct sync_module_result
-_unpickle_context_set_module(struct _unpickle_context *ctx,
-                             const char *modname)
+static PyObject *
+_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled)
 {
-    struct sync_module_result res = {0};
-    struct sync_module_result *cached = NULL;
-    const char *filename = NULL;
-    const char *run_modname = modname;
-    if (strcmp(modname, "__main__") == 0) {
-        cached = &ctx->main.cached;
-        filename = ctx->main.filename;
-        // We don't want to trigger "if __name__ == '__main__':".
-        run_modname = "<fake __main__>";
-    }
-    else {
-        res.failed = PyExc_NotImplementedError;
-        goto finally;
-    }
+    PyThreadState *tstate = ctx->tstate;
 
-    res.module = import_get_module(ctx->tstate, modname);
-    if (res.module == NULL) {
-        res.failed = _PyErr_GetRaisedException(ctx->tstate);
-        assert(res.failed != NULL);
-        goto finally;
+    PyObject *exc = NULL;
+    PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads");
+    if (loads == NULL) {
+        return NULL;
     }
 
-    if (filename == NULL) {
-        Py_CLEAR(res.module);
-        res.failed = PyExc_NotImplementedError;
+    // Make an initial attempt to unpickle.
+    PyObject *obj = PyObject_CallOneArg(loads, pickled);
+    if (obj != NULL) {
         goto finally;
     }
-    res.loaded = runpy_run_path(filename, run_modname);
-    if (res.loaded == NULL) {
-        Py_CLEAR(res.module);
-        res.failed = _PyErr_GetRaisedException(ctx->tstate);
-        assert(res.failed != NULL);
+    assert(_PyErr_Occurred(tstate));
+    if (ctx == NULL) {
         goto finally;
     }
-
-finally:
-    if (cached != NULL) {
-        assert(cached->module == NULL);
-        assert(cached->loaded == NULL);
-        assert(cached->failed == NULL);
-        *cached = res;
-    }
-    return res;
-}
-
-
-static int
-_handle_unpickle_missing_attr(struct _unpickle_context *ctx, PyObject *exc)
-{
-    // The caller must check if an exception is set or not when -1 is returned.
-    assert(!_PyErr_Occurred(ctx->tstate));
-    assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError));
-    struct attributeerror_info info;
-    if (_parse_attributeerror(exc, &info) < 0) {
-        return -1;
+    exc = _PyErr_GetRaisedException(tstate);
+    if (!check_missing___main___attr(exc)) {
+        goto finally;
     }
 
-    // Get the module.
-    struct sync_module_result mod = _unpickle_context_get_module(ctx, info.modname);
-    if (mod.failed != NULL) {
-        // It must have failed previously.
-        return -1;
-    }
-    if (mod.module == NULL) {
-        mod = _unpickle_context_set_module(ctx, info.modname);
-        if (mod.failed != NULL) {
-            return -1;
-        }
-        assert(mod.module != NULL);
+    // Temporarily swap in a fake __main__ loaded from the original
+    // file and cached.  Note that functions will use the cached ns
+    // for __globals__, // not the actual module.
+    if (ensure_isolated_main(tstate, &ctx->main) < 0) {
+        goto finally;
     }
-
-    // Bail out if it is unexpectedly set already.
-    if (PyObject_HasAttrString(mod.module, info.attrname)) {
-        return -1;
+    if (apply_isolated_main(tstate, &ctx->main) < 0) {
+        goto finally;
     }
 
-    // Try setting the attribute.
-    PyObject *value = NULL;
-    if (PyDict_GetItemStringRef(mod.loaded, info.attrname, &value) <= 0) {
-        return -1;
-    }
-    assert(value != NULL);
-    int res = PyObject_SetAttrString(mod.module, info.attrname, value);
-    Py_DECREF(value);
-    if (res < 0) {
-        return -1;
+    // Try to unpickle once more.
+    obj = PyObject_CallOneArg(loads, pickled);
+    restore_main(tstate, &ctx->main);
+    if (obj == NULL) {
+        goto finally;
     }
+    Py_CLEAR(exc);
 
-    return 0;
-}
-
-static PyObject *
-_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled)
-{
-    PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads");
-    if (loads == NULL) {
-        return NULL;
-    }
-    PyObject *obj = PyObject_CallOneArg(loads, pickled);
-    if (ctx != NULL) {
-        while (obj == NULL) {
-            assert(_PyErr_Occurred(ctx->tstate));
-            if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
-                // We leave other failures unhandled.
-                break;
-            }
-            // Try setting the attr if not set.
-            PyObject *exc = _PyErr_GetRaisedException(ctx->tstate);
-            if (_handle_unpickle_missing_attr(ctx, exc) < 0) {
-                // Any resulting exceptions are ignored
-                // in favor of the original.
-                _PyErr_SetRaisedException(ctx->tstate, exc);
-                break;
-            }
-            Py_CLEAR(exc);
-            // Retry with the attribute set.
-            obj = PyObject_CallOneArg(loads, pickled);
+finally:
+    if (exc != NULL) {
+        if (_PyErr_Occurred(tstate)) {
+            sync_module_capture_exc(tstate, &ctx->main);
         }
+        // We restore the original exception.
+        // It might make sense to chain it (__context__).
+        _PyErr_SetRaisedException(tstate, exc);
     }
     Py_DECREF(loads);
     return obj;
@@ -2619,11 +2610,14 @@ _PyXI_Enter(_PyXI_session *session,
             PyInterpreterState *interp, PyObject *nsupdates,
             _PyXI_session_result *result)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+#ifndef NDEBUG
+    PyThreadState *tstate = _PyThreadState_GET();  // Only used for asserts
+#endif
 
     // Convert the attrs for cross-interpreter use.
     _PyXI_namespace *sharedns = NULL;
     if (nsupdates != NULL) {
+        assert(PyDict_Check(nsupdates));
         Py_ssize_t len = PyDict_Size(nsupdates);
         if (len < 0) {
             if (result != NULL) {
@@ -2661,7 +2655,9 @@ _PyXI_Enter(_PyXI_session *session,
     _enter_session(session, interp);
     _PyXI_failure override = XI_FAILURE_INIT;
     override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION;
+#ifndef NDEBUG
     tstate = _PyThreadState_GET();
+#endif
 
     // Ensure this thread owns __main__.
     if (_PyInterpreterState_SetRunningMain(interp) < 0) {
@@ -2697,7 +2693,9 @@ error:
 
     // Exit the session.
     _exit_session(session);
+#ifndef NDEBUG
     tstate = _PyThreadState_GET();
+#endif
 
     if (sharedns != NULL) {
         _destroy_sharedns(sharedns);
@@ -2883,6 +2881,7 @@ _ensure_main_ns(_PyXI_session *session, _PyXI_failure *failure)
     // Cache __main__.__dict__.
     PyObject *main_mod = _Py_GetMainModule(tstate);
     if (_Py_CheckMainModule(main_mod) < 0) {
+        Py_XDECREF(main_mod);
         if (failure != NULL) {
             *failure = (_PyXI_failure){
                 .code = _PyXI_ERR_MAIN_NS_FAILURE,
diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h
index 6d0b93eb82a..c3c76ae8d9a 100644
--- a/Python/crossinterp_data_lookup.h
+++ b/Python/crossinterp_data_lookup.h
@@ -722,16 +722,26 @@ _PyFunction_FromXIData(_PyXIData_t *xidata)
         return NULL;
     }
     // Create a new function.
+    // For stateless functions (no globals) we use __main__ as __globals__,
+    // just like we do for builtins like exec().
     assert(PyCode_Check(code));
-    PyObject *globals = PyDict_New();
+    PyThreadState *tstate = _PyThreadState_GET();
+    PyObject *globals = _PyEval_GetGlobalsFromRunningMain(tstate);  // borrowed
     if (globals == NULL) {
-        Py_DECREF(code);
-        return NULL;
+        if (_PyErr_Occurred(tstate)) {
+            Py_DECREF(code);
+            return NULL;
+        }
+        globals = PyDict_New();
+        if (globals == NULL) {
+            Py_DECREF(code);
+            return NULL;
+        }
     }
-    PyThreadState *tstate = _PyThreadState_GET();
-    if (PyDict_SetItem(globals, &_Py_ID(__builtins__),
-                       tstate->interp->builtins) < 0)
-    {
+    else {
+        Py_INCREF(globals);
+    }
+    if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) {
         Py_DECREF(code);
         Py_DECREF(globals);
         return NULL;
diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c
index 975c28eec10..75b98a04723 100644
--- a/Python/emscripten_trampoline.c
+++ b/Python/emscripten_trampoline.c
@@ -80,7 +80,7 @@ function getPyEMCountArgsPtr() {
         // To differentiate, we check if the platform is 'MacIntel' (common for Macs and newer iPads)
         // AND if the device has multi-touch capabilities (navigator.maxTouchPoints > 1)
         (navigator.platform === 'MacIntel' && typeof navigator.maxTouchPoints !== 'undefined' && navigator.maxTouchPoints > 1)
-    )
+    );
     if (isIOS) {
         return 0;
     }
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index dbfb2391bf0..46fc164a5b3 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -539,6 +539,46 @@
             break;
         }
 
+        case _POP_TOP_NOP: {
+            _PyStackRef value;
+            value = stack_pointer[-1];
+            assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) ||
+                   _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value))));
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_INT: {
+            _PyStackRef value;
+            value = stack_pointer[-1];
+            assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
+            PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc);
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_FLOAT: {
+            _PyStackRef value;
+            value = stack_pointer[-1];
+            assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
+            PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc);
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_UNICODE: {
+            _PyStackRef value;
+            value = stack_pointer[-1];
+            assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
+            PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc);
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
         case _POP_TWO: {
             _PyStackRef tos;
             _PyStackRef nos;
@@ -852,7 +892,7 @@
             _PyStackRef left;
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-            if (!PyLong_CheckExact(left_o)) {
+            if (!_PyLong_CheckExactAndCompact(left_o)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
@@ -863,7 +903,31 @@
             _PyStackRef value;
             value = stack_pointer[-1];
             PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-            if (!PyLong_CheckExact(value_o)) {
+            if (!_PyLong_CheckExactAndCompact(value_o)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            break;
+        }
+
+        case _GUARD_NOS_OVERFLOWED: {
+            _PyStackRef left;
+            left = stack_pointer[-2];
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            assert(Py_TYPE(left_o) == &PyLong_Type);
+            if (!_PyLong_IsCompact((PyLongObject *)left_o)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            break;
+        }
+
+        case _GUARD_TOS_OVERFLOWED: {
+            _PyStackRef value;
+            value = stack_pointer[-1];
+            PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
+            assert(Py_TYPE(value_o) == &PyLong_Type);
+            if (!_PyLong_IsCompact((PyLongObject *)value_o)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
@@ -880,20 +944,15 @@
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(PyLong_CheckExact(left_o));
             assert(PyLong_CheckExact(right_o));
-            if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) {
+            assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+            STAT_INC(BINARY_OP, hit);
+            res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
+            if (PyStackRef_IsNull(res)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            STAT_INC(BINARY_OP, hit);
-            PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
             PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
-            if (res_o == NULL) {
-                stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
-            res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -910,20 +969,15 @@
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(PyLong_CheckExact(left_o));
             assert(PyLong_CheckExact(right_o));
-            if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) {
+            assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+            STAT_INC(BINARY_OP, hit);
+            res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
+            if (PyStackRef_IsNull(res)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            STAT_INC(BINARY_OP, hit);
-            PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
             PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
-            if (res_o == NULL) {
-                stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
-            res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -940,20 +994,15 @@
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
             assert(PyLong_CheckExact(left_o));
             assert(PyLong_CheckExact(right_o));
-            if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) {
+            assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+            STAT_INC(BINARY_OP, hit);
+            res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
+            if (PyStackRef_IsNull(res)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            STAT_INC(BINARY_OP, hit);
-            PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
             PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
-            if (res_o == NULL) {
-                stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
-            res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -1063,6 +1112,87 @@
             break;
         }
 
+        case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: {
+            _PyStackRef right;
+            _PyStackRef left;
+            _PyStackRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
+            STAT_INC(BINARY_OP, hit);
+            double dres =
+            ((PyFloatObject *)left_o)->ob_fval *
+            ((PyFloatObject *)right_o)->ob_fval;
+            res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres));
+            if (PyStackRef_IsNull(res)) {
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                JUMP_TO_ERROR();
+            }
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: {
+            _PyStackRef right;
+            _PyStackRef left;
+            _PyStackRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
+            STAT_INC(BINARY_OP, hit);
+            double dres =
+            ((PyFloatObject *)left_o)->ob_fval +
+            ((PyFloatObject *)right_o)->ob_fval;
+            res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres));
+            if (PyStackRef_IsNull(res)) {
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                JUMP_TO_ERROR();
+            }
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: {
+            _PyStackRef right;
+            _PyStackRef left;
+            _PyStackRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+            PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
+            STAT_INC(BINARY_OP, hit);
+            double dres =
+            ((PyFloatObject *)left_o)->ob_fval -
+            ((PyFloatObject *)right_o)->ob_fval;
+            res = PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(dres));
+            if (PyStackRef_IsNull(res)) {
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                JUMP_TO_ERROR();
+            }
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
         case _BINARY_OP_ADD_UNICODE: {
             _PyStackRef right;
             _PyStackRef left;
@@ -3726,14 +3856,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
-            if (!_PyLong_IsCompact((PyLongObject *)left_o)) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
-            }
-            if (!_PyLong_IsCompact((PyLongObject *)right_o)) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
-            }
+            assert(_PyLong_IsCompact((PyLongObject *)left_o));
+            assert(_PyLong_IsCompact((PyLongObject *)right_o));
             STAT_INC(COMPARE_OP, hit);
             assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 &&
                    _PyLong_DigitCount((PyLongObject *)right_o) <= 1);
@@ -6763,12 +6887,44 @@
             break;
         }
 
+        case _COPY_1: {
+            _PyStackRef bottom;
+            _PyStackRef top;
+            bottom = stack_pointer[-1];
+            top = PyStackRef_DUP(bottom);
+            stack_pointer[0] = top;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _COPY_2: {
+            _PyStackRef bottom;
+            _PyStackRef top;
+            bottom = stack_pointer[-2];
+            top = PyStackRef_DUP(bottom);
+            stack_pointer[0] = top;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _COPY_3: {
+            _PyStackRef bottom;
+            _PyStackRef top;
+            bottom = stack_pointer[-3];
+            top = PyStackRef_DUP(bottom);
+            stack_pointer[0] = top;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
         case _COPY: {
             _PyStackRef bottom;
             _PyStackRef top;
             oparg = CURRENT_OPARG();
             bottom = stack_pointer[-1 - (oparg-1)];
-            assert(oparg > 0);
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
@@ -6808,6 +6964,32 @@
             break;
         }
 
+        case _SWAP_2: {
+            _PyStackRef top;
+            _PyStackRef bottom;
+            top = stack_pointer[-1];
+            bottom = stack_pointer[-2];
+            _PyStackRef temp = bottom;
+            bottom = top;
+            top = temp;
+            stack_pointer[-2] = bottom;
+            stack_pointer[-1] = top;
+            break;
+        }
+
+        case _SWAP_3: {
+            _PyStackRef top;
+            _PyStackRef bottom;
+            top = stack_pointer[-1];
+            bottom = stack_pointer[-3];
+            _PyStackRef temp = bottom;
+            bottom = top;
+            top = temp;
+            stack_pointer[-3] = bottom;
+            stack_pointer[-1] = top;
+            break;
+        }
+
         case _SWAP: {
             _PyStackRef top;
             _PyStackRef bottom;
@@ -6817,7 +6999,6 @@
             _PyStackRef temp = bottom;
             bottom = top;
             top = temp;
-            assert(oparg >= 2);
             stack_pointer[-2 - (oparg-2)] = bottom;
             stack_pointer[-1] = top;
             break;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 2cf027c539b..8f7932f0033 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -158,7 +158,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -168,7 +168,7 @@
             {
                 left = stack_pointer[-2];
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-                if (!PyLong_CheckExact(left_o)) {
+                if (!_PyLong_CheckExactAndCompact(left_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -182,19 +182,16 @@
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
                 assert(PyLong_CheckExact(left_o));
                 assert(PyLong_CheckExact(right_o));
-                if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) {
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
                 }
-                STAT_INC(BINARY_OP, hit);
-                PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
                 PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
                 PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
-                if (res_o == NULL) {
-                    JUMP_TO_LABEL(pop_2_error);
-                }
-                res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -486,7 +483,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -496,7 +493,7 @@
             {
                 left = stack_pointer[-2];
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-                if (!PyLong_CheckExact(left_o)) {
+                if (!_PyLong_CheckExactAndCompact(left_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -510,19 +507,16 @@
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
                 assert(PyLong_CheckExact(left_o));
                 assert(PyLong_CheckExact(right_o));
-                if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) {
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
                 }
-                STAT_INC(BINARY_OP, hit);
-                PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
                 PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
                 PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
-                if (res_o == NULL) {
-                    JUMP_TO_LABEL(pop_2_error);
-                }
-                res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -700,7 +694,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -862,7 +856,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -940,7 +934,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -1070,7 +1064,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -1080,7 +1074,7 @@
             {
                 left = stack_pointer[-2];
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-                if (!PyLong_CheckExact(left_o)) {
+                if (!_PyLong_CheckExactAndCompact(left_o)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
@@ -1094,19 +1088,16 @@
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
                 assert(PyLong_CheckExact(left_o));
                 assert(PyLong_CheckExact(right_o));
-                if (!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)) {
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res)) {
                     UPDATE_MISS_STATS(BINARY_OP);
                     assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
                     JUMP_TO_PREDICTED(BINARY_OP);
                 }
-                STAT_INC(BINARY_OP, hit);
-                PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
                 PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
                 PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
-                if (res_o == NULL) {
-                    JUMP_TO_LABEL(pop_2_error);
-                }
-                res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -4902,7 +4893,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(COMPARE_OP);
                     assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP));
                     JUMP_TO_PREDICTED(COMPARE_OP);
@@ -4912,7 +4903,7 @@
             {
                 left = stack_pointer[-2];
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
-                if (!PyLong_CheckExact(left_o)) {
+                if (!_PyLong_CheckExactAndCompact(left_o)) {
                     UPDATE_MISS_STATS(COMPARE_OP);
                     assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP));
                     JUMP_TO_PREDICTED(COMPARE_OP);
@@ -4924,16 +4915,8 @@
                 right = value;
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
-                if (!_PyLong_IsCompact((PyLongObject *)left_o)) {
-                    UPDATE_MISS_STATS(COMPARE_OP);
-                    assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP));
-                    JUMP_TO_PREDICTED(COMPARE_OP);
-                }
-                if (!_PyLong_IsCompact((PyLongObject *)right_o)) {
-                    UPDATE_MISS_STATS(COMPARE_OP);
-                    assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP));
-                    JUMP_TO_PREDICTED(COMPARE_OP);
-                }
+                assert(_PyLong_IsCompact((PyLongObject *)left_o));
+                assert(_PyLong_IsCompact((PyLongObject *)right_o));
                 STAT_INC(COMPARE_OP, hit);
                 assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 &&
                    _PyLong_DigitCount((PyLongObject *)right_o) <= 1);
@@ -5228,7 +5211,6 @@
             _PyStackRef bottom;
             _PyStackRef top;
             bottom = stack_pointer[-1 - (oparg-1)];
-            assert(oparg > 0);
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
@@ -11491,7 +11473,7 @@
             {
                 value = stack_pointer[-1];
                 PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
-                if (!PyLong_CheckExact(value_o)) {
+                if (!_PyLong_CheckExactAndCompact(value_o)) {
                     UPDATE_MISS_STATS(STORE_SUBSCR);
                     assert(_PyOpcode_Deopt[opcode] == (STORE_SUBSCR));
                     JUMP_TO_PREDICTED(STORE_SUBSCR);
@@ -11568,7 +11550,6 @@
             _PyStackRef temp = bottom;
             bottom = top;
             top = temp;
-            assert(oparg >= 2);
             stack_pointer[-2 - (oparg-2)] = bottom;
             stack_pointer[-1] = top;
             DISPATCH();
diff --git a/Python/getversion.c b/Python/getversion.c
index 226b2f999a6..8d8bc6ea700 100644
--- a/Python/getversion.c
+++ b/Python/getversion.c
@@ -15,7 +15,7 @@ void _Py_InitVersion(void)
     }
     initialized = 1;
 #ifdef Py_GIL_DISABLED
-    const char *buildinfo_format = "%.80s experimental free-threading build (%.80s) %.80s";
+    const char *buildinfo_format = "%.80s free-threading build (%.80s) %.80s";
 #else
     const char *buildinfo_format = "%.80s (%.80s) %.80s";
 #endif
diff --git a/Python/import.c b/Python/import.c
index 184dede335d..73b94d0dd2a 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -3960,25 +3960,28 @@ PyImport_Import(PyObject *module_name)
     }
 
     /* Get the builtins from current globals */
-    globals = PyEval_GetGlobals();
+    globals = PyEval_GetGlobals();  // borrowed
     if (globals != NULL) {
         Py_INCREF(globals);
+        // XXX Use _PyEval_EnsureBuiltins()?
         builtins = PyObject_GetItem(globals, &_Py_ID(__builtins__));
         if (builtins == NULL) {
             // XXX Fall back to interp->builtins or sys.modules['builtins']?
             goto err;
         }
     }
+    else if (_PyErr_Occurred(tstate)) {
+        goto err;
+    }
     else {
         /* No globals -- use standard builtins, and fake globals */
-        builtins = PyImport_ImportModuleLevel("builtins",
-                                              NULL, NULL, NULL, 0);
-        if (builtins == NULL) {
+        globals = PyDict_New();
+        if (globals == NULL) {
             goto err;
         }
-        globals = Py_BuildValue("{OO}", &_Py_ID(__builtins__), builtins);
-        if (globals == NULL)
+        if (_PyEval_EnsureBuiltinsWithModule(tstate, globals, &builtins) < 0) {
             goto err;
+        }
     }
 
     /* Get the __import__ function from the builtins */
diff --git a/Python/lock.c b/Python/lock.c
index b125ad0c9e3..ea6ac00bfec 100644
--- a/Python/lock.c
+++ b/Python/lock.c
@@ -58,7 +58,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
             return PY_LOCK_ACQUIRED;
         }
     }
-    else if (timeout == 0) {
+    if (timeout == 0) {
         return PY_LOCK_FAILURE;
     }
 
diff --git a/Python/optimizer.c b/Python/optimizer.c
index dde3dd8ebe7..8d01d605ef4 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1292,8 +1292,8 @@ uop_optimize(
     for (int pc = 0; pc < length; pc++) {
         int opcode = buffer[pc].opcode;
         int oparg = buffer[pc].oparg;
-        if (oparg < _PyUop_Replication[opcode]) {
-            buffer[pc].opcode = opcode + oparg + 1;
+        if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) {
+            buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start;
             assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
         }
         else if (is_terminator(&buffer[pc])) {
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index 6a7df233819..fab6fef5ccd 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -26,6 +26,8 @@
 #include "pycore_function.h"
 #include "pycore_uop_ids.h"
 #include "pycore_range.h"
+#include "pycore_unicodeobject.h"
+#include "pycore_ceval.h"
 
 #include <stdarg.h>
 #include <stdbool.h>
@@ -103,6 +105,10 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop)
     if ((int)index >= dict->ma_keys->dk_nentries) {
         return NULL;
     }
+    PyDictKeysObject *keys = dict->ma_keys;
+    if (keys->dk_version != inst->operand0) {
+        return NULL;
+    }
     PyObject *res = entries[index].me_value;
     if (res == NULL) {
         return NULL;
@@ -317,7 +323,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 /* Shortened forms for convenience, used in optimizer_bytecodes.c */
 #define sym_is_not_null _Py_uop_sym_is_not_null
 #define sym_is_const _Py_uop_sym_is_const
+#define sym_is_safe_const _Py_uop_sym_is_safe_const
 #define sym_get_const _Py_uop_sym_get_const
+#define sym_new_const_steal _Py_uop_sym_new_const_steal
+#define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref
 #define sym_new_unknown _Py_uop_sym_new_unknown
 #define sym_new_not_null _Py_uop_sym_new_not_null
 #define sym_new_type _Py_uop_sym_new_type
@@ -333,6 +342,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
 #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
 #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
+#define sym_set_compact_int(SYM) _Py_uop_sym_set_compact_int(ctx, SYM)
 #define sym_is_bottom _Py_uop_sym_is_bottom
 #define sym_truthiness _Py_uop_sym_truthiness
 #define frame_new _Py_uop_frame_new
@@ -340,15 +350,19 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 #define sym_new_tuple _Py_uop_sym_new_tuple
 #define sym_tuple_getitem _Py_uop_sym_tuple_getitem
 #define sym_tuple_length _Py_uop_sym_tuple_length
-#define sym_is_immortal _Py_uop_sym_is_immortal
+#define sym_is_immortal _Py_uop_symbol_is_immortal
+#define sym_is_compact_int _Py_uop_sym_is_compact_int
+#define sym_new_compact_int _Py_uop_sym_new_compact_int
 #define sym_new_truthiness _Py_uop_sym_new_truthiness
 
+#define JUMP_TO_LABEL(label) goto label;
+
 static int
 optimize_to_bool(
     _PyUOpInstruction *this_instr,
     JitOptContext *ctx,
-    JitOptSymbol *value,
-    JitOptSymbol **result_ptr)
+    JitOptRef value,
+    JitOptRef *result_ptr)
 {
     if (sym_matches_type(value, &PyBool_Type)) {
         REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -375,7 +389,7 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
     }
 }
 
-static JitOptSymbol *
+static JitOptRef
 lookup_attr(JitOptContext *ctx, _PyUOpInstruction *this_instr,
             PyTypeObject *type, PyObject *name, uint16_t immortal,
             uint16_t mortal)
@@ -440,6 +454,13 @@ get_code_with_logging(_PyUOpInstruction *op)
     return co;
 }
 
+// TODO (gh-134584) generate most of this table automatically
+const uint16_t op_without_decref_inputs[MAX_UOP_ID + 1] = {
+    [_BINARY_OP_MULTIPLY_FLOAT] = _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS,
+    [_BINARY_OP_ADD_FLOAT] = _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS,
+    [_BINARY_OP_SUBTRACT_FLOAT] = _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS,
+};
+
 /* 1 for success, 0 for not ready, cannot error at the moment. */
 static int
 optimize_uops(
@@ -477,7 +498,7 @@ optimize_uops(
 
         int oparg = this_instr->oparg;
         opcode = this_instr->opcode;
-        JitOptSymbol **stack_pointer = ctx->frame->stack_pointer;
+        JitOptRef *stack_pointer = ctx->frame->stack_pointer;
 
 #ifdef Py_DEBUG
         if (get_lltrace() >= 3) {
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index c2469547d77..3182e8b3b70 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -27,13 +27,16 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
 #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
 #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
 #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
+#define sym_set_compact_int(SYM) _Py_uop_sym_set_compact_int(ctx, SYM)
 #define sym_is_bottom _Py_uop_sym_is_bottom
 #define frame_new _Py_uop_frame_new
 #define frame_pop _Py_uop_frame_pop
 #define sym_new_tuple _Py_uop_sym_new_tuple
 #define sym_tuple_getitem _Py_uop_sym_tuple_getitem
 #define sym_tuple_length _Py_uop_sym_tuple_length
-#define sym_is_immortal _Py_uop_sym_is_immortal
+#define sym_is_immortal _Py_uop_symbol_is_immortal
+#define sym_new_compact_int _Py_uop_sym_new_compact_int
+#define sym_is_compact_int _Py_uop_sym_is_compact_int
 #define sym_new_truthiness _Py_uop_sym_new_truthiness
 
 extern int
@@ -87,12 +90,12 @@ dummy_func(void) {
     }
 
     op(_LOAD_FAST_BORROW, (-- value)) {
-        value = GETLOCAL(oparg);
+        value = PyJitRef_Borrow(GETLOCAL(oparg));
     }
 
     op(_LOAD_FAST_AND_CLEAR, (-- value)) {
         value = GETLOCAL(oparg);
-        JitOptSymbol *temp = sym_new_null(ctx);
+        JitOptRef temp = sym_new_null(ctx);
         GETLOCAL(oparg) = temp;
     }
 
@@ -105,17 +108,27 @@ dummy_func(void) {
     }
 
     op(_GUARD_TOS_INT, (value -- value)) {
-        if (sym_matches_type(value, &PyLong_Type)) {
+        if (sym_is_compact_int(value)) {
             REPLACE_OP(this_instr, _NOP, 0, 0);
         }
-        sym_set_type(value, &PyLong_Type);
+        else {
+            if (sym_get_type(value) == &PyLong_Type) {
+                REPLACE_OP(this_instr, _GUARD_TOS_OVERFLOWED, 0, 0);
+            }
+            sym_set_compact_int(value);
+        }
     }
 
     op(_GUARD_NOS_INT, (left, unused -- left, unused)) {
-        if (sym_matches_type(left, &PyLong_Type)) {
+        if (sym_is_compact_int(left)) {
             REPLACE_OP(this_instr, _NOP, 0, 0);
         }
-        sym_set_type(left, &PyLong_Type);
+        else {
+            if (sym_get_type(left) == &PyLong_Type) {
+                REPLACE_OP(this_instr, _GUARD_NOS_OVERFLOWED, 0, 0);
+            }
+            sym_set_compact_int(left);
+        }
     }
 
     op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) {
@@ -168,6 +181,7 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP, (lhs, rhs -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs);
         bool lhs_int = sym_matches_type(lhs, &PyLong_Type);
         bool rhs_int = sym_matches_type(rhs, &PyLong_Type);
         bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);
@@ -222,95 +236,54 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_ADD_INT, (left, right -- res)) {
-        res = sym_new_type(ctx, &PyLong_Type);
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_compact_int(ctx);
     }
 
     op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
-        res = sym_new_type(ctx, &PyLong_Type);
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_compact_int(ctx);
     }
 
     op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
-        res = sym_new_type(ctx, &PyLong_Type);
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_compact_int(ctx);
     }
 
     op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-            assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyFloat_FromDouble(
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) +
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-            // TODO gh-115506:
-            // replace opcode with constant propagated one and update tests!
-        }
-        else {
-            res = sym_new_type(ctx, &PyFloat_Type);
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyFloat_Type);
+        // TODO (gh-134584): Refactor this to use another uop
+        if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
+            REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
         }
     }
 
     op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-            assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyFloat_FromDouble(
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) -
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-            // TODO gh-115506:
-            // replace opcode with constant propagated one and update tests!
-        }
-        else {
-            res = sym_new_type(ctx, &PyFloat_Type);
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyFloat_Type);
+        // TODO (gh-134584): Refactor this to use another uop
+        if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
+            REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
         }
     }
 
     op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-            assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyFloat_FromDouble(
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) *
-                PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-            // TODO gh-115506:
-            // replace opcode with constant propagated one and update tests!
-        }
-        else {
-            res = sym_new_type(ctx, &PyFloat_Type);
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyFloat_Type);
+        // TODO (gh-134584): Refactor this to use another uop
+        if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
+            REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
         }
     }
 
     op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
-        if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-            assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
-            assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
-            PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right));
-            if (temp == NULL) {
-                goto error;
-            }
-            res = sym_new_const(ctx, temp);
-            Py_DECREF(temp);
-        }
-        else {
-            res = sym_new_type(ctx, &PyUnicode_Type);
-        }
+        REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
+        res = sym_new_type(ctx, &PyUnicode_Type);
     }
 
     op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) {
-        JitOptSymbol *res;
+        JitOptRef res;
         if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
             assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
             assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
@@ -329,7 +302,7 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem  -- new_frame)) {
-        new_frame = NULL;
+        new_frame = PyJitRef_NULL;
         ctx->done = true;
     }
 
@@ -418,10 +391,26 @@ dummy_func(void) {
     }
 
     op(_UNARY_NOT, (value -- res)) {
+        REPLACE_OPCODE_IF_EVALUATES_PURE(value);
         sym_set_type(value, &PyBool_Type);
         res = sym_new_truthiness(ctx, value, false);
     }
 
+    op(_UNARY_NEGATIVE, (value -- res)) {
+        if (sym_is_compact_int(value)) {
+            res = sym_new_compact_int(ctx);
+        }
+        else {
+            PyTypeObject *type = sym_get_type(value);
+            if (type == &PyLong_Type || type == &PyFloat_Type) {
+                res = sym_new_type(ctx, type);
+            }
+            else {
+                res = sym_new_not_null(ctx);
+            }
+        }
+    }
+
     op(_UNARY_INVERT, (value -- res)) {
         if (sym_matches_type(value, &PyLong_Type)) {
             res = sym_new_type(ctx, &PyLong_Type);
@@ -488,7 +477,7 @@ dummy_func(void) {
     op(_LOAD_CONST, (-- value)) {
         PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg);
         REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val);
-        value = sym_new_const(ctx, val);
+        value = PyJitRef_Borrow(sym_new_const(ctx, val));
     }
 
     op(_LOAD_SMALL_INT, (-- value)) {
@@ -496,7 +485,7 @@ dummy_func(void) {
         assert(val);
         assert(_Py_IsImmortal(val));
         REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val);
-        value = sym_new_const(ctx, val);
+        value = PyJitRef_Borrow(sym_new_const(ctx, val));
     }
 
     op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
@@ -504,7 +493,7 @@ dummy_func(void) {
     }
 
     op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
-        value = sym_new_const(ctx, ptr);
+        value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
     }
 
     op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) {
@@ -512,19 +501,37 @@ dummy_func(void) {
     }
 
     op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) {
-        value = sym_new_const(ctx, ptr);
+        value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
     }
 
     op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused -- value)) {
-        value = sym_new_const(ctx, ptr);
+        value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
     }
 
     op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused -- value)) {
-        value = sym_new_const(ctx, ptr);
+        value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
     }
 
     op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) {
-        value = sym_new_const(ctx, ptr);
+        value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
+    }
+
+    op(_POP_TOP, (value -- )) {
+        PyTypeObject *typ = sym_get_type(value);
+        if (PyJitRef_IsBorrowed(value) ||
+            sym_is_immortal(PyJitRef_Unwrap(value)) ||
+            sym_is_null(value)) {
+            REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0);
+        }
+        else if (typ == &PyLong_Type) {
+            REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0);
+        }
+        else if (typ == &PyFloat_Type) {
+            REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0);
+        }
+        else if (typ == &PyUnicode_Type) {
+            REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0);
+        }
     }
 
     op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
@@ -533,7 +540,7 @@ dummy_func(void) {
     }
 
     op(_SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) {
-        JitOptSymbol *temp = bottom;
+        JitOptRef temp = bottom;
         bottom = top;
         top = temp;
         assert(oparg >= 2);
@@ -547,7 +554,7 @@ dummy_func(void) {
     op(_LOAD_ATTR_MODULE, (dict_version/2, index/1, owner -- attr)) {
         (void)dict_version;
         (void)index;
-        attr = NULL;
+        attr = PyJitRef_NULL;
         if (sym_is_const(ctx, owner)) {
             PyModuleObject *mod = (PyModuleObject *)sym_get_const(ctx, owner);
             if (PyModule_CheckExact(mod)) {
@@ -557,11 +564,17 @@ dummy_func(void) {
                     PyDict_Watch(GLOBALS_WATCHER_ID, dict);
                     _Py_BloomFilter_Add(dependencies, dict);
                     PyObject *res = convert_global_to_const(this_instr, dict, true);
-                    attr = sym_new_const(ctx, res);
+                    if (res == NULL) {
+                        attr = sym_new_not_null(ctx);
+                    }
+                    else {
+                        attr = sym_new_const(ctx, res);
+                    }
+
                 }
             }
         }
-        if (attr == NULL) {
+        if (PyJitRef_IsNull(attr)) {
             /* No conversion made. We don't know what `attr` is. */
             attr = sym_new_not_null(ctx);
         }
@@ -654,7 +667,7 @@ dummy_func(void) {
 
     op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) {
         (void)fget;
-        new_frame = NULL;
+        new_frame = PyJitRef_NULL;
         ctx->done = true;
     }
 
@@ -672,6 +685,16 @@ dummy_func(void) {
         sym_set_type(callable, &PyFunction_Type);
     }
 
+    op(_CHECK_METHOD_VERSION, (func_version/2, callable, null, unused[oparg] -- callable, null, unused[oparg])) {
+        if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) {
+            PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable);
+            assert(PyMethod_Check(method));
+            REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version);
+            this_instr->operand1 = (uintptr_t)method->im_func;
+        }
+        sym_set_type(callable, &PyMethod_Type);
+    }
+
     op(_CHECK_FUNCTION_EXACT_ARGS, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
         assert(sym_matches_type(callable, &PyFunction_Type));
         if (sym_is_const(ctx, callable)) {
@@ -702,7 +725,7 @@ dummy_func(void) {
         }
 
 
-        assert(self_or_null != NULL);
+        assert(!PyJitRef_IsNull(self_or_null));
         assert(args != NULL);
         if (sym_is_not_null(self_or_null)) {
             // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM
@@ -711,9 +734,9 @@ dummy_func(void) {
         }
 
         if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
-            new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, args, argcount);
+            new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args, argcount));
         } else {
-            new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0);
+            new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
         }
     }
 
@@ -732,11 +755,11 @@ dummy_func(void) {
             break;
         }
 
-        new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0);
+        new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
     }
 
     op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) {
-        new_frame = NULL;
+        new_frame = PyJitRef_NULL;
         ctx->done = true;
     }
 
@@ -748,12 +771,14 @@ dummy_func(void) {
     }
 
     op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) {
-        init_frame = NULL;
+        init_frame = PyJitRef_NULL;
         ctx->done = true;
     }
 
     op(_RETURN_VALUE, (retval -- res)) {
-        JitOptSymbol *temp = retval;
+        // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe
+        // in bytecodes.c
+        JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval));
         DEAD(retval);
         SAVE_STACK();
         ctx->frame->stack_pointer = stack_pointer;
@@ -815,13 +840,13 @@ dummy_func(void) {
     }
 
     op(_FOR_ITER_GEN_FRAME, (unused, unused -- unused, unused, gen_frame)) {
-        gen_frame = NULL;
+        gen_frame = PyJitRef_NULL;
         /* We are about to hit the end of the trace */
         ctx->done = true;
     }
 
     op(_SEND_GEN_FRAME, (unused, unused -- unused, gen_frame)) {
-        gen_frame = NULL;
+        gen_frame = PyJitRef_NULL;
         // We are about to hit the end of the trace:
         ctx->done = true;
     }
@@ -841,7 +866,7 @@ dummy_func(void) {
     op(_PUSH_FRAME, (new_frame -- )) {
         SYNC_SP();
         ctx->frame->stack_pointer = stack_pointer;
-        ctx->frame = (_Py_UOpsAbstractFrame *)new_frame;
+        ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
         ctx->curr_frame_depth++;
         stack_pointer = ctx->frame->stack_pointer;
         co = get_code(this_instr);
@@ -1153,8 +1178,21 @@ dummy_func(void) {
         sym_set_const(callable, (PyObject *)&PyUnicode_Type);
     }
 
-    op(_CALL_LEN, (unused, unused, unused -- res)) {
+    op(_CALL_LEN, (callable, null, arg -- res)) {
         res = sym_new_type(ctx, &PyLong_Type);
+        int tuple_length = sym_tuple_length(arg);
+        if (tuple_length >= 0) {
+            PyObject *temp = PyLong_FromLong(tuple_length);
+            if (temp == NULL) {
+                goto error;
+            }
+            if (_Py_IsImmortal(temp)) {
+                REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW,
+                           0, (uintptr_t)temp);
+            }
+            res = sym_new_const(ctx, temp);
+            Py_DECREF(temp);
+        }
     }
 
     op(_GET_LEN, (obj -- obj, len)) {
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index d9313be0bb0..8d30df3aa7d 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -26,7 +26,7 @@
         /* _MONITOR_RESUME is not a viable micro-op for tier 2 */
 
         case _LOAD_FAST_CHECK: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = GETLOCAL(oparg);
             if (sym_is_null(value)) {
                 ctx->done = true;
@@ -38,7 +38,7 @@
         }
 
         case _LOAD_FAST: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = GETLOCAL(oparg);
             stack_pointer[0] = value;
             stack_pointer += 1;
@@ -47,8 +47,8 @@
         }
 
         case _LOAD_FAST_BORROW: {
-            JitOptSymbol *value;
-            value = GETLOCAL(oparg);
+            JitOptRef value;
+            value = PyJitRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
@@ -56,9 +56,9 @@
         }
 
         case _LOAD_FAST_AND_CLEAR: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = GETLOCAL(oparg);
-            JitOptSymbol *temp = sym_new_null(ctx);
+            JitOptRef temp = sym_new_null(ctx);
             GETLOCAL(oparg) = temp;
             stack_pointer[0] = value;
             stack_pointer += 1;
@@ -67,10 +67,10 @@
         }
 
         case _LOAD_CONST: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg);
             REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val);
-            value = sym_new_const(ctx, val);
+            value = PyJitRef_Borrow(sym_new_const(ctx, val));
             stack_pointer[0] = value;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
@@ -78,12 +78,12 @@
         }
 
         case _LOAD_SMALL_INT: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *val = PyLong_FromLong(oparg);
             assert(val);
             assert(_Py_IsImmortal(val));
             REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val);
-            value = sym_new_const(ctx, val);
+            value = PyJitRef_Borrow(sym_new_const(ctx, val));
             stack_pointer[0] = value;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
@@ -91,7 +91,7 @@
         }
 
         case _STORE_FAST: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = stack_pointer[-1];
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
@@ -100,6 +100,47 @@
         }
 
         case _POP_TOP: {
+            JitOptRef value;
+            value = stack_pointer[-1];
+            PyTypeObject *typ = sym_get_type(value);
+            if (PyJitRef_IsBorrowed(value) ||
+                sym_is_immortal(PyJitRef_Unwrap(value)) ||
+                sym_is_null(value)) {
+                REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0);
+            }
+            else if (typ == &PyLong_Type) {
+                REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0);
+            }
+            else if (typ == &PyFloat_Type) {
+                REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0);
+            }
+            else if (typ == &PyUnicode_Type) {
+                REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0);
+            }
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_NOP: {
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_INT: {
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_FLOAT: {
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _POP_TOP_UNICODE: {
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             break;
@@ -112,7 +153,7 @@
         }
 
         case _PUSH_NULL: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -133,7 +174,7 @@
         }
 
         case _END_SEND: {
-            JitOptSymbol *val;
+            JitOptRef val;
             val = sym_new_not_null(ctx);
             stack_pointer[-2] = val;
             stack_pointer += -1;
@@ -142,16 +183,44 @@
         }
 
         case _UNARY_NEGATIVE: {
-            JitOptSymbol *res;
-            res = sym_new_not_null(ctx);
+            JitOptRef value;
+            JitOptRef res;
+            value = stack_pointer[-1];
+            if (sym_is_compact_int(value)) {
+                res = sym_new_compact_int(ctx);
+            }
+            else {
+                PyTypeObject *type = sym_get_type(value);
+                if (type == &PyLong_Type || type == &PyFloat_Type) {
+                    res = sym_new_type(ctx, type);
+                }
+                else {
+                    res = sym_new_not_null(ctx);
+                }
+            }
             stack_pointer[-1] = res;
             break;
         }
 
         case _UNARY_NOT: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
+            if (
+                sym_is_safe_const(ctx, value)
+            ) {
+                JitOptRef value_sym = value;
+                _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                assert(PyStackRef_BoolCheck(value));
+                res_stackref = PyStackRef_IsFalse(value)
+                ? PyStackRef_True : PyStackRef_False;
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-1] = res;
+                break;
+            }
             sym_set_type(value, &PyBool_Type);
             res = sym_new_truthiness(ctx, value, false);
             stack_pointer[-1] = res;
@@ -159,8 +228,8 @@
         }
 
         case _TO_BOOL: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
             int already_bool = optimize_to_bool(this_instr, ctx, value, &res);
             if (!already_bool) {
@@ -171,7 +240,7 @@
         }
 
         case _TO_BOOL_BOOL: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = stack_pointer[-1];
             int already_bool = optimize_to_bool(this_instr, ctx, value, &value);
             if (!already_bool) {
@@ -183,8 +252,8 @@
         }
 
         case _TO_BOOL_INT: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
             int already_bool = optimize_to_bool(this_instr, ctx, value, &res);
             if (!already_bool) {
@@ -196,7 +265,7 @@
         }
 
         case _GUARD_NOS_LIST: {
-            JitOptSymbol *nos;
+            JitOptRef nos;
             nos = stack_pointer[-2];
             if (sym_matches_type(nos, &PyList_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -206,7 +275,7 @@
         }
 
         case _GUARD_TOS_LIST: {
-            JitOptSymbol *tos;
+            JitOptRef tos;
             tos = stack_pointer[-1];
             if (sym_matches_type(tos, &PyList_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -220,8 +289,8 @@
         }
 
         case _TO_BOOL_LIST: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
             int already_bool = optimize_to_bool(this_instr, ctx, value, &res);
             if (!already_bool) {
@@ -232,8 +301,8 @@
         }
 
         case _TO_BOOL_NONE: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
             int already_bool = optimize_to_bool(this_instr, ctx, value, &res);
             if (!already_bool) {
@@ -245,7 +314,7 @@
         }
 
         case _GUARD_NOS_UNICODE: {
-            JitOptSymbol *nos;
+            JitOptRef nos;
             nos = stack_pointer[-2];
             if (sym_matches_type(nos, &PyUnicode_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -255,7 +324,7 @@
         }
 
         case _GUARD_TOS_UNICODE: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = stack_pointer[-1];
             if (sym_matches_type(value, &PyUnicode_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -265,8 +334,8 @@
         }
 
         case _TO_BOOL_STR: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
             int already_bool = optimize_to_bool(this_instr, ctx, value, &res);
             if (!already_bool) {
@@ -277,7 +346,7 @@
         }
 
         case _REPLACE_WITH_TRUE: {
-            JitOptSymbol *res;
+            JitOptRef res;
             REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True);
             res = sym_new_const(ctx, Py_True);
             stack_pointer[-1] = res;
@@ -285,8 +354,8 @@
         }
 
         case _UNARY_INVERT: {
-            JitOptSymbol *value;
-            JitOptSymbol *res;
+            JitOptRef value;
+            JitOptRef res;
             value = stack_pointer[-1];
             if (sym_matches_type(value, &PyLong_Type)) {
                 res = sym_new_type(ctx, &PyLong_Type);
@@ -299,28 +368,80 @@
         }
 
         case _GUARD_NOS_INT: {
-            JitOptSymbol *left;
+            JitOptRef left;
             left = stack_pointer[-2];
-            if (sym_matches_type(left, &PyLong_Type)) {
+            if (sym_is_compact_int(left)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
             }
-            sym_set_type(left, &PyLong_Type);
+            else {
+                if (sym_get_type(left) == &PyLong_Type) {
+                    REPLACE_OP(this_instr, _GUARD_NOS_OVERFLOWED, 0, 0);
+                }
+                sym_set_compact_int(left);
+            }
             break;
         }
 
         case _GUARD_TOS_INT: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = stack_pointer[-1];
-            if (sym_matches_type(value, &PyLong_Type)) {
+            if (sym_is_compact_int(value)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
             }
-            sym_set_type(value, &PyLong_Type);
+            else {
+                if (sym_get_type(value) == &PyLong_Type) {
+                    REPLACE_OP(this_instr, _GUARD_TOS_OVERFLOWED, 0, 0);
+                }
+                sym_set_compact_int(value);
+            }
+            break;
+        }
+
+        case _GUARD_NOS_OVERFLOWED: {
+            break;
+        }
+
+        case _GUARD_TOS_OVERFLOWED: {
             break;
         }
 
         case _BINARY_OP_MULTIPLY_INT: {
-            JitOptSymbol *res;
-            res = sym_new_type(ctx, &PyLong_Type);
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res_stackref )) {
+                    ctx->done = true;
+                    break;
+                }
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                break;
+            }
+            res = sym_new_compact_int(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -328,8 +449,42 @@
         }
 
         case _BINARY_OP_ADD_INT: {
-            JitOptSymbol *res;
-            res = sym_new_type(ctx, &PyLong_Type);
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res_stackref )) {
+                    ctx->done = true;
+                    break;
+                }
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                break;
+            }
+            res = sym_new_compact_int(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -337,8 +492,42 @@
         }
 
         case _BINARY_OP_SUBTRACT_INT: {
-            JitOptSymbol *res;
-            res = sym_new_type(ctx, &PyLong_Type);
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
+            right = stack_pointer[-1];
+            left = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
+                assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
+                STAT_INC(BINARY_OP, hit);
+                res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
+                if (PyStackRef_IsNull(res_stackref )) {
+                    ctx->done = true;
+                    break;
+                }
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                break;
+            }
+            res = sym_new_compact_int(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -346,7 +535,7 @@
         }
 
         case _GUARD_NOS_FLOAT: {
-            JitOptSymbol *left;
+            JitOptRef left;
             left = stack_pointer[-2];
             if (sym_matches_type(left, &PyFloat_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -356,7 +545,7 @@
         }
 
         case _GUARD_TOS_FLOAT: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = stack_pointer[-1];
             if (sym_matches_type(value, &PyFloat_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -366,125 +555,215 @@
         }
 
         case _BINARY_OP_MULTIPLY_FLOAT: {
-            JitOptSymbol *right;
-            JitOptSymbol *left;
-            JitOptSymbol *res;
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-                assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyFloat_FromDouble(
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) *
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                double dres =
+                ((PyFloatObject *)left_o)->ob_fval *
+                ((PyFloatObject *)right_o)->ob_fval;
+                res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres);
+                if (PyStackRef_IsNull(res_stackref )) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
+                break;
             }
-            else {
-                res = sym_new_type(ctx, &PyFloat_Type);
-                stack_pointer += -1;
+            res = sym_new_type(ctx, &PyFloat_Type);
+            if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
+                REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
-            stack_pointer[-1] = res;
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         case _BINARY_OP_ADD_FLOAT: {
-            JitOptSymbol *right;
-            JitOptSymbol *left;
-            JitOptSymbol *res;
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-                assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyFloat_FromDouble(
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) +
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                double dres =
+                ((PyFloatObject *)left_o)->ob_fval +
+                ((PyFloatObject *)right_o)->ob_fval;
+                res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres);
+                if (PyStackRef_IsNull(res_stackref )) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
+                break;
             }
-            else {
-                res = sym_new_type(ctx, &PyFloat_Type);
-                stack_pointer += -1;
+            res = sym_new_type(ctx, &PyFloat_Type);
+            if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
+                REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
-            stack_pointer[-1] = res;
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         case _BINARY_OP_SUBTRACT_FLOAT: {
-            JitOptSymbol *right;
-            JitOptSymbol *left;
-            JitOptSymbol *res;
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyFloat_CheckExact(sym_get_const(ctx, left)));
-                assert(PyFloat_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyFloat_FromDouble(
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) -
-                    PyFloat_AS_DOUBLE(sym_get_const(ctx, right)));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                double dres =
+                ((PyFloatObject *)left_o)->ob_fval -
+                ((PyFloatObject *)right_o)->ob_fval;
+                res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres);
+                if (PyStackRef_IsNull(res_stackref )) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
+                break;
             }
-            else {
-                res = sym_new_type(ctx, &PyFloat_Type);
-                stack_pointer += -1;
+            res = sym_new_type(ctx, &PyFloat_Type);
+            if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
+                REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
-            stack_pointer[-1] = res;
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: {
+            JitOptRef res;
+            res = sym_new_not_null(ctx);
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: {
+            JitOptRef res;
+            res = sym_new_not_null(ctx);
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            break;
+        }
+
+        case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: {
+            JitOptRef res;
+            res = sym_new_not_null(ctx);
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         case _BINARY_OP_ADD_UNICODE: {
-            JitOptSymbol *right;
-            JitOptSymbol *left;
-            JitOptSymbol *res;
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
-                assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
-                assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
-                PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right));
-                if (temp == NULL) {
+            if (
+                sym_is_safe_const(ctx, left) &&
+                sym_is_safe_const(ctx, right)
+            ) {
+                JitOptRef left_sym = left;
+                JitOptRef right_sym = right;
+                _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
+                _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
+                PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyUnicode_CheckExact(left_o));
+                assert(PyUnicode_CheckExact(right_o));
+                STAT_INC(BINARY_OP, hit);
+                PyObject *res_o = PyUnicode_Concat(left_o, right_o);
+                PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc);
+                PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
+                if (res_o == NULL) {
                     goto error;
                 }
-                res = sym_new_const(ctx, temp);
+                res_stackref = PyStackRef_FromPyObjectSteal(res_o);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
                 assert(WITHIN_STACK_BOUNDS());
-                Py_DECREF(temp);
-            }
-            else {
-                res = sym_new_type(ctx, &PyUnicode_Type);
-                stack_pointer += -1;
+                break;
             }
-            stack_pointer[-1] = res;
+            res = sym_new_type(ctx, &PyUnicode_Type);
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         case _BINARY_OP_INPLACE_ADD_UNICODE: {
-            JitOptSymbol *right;
-            JitOptSymbol *left;
+            JitOptRef right;
+            JitOptRef left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            JitOptSymbol *res;
+            JitOptRef res;
             if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
                 assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
                 assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
@@ -509,7 +788,7 @@
         }
 
         case _BINARY_OP_EXTEND: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -518,8 +797,8 @@
         }
 
         case _BINARY_SLICE: {
-            JitOptSymbol *container;
-            JitOptSymbol *res;
+            JitOptRef container;
+            JitOptRef res;
             container = stack_pointer[-3];
             PyTypeObject *type = sym_get_type(container);
             if (type == &PyUnicode_Type ||
@@ -544,7 +823,7 @@
         }
 
         case _BINARY_OP_SUBSCR_LIST_INT: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -553,7 +832,7 @@
         }
 
         case _BINARY_OP_SUBSCR_LIST_SLICE: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -562,7 +841,7 @@
         }
 
         case _BINARY_OP_SUBSCR_STR_INT: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_type(ctx, &PyUnicode_Type);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -571,7 +850,7 @@
         }
 
         case _GUARD_NOS_TUPLE: {
-            JitOptSymbol *nos;
+            JitOptRef nos;
             nos = stack_pointer[-2];
             if (sym_matches_type(nos, &PyTuple_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -581,7 +860,7 @@
         }
 
         case _GUARD_TOS_TUPLE: {
-            JitOptSymbol *tos;
+            JitOptRef tos;
             tos = stack_pointer[-1];
             if (sym_matches_type(tos, &PyTuple_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -591,9 +870,9 @@
         }
 
         case _BINARY_OP_SUBSCR_TUPLE_INT: {
-            JitOptSymbol *sub_st;
-            JitOptSymbol *tuple_st;
-            JitOptSymbol *res;
+            JitOptRef sub_st;
+            JitOptRef tuple_st;
+            JitOptRef res;
             sub_st = stack_pointer[-1];
             tuple_st = stack_pointer[-2];
             assert(sym_matches_type(tuple_st, &PyTuple_Type));
@@ -620,7 +899,7 @@
         }
 
         case _GUARD_NOS_DICT: {
-            JitOptSymbol *nos;
+            JitOptRef nos;
             nos = stack_pointer[-2];
             if (sym_matches_type(nos, &PyDict_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -630,7 +909,7 @@
         }
 
         case _GUARD_TOS_DICT: {
-            JitOptSymbol *tos;
+            JitOptRef tos;
             tos = stack_pointer[-1];
             if (sym_matches_type(tos, &PyDict_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -640,7 +919,7 @@
         }
 
         case _BINARY_OP_SUBSCR_DICT: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -649,7 +928,7 @@
         }
 
         case _BINARY_OP_SUBSCR_CHECK_FUNC: {
-            JitOptSymbol *getitem;
+            JitOptRef getitem;
             getitem = sym_new_not_null(ctx);
             stack_pointer[0] = getitem;
             stack_pointer += 1;
@@ -658,8 +937,8 @@
         }
 
         case _BINARY_OP_SUBSCR_INIT_CALL: {
-            JitOptSymbol *new_frame;
-            new_frame = NULL;
+            JitOptRef new_frame;
+            new_frame = PyJitRef_NULL;
             ctx->done = true;
             stack_pointer[-3] = new_frame;
             stack_pointer += -2;
@@ -704,14 +983,14 @@
         }
 
         case _CALL_INTRINSIC_1: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-1] = res;
             break;
         }
 
         case _CALL_INTRINSIC_2: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -720,10 +999,10 @@
         }
 
         case _RETURN_VALUE: {
-            JitOptSymbol *retval;
-            JitOptSymbol *res;
+            JitOptRef retval;
+            JitOptRef res;
             retval = stack_pointer[-1];
-            JitOptSymbol *temp = retval;
+            JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval));
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             ctx->frame->stack_pointer = stack_pointer;
@@ -747,14 +1026,14 @@
         }
 
         case _GET_AITER: {
-            JitOptSymbol *iter;
+            JitOptRef iter;
             iter = sym_new_not_null(ctx);
             stack_pointer[-1] = iter;
             break;
         }
 
         case _GET_ANEXT: {
-            JitOptSymbol *awaitable;
+            JitOptRef awaitable;
             awaitable = sym_new_not_null(ctx);
             stack_pointer[0] = awaitable;
             stack_pointer += 1;
@@ -763,7 +1042,7 @@
         }
 
         case _GET_AWAITABLE: {
-            JitOptSymbol *iter;
+            JitOptRef iter;
             iter = sym_new_not_null(ctx);
             stack_pointer[-1] = iter;
             break;
@@ -772,15 +1051,15 @@
         /* _SEND is not a viable micro-op for tier 2 */
 
         case _SEND_GEN_FRAME: {
-            JitOptSymbol *gen_frame;
-            gen_frame = NULL;
+            JitOptRef gen_frame;
+            gen_frame = PyJitRef_NULL;
             ctx->done = true;
             stack_pointer[-1] = gen_frame;
             break;
         }
 
         case _YIELD_VALUE: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = sym_new_unknown(ctx);
             stack_pointer[-1] = value;
             break;
@@ -793,7 +1072,7 @@
         }
 
         case _LOAD_COMMON_CONSTANT: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = sym_new_not_null(ctx);
             stack_pointer[0] = value;
             stack_pointer += 1;
@@ -802,7 +1081,7 @@
         }
 
         case _LOAD_BUILD_CLASS: {
-            JitOptSymbol *bc;
+            JitOptRef bc;
             bc = sym_new_not_null(ctx);
             stack_pointer[0] = bc;
             stack_pointer += 1;
@@ -821,8 +1100,8 @@
         }
 
         case _UNPACK_SEQUENCE: {
-            JitOptSymbol **values;
-            JitOptSymbol **top;
+            JitOptRef *values;
+            JitOptRef *top;
             values = &stack_pointer[-1];
             top = &stack_pointer[-1 + oparg];
             (void)top;
@@ -835,9 +1114,9 @@
         }
 
         case _UNPACK_SEQUENCE_TWO_TUPLE: {
-            JitOptSymbol *seq;
-            JitOptSymbol *val1;
-            JitOptSymbol *val0;
+            JitOptRef seq;
+            JitOptRef val1;
+            JitOptRef val0;
             seq = stack_pointer[-1];
             val0 = sym_tuple_getitem(ctx, seq, 0);
             val1 = sym_tuple_getitem(ctx, seq, 1);
@@ -849,8 +1128,8 @@
         }
 
         case _UNPACK_SEQUENCE_TUPLE: {
-            JitOptSymbol *seq;
-            JitOptSymbol **values;
+            JitOptRef seq;
+            JitOptRef *values;
             seq = stack_pointer[-1];
             values = &stack_pointer[-1];
             for (int i = 0; i < oparg; i++) {
@@ -862,7 +1141,7 @@
         }
 
         case _UNPACK_SEQUENCE_LIST: {
-            JitOptSymbol **values;
+            JitOptRef *values;
             values = &stack_pointer[-1];
             for (int _i = oparg; --_i >= 0;) {
                 values[_i] = sym_new_not_null(ctx);
@@ -873,8 +1152,8 @@
         }
 
         case _UNPACK_EX: {
-            JitOptSymbol **values;
-            JitOptSymbol **top;
+            JitOptRef *values;
+            JitOptRef *top;
             values = &stack_pointer[-1];
             top = &stack_pointer[(oparg & 0xFF) + (oparg >> 8)];
             (void)top;
@@ -910,7 +1189,7 @@
         }
 
         case _LOAD_LOCALS: {
-            JitOptSymbol *locals;
+            JitOptRef locals;
             locals = sym_new_not_null(ctx);
             stack_pointer[0] = locals;
             stack_pointer += 1;
@@ -921,7 +1200,7 @@
         /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */
 
         case _LOAD_NAME: {
-            JitOptSymbol *v;
+            JitOptRef v;
             v = sym_new_not_null(ctx);
             stack_pointer[0] = v;
             stack_pointer += 1;
@@ -930,7 +1209,7 @@
         }
 
         case _LOAD_GLOBAL: {
-            JitOptSymbol **res;
+            JitOptRef *res;
             res = &stack_pointer[0];
             res[0] = sym_new_not_null(ctx);
             stack_pointer += 1;
@@ -939,7 +1218,7 @@
         }
 
         case _PUSH_NULL_CONDITIONAL: {
-            JitOptSymbol **null;
+            JitOptRef *null;
             null = &stack_pointer[0];
             if (oparg & 1) {
                 REPLACE_OP(this_instr, _PUSH_NULL, 0, 0);
@@ -958,7 +1237,7 @@
         }
 
         case _LOAD_GLOBAL_MODULE: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -967,7 +1246,7 @@
         }
 
         case _LOAD_GLOBAL_BUILTINS: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -988,14 +1267,14 @@
         }
 
         case _LOAD_FROM_DICT_OR_DEREF: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = sym_new_not_null(ctx);
             stack_pointer[-1] = value;
             break;
         }
 
         case _LOAD_DEREF: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = sym_new_not_null(ctx);
             stack_pointer[0] = value;
             stack_pointer += 1;
@@ -1014,7 +1293,7 @@
         }
 
         case _BUILD_STRING: {
-            JitOptSymbol *str;
+            JitOptRef str;
             str = sym_new_type(ctx, &PyUnicode_Type);
             stack_pointer[-oparg] = str;
             stack_pointer += 1 - oparg;
@@ -1023,7 +1302,7 @@
         }
 
         case _BUILD_INTERPOLATION: {
-            JitOptSymbol *interpolation;
+            JitOptRef interpolation;
             interpolation = sym_new_not_null(ctx);
             stack_pointer[-2 - (oparg & 1)] = interpolation;
             stack_pointer += -1 - (oparg & 1);
@@ -1032,7 +1311,7 @@
         }
 
         case _BUILD_TEMPLATE: {
-            JitOptSymbol *template;
+            JitOptRef template;
             template = sym_new_not_null(ctx);
             stack_pointer[-2] = template;
             stack_pointer += -1;
@@ -1041,8 +1320,8 @@
         }
 
         case _BUILD_TUPLE: {
-            JitOptSymbol **values;
-            JitOptSymbol *tup;
+            JitOptRef *values;
+            JitOptRef tup;
             values = &stack_pointer[-oparg];
             tup = sym_new_tuple(ctx, oparg, values);
             stack_pointer[-oparg] = tup;
@@ -1052,7 +1331,7 @@
         }
 
         case _BUILD_LIST: {
-            JitOptSymbol *list;
+            JitOptRef list;
             list = sym_new_type(ctx, &PyList_Type);
             stack_pointer[-oparg] = list;
             stack_pointer += 1 - oparg;
@@ -1073,7 +1352,7 @@
         }
 
         case _BUILD_SET: {
-            JitOptSymbol *set;
+            JitOptRef set;
             set = sym_new_type(ctx, &PySet_Type);
             stack_pointer[-oparg] = set;
             stack_pointer += 1 - oparg;
@@ -1082,7 +1361,7 @@
         }
 
         case _BUILD_MAP: {
-            JitOptSymbol *map;
+            JitOptRef map;
             map = sym_new_type(ctx, &PyDict_Type);
             stack_pointer[-oparg*2] = map;
             stack_pointer += 1 - oparg*2;
@@ -1113,7 +1392,7 @@
         }
 
         case _LOAD_SUPER_ATTR_ATTR: {
-            JitOptSymbol *attr_st;
+            JitOptRef attr_st;
             attr_st = sym_new_not_null(ctx);
             stack_pointer[-3] = attr_st;
             stack_pointer += -2;
@@ -1122,8 +1401,8 @@
         }
 
         case _LOAD_SUPER_ATTR_METHOD: {
-            JitOptSymbol *attr;
-            JitOptSymbol *self_or_null;
+            JitOptRef attr;
+            JitOptRef self_or_null;
             attr = sym_new_not_null(ctx);
             self_or_null = sym_new_not_null(ctx);
             stack_pointer[-3] = attr;
@@ -1134,9 +1413,9 @@
         }
 
         case _LOAD_ATTR: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
-            JitOptSymbol **self_or_null;
+            JitOptRef owner;
+            JitOptRef attr;
+            JitOptRef *self_or_null;
             owner = stack_pointer[-1];
             self_or_null = &stack_pointer[0];
             (void)owner;
@@ -1151,7 +1430,7 @@
         }
 
         case _GUARD_TYPE_VERSION: {
-            JitOptSymbol *owner;
+            JitOptRef owner;
             owner = stack_pointer[-1];
             uint32_t type_version = (uint32_t)this_instr->operand0;
             assert(type_version);
@@ -1178,7 +1457,7 @@
         }
 
         case _LOAD_ATTR_INSTANCE_VALUE: {
-            JitOptSymbol *attr;
+            JitOptRef attr;
             uint16_t offset = (uint16_t)this_instr->operand0;
             attr = sym_new_not_null(ctx);
             (void)offset;
@@ -1187,14 +1466,14 @@
         }
 
         case _LOAD_ATTR_MODULE: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
+            JitOptRef owner;
+            JitOptRef attr;
             owner = stack_pointer[-1];
             uint32_t dict_version = (uint32_t)this_instr->operand0;
             uint16_t index = (uint16_t)this_instr->operand0;
             (void)dict_version;
             (void)index;
-            attr = NULL;
+            attr = PyJitRef_NULL;
             if (sym_is_const(ctx, owner)) {
                 PyModuleObject *mod = (PyModuleObject *)sym_get_const(ctx, owner);
                 if (PyModule_CheckExact(mod)) {
@@ -1205,11 +1484,16 @@
                         PyDict_Watch(GLOBALS_WATCHER_ID, dict);
                         _Py_BloomFilter_Add(dependencies, dict);
                         PyObject *res = convert_global_to_const(this_instr, dict, true);
-                        attr = sym_new_const(ctx, res);
+                        if (res == NULL) {
+                            attr = sym_new_not_null(ctx);
+                        }
+                        else {
+                            attr = sym_new_const(ctx, res);
+                        }
                     }
                 }
             }
-            if (attr == NULL) {
+            if (PyJitRef_IsNull(attr)) {
                 attr = sym_new_not_null(ctx);
             }
             stack_pointer[-1] = attr;
@@ -1217,7 +1501,7 @@
         }
 
         case _LOAD_ATTR_WITH_HINT: {
-            JitOptSymbol *attr;
+            JitOptRef attr;
             uint16_t hint = (uint16_t)this_instr->operand0;
             attr = sym_new_not_null(ctx);
             (void)hint;
@@ -1226,7 +1510,7 @@
         }
 
         case _LOAD_ATTR_SLOT: {
-            JitOptSymbol *attr;
+            JitOptRef attr;
             uint16_t index = (uint16_t)this_instr->operand0;
             attr = sym_new_not_null(ctx);
             (void)index;
@@ -1235,7 +1519,7 @@
         }
 
         case _CHECK_ATTR_CLASS: {
-            JitOptSymbol *owner;
+            JitOptRef owner;
             owner = stack_pointer[-1];
             uint32_t type_version = (uint32_t)this_instr->operand0;
             PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version);
@@ -1251,8 +1535,8 @@
         }
 
         case _LOAD_ATTR_CLASS: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
+            JitOptRef owner;
+            JitOptRef attr;
             owner = stack_pointer[-1];
             PyObject *descr = (PyObject *)this_instr->operand0;
             (void)descr;
@@ -1266,10 +1550,10 @@
         }
 
         case _LOAD_ATTR_PROPERTY_FRAME: {
-            JitOptSymbol *new_frame;
+            JitOptRef new_frame;
             PyObject *fget = (PyObject *)this_instr->operand0;
             (void)fget;
-            new_frame = NULL;
+            new_frame = PyJitRef_NULL;
             ctx->done = true;
             stack_pointer[-1] = new_frame;
             break;
@@ -1300,7 +1584,7 @@
         }
 
         case _COMPARE_OP: {
-            JitOptSymbol *res;
+            JitOptRef res;
             if (oparg & 16) {
                 res = sym_new_type(ctx, &PyBool_Type);
             }
@@ -1314,7 +1598,7 @@
         }
 
         case _COMPARE_OP_FLOAT: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_type(ctx, &PyBool_Type);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -1323,9 +1607,9 @@
         }
 
         case _COMPARE_OP_INT: {
-            JitOptSymbol *right;
-            JitOptSymbol *left;
-            JitOptSymbol *res;
+            JitOptRef right;
+            JitOptRef left;
+            JitOptRef res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
             if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
@@ -1355,7 +1639,7 @@
         }
 
         case _COMPARE_OP_STR: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_type(ctx, &PyBool_Type);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -1364,7 +1648,7 @@
         }
 
         case _IS_OP: {
-            JitOptSymbol *b;
+            JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
             stack_pointer[-2] = b;
             stack_pointer += -1;
@@ -1373,7 +1657,7 @@
         }
 
         case _CONTAINS_OP: {
-            JitOptSymbol *b;
+            JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
             stack_pointer[-2] = b;
             stack_pointer += -1;
@@ -1382,7 +1666,7 @@
         }
 
         case _GUARD_TOS_ANY_SET: {
-            JitOptSymbol *tos;
+            JitOptRef tos;
             tos = stack_pointer[-1];
             if (sym_matches_type(tos, &PySet_Type) ||
                 sym_matches_type(tos, &PyFrozenSet_Type))
@@ -1393,7 +1677,7 @@
         }
 
         case _CONTAINS_OP_SET: {
-            JitOptSymbol *b;
+            JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
             stack_pointer[-2] = b;
             stack_pointer += -1;
@@ -1402,7 +1686,7 @@
         }
 
         case _CONTAINS_OP_DICT: {
-            JitOptSymbol *b;
+            JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
             stack_pointer[-2] = b;
             stack_pointer += -1;
@@ -1411,8 +1695,8 @@
         }
 
         case _CHECK_EG_MATCH: {
-            JitOptSymbol *rest;
-            JitOptSymbol *match;
+            JitOptRef rest;
+            JitOptRef match;
             rest = sym_new_not_null(ctx);
             match = sym_new_not_null(ctx);
             stack_pointer[-2] = rest;
@@ -1421,14 +1705,14 @@
         }
 
         case _CHECK_EXC_MATCH: {
-            JitOptSymbol *b;
+            JitOptRef b;
             b = sym_new_not_null(ctx);
             stack_pointer[-1] = b;
             break;
         }
 
         case _IMPORT_NAME: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -1437,7 +1721,7 @@
         }
 
         case _IMPORT_FROM: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -1450,15 +1734,15 @@
         /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */
 
         case _IS_NONE: {
-            JitOptSymbol *b;
+            JitOptRef b;
             b = sym_new_not_null(ctx);
             stack_pointer[-1] = b;
             break;
         }
 
         case _GET_LEN: {
-            JitOptSymbol *obj;
-            JitOptSymbol *len;
+            JitOptRef obj;
+            JitOptRef len;
             obj = stack_pointer[-1];
             int tuple_length = sym_tuple_length(obj);
             if (tuple_length == -1) {
@@ -1487,7 +1771,7 @@
         }
 
         case _MATCH_CLASS: {
-            JitOptSymbol *attrs;
+            JitOptRef attrs;
             attrs = sym_new_not_null(ctx);
             stack_pointer[-3] = attrs;
             stack_pointer += -2;
@@ -1496,7 +1780,7 @@
         }
 
         case _MATCH_MAPPING: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -1505,7 +1789,7 @@
         }
 
         case _MATCH_SEQUENCE: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -1514,7 +1798,7 @@
         }
 
         case _MATCH_KEYS: {
-            JitOptSymbol *values_or_none;
+            JitOptRef values_or_none;
             values_or_none = sym_new_not_null(ctx);
             stack_pointer[0] = values_or_none;
             stack_pointer += 1;
@@ -1523,9 +1807,9 @@
         }
 
         case _GET_ITER: {
-            JitOptSymbol *iterable;
-            JitOptSymbol *iter;
-            JitOptSymbol *index_or_null;
+            JitOptRef iterable;
+            JitOptRef iter;
+            JitOptRef index_or_null;
             iterable = stack_pointer[-1];
             if (sym_matches_type(iterable, &PyTuple_Type) || sym_matches_type(iterable, &PyList_Type)) {
                 iter = iterable;
@@ -1543,7 +1827,7 @@
         }
 
         case _GET_YIELD_FROM_ITER: {
-            JitOptSymbol *iter;
+            JitOptRef iter;
             iter = sym_new_not_null(ctx);
             stack_pointer[-1] = iter;
             break;
@@ -1552,7 +1836,7 @@
         /* _FOR_ITER is not a viable micro-op for tier 2 */
 
         case _FOR_ITER_TIER_TWO: {
-            JitOptSymbol *next;
+            JitOptRef next;
             next = sym_new_not_null(ctx);
             stack_pointer[0] = next;
             stack_pointer += 1;
@@ -1575,7 +1859,7 @@
         /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 */
 
         case _ITER_NEXT_LIST_TIER_TWO: {
-            JitOptSymbol *next;
+            JitOptRef next;
             next = sym_new_not_null(ctx);
             stack_pointer[0] = next;
             stack_pointer += 1;
@@ -1584,7 +1868,7 @@
         }
 
         case _ITER_CHECK_TUPLE: {
-            JitOptSymbol *iter;
+            JitOptRef iter;
             iter = stack_pointer[-2];
             if (sym_matches_type(iter, &PyTuple_Type)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -1600,7 +1884,7 @@
         }
 
         case _ITER_NEXT_TUPLE: {
-            JitOptSymbol *next;
+            JitOptRef next;
             next = sym_new_not_null(ctx);
             stack_pointer[0] = next;
             stack_pointer += 1;
@@ -1619,7 +1903,7 @@
         }
 
         case _ITER_NEXT_RANGE: {
-            JitOptSymbol *next;
+            JitOptRef next;
             next = sym_new_type(ctx, &PyLong_Type);
             stack_pointer[0] = next;
             stack_pointer += 1;
@@ -1628,8 +1912,8 @@
         }
 
         case _FOR_ITER_GEN_FRAME: {
-            JitOptSymbol *gen_frame;
-            gen_frame = NULL;
+            JitOptRef gen_frame;
+            gen_frame = PyJitRef_NULL;
             ctx->done = true;
             stack_pointer[0] = gen_frame;
             stack_pointer += 1;
@@ -1638,8 +1922,8 @@
         }
 
         case _INSERT_NULL: {
-            JitOptSymbol *self;
-            JitOptSymbol **method_and_self;
+            JitOptRef self;
+            JitOptRef *method_and_self;
             self = stack_pointer[-1];
             method_and_self = &stack_pointer[-1];
             method_and_self[0] = sym_new_null(ctx);
@@ -1650,7 +1934,7 @@
         }
 
         case _LOAD_SPECIAL: {
-            JitOptSymbol **method_and_self;
+            JitOptRef *method_and_self;
             method_and_self = &stack_pointer[-2];
             method_and_self[0] = sym_new_not_null(ctx);
             method_and_self[1] = sym_new_unknown(ctx);
@@ -1658,7 +1942,7 @@
         }
 
         case _WITH_EXCEPT_START: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[0] = res;
             stack_pointer += 1;
@@ -1667,8 +1951,8 @@
         }
 
         case _PUSH_EXC_INFO: {
-            JitOptSymbol *prev_exc;
-            JitOptSymbol *new_exc;
+            JitOptRef prev_exc;
+            JitOptRef new_exc;
             prev_exc = sym_new_not_null(ctx);
             new_exc = sym_new_not_null(ctx);
             stack_pointer[-1] = prev_exc;
@@ -1687,9 +1971,9 @@
         }
 
         case _LOAD_ATTR_METHOD_WITH_VALUES: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
-            JitOptSymbol *self;
+            JitOptRef owner;
+            JitOptRef attr;
+            JitOptRef self;
             owner = stack_pointer[-1];
             PyObject *descr = (PyObject *)this_instr->operand0;
             (void)descr;
@@ -1707,9 +1991,9 @@
         }
 
         case _LOAD_ATTR_METHOD_NO_DICT: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
-            JitOptSymbol *self;
+            JitOptRef owner;
+            JitOptRef attr;
+            JitOptRef self;
             owner = stack_pointer[-1];
             PyObject *descr = (PyObject *)this_instr->operand0;
             (void)descr;
@@ -1727,8 +2011,8 @@
         }
 
         case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
+            JitOptRef owner;
+            JitOptRef attr;
             owner = stack_pointer[-1];
             PyObject *descr = (PyObject *)this_instr->operand0;
             (void)descr;
@@ -1742,8 +2026,8 @@
         }
 
         case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
+            JitOptRef owner;
+            JitOptRef attr;
             owner = stack_pointer[-1];
             PyObject *descr = (PyObject *)this_instr->operand0;
             (void)descr;
@@ -1761,9 +2045,9 @@
         }
 
         case _LOAD_ATTR_METHOD_LAZY_DICT: {
-            JitOptSymbol *owner;
-            JitOptSymbol *attr;
-            JitOptSymbol *self;
+            JitOptRef owner;
+            JitOptRef attr;
+            JitOptRef self;
             owner = stack_pointer[-1];
             PyObject *descr = (PyObject *)this_instr->operand0;
             (void)descr;
@@ -1781,9 +2065,9 @@
         }
 
         case _MAYBE_EXPAND_METHOD: {
-            JitOptSymbol **args;
-            JitOptSymbol *self_or_null;
-            JitOptSymbol *callable;
+            JitOptRef *args;
+            JitOptRef self_or_null;
+            JitOptRef callable;
             args = &stack_pointer[-oparg];
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
@@ -1800,7 +2084,7 @@
         /* _MONITOR_CALL is not a viable micro-op for tier 2 */
 
         case _PY_FRAME_GENERAL: {
-            JitOptSymbol *new_frame;
+            JitOptRef new_frame;
             PyCodeObject *co = NULL;
             assert((this_instr + 2)->opcode == _PUSH_FRAME);
             co = get_code_with_logging((this_instr + 2));
@@ -1808,7 +2092,7 @@
                 ctx->done = true;
                 break;
             }
-            new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0);
+            new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
             assert(WITHIN_STACK_BOUNDS());
@@ -1816,7 +2100,7 @@
         }
 
         case _CHECK_FUNCTION_VERSION: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-2 - oparg];
             uint32_t func_version = (uint32_t)this_instr->operand0;
             if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) {
@@ -1833,6 +2117,16 @@
         }
 
         case _CHECK_METHOD_VERSION: {
+            JitOptRef callable;
+            callable = stack_pointer[-2 - oparg];
+            uint32_t func_version = (uint32_t)this_instr->operand0;
+            if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyMethod_Type)) {
+                PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable);
+                assert(PyMethod_Check(method));
+                REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version);
+                this_instr->operand1 = (uintptr_t)method->im_func;
+            }
+            sym_set_type(callable, &PyMethod_Type);
             break;
         }
 
@@ -1845,7 +2139,7 @@
         }
 
         case _CALL_NON_PY_GENERAL: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -1854,8 +2148,8 @@
         }
 
         case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
-            JitOptSymbol *null;
-            JitOptSymbol *callable;
+            JitOptRef null;
+            JitOptRef callable;
             null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
             sym_set_null(null);
@@ -1864,8 +2158,8 @@
         }
 
         case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: {
-            JitOptSymbol *self_or_null;
-            JitOptSymbol *callable;
+            JitOptRef self_or_null;
+            JitOptRef callable;
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
             callable = sym_new_not_null(ctx);
@@ -1883,8 +2177,8 @@
         }
 
         case _CHECK_FUNCTION_EXACT_ARGS: {
-            JitOptSymbol *self_or_null;
-            JitOptSymbol *callable;
+            JitOptRef self_or_null;
+            JitOptRef callable;
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
             assert(sym_matches_type(callable, &PyFunction_Type));
@@ -1911,9 +2205,9 @@
         }
 
         case _INIT_CALL_PY_EXACT_ARGS: {
-            JitOptSymbol **args;
-            JitOptSymbol *self_or_null;
-            JitOptSymbol *new_frame;
+            JitOptRef *args;
+            JitOptRef self_or_null;
+            JitOptRef new_frame;
             args = &stack_pointer[-oparg];
             self_or_null = stack_pointer[-1 - oparg];
             int argcount = oparg;
@@ -1924,16 +2218,16 @@
                 ctx->done = true;
                 break;
             }
-            assert(self_or_null != NULL);
+            assert(!PyJitRef_IsNull(self_or_null));
             assert(args != NULL);
             if (sym_is_not_null(self_or_null)) {
                 args--;
                 argcount++;
             }
             if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
-                new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, args, argcount);
+                new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args, argcount));
             } else {
-                new_frame = (JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0);
+                new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
             }
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
@@ -1942,12 +2236,12 @@
         }
 
         case _PUSH_FRAME: {
-            JitOptSymbol *new_frame;
+            JitOptRef new_frame;
             new_frame = stack_pointer[-1];
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             ctx->frame->stack_pointer = stack_pointer;
-            ctx->frame = (_Py_UOpsAbstractFrame *)new_frame;
+            ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame);
             ctx->curr_frame_depth++;
             stack_pointer = ctx->frame->stack_pointer;
             co = get_code(this_instr);
@@ -1974,7 +2268,7 @@
         }
 
         case _GUARD_NOS_NULL: {
-            JitOptSymbol *null;
+            JitOptRef null;
             null = stack_pointer[-2];
             if (sym_is_null(null)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -1984,7 +2278,7 @@
         }
 
         case _GUARD_NOS_NOT_NULL: {
-            JitOptSymbol *nos;
+            JitOptRef nos;
             nos = stack_pointer[-2];
             if (sym_is_not_null(nos)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -1994,7 +2288,7 @@
         }
 
         case _GUARD_THIRD_NULL: {
-            JitOptSymbol *null;
+            JitOptRef null;
             null = stack_pointer[-3];
             if (sym_is_null(null)) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -2004,7 +2298,7 @@
         }
 
         case _GUARD_CALLABLE_TYPE_1: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-3];
             if (sym_get_const(ctx, callable) == (PyObject *)&PyType_Type) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -2014,8 +2308,8 @@
         }
 
         case _CALL_TYPE_1: {
-            JitOptSymbol *arg;
-            JitOptSymbol *res;
+            JitOptRef arg;
+            JitOptRef res;
             arg = stack_pointer[-1];
             PyObject* type = (PyObject *)sym_get_type(arg);
             if (type) {
@@ -2033,7 +2327,7 @@
         }
 
         case _GUARD_CALLABLE_STR_1: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-3];
             if (sym_get_const(ctx, callable) == (PyObject *)&PyUnicode_Type) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -2043,8 +2337,8 @@
         }
 
         case _CALL_STR_1: {
-            JitOptSymbol *arg;
-            JitOptSymbol *res;
+            JitOptRef arg;
+            JitOptRef res;
             arg = stack_pointer[-1];
             if (sym_matches_type(arg, &PyUnicode_Type)) {
                 res = arg;
@@ -2059,7 +2353,7 @@
         }
 
         case _GUARD_CALLABLE_TUPLE_1: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-3];
             if (sym_get_const(ctx, callable) == (PyObject *)&PyTuple_Type) {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
@@ -2069,8 +2363,8 @@
         }
 
         case _CALL_TUPLE_1: {
-            JitOptSymbol *arg;
-            JitOptSymbol *res;
+            JitOptRef arg;
+            JitOptRef res;
             arg = stack_pointer[-1];
             if (sym_matches_type(arg, &PyTuple_Type)) {
                 res = arg;
@@ -2085,9 +2379,9 @@
         }
 
         case _CHECK_AND_ALLOCATE_OBJECT: {
-            JitOptSymbol **args;
-            JitOptSymbol *self_or_null;
-            JitOptSymbol *callable;
+            JitOptRef *args;
+            JitOptRef self_or_null;
+            JitOptRef callable;
             args = &stack_pointer[-oparg];
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
@@ -2102,8 +2396,8 @@
         }
 
         case _CREATE_INIT_FRAME: {
-            JitOptSymbol *init_frame;
-            init_frame = NULL;
+            JitOptRef init_frame;
+            init_frame = PyJitRef_NULL;
             ctx->done = true;
             stack_pointer[-2 - oparg] = init_frame;
             stack_pointer += -1 - oparg;
@@ -2118,7 +2412,7 @@
         }
 
         case _CALL_BUILTIN_CLASS: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2127,7 +2421,7 @@
         }
 
         case _CALL_BUILTIN_O: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2136,7 +2430,7 @@
         }
 
         case _CALL_BUILTIN_FAST: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2145,7 +2439,7 @@
         }
 
         case _CALL_BUILTIN_FAST_WITH_KEYWORDS: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2154,7 +2448,7 @@
         }
 
         case _GUARD_CALLABLE_LEN: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-3];
             PyObject *len = _PyInterpreterState_GET()->callable_cache.len;
             if (sym_get_const(ctx, callable) == len) {
@@ -2165,8 +2459,27 @@
         }
 
         case _CALL_LEN: {
-            JitOptSymbol *res;
+            JitOptRef arg;
+            JitOptRef res;
+            arg = stack_pointer[-1];
             res = sym_new_type(ctx, &PyLong_Type);
+            int tuple_length = sym_tuple_length(arg);
+            if (tuple_length >= 0) {
+                PyObject *temp = PyLong_FromLong(tuple_length);
+                if (temp == NULL) {
+                    goto error;
+                }
+                if (_Py_IsImmortal(temp)) {
+                    REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW,
+                           0, (uintptr_t)temp);
+                }
+                res = sym_new_const(ctx, temp);
+                stack_pointer[-3] = res;
+                stack_pointer += -2;
+                assert(WITHIN_STACK_BOUNDS());
+                Py_DECREF(temp);
+                stack_pointer += 2;
+            }
             stack_pointer[-3] = res;
             stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
@@ -2174,7 +2487,7 @@
         }
 
         case _GUARD_CALLABLE_ISINSTANCE: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-4];
             PyObject *isinstance = _PyInterpreterState_GET()->callable_cache.isinstance;
             if (sym_get_const(ctx, callable) == isinstance) {
@@ -2185,9 +2498,9 @@
         }
 
         case _CALL_ISINSTANCE: {
-            JitOptSymbol *cls;
-            JitOptSymbol *instance;
-            JitOptSymbol *res;
+            JitOptRef cls;
+            JitOptRef instance;
+            JitOptRef res;
             cls = stack_pointer[-1];
             instance = stack_pointer[-2];
             res = sym_new_type(ctx, &PyBool_Type);
@@ -2208,7 +2521,7 @@
         }
 
         case _GUARD_CALLABLE_LIST_APPEND: {
-            JitOptSymbol *callable;
+            JitOptRef callable;
             callable = stack_pointer[-3];
             PyObject *list_append = _PyInterpreterState_GET()->callable_cache.list_append;
             if (sym_get_const(ctx, callable) == list_append) {
@@ -2225,7 +2538,7 @@
         }
 
         case _CALL_METHOD_DESCRIPTOR_O: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2234,7 +2547,7 @@
         }
 
         case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2243,7 +2556,7 @@
         }
 
         case _CALL_METHOD_DESCRIPTOR_NOARGS: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2252,7 +2565,7 @@
         }
 
         case _CALL_METHOD_DESCRIPTOR_FAST: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
@@ -2269,8 +2582,8 @@
         /* _DO_CALL_KW is not a viable micro-op for tier 2 */
 
         case _PY_FRAME_KW: {
-            JitOptSymbol *new_frame;
-            new_frame = NULL;
+            JitOptRef new_frame;
+            new_frame = PyJitRef_NULL;
             ctx->done = true;
             stack_pointer[-3 - oparg] = new_frame;
             stack_pointer += -2 - oparg;
@@ -2295,7 +2608,7 @@
         }
 
         case _CALL_KW_NON_PY: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-3 - oparg] = res;
             stack_pointer += -2 - oparg;
@@ -2310,14 +2623,14 @@
         /* _DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */
 
         case _MAKE_FUNCTION: {
-            JitOptSymbol *func;
+            JitOptRef func;
             func = sym_new_not_null(ctx);
             stack_pointer[-1] = func;
             break;
         }
 
         case _SET_FUNCTION_ATTRIBUTE: {
-            JitOptSymbol *func_out;
+            JitOptRef func_out;
             func_out = sym_new_not_null(ctx);
             stack_pointer[-2] = func_out;
             stack_pointer += -1;
@@ -2326,7 +2639,7 @@
         }
 
         case _RETURN_GENERATOR: {
-            JitOptSymbol *res;
+            JitOptRef res;
             ctx->frame->stack_pointer = stack_pointer;
             frame_pop(ctx);
             stack_pointer = ctx->frame->stack_pointer;
@@ -2348,7 +2661,7 @@
         }
 
         case _BUILD_SLICE: {
-            JitOptSymbol *slice;
+            JitOptRef slice;
             slice = sym_new_type(ctx, &PySlice_Type);
             stack_pointer[-oparg] = slice;
             stack_pointer += 1 - oparg;
@@ -2357,21 +2670,21 @@
         }
 
         case _CONVERT_VALUE: {
-            JitOptSymbol *result;
+            JitOptRef result;
             result = sym_new_not_null(ctx);
             stack_pointer[-1] = result;
             break;
         }
 
         case _FORMAT_SIMPLE: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-1] = res;
             break;
         }
 
         case _FORMAT_WITH_SPEC: {
-            JitOptSymbol *res;
+            JitOptRef res;
             res = sym_new_not_null(ctx);
             stack_pointer[-2] = res;
             stack_pointer += -1;
@@ -2380,8 +2693,8 @@
         }
 
         case _COPY: {
-            JitOptSymbol *bottom;
-            JitOptSymbol *top;
+            JitOptRef bottom;
+            JitOptRef top;
             bottom = stack_pointer[-1 - (oparg-1)];
             assert(oparg > 0);
             top = bottom;
@@ -2392,11 +2705,36 @@
         }
 
         case _BINARY_OP: {
-            JitOptSymbol *rhs;
-            JitOptSymbol *lhs;
-            JitOptSymbol *res;
+            JitOptRef rhs;
+            JitOptRef lhs;
+            JitOptRef res;
             rhs = stack_pointer[-1];
             lhs = stack_pointer[-2];
+            if (
+                sym_is_safe_const(ctx, lhs) &&
+                sym_is_safe_const(ctx, rhs)
+            ) {
+                JitOptRef lhs_sym = lhs;
+                JitOptRef rhs_sym = rhs;
+                _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym);
+                _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym);
+                _PyStackRef res_stackref;
+                /* Start of uop copied from bytecodes for constant evaluation */
+                PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs);
+                PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs);
+                assert(_PyEval_BinaryOps[oparg]);
+                stack_pointer[-2] = res;
+                stack_pointer += -1;
+                assert(WITHIN_STACK_BOUNDS());
+                PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o);
+                if (res_o == NULL) {
+                    JUMP_TO_LABEL(error);
+                }
+                res_stackref = PyStackRef_FromPyObjectSteal(res_o);
+                /* End of uop copied from bytecodes for constant evaluation */
+                res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
+                break;
+            }
             bool lhs_int = sym_matches_type(lhs, &PyLong_Type);
             bool rhs_int = sym_matches_type(rhs, &PyLong_Type);
             bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);
@@ -2437,11 +2775,11 @@
         }
 
         case _SWAP: {
-            JitOptSymbol *top;
-            JitOptSymbol *bottom;
+            JitOptRef top;
+            JitOptRef bottom;
             top = stack_pointer[-1];
             bottom = stack_pointer[-2 - (oparg-2)];
-            JitOptSymbol *temp = bottom;
+            JitOptRef temp = bottom;
             bottom = top;
             top = temp;
             assert(oparg >= 2);
@@ -2469,7 +2807,7 @@
         /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */
 
         case _GUARD_IS_TRUE_POP: {
-            JitOptSymbol *flag;
+            JitOptRef flag;
             flag = stack_pointer[-1];
             if (sym_is_const(ctx, flag)) {
                 PyObject *value = sym_get_const(ctx, flag);
@@ -2483,7 +2821,7 @@
         }
 
         case _GUARD_IS_FALSE_POP: {
-            JitOptSymbol *flag;
+            JitOptRef flag;
             flag = stack_pointer[-1];
             if (sym_is_const(ctx, flag)) {
                 PyObject *value = sym_get_const(ctx, flag);
@@ -2497,7 +2835,7 @@
         }
 
         case _GUARD_IS_NONE_POP: {
-            JitOptSymbol *val;
+            JitOptRef val;
             val = stack_pointer[-1];
             if (sym_is_const(ctx, val)) {
                 PyObject *value = sym_get_const(ctx, val);
@@ -2515,7 +2853,7 @@
         }
 
         case _GUARD_IS_NOT_NONE_POP: {
-            JitOptSymbol *val;
+            JitOptRef val;
             val = stack_pointer[-1];
             if (sym_is_const(ctx, val)) {
                 PyObject *value = sym_get_const(ctx, val);
@@ -2563,7 +2901,7 @@
         }
 
         case _LOAD_CONST_INLINE: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = sym_new_const(ctx, ptr);
             stack_pointer[0] = value;
@@ -2573,7 +2911,7 @@
         }
 
         case _POP_TOP_LOAD_CONST_INLINE: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = sym_new_const(ctx, ptr);
             stack_pointer[-1] = value;
@@ -2581,9 +2919,9 @@
         }
 
         case _LOAD_CONST_INLINE_BORROW: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
-            value = sym_new_const(ctx, ptr);
+            value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
             stack_pointer[0] = value;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
@@ -2609,15 +2947,15 @@
         }
 
         case _POP_TOP_LOAD_CONST_INLINE_BORROW: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
-            value = sym_new_const(ctx, ptr);
+            value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
             stack_pointer[-1] = value;
             break;
         }
 
         case _POP_TWO_LOAD_CONST_INLINE_BORROW: {
-            JitOptSymbol *value;
+            JitOptRef value;
             value = sym_new_not_null(ctx);
             stack_pointer[-2] = value;
             stack_pointer += -1;
@@ -2626,9 +2964,9 @@
         }
 
         case _POP_CALL_LOAD_CONST_INLINE_BORROW: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
-            value = sym_new_const(ctx, ptr);
+            value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
             stack_pointer[-2] = value;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
@@ -2636,9 +2974,9 @@
         }
 
         case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
-            value = sym_new_const(ctx, ptr);
+            value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
             stack_pointer[-3] = value;
             stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
@@ -2646,9 +2984,9 @@
         }
 
         case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: {
-            JitOptSymbol *value;
+            JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
-            value = sym_new_const(ctx, ptr);
+            value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
             stack_pointer[-4] = value;
             stack_pointer += -3;
             assert(WITHIN_STACK_BOUNDS());
@@ -2656,8 +2994,8 @@
         }
 
         case _LOAD_CONST_UNDER_INLINE: {
-            JitOptSymbol *value;
-            JitOptSymbol *new;
+            JitOptRef value;
+            JitOptRef new;
             value = sym_new_not_null(ctx);
             new = sym_new_not_null(ctx);
             stack_pointer[-1] = value;
@@ -2668,8 +3006,8 @@
         }
 
         case _LOAD_CONST_UNDER_INLINE_BORROW: {
-            JitOptSymbol *value;
-            JitOptSymbol *new;
+            JitOptRef value;
+            JitOptRef new;
             value = sym_new_not_null(ctx);
             new = sym_new_not_null(ctx);
             stack_pointer[-1] = value;
diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c
index 25de5d83166..bd3ec615d08 100644
--- a/Python/optimizer_symbols.c
+++ b/Python/optimizer_symbols.c
@@ -30,17 +30,19 @@ we often skip in-between states for convenience:
    |     |
 NULL     |
 |        |                <- Anything below this level is an object.
-|        NON_NULL
-|        |      |         <- Anything below this level has a known type version.
-| TYPE_VERSION  |
-| |             |         <- Anything below this level has a known type.
-| KNOWN_CLASS   |
-| |         |   |         <- Anything below this level has a known truthiness.
-| |         |  TRUTHINESS
-| |         |  |
-| TUPLE     |  |
-|     |     |  |          <- Anything below this level is a known constant.
-|    KNOWN_VALUE
+|        NON_NULL-+
+|          |      |       <- Anything below this level has a known type version.
+|    TYPE_VERSION |
+|    |            |       <- Anything below this level has a known type.
+|    KNOWN_CLASS  |
+|    |  |  |   |  |
+|    |  | INT* |  |
+|    |  |  |   |  |       <- Anything below this level has a known truthiness.
+|    |  |  |   |  TRUTHINESS
+|    |  |  |   |  |
+| TUPLE |  |   |  |
+|    |  |  |   |  |       <- Anything below this level is a known constant.
+|    KNOWN_VALUE--+
 |    |                    <- Anything below this level is unreachable.
 BOTTOM
 
@@ -52,6 +54,8 @@ result of a truth test, which would allow us to narrow the symbol to KNOWN_VALUE
 the same symbol, that would be a contradiction, and the symbol would be set to
 BOTTOM (indicating that the code is unreachable).
 
+INT* is a limited range int, currently a "compact" int.
+
 */
 
 #ifdef Py_DEBUG
@@ -88,6 +92,12 @@ out_of_space(JitOptContext *ctx)
     return &NO_SPACE_SYMBOL;
 }
 
+JitOptRef
+out_of_space_ref(JitOptContext *ctx)
+{
+    return PyJitRef_Wrap(out_of_space(ctx));
+}
+
 static JitOptSymbol *
 sym_new(JitOptContext *ctx)
 {
@@ -98,7 +108,7 @@ sym_new(JitOptContext *ctx)
         return NULL;
     }
     ctx->t_arena.ty_curr_number++;
-    self->tag = JIT_SYM_UNKNOWN_TAG;
+    self->tag = JIT_SYM_UNKNOWN_TAG;;
     return self;
 }
 
@@ -117,25 +127,28 @@ sym_set_bottom(JitOptContext *ctx, JitOptSymbol *sym)
 }
 
 bool
-_Py_uop_sym_is_bottom(JitOptSymbol *sym)
+_Py_uop_sym_is_bottom(JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     return sym->tag == JIT_SYM_BOTTOM_TAG;
 }
 
 bool
-_Py_uop_sym_is_not_null(JitOptSymbol *sym) {
+_Py_uop_sym_is_not_null(JitOptRef ref) {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     return sym->tag == JIT_SYM_NON_NULL_TAG || sym->tag > JIT_SYM_BOTTOM_TAG;
 }
 
 bool
-_Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym)
+_Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) {
         return true;
     }
     if (sym->tag == JIT_SYM_TRUTHINESS_TAG) {
         JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value;
-        int truthiness = _Py_uop_sym_truthiness(ctx, value);
+        int truthiness = _Py_uop_sym_truthiness(ctx, PyJitRef_Wrap(value));
         if (truthiness < 0) {
             return false;
         }
@@ -146,21 +159,22 @@ _Py_uop_sym_is_const(JitOptContext *ctx, JitOptSymbol *sym)
 }
 
 bool
-_Py_uop_sym_is_null(JitOptSymbol *sym)
+_Py_uop_sym_is_null(JitOptRef ref)
 {
-    return sym->tag == JIT_SYM_NULL_TAG;
+    return PyJitRef_Unwrap(ref)->tag == JIT_SYM_NULL_TAG;
 }
 
 
 PyObject *
-_Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym)
+_Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) {
         return sym->value.value;
     }
     if (sym->tag == JIT_SYM_TRUTHINESS_TAG) {
         JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value;
-        int truthiness = _Py_uop_sym_truthiness(ctx, value);
+        int truthiness = _Py_uop_sym_truthiness(ctx, PyJitRef_Wrap(value));
         if (truthiness < 0) {
             return NULL;
         }
@@ -171,9 +185,41 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptSymbol *sym)
     return NULL;
 }
 
+_PyStackRef
+_Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym)
+{
+    PyObject *const_val = _Py_uop_sym_get_const(ctx, sym);
+    if (const_val == NULL) {
+        return PyStackRef_NULL;
+    }
+    return PyStackRef_FromPyObjectBorrow(const_val);
+}
+
+/*
+ Indicates whether the constant is safe to constant evaluate
+ (without side effects).
+ */
+bool
+_Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym)
+{
+    PyObject *const_val = _Py_uop_sym_get_const(ctx, sym);
+    if (const_val == NULL) {
+        return false;
+    }
+    if (_PyLong_CheckExactAndCompact(const_val)) {
+        return true;
+    }
+    PyTypeObject *typ = Py_TYPE(const_val);
+    return (typ == &PyUnicode_Type) ||
+           (typ == &PyFloat_Type) ||
+           (typ == &PyTuple_Type) ||
+           (typ == &PyBool_Type);
+}
+
 void
-_Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ)
+_Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     JitSymType tag = sym->tag;
     switch(tag) {
         case JIT_SYM_NULL_TAG:
@@ -218,15 +264,21 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ)
                 sym_set_bottom(ctx, sym);
             }
             return;
+        case JIT_SYM_COMPACT_INT:
+            if (typ != &PyLong_Type) {
+                sym_set_bottom(ctx, sym);
+            }
+            return;
     }
 }
 
 bool
-_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version)
+_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int version)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     PyTypeObject *type = _PyType_LookupByVersion(version);
     if (type) {
-        _Py_uop_sym_set_type(ctx, sym, type);
+        _Py_uop_sym_set_type(ctx, ref, type);
     }
     JitSymType tag = sym->tag;
     switch(tag) {
@@ -274,13 +326,20 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int
                 return false;
             }
             return true;
+        case JIT_SYM_COMPACT_INT:
+            if (version != PyLong_Type.tp_version_tag) {
+                sym_set_bottom(ctx, sym);
+                return false;
+            }
+            return true;
     }
     Py_UNREACHABLE();
 }
 
 void
-_Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val)
+_Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     JitSymType tag = sym->tag;
     switch(tag) {
         case JIT_SYM_NULL_TAG:
@@ -301,10 +360,10 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val
             return;
         case JIT_SYM_TUPLE_TAG:
             if (PyTuple_CheckExact(const_val)) {
-                Py_ssize_t len = _Py_uop_sym_tuple_length(sym);
+                Py_ssize_t len = _Py_uop_sym_tuple_length(ref);
                 if (len == PyTuple_GET_SIZE(const_val)) {
                     for (Py_ssize_t i = 0; i < len; i++) {
-                        JitOptSymbol *sym_item = _Py_uop_sym_tuple_getitem(ctx, sym, i);
+                        JitOptRef sym_item = _Py_uop_sym_tuple_getitem(ctx, ref, i);
                         PyObject *item = PyTuple_GET_ITEM(const_val, i);
                         _Py_uop_sym_set_const(ctx, sym_item, item);
                     }
@@ -329,13 +388,14 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val
             return;
         case JIT_SYM_TRUTHINESS_TAG:
             if (!PyBool_Check(const_val) ||
-                (_Py_uop_sym_is_const(ctx, sym) &&
-                 _Py_uop_sym_get_const(ctx, sym) != const_val))
+                (_Py_uop_sym_is_const(ctx, ref) &&
+                 _Py_uop_sym_get_const(ctx, ref) != const_val))
             {
                 sym_set_bottom(ctx, sym);
                 return;
             }
-            JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value;
+            JitOptRef value = PyJitRef_Wrap(
+                allocation_base(ctx) + sym->truthiness.value);
             PyTypeObject *type = _Py_uop_sym_get_type(value);
             if (const_val == (sym->truthiness.invert ? Py_False : Py_True)) {
                 // value is truthy. This is only useful for bool:
@@ -356,12 +416,21 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val
             // TODO: More types (GH-130415)!
             make_const(sym, const_val);
             return;
+        case JIT_SYM_COMPACT_INT:
+            if (_PyLong_CheckExactAndCompact(const_val)) {
+                make_const(sym, const_val);
+            }
+            else {
+                sym_set_bottom(ctx, sym);
+            }
+            return;
     }
 }
 
 void
-_Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym)
+_Py_uop_sym_set_null(JitOptContext *ctx, JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     if (sym->tag == JIT_SYM_UNKNOWN_TAG) {
         sym->tag = JIT_SYM_NULL_TAG;
     }
@@ -371,8 +440,9 @@ _Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym)
 }
 
 void
-_Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym)
+_Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     if (sym->tag == JIT_SYM_UNKNOWN_TAG) {
         sym->tag = JIT_SYM_NON_NULL_TAG;
     }
@@ -381,66 +451,79 @@ _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym)
     }
 }
 
-
-JitOptSymbol *
+JitOptRef
 _Py_uop_sym_new_unknown(JitOptContext *ctx)
 {
     JitOptSymbol *res = sym_new(ctx);
     if (res == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
-    return res;
+    return PyJitRef_Wrap(res);
 }
 
-JitOptSymbol *
+JitOptRef
 _Py_uop_sym_new_not_null(JitOptContext *ctx)
 {
     JitOptSymbol *res = sym_new(ctx);
     if (res == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
     res->tag = JIT_SYM_NON_NULL_TAG;
-    return res;
+    return PyJitRef_Wrap(res);
 }
 
-JitOptSymbol *
+JitOptRef
 _Py_uop_sym_new_type(JitOptContext *ctx, PyTypeObject *typ)
 {
     JitOptSymbol *res = sym_new(ctx);
     if (res == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
-    _Py_uop_sym_set_type(ctx, res, typ);
-    return res;
+    JitOptRef ref = PyJitRef_Wrap(res);
+    _Py_uop_sym_set_type(ctx, ref, typ);
+    return ref;
 }
 
 // Adds a new reference to const_val, owned by the symbol.
-JitOptSymbol *
+JitOptRef
 _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val)
 {
     assert(const_val != NULL);
     JitOptSymbol *res = sym_new(ctx);
     if (res == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
-    _Py_uop_sym_set_const(ctx, res, const_val);
+    JitOptRef ref = PyJitRef_Wrap(res);
+    _Py_uop_sym_set_const(ctx, ref, const_val);
+    return ref;
+}
+
+JitOptRef
+_Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val)
+{
+    assert(const_val != NULL);
+    JitOptRef res = _Py_uop_sym_new_const(ctx, const_val);
+    // Decref once because sym_new_const increfs it.
+    Py_DECREF(const_val);
     return res;
 }
 
-JitOptSymbol *
+JitOptRef
 _Py_uop_sym_new_null(JitOptContext *ctx)
 {
     JitOptSymbol *null_sym = sym_new(ctx);
     if (null_sym == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
-    _Py_uop_sym_set_null(ctx, null_sym);
-    return null_sym;
+    JitOptRef ref = PyJitRef_Wrap(null_sym);
+    _Py_uop_sym_set_null(ctx, ref);
+    return ref;
 }
 
 PyTypeObject *
-_Py_uop_sym_get_type(JitOptSymbol *sym)
+_Py_uop_sym_get_type(JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     JitSymType tag = sym->tag;
     switch(tag) {
         case JIT_SYM_NULL_TAG:
@@ -458,13 +541,17 @@ _Py_uop_sym_get_type(JitOptSymbol *sym)
             return &PyTuple_Type;
         case JIT_SYM_TRUTHINESS_TAG:
             return &PyBool_Type;
+        case JIT_SYM_COMPACT_INT:
+            return &PyLong_Type;
+
     }
     Py_UNREACHABLE();
 }
 
 unsigned int
-_Py_uop_sym_get_type_version(JitOptSymbol *sym)
+_Py_uop_sym_get_type_version(JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     JitSymType tag = sym->tag;
     switch(tag) {
         case JIT_SYM_NULL_TAG:
@@ -482,38 +569,42 @@ _Py_uop_sym_get_type_version(JitOptSymbol *sym)
             return PyTuple_Type.tp_version_tag;
         case JIT_SYM_TRUTHINESS_TAG:
             return PyBool_Type.tp_version_tag;
+        case JIT_SYM_COMPACT_INT:
+            return PyLong_Type.tp_version_tag;
     }
     Py_UNREACHABLE();
 }
 
 bool
-_Py_uop_sym_has_type(JitOptSymbol *sym)
+_Py_uop_sym_has_type(JitOptRef sym)
 {
     return _Py_uop_sym_get_type(sym) != NULL;
 }
 
 bool
-_Py_uop_sym_matches_type(JitOptSymbol *sym, PyTypeObject *typ)
+_Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ)
 {
     assert(typ != NULL && PyType_Check(typ));
     return _Py_uop_sym_get_type(sym) == typ;
 }
 
 bool
-_Py_uop_sym_matches_type_version(JitOptSymbol *sym, unsigned int version)
+_Py_uop_sym_matches_type_version(JitOptRef sym, unsigned int version)
 {
     return _Py_uop_sym_get_type_version(sym) == version;
 }
 
 int
-_Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym)
+_Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     switch(sym->tag) {
         case JIT_SYM_NULL_TAG:
         case JIT_SYM_TYPE_VERSION_TAG:
         case JIT_SYM_BOTTOM_TAG:
         case JIT_SYM_NON_NULL_TAG:
         case JIT_SYM_UNKNOWN_TAG:
+        case JIT_SYM_COMPACT_INT:
             return -1;
         case JIT_SYM_KNOWN_CLASS_TAG:
             /* TODO :
@@ -527,7 +618,8 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym)
         case JIT_SYM_TRUTHINESS_TAG:
             ;
             JitOptSymbol *value = allocation_base(ctx) + sym->truthiness.value;
-            int truthiness = _Py_uop_sym_truthiness(ctx, value);
+            int truthiness = _Py_uop_sym_truthiness(ctx,
+                                                    PyJitRef_Wrap(value));
             if (truthiness < 0) {
                 return truthiness;
             }
@@ -553,12 +645,12 @@ _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptSymbol *sym)
     return -1;
 }
 
-JitOptSymbol *
-_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args)
+JitOptRef
+_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef *args)
 {
     JitOptSymbol *res = sym_new(ctx);
     if (res == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
     if (size > MAX_SYMBOLIC_TUPLE_SIZE) {
         res->tag = JIT_SYM_KNOWN_CLASS_TAG;
@@ -568,15 +660,16 @@ _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args)
         res->tag = JIT_SYM_TUPLE_TAG;
         res->tuple.length = size;
         for (int i = 0; i < size; i++) {
-            res->tuple.items[i] = (uint16_t)(args[i] - allocation_base(ctx));
+            res->tuple.items[i] = (uint16_t)(PyJitRef_Unwrap(args[i]) - allocation_base(ctx));
         }
     }
-    return res;
+    return PyJitRef_Wrap(res);
 }
 
-JitOptSymbol *
-_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item)
+JitOptRef
+_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef ref, int item)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     assert(item >= 0);
     if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) {
         PyObject *tuple = sym->value.value;
@@ -585,14 +678,15 @@ _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item)
         }
     }
     else if (sym->tag == JIT_SYM_TUPLE_TAG && item < sym->tuple.length) {
-        return allocation_base(ctx) + sym->tuple.items[item];
+        return PyJitRef_Wrap(allocation_base(ctx) + sym->tuple.items[item]);
     }
     return _Py_uop_sym_new_not_null(ctx);
 }
 
 int
-_Py_uop_sym_tuple_length(JitOptSymbol *sym)
+_Py_uop_sym_tuple_length(JitOptRef ref)
 {
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
     if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) {
         PyObject *tuple = sym->value.value;
         if (PyTuple_CheckExact(tuple)) {
@@ -607,7 +701,7 @@ _Py_uop_sym_tuple_length(JitOptSymbol *sym)
 
 // Return true if known to be immortal.
 bool
-_Py_uop_sym_is_immortal(JitOptSymbol *sym)
+_Py_uop_symbol_is_immortal(JitOptSymbol *sym)
 {
     if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) {
         return _Py_IsImmortal(sym->value.value);
@@ -615,25 +709,84 @@ _Py_uop_sym_is_immortal(JitOptSymbol *sym)
     if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) {
         return sym->cls.type == &PyBool_Type;
     }
-    if (sym->tag == JIT_SYM_TRUTHINESS_TAG) {
-        return true;
-    }
     return false;
 }
 
-JitOptSymbol *
-_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy)
+bool
+_Py_uop_sym_is_compact_int(JitOptRef ref)
+{
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
+    if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) {
+        return (bool)_PyLong_CheckExactAndCompact(sym->value.value);
+    }
+    return sym->tag == JIT_SYM_COMPACT_INT;
+}
+
+bool
+_Py_uop_sym_is_immortal(JitOptRef ref)
+{
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
+    return _Py_uop_symbol_is_immortal(sym);
+}
+
+void
+_Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref)
+{
+    JitOptSymbol *sym = PyJitRef_Unwrap(ref);
+    JitSymType tag = sym->tag;
+    switch(tag) {
+        case JIT_SYM_NULL_TAG:
+            sym_set_bottom(ctx, sym);
+            return;
+        case JIT_SYM_KNOWN_CLASS_TAG:
+            if (sym->cls.type == &PyLong_Type) {
+                sym->tag = JIT_SYM_COMPACT_INT;
+            } else {
+                sym_set_bottom(ctx, sym);
+            }
+            return;
+        case JIT_SYM_TYPE_VERSION_TAG:
+            if (sym->version.version == PyLong_Type.tp_version_tag) {
+                sym->tag = JIT_SYM_COMPACT_INT;
+            }
+            else {
+                sym_set_bottom(ctx, sym);
+            }
+            return;
+        case JIT_SYM_KNOWN_VALUE_TAG:
+            if (!_PyLong_CheckExactAndCompact(sym->value.value)) {
+                Py_CLEAR(sym->value.value);
+                sym_set_bottom(ctx, sym);
+            }
+            return;
+        case JIT_SYM_TUPLE_TAG:
+        case JIT_SYM_TRUTHINESS_TAG:
+            sym_set_bottom(ctx, sym);
+            return;
+        case JIT_SYM_BOTTOM_TAG:
+        case JIT_SYM_COMPACT_INT:
+            return;
+        case JIT_SYM_NON_NULL_TAG:
+        case JIT_SYM_UNKNOWN_TAG:
+            sym->tag = JIT_SYM_COMPACT_INT;
+            return;
+    }
+}
+
+JitOptRef
+_Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef ref, bool truthy)
 {
+    JitOptSymbol *value = PyJitRef_Unwrap(ref);
     // It's clearer to invert this in the signature:
     bool invert = !truthy;
     if (value->tag == JIT_SYM_TRUTHINESS_TAG && value->truthiness.invert == invert) {
-        return value;
+        return ref;
     }
     JitOptSymbol *res = sym_new(ctx);
     if (res == NULL) {
-        return out_of_space(ctx);
+        return out_of_space_ref(ctx);
     }
-    int truthiness = _Py_uop_sym_truthiness(ctx, value);
+    int truthiness = _Py_uop_sym_truthiness(ctx, ref);
     if (truthiness < 0) {
         res->tag = JIT_SYM_TRUTHINESS_TAG;
         res->truthiness.invert = invert;
@@ -642,7 +795,18 @@ _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptSymbol *value, bool truthy)
     else {
         make_const(res, (truthiness ^ invert) ? Py_True : Py_False);
     }
-    return res;
+    return PyJitRef_Wrap(res);
+}
+
+JitOptRef
+_Py_uop_sym_new_compact_int(JitOptContext *ctx)
+{
+    JitOptSymbol *sym = sym_new(ctx);
+    if (sym == NULL) {
+        return out_of_space_ref(ctx);
+    }
+    sym->tag = JIT_SYM_COMPACT_INT;
+    return PyJitRef_Wrap(sym);
 }
 
 // 0 on success, -1 on error.
@@ -651,7 +815,7 @@ _Py_uop_frame_new(
     JitOptContext *ctx,
     PyCodeObject *co,
     int curr_stackentries,
-    JitOptSymbol **args,
+    JitOptRef *args,
     int arg_len)
 {
     assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
@@ -676,14 +840,14 @@ _Py_uop_frame_new(
     }
 
     for (int i = arg_len; i < co->co_nlocalsplus; i++) {
-        JitOptSymbol *local = _Py_uop_sym_new_unknown(ctx);
+        JitOptRef local = _Py_uop_sym_new_unknown(ctx);
         frame->locals[i] = local;
     }
 
 
     // Initialize the stack as well
     for (int i = 0; i < curr_stackentries; i++) {
-        JitOptSymbol *stackvar = _Py_uop_sym_new_unknown(ctx);
+        JitOptRef stackvar = _Py_uop_sym_new_unknown(ctx);
         frame->stack[i] = stackvar;
     }
 
@@ -709,12 +873,12 @@ _Py_uop_abstractcontext_fini(JitOptContext *ctx)
 void
 _Py_uop_abstractcontext_init(JitOptContext *ctx)
 {
-    static_assert(sizeof(JitOptSymbol) <= 2 * sizeof(uint64_t), "JitOptSymbol has grown");
+    static_assert(sizeof(JitOptSymbol) <= 3 * sizeof(uint64_t), "JitOptSymbol has grown");
     ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE;
     ctx->n_consumed = ctx->locals_and_stack;
 #ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter.
     for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) {
-        ctx->locals_and_stack[i] = NULL;
+        ctx->locals_and_stack[i] = PyJitRef_NULL;
     }
 #endif
 
@@ -764,47 +928,48 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
     _Py_uop_abstractcontext_init(ctx);
     PyObject *val_42 = NULL;
     PyObject *val_43 = NULL;
+    PyObject *val_big = NULL;
     PyObject *tuple = NULL;
 
     // Use a single 'sym' variable so copy-pasting tests is easier.
-    JitOptSymbol *sym = _Py_uop_sym_new_unknown(ctx);
-    if (sym == NULL) {
+    JitOptRef ref = _Py_uop_sym_new_unknown(ctx);
+    if (PyJitRef_IsNull(ref)) {
         goto fail;
     }
-    TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "top is NULL");
-    TEST_PREDICATE(!_Py_uop_sym_is_not_null(sym), "top is not NULL");
-    TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyLong_Type), "top matches a type");
-    TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "top is a constant");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "top as constant is not NULL");
-    TEST_PREDICATE(!_Py_uop_sym_is_bottom(sym), "top is bottom");
-
-    sym = make_bottom(ctx);
-    if (sym == NULL) {
+    TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "top is NULL");
+    TEST_PREDICATE(!_Py_uop_sym_is_not_null(ref), "top is not NULL");
+    TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyLong_Type), "top matches a type");
+    TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "top is a constant");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "top as constant is not NULL");
+    TEST_PREDICATE(!_Py_uop_sym_is_bottom(ref), "top is bottom");
+
+    ref = PyJitRef_Wrap(make_bottom(ctx));
+    if (PyJitRef_IsNull(ref)) {
         goto fail;
     }
-    TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "bottom is NULL is not false");
-    TEST_PREDICATE(!_Py_uop_sym_is_not_null(sym), "bottom is not NULL is not false");
-    TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyLong_Type), "bottom matches a type");
-    TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "bottom is a constant is not false");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "bottom as constant is not NULL");
-    TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "bottom isn't bottom");
-
-    sym = _Py_uop_sym_new_type(ctx, &PyLong_Type);
-    if (sym == NULL) {
+    TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "bottom is NULL is not false");
+    TEST_PREDICATE(!_Py_uop_sym_is_not_null(ref), "bottom is not NULL is not false");
+    TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyLong_Type), "bottom matches a type");
+    TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "bottom is a constant is not false");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "bottom as constant is not NULL");
+    TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "bottom isn't bottom");
+
+    ref = _Py_uop_sym_new_type(ctx, &PyLong_Type);
+    if (PyJitRef_IsNull(ref)) {
         goto fail;
     }
-    TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "int is NULL");
-    TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "int isn't not NULL");
-    TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "int isn't int");
-    TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyFloat_Type), "int matches float");
-    TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, sym), "int is a constant");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "int as constant is not NULL");
+    TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "int is NULL");
+    TEST_PREDICATE(_Py_uop_sym_is_not_null(ref), "int isn't not NULL");
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "int isn't int");
+    TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyFloat_Type), "int matches float");
+    TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, ref), "int is a constant");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "int as constant is not NULL");
 
-    _Py_uop_sym_set_type(ctx, sym, &PyLong_Type);  // Should be a no-op
-    TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "(int and int) isn't int");
+    _Py_uop_sym_set_type(ctx, ref, &PyLong_Type);  // Should be a no-op
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "(int and int) isn't int");
 
-    _Py_uop_sym_set_type(ctx, sym, &PyFloat_Type);  // Should make it bottom
-    TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(int and float) isn't bottom");
+    _Py_uop_sym_set_type(ctx, ref, &PyFloat_Type);  // Should make it bottom
+    TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(int and float) isn't bottom");
 
     val_42 = PyLong_FromLong(42);
     assert(val_42 != NULL);
@@ -814,89 +979,118 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
     assert(val_43 != NULL);
     assert(_Py_IsImmortal(val_43));
 
-    sym = _Py_uop_sym_new_type(ctx, &PyLong_Type);
-    if (sym == NULL) {
+    ref = _Py_uop_sym_new_type(ctx, &PyLong_Type);
+    if (PyJitRef_IsNull(ref)) {
         goto fail;
     }
-    _Py_uop_sym_set_const(ctx, sym, val_42);
-    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 1, "bool(42) is not True");
-    TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "42 is NULL");
-    TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "42 isn't not NULL");
-    TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "42 isn't an int");
-    TEST_PREDICATE(!_Py_uop_sym_matches_type(sym, &PyFloat_Type), "42 matches float");
-    TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym), "42 is not a constant");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) != NULL, "42 as constant is NULL");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == val_42, "42 as constant isn't 42");
-    TEST_PREDICATE(_Py_uop_sym_is_immortal(sym), "42 is not immortal");
-
-    _Py_uop_sym_set_type(ctx, sym, &PyLong_Type);  // Should be a no-op
-    TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "(42 and 42) isn't an int");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == val_42, "(42 and 42) as constant isn't 42");
-
-    _Py_uop_sym_set_type(ctx, sym, &PyFloat_Type);  // Should make it bottom
-    TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and float) isn't bottom");
-
-    sym = _Py_uop_sym_new_type(ctx, &PyBool_Type);
-    TEST_PREDICATE(_Py_uop_sym_is_immortal(sym), "a bool is not immortal");
-
-    sym = _Py_uop_sym_new_type(ctx, &PyLong_Type);
-    if (sym == NULL) {
+    _Py_uop_sym_set_const(ctx, ref, val_42);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 1, "bool(42) is not True");
+    TEST_PREDICATE(!_Py_uop_sym_is_null(ref), "42 is NULL");
+    TEST_PREDICATE(_Py_uop_sym_is_not_null(ref), "42 isn't not NULL");
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "42 isn't an int");
+    TEST_PREDICATE(!_Py_uop_sym_matches_type(ref, &PyFloat_Type), "42 matches float");
+    TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref), "42 is not a constant");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) != NULL, "42 as constant is NULL");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == val_42, "42 as constant isn't 42");
+    TEST_PREDICATE(_Py_uop_sym_is_immortal(ref), "42 is not immortal");
+
+    _Py_uop_sym_set_type(ctx, ref, &PyLong_Type);  // Should be a no-op
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyLong_Type), "(42 and 42) isn't an int");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == val_42, "(42 and 42) as constant isn't 42");
+
+    _Py_uop_sym_set_type(ctx, ref, &PyFloat_Type);  // Should make it bottom
+    TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(42 and float) isn't bottom");
+
+    ref = _Py_uop_sym_new_type(ctx, &PyBool_Type);
+    TEST_PREDICATE(_Py_uop_sym_is_immortal(ref), "a bool is not immortal");
+
+    ref = _Py_uop_sym_new_type(ctx, &PyLong_Type);
+    if (PyJitRef_IsNull(ref)) {
         goto fail;
     }
-    _Py_uop_sym_set_const(ctx, sym, val_42);
-    _Py_uop_sym_set_const(ctx, sym, val_43);  // Should make it bottom
-    TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and 43) isn't bottom");
-
-
-    sym = _Py_uop_sym_new_const(ctx, Py_None);
-    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(None) is not False");
-    sym = _Py_uop_sym_new_const(ctx, Py_False);
-    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(False) is not False");
-    sym = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0));
-    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "bool(0) is not False");
-
-    JitOptSymbol *i1 = _Py_uop_sym_new_type(ctx, &PyFloat_Type);
-    JitOptSymbol *i2 = _Py_uop_sym_new_const(ctx, val_43);
-    JitOptSymbol *array[2] = { i1, i2 };
-    sym = _Py_uop_sym_new_tuple(ctx, 2, array);
+    _Py_uop_sym_set_const(ctx, ref, val_42);
+    _Py_uop_sym_set_const(ctx, ref, val_43);  // Should make it bottom
+    TEST_PREDICATE(_Py_uop_sym_is_bottom(ref), "(42 and 43) isn't bottom");
+
+
+    ref = _Py_uop_sym_new_const(ctx, Py_None);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(None) is not False");
+    ref = _Py_uop_sym_new_const(ctx, Py_False);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(False) is not False");
+    ref = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0));
+    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "bool(0) is not False");
+
+    JitOptRef i1 = _Py_uop_sym_new_type(ctx, &PyFloat_Type);
+    JitOptRef i2 = _Py_uop_sym_new_const(ctx, val_43);
+    JitOptRef array[2] = { i1, i2 };
+    ref = _Py_uop_sym_new_tuple(ctx, 2, array);
     TEST_PREDICATE(
-        _Py_uop_sym_matches_type(_Py_uop_sym_tuple_getitem(ctx, sym, 0), &PyFloat_Type),
+        _Py_uop_sym_matches_type(_Py_uop_sym_tuple_getitem(ctx, ref, 0), &PyFloat_Type),
         "tuple item does not match value used to create tuple"
     );
     TEST_PREDICATE(
-        _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43,
+        _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, ref, 1)) == val_43,
         "tuple item does not match value used to create tuple"
     );
     PyObject *pair[2] = { val_42, val_43 };
     tuple = _PyTuple_FromArray(pair, 2);
-    sym = _Py_uop_sym_new_const(ctx, tuple);
+    ref = _Py_uop_sym_new_const(ctx, tuple);
     TEST_PREDICATE(
-        _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, sym, 1)) == val_43,
+        _Py_uop_sym_get_const(ctx, _Py_uop_sym_tuple_getitem(ctx, ref, 1)) == val_43,
         "tuple item does not match value used to create tuple"
     );
-    sym = _Py_uop_sym_new_type(ctx, &PyTuple_Type);
+    ref = _Py_uop_sym_new_type(ctx, &PyTuple_Type);
     TEST_PREDICATE(
-        _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, sym, 42)),
+        _Py_uop_sym_is_not_null(_Py_uop_sym_tuple_getitem(ctx, ref, 42)),
         "Unknown tuple item is not narrowed to non-NULL"
     );
-    JitOptSymbol *value = _Py_uop_sym_new_type(ctx, &PyBool_Type);
-    sym = _Py_uop_sym_new_truthiness(ctx, value, false);
-    TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean");
-    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == -1, "truthiness is not unknown");
-    TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym) == false, "truthiness is constant");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == NULL, "truthiness is not NULL");
+    JitOptRef value = _Py_uop_sym_new_type(ctx, &PyBool_Type);
+    ref = _Py_uop_sym_new_truthiness(ctx, value, false);
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyBool_Type), "truthiness is not boolean");
+    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == -1, "truthiness is not unknown");
+    TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref) == false, "truthiness is constant");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == NULL, "truthiness is not NULL");
     TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == false, "value is constant");
     TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == NULL, "value is not NULL");
-    _Py_uop_sym_set_const(ctx, sym, Py_False);
-    TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyBool_Type), "truthiness is not boolean");
-    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, sym) == 0, "truthiness is not True");
-    TEST_PREDICATE(_Py_uop_sym_is_const(ctx, sym) == true, "truthiness is not constant");
-    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, sym) == Py_False, "truthiness is not False");
+    _Py_uop_sym_set_const(ctx, ref, Py_False);
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref, &PyBool_Type), "truthiness is not boolean");
+    TEST_PREDICATE(_Py_uop_sym_truthiness(ctx, ref) == 0, "truthiness is not True");
+    TEST_PREDICATE(_Py_uop_sym_is_const(ctx, ref) == true, "truthiness is not constant");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref) == Py_False, "truthiness is not False");
     TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == true, "value is not constant");
     TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == Py_True, "value is not True");
+
+
+    val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(66));
+    if (val_big == NULL) {
+        goto fail;
+    }
+
+    JitOptRef ref_42 = _Py_uop_sym_new_const(ctx, val_42);
+    JitOptRef ref_big = _Py_uop_sym_new_const(ctx, val_big);
+    JitOptRef ref_int = _Py_uop_sym_new_compact_int(ctx);
+    TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_42), "42 is not a compact int");
+    TEST_PREDICATE(!_Py_uop_sym_is_compact_int(ref_big), "(1 << 66) is a compact int");
+    TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int");
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int");
+
+    _Py_uop_sym_set_type(ctx, ref_int, &PyLong_Type);  // Should have no effect
+    TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int after cast");
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int after cast");
+
+    _Py_uop_sym_set_type(ctx, ref_int, &PyFloat_Type);  // Should make it bottom
+    TEST_PREDICATE(_Py_uop_sym_is_bottom(ref_int), "compact int cast to float isn't bottom");
+
+    ref_int = _Py_uop_sym_new_compact_int(ctx);
+    _Py_uop_sym_set_const(ctx, ref_int, val_43);
+    TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "43 is not a compact int");
+    TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "43 is not an int");
+    TEST_PREDICATE(_Py_uop_sym_get_const(ctx, ref_int) == val_43, "43 isn't 43");
+
     _Py_uop_abstractcontext_fini(ctx);
     Py_DECREF(val_42);
     Py_DECREF(val_43);
+    Py_DECREF(val_big);
     Py_DECREF(tuple);
     Py_RETURN_NONE;
 
@@ -904,6 +1098,7 @@ fail:
     _Py_uop_abstractcontext_fini(ctx);
     Py_XDECREF(val_42);
     Py_XDECREF(val_43);
+    Py_XDECREF(val_big);
     Py_DECREF(tuple);
     return NULL;
 }
diff --git a/Python/pystate.c b/Python/pystate.c
index 0544b15aad1..0d4c26f92ce 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -1142,6 +1142,7 @@ _Py_CheckMainModule(PyObject *module)
         PyObject *msg = PyUnicode_FromString("invalid __main__ module");
         if (msg != NULL) {
             (void)PyErr_SetImportError(msg, &_Py_ID(__main__), NULL);
+            Py_DECREF(msg);
         }
         return -1;
     }
diff --git a/Python/qsbr.c b/Python/qsbr.c
index bf34fb2523d..c992c285cb1 100644
--- a/Python/qsbr.c
+++ b/Python/qsbr.c
@@ -1,6 +1,6 @@
 /*
  * Implementation of safe memory reclamation scheme using
- * quiescent states.
+ * quiescent states.  See InternalDocs/qsbr.md.
  *
  * This is derived from the "GUS" safe memory reclamation technique
  * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs
@@ -41,10 +41,6 @@
 // Starting size of the array of qsbr thread states
 #define MIN_ARRAY_SIZE 8
 
-// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing
-// the write sequence.
-#define QSBR_DEFERRED_LIMIT 10
-
 // Allocate a QSBR thread state from the freelist
 static struct _qsbr_thread_state *
 qsbr_allocate(struct _qsbr_shared *shared)
@@ -117,13 +113,9 @@ _Py_qsbr_advance(struct _qsbr_shared *shared)
 }
 
 uint64_t
-_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr)
+_Py_qsbr_shared_next(struct _qsbr_shared *shared)
 {
-    if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) {
-        return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
-    }
-    qsbr->deferrals = 0;
-    return _Py_qsbr_advance(qsbr->shared);
+    return _Py_qsbr_shared_current(shared) + QSBR_INCR;
 }
 
 static uint64_t
diff --git a/Python/specialize.c b/Python/specialize.c
index 92f79d39d55..fe8d04cf344 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -118,6 +118,7 @@ _Py_GetSpecializationStats(void) {
     err += add_stat_dict(stats, LOAD_GLOBAL, "load_global");
     err += add_stat_dict(stats, STORE_SUBSCR, "store_subscr");
     err += add_stat_dict(stats, STORE_ATTR, "store_attr");
+    err += add_stat_dict(stats, JUMP_BACKWARD, "jump_backward");
     err += add_stat_dict(stats, CALL, "call");
     err += add_stat_dict(stats, CALL_KW, "call_kw");
     err += add_stat_dict(stats, BINARY_OP, "binary_op");
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index e5ae841d195..ae6cf306735 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2488,6 +2488,11 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script)
     if (PyUnicode_FSConverter(script, &path) == 0) {
         return NULL;
     }
+
+    if (PySys_Audit("sys.remote_exec", "iO", pid, script) < 0) {
+        return NULL;
+    }
+
     debugger_script_path = PyBytes_AS_STRING(path);
 #ifdef MS_WINDOWS
     PyObject *unicode_path;
@@ -3602,6 +3607,18 @@ make_impl_info(PyObject *version_info)
         goto error;
 #endif
 
+    // PEP-734
+#if defined(__wasi__) || defined(__EMSCRIPTEN__)
+    // It is not enabled on WASM builds just yet
+    value = Py_False;
+#else
+    value = Py_True;
+#endif
+    res = PyDict_SetItemString(impl_info, "supports_isolated_interpreters", value);
+    if (res < 0) {
+        goto error;
+    }
+
     /* dict ready */
 
     ns = _PyNamespace_New(impl_info);
diff --git a/Tools/build/generate-build-details.py b/Tools/build/generate-build-details.py
index 87e262065ec..8cd23e2f54f 100644
--- a/Tools/build/generate-build-details.py
+++ b/Tools/build/generate-build-details.py
@@ -75,7 +75,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]:
     PY3LIBRARY = sysconfig.get_config_var('PY3LIBRARY')
     LIBPYTHON = sysconfig.get_config_var('LIBPYTHON')
     LIBPC = sysconfig.get_config_var('LIBPC')
-    INCLUDEDIR = sysconfig.get_config_var('INCLUDEDIR')
+    INCLUDEPY = sysconfig.get_config_var('INCLUDEPY')
 
     if os.name == 'posix':
         # On POSIX, LIBRARY is always the static library, while LDLIBRARY is the
@@ -123,7 +123,7 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]:
     if has_static_library:
         data['libpython']['static'] = os.path.join(LIBDIR, LIBRARY)
 
-    data['c_api']['headers'] = INCLUDEDIR
+    data['c_api']['headers'] = INCLUDEPY
     if LIBPC:
         data['c_api']['pkgconfig_path'] = LIBPC
 
diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py
index df52f8de762..968397728b2 100644
--- a/Tools/build/generate_sbom.py
+++ b/Tools/build/generate_sbom.py
@@ -172,7 +172,7 @@ def download_with_retries(download_location: str,
     for attempt in range(max_retries + 1):
         try:
             resp = urllib.request.urlopen(download_location)
-        except urllib.error.URLError as ex:
+        except (urllib.error.URLError, ConnectionError) as ex:
             if attempt == max_retries:
                 msg = f"Download from {download_location} failed."
                 raise OSError(msg) from ex
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index ca6d0301f35..6466d2615cd 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -180,7 +180,7 @@ class Uop:
     properties: Properties
     _size: int = -1
     implicitly_created: bool = False
-    replicated = 0
+    replicated = range(0)
     replicates: "Uop | None" = None
     # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
     instruction_size: int | None = None
@@ -596,6 +596,7 @@ NON_ESCAPING_FUNCTIONS = (
     "PyStackRef_IsNull",
     "PyStackRef_MakeHeapSafe",
     "PyStackRef_None",
+    "PyStackRef_RefcountOnObject",
     "PyStackRef_TYPE",
     "PyStackRef_True",
     "PyTuple_GET_ITEM",
@@ -687,6 +688,7 @@ NON_ESCAPING_FUNCTIONS = (
     "PyStackRef_IsValid",
     "PyStackRef_Wrap",
     "PyStackRef_Unwrap",
+    "_PyLong_CheckExactAndCompact",
 )
 
 
@@ -741,7 +743,7 @@ def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingC
                     continue
                 #if not tkn.text.startswith(("Py", "_Py", "monitor")):
                 #    continue
-                if tkn.text.startswith(("sym_", "optimize_")):
+                if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")):
                     # Optimize functions
                     continue
                 if tkn.text.endswith("Check"):
@@ -868,6 +870,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
         needs_prev=variable_used(op, "prev_instr"),
     )
 
+def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
+    # Only replace array item with scalar if no more than one item is an array
+    index = -1
+    for i, item in enumerate(items):
+        if "oparg" in item.size:
+            if index >= 0:
+                return items
+            index = i
+    if index < 0:
+        return items
+    try:
+        count = int(eval(items[index].size.replace("oparg", str(oparg))))
+    except ValueError:
+        return items
+    return items[:index] + [
+        StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count)
+        ] + items[index+1:]
+
+def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
+    stack.inputs = expand(stack.inputs, oparg)
+    stack.outputs = expand(stack.outputs, oparg)
+    return stack
 
 def make_uop(
     name: str,
@@ -887,20 +911,26 @@ def make_uop(
     )
     for anno in op.annotations:
         if anno.startswith("replicate"):
-            result.replicated = int(anno[10:-1])
+            text = anno[10:-1]
+            start, stop = text.split(":")
+            result.replicated = range(int(start), int(stop))
             break
     else:
         return result
-    for oparg in range(result.replicated):
+    for oparg in result.replicated:
         name_x = name + "_" + str(oparg)
         properties = compute_properties(op)
         properties.oparg = False
-        properties.const_oparg = oparg
+        stack = analyze_stack(op)
+        if not variable_used(op, "oparg"):
+            stack = scalarize_stack(stack, oparg)
+        else:
+            properties.const_oparg = oparg
         rep = Uop(
             name=name_x,
             context=op.context,
             annotations=op.annotations,
-            stack=analyze_stack(op),
+            stack=stack,
             caches=analyze_caches(inputs),
             local_stores=find_variable_stores(op),
             body=op.block,
diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py
index 10567204dcc..0bcdc5395dc 100644
--- a/Tools/cases_generator/opcode_metadata_generator.py
+++ b/Tools/cases_generator/opcode_metadata_generator.py
@@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None:
             assert name2 in analysis.instructions, f"{name2} doesn't match any instr"
             instr1 = analysis.instructions[name1]
             instr2 = analysis.instructions[name2]
-            assert (
-                len(instr1.parts) == 1
-            ), f"{name1} is not a good superinstruction part"
-            assert (
-                len(instr2.parts) == 1
-            ), f"{name2} is not a good superinstruction part"
-            expansions.append((instr1.parts[0].name, "OPARG_TOP", 0))
-            expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0))
+            for part in instr1.parts:
+                expansions.append((part.name, "OPARG_TOP", 0))
+            for part in instr2.parts:
+                expansions.append((part.name, "OPARG_BOTTOM", 0))
         elif not is_viable_expansion(inst):
             continue
         else:
diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py
index 75805dbd7f3..4556b6d5a74 100644
--- a/Tools/cases_generator/optimizer_generator.py
+++ b/Tools/cases_generator/optimizer_generator.py
@@ -12,6 +12,8 @@ from analyzer import (
     analyze_files,
     StackItem,
     analysis_error,
+    CodeSection,
+    Label,
 )
 from generators_common import (
     DEFAULT_INPUT,
@@ -19,6 +21,7 @@ from generators_common import (
     write_header,
     Emitter,
     TokenIterator,
+    always_true,
 )
 from cwriter import CWriter
 from typing import TextIO
@@ -72,9 +75,12 @@ def validate_uop(override: Uop, uop: Uop) -> None:
 
 def type_name(var: StackItem) -> str:
     if var.is_array():
-        return "JitOptSymbol **"
-    return "JitOptSymbol *"
+        return "JitOptRef *"
+    return "JitOptRef "
 
+def stackref_type_name(var: StackItem) -> str:
+    assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef."
+    return "_PyStackRef "
 
 def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
     variables = {"unused"}
@@ -135,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None:
 
 class OptimizerEmitter(Emitter):
 
+    def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack):
+        super().__init__(out, labels)
+        self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure
+        self.original_uop = original_uop
+        self.stack = stack
+
     def emit_save(self, storage: Storage) -> None:
         storage.flush(self.out)
 
@@ -145,6 +157,185 @@ class OptimizerEmitter(Emitter):
         self.out.emit(goto)
         self.out.emit(label)
 
+    def replace_opcode_if_evaluates_pure(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        assert isinstance(uop, Uop)
+        input_identifiers = []
+        for token in tkn_iter:
+            if token.kind == "IDENTIFIER":
+                input_identifiers.append(token)
+            if token.kind == "SEMI":
+                break
+
+        if len(input_identifiers) == 0:
+            raise analysis_error(
+                "To evaluate an operation as pure, it must have at least 1 input",
+                tkn
+            )
+        # Check that the input identifiers belong to the uop's
+        # input stack effect
+        uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs}
+        for input_tkn in input_identifiers:
+            if input_tkn.text not in uop_stack_effect_input_identifers:
+                raise analysis_error(f"{input_tkn.text} referenced in "
+                                     f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not "
+                                     f"exist in the base uop's input stack effects",
+                                     input_tkn)
+        input_identifiers_as_str = {tkn.text for tkn in input_identifiers}
+        used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str]
+        assert len(used_stack_inputs) > 0
+        emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy())
+        emitter.emit("if (\n")
+        for inp in used_stack_inputs[:-1]:
+            emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n")
+        emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n")
+        emitter.emit(') {\n')
+        # Declare variables, before they are shadowed.
+        for inp in used_stack_inputs:
+            if inp.used:
+                emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n")
+        # Shadow the symbolic variables with stackrefs.
+        for inp in used_stack_inputs:
+            if inp.is_array():
+                raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn)
+            if inp.used:
+                emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n")
+        # Rename all output variables to stackref variant.
+        for outp in self.original_uop.stack.outputs:
+            if outp.is_array():
+                raise analysis_error(
+                    "Array output StackRefs not supported for evaluating pure ops.",
+                    self.original_uop.body.open
+                )
+            emitter.emit(f"_PyStackRef {outp.name}_stackref;\n")
+
+
+        storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False)
+        # No reference management of outputs needed.
+        for var in storage.outputs:
+            var.in_local = True
+        emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n")
+        emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False)
+        self.out.start_line()
+        emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n")
+        # Finally, assign back the output stackrefs to symbolics.
+        for outp in self.original_uop.stack.outputs:
+            # All new stackrefs are created from new references.
+            # That's how the stackref contract works.
+            if not outp.peek:
+                emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n")
+            else:
+                emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n")
+        storage.flush(self.out)
+        emitter.emit("break;\n")
+        emitter.emit("}\n")
+        return True
+
+class OptimizerConstantEmitter(OptimizerEmitter):
+    def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack):
+        super().__init__(out, labels, original_uop, stack)
+        # Replace all outputs to point to their stackref versions.
+        overrides = {
+            outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs
+        }
+        self._replacers = {**self._replacers, **overrides}
+
+    def emit_to_with_replacement(
+        self,
+        out: CWriter,
+        tkn_iter: TokenIterator,
+        end: str,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None
+    ) -> Token:
+        parens = 0
+        for tkn in tkn_iter:
+            if tkn.kind == end and parens == 0:
+                return tkn
+            if tkn.kind == "LPAREN":
+                parens += 1
+            if tkn.kind == "RPAREN":
+                parens -= 1
+            if tkn.text in self._replacers:
+                self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst)
+            else:
+                out.emit(tkn)
+        raise analysis_error(f"Expecting {end}. Reached end of file", tkn)
+
+    def emit_stackref_override(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        self.out.emit(tkn)
+        self.out.emit("_stackref ")
+        return True
+
+    def deopt_if(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        self.out.start_line()
+        self.out.emit("if (")
+        lparen = next(tkn_iter)
+        assert lparen.kind == "LPAREN"
+        first_tkn = tkn_iter.peek()
+        self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst)
+        self.emit(") {\n")
+        next(tkn_iter)  # Semi colon
+        # We guarantee this will deopt in real-world code
+        # via constants analysis. So just bail.
+        self.emit("ctx->done = true;\n")
+        self.emit("break;\n")
+        self.emit("}\n")
+        return not always_true(first_tkn)
+
+    exit_if = deopt_if
+
+    def error_if(
+        self,
+        tkn: Token,
+        tkn_iter: TokenIterator,
+        uop: CodeSection,
+        storage: Storage,
+        inst: Instruction | None,
+    ) -> bool:
+        lparen = next(tkn_iter)
+        assert lparen.kind == "LPAREN"
+        first_tkn = tkn_iter.peek()
+        unconditional = always_true(first_tkn)
+        if unconditional:
+            next(tkn_iter)
+            next(tkn_iter)  # RPAREN
+            self.out.start_line()
+        else:
+            self.out.emit_at("if ", tkn)
+            self.emit(lparen)
+            self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst)
+            self.out.emit(") {\n")
+        next(tkn_iter)  # Semi colon
+        storage.clear_inputs("at ERROR_IF")
+
+        self.out.emit("goto error;\n")
+        if not unconditional:
+            self.out.emit("}\n")
+        return not unconditional
+
+
 def write_uop(
     override: Uop | None,
     uop: Uop,
@@ -175,13 +366,14 @@ def write_uop(
                         cast = f"uint{cache.size*16}_t"
                     out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n")
         if override:
-            emitter = OptimizerEmitter(out, {})
+            emitter = OptimizerEmitter(out, {}, uop, stack.copy())
             # No reference management of inputs needed.
             for var in storage.inputs:  # type: ignore[possibly-undefined]
                 var.in_local = False
             _, storage = emitter.emit_tokens(override, storage, None, False)
             out.start_line()
             storage.flush(out)
+            out.start_line()
         else:
             emit_default(out, uop, stack)
             out.start_line()
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index a6dac481875..c7fe0d162ac 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -379,9 +379,13 @@ class Parser(PLexer):
         while anno := self.expect(lx.ANNOTATION):
             if anno.text == "replicate":
                 self.require(lx.LPAREN)
-                times = self.require(lx.NUMBER)
+                stop = self.require(lx.NUMBER)
+                start_text = "0"
+                if self.expect(lx.COLON):
+                    start_text = stop.text
+                    stop = self.require(lx.NUMBER)
                 self.require(lx.RPAREN)
-                annotations.append(f"replicate({times.text})")
+                annotations.append(f"replicate({start_text}:{stop.text})")
             else:
                 annotations.append(anno.text)
         tkn = self.expect(lx.INST)
diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py
index 276f306dfff..fc3bc47286f 100644
--- a/Tools/cases_generator/tier2_generator.py
+++ b/Tools/cases_generator/tier2_generator.py
@@ -91,7 +91,7 @@ class Tier2Emitter(Emitter):
         self.emit("}\n")
         return not always_true(first_tkn)
 
-    def exit_if(  # type: ignore[override]
+    def exit_if(
         self,
         tkn: Token,
         tkn_iter: TokenIterator,
diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py
index 6f995e5c46b..1cc23837a72 100644
--- a/Tools/cases_generator/uop_metadata_generator.py
+++ b/Tools/cases_generator/uop_metadata_generator.py
@@ -24,7 +24,8 @@ DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_uop_metadata.h"
 
 def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
     out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
-    out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
+    out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n")
+    out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n")
     out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
     out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
     out.emit("#ifdef NEED_OPCODE_METADATA\n")
@@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
             out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
 
     out.emit("};\n\n")
-    out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
+    out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
     for uop in analysis.uops.values():
         if uop.replicated:
-            out.emit(f"[{uop.name}] = {uop.replicated},\n")
+            assert(uop.replicated.step == 1)
+            out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n")
 
     out.emit("};\n\n")
     out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")
diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py
new file mode 100644
index 00000000000..1077e4106fd
--- /dev/null
+++ b/Tools/jit/_optimizers.py
@@ -0,0 +1,319 @@
+"""Low-level optimization of textual assembly."""
+
+import dataclasses
+import pathlib
+import re
+import typing
+
+# Same as saying "not string.startswith('')":
+_RE_NEVER_MATCH = re.compile(r"(?!)")
+# Dictionary mapping branch instructions to their inverted branch instructions.
+# If a branch cannot be inverted, the value is None:
+_X86_BRANCHES = {
+    # https://www.felixcloutier.com/x86/jcc
+    "ja": "jna",
+    "jae": "jnae",
+    "jb": "jnb",
+    "jbe": "jnbe",
+    "jc": "jnc",
+    "jcxz": None,
+    "je": "jne",
+    "jecxz": None,
+    "jg": "jng",
+    "jge": "jnge",
+    "jl": "jnl",
+    "jle": "jnle",
+    "jo": "jno",
+    "jp": "jnp",
+    "jpe": "jpo",
+    "jrcxz": None,
+    "js": "jns",
+    "jz": "jnz",
+    # https://www.felixcloutier.com/x86/loop:loopcc
+    "loop": None,
+    "loope": None,
+    "loopne": None,
+    "loopnz": None,
+    "loopz": None,
+}
+# Update with all of the inverted branches, too:
+_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v}
+
+
+@dataclasses.dataclass
+class _Block:
+    label: str | None = None
+    # Non-instruction lines like labels, directives, and comments:
+    noninstructions: list[str] = dataclasses.field(default_factory=list)
+    # Instruction lines:
+    instructions: list[str] = dataclasses.field(default_factory=list)
+    # If this block ends in a jump, where to?
+    target: typing.Self | None = None
+    # The next block in the linked list:
+    link: typing.Self | None = None
+    # Whether control flow can fall through to the linked block above:
+    fallthrough: bool = True
+    # Whether this block can eventually reach the next uop (_JIT_CONTINUE):
+    hot: bool = False
+
+    def resolve(self) -> typing.Self:
+        """Find the first non-empty block reachable from this one."""
+        block = self
+        while block.link and not block.instructions:
+            block = block.link
+        return block
+
+
+@dataclasses.dataclass
+class Optimizer:
+    """Several passes of analysis and optimization for textual assembly."""
+
+    path: pathlib.Path
+    _: dataclasses.KW_ONLY
+    # prefix used to mangle symbols on some platforms:
+    prefix: str = ""
+    # The first block in the linked list:
+    _root: _Block = dataclasses.field(init=False, default_factory=_Block)
+    _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict)
+    # No groups:
+    _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile(
+        r"\s*(?:\.|#|//|$)"
+    )
+    # One group (label):
+    _re_label: typing.ClassVar[re.Pattern[str]] = re.compile(
+        r'\s*(?P<label>[\w."$?@]+):'
+    )
+    # Override everything that follows in subclasses:
+    _alignment: typing.ClassVar[int] = 1
+    _branches: typing.ClassVar[dict[str, str | None]] = {}
+    # Two groups (instruction and target):
+    _re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
+    # One group (target):
+    _re_jump: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
+    # No groups:
+    _re_return: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
+
+    def __post_init__(self) -> None:
+        # Split the code into a linked list of basic blocks. A basic block is an
+        # optional label, followed by zero or more non-instruction lines,
+        # followed by zero or more instruction lines (only the last of which may
+        # be a branch, jump, or return):
+        text = self._preprocess(self.path.read_text())
+        block = self._root
+        for line in text.splitlines():
+            # See if we need to start a new block:
+            if match := self._re_label.match(line):
+                # Label. New block:
+                block.link = block = self._lookup_label(match["label"])
+                block.noninstructions.append(line)
+                continue
+            if self._re_noninstructions.match(line):
+                if block.instructions:
+                    # Non-instruction lines. New block:
+                    block.link = block = _Block()
+                block.noninstructions.append(line)
+                continue
+            if block.target or not block.fallthrough:
+                # Current block ends with a branch, jump, or return. New block:
+                block.link = block = _Block()
+            block.instructions.append(line)
+            if match := self._re_branch.match(line):
+                # A block ending in a branch has a target and fallthrough:
+                block.target = self._lookup_label(match["target"])
+                assert block.fallthrough
+            elif match := self._re_jump.match(line):
+                # A block ending in a jump has a target and no fallthrough:
+                block.target = self._lookup_label(match["target"])
+                block.fallthrough = False
+            elif self._re_return.match(line):
+                # A block ending in a return has no target and fallthrough:
+                assert not block.target
+                block.fallthrough = False
+
+    def _preprocess(self, text: str) -> str:
+        # Override this method to do preprocessing of the textual assembly:
+        return text
+
+    @classmethod
+    def _invert_branch(cls, line: str, target: str) -> str | None:
+        match = cls._re_branch.match(line)
+        assert match
+        inverted = cls._branches.get(match["instruction"])
+        if not inverted:
+            return None
+        (a, b), (c, d) = match.span("instruction"), match.span("target")
+        # Before:
+        #     je FOO
+        # After:
+        #     jne BAR
+        return "".join([line[:a], inverted, line[b:c], target, line[d:]])
+
+    @classmethod
+    def _update_jump(cls, line: str, target: str) -> str:
+        match = cls._re_jump.match(line)
+        assert match
+        a, b = match.span("target")
+        # Before:
+        #     jmp FOO
+        # After:
+        #     jmp BAR
+        return "".join([line[:a], target, line[b:]])
+
+    def _lookup_label(self, label: str) -> _Block:
+        if label not in self._labels:
+            self._labels[label] = _Block(label)
+        return self._labels[label]
+
+    def _blocks(self) -> typing.Generator[_Block, None, None]:
+        block: _Block | None = self._root
+        while block:
+            yield block
+            block = block.link
+
+    def _body(self) -> str:
+        lines = []
+        hot = True
+        for block in self._blocks():
+            if hot != block.hot:
+                hot = block.hot
+                # Make it easy to tell at a glance where cold code is:
+                lines.append(f"# JIT: {'HOT' if hot else 'COLD'} ".ljust(80, "#"))
+            lines.extend(block.noninstructions)
+            lines.extend(block.instructions)
+        return "\n".join(lines)
+
+    def _predecessors(self, block: _Block) -> typing.Generator[_Block, None, None]:
+        # This is inefficient, but it's never wrong:
+        for pre in self._blocks():
+            if pre.target is block or pre.fallthrough and pre.link is block:
+                yield pre
+
+    def _insert_continue_label(self) -> None:
+        # Find the block with the last instruction:
+        for end in reversed(list(self._blocks())):
+            if end.instructions:
+                break
+        # Before:
+        #    jmp FOO
+        # After:
+        #    jmp FOO
+        #    .balign 8
+        #    _JIT_CONTINUE:
+        # This lets the assembler encode _JIT_CONTINUE jumps at build time!
+        align = _Block()
+        align.noninstructions.append(f"\t.balign\t{self._alignment}")
+        continuation = self._lookup_label(f"{self.prefix}_JIT_CONTINUE")
+        assert continuation.label
+        continuation.noninstructions.append(f"{continuation.label}:")
+        end.link, align.link, continuation.link = align, continuation, end.link
+
+    def _mark_hot_blocks(self) -> None:
+        # Start with the last block, and perform a DFS to find all blocks that
+        # can eventually reach it:
+        todo = list(self._blocks())[-1:]
+        while todo:
+            block = todo.pop()
+            block.hot = True
+            todo.extend(pre for pre in self._predecessors(block) if not pre.hot)
+
+    def _invert_hot_branches(self) -> None:
+        for branch in self._blocks():
+            link = branch.link
+            if link is None:
+                continue
+            jump = link.resolve()
+            # Before:
+            #    je HOT
+            #    jmp COLD
+            # After:
+            #    jne COLD
+            #    jmp HOT
+            if (
+                # block ends with a branch to hot code...
+                branch.target
+                and branch.fallthrough
+                and branch.target.hot
+                # ...followed by a jump to cold code with no other predecessors:
+                and jump.target
+                and not jump.fallthrough
+                and not jump.target.hot
+                and len(jump.instructions) == 1
+                and list(self._predecessors(jump)) == [branch]
+            ):
+                assert jump.target.label
+                assert branch.target.label
+                inverted = self._invert_branch(
+                    branch.instructions[-1], jump.target.label
+                )
+                # Check to see if the branch can even be inverted:
+                if inverted is None:
+                    continue
+                branch.instructions[-1] = inverted
+                jump.instructions[-1] = self._update_jump(
+                    jump.instructions[-1], branch.target.label
+                )
+                branch.target, jump.target = jump.target, branch.target
+                jump.hot = True
+
+    def _remove_redundant_jumps(self) -> None:
+        # Zero-length jumps can be introduced by _insert_continue_label and
+        # _invert_hot_branches:
+        for block in self._blocks():
+            # Before:
+            #    jmp FOO
+            #    FOO:
+            # After:
+            #    FOO:
+            if (
+                block.target
+                and block.link
+                and block.target.resolve() is block.link.resolve()
+            ):
+                block.target = None
+                block.fallthrough = True
+                block.instructions.pop()
+
+    def run(self) -> None:
+        """Run this optimizer."""
+        self._insert_continue_label()
+        self._mark_hot_blocks()
+        self._invert_hot_branches()
+        self._remove_redundant_jumps()
+        self.path.write_text(self._body())
+
+
+class OptimizerAArch64(Optimizer):  # pylint: disable = too-few-public-methods
+    """aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
+
+    # TODO: @diegorusso
+    _alignment = 8
+    # https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
+    _re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
+
+
+class OptimizerX86(Optimizer):  # pylint: disable = too-few-public-methods
+    """i686-pc-windows-msvc/x86_64-apple-darwin/x86_64-unknown-linux-gnu"""
+
+    _branches = _X86_BRANCHES
+    _re_branch = re.compile(
+        rf"\s*(?P<instruction>{'|'.join(_X86_BRANCHES)})\s+(?P<target>[\w.]+)"
+    )
+    # https://www.felixcloutier.com/x86/jmp
+    _re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)")
+    # https://www.felixcloutier.com/x86/ret
+    _re_return = re.compile(r"\s*ret\b")
+
+
+class OptimizerX8664Windows(OptimizerX86):  # pylint: disable = too-few-public-methods
+    """x86_64-pc-windows-msvc"""
+
+    def _preprocess(self, text: str) -> str:
+        text = super()._preprocess(text)
+        # Before:
+        #     rex64 jmpq *__imp__JIT_CONTINUE(%rip)
+        # After:
+        #     jmp _JIT_CONTINUE
+        far_indirect_jump = (
+            rf"rex64\s+jmpq\s+\*__imp_(?P<target>{self.prefix}_JIT_\w+)\(%rip\)"
+        )
+        return re.sub(far_indirect_jump, r"jmp\t\g<target>", text)
diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py
index 03b0ba647b0..1d82f5366f6 100644
--- a/Tools/jit/_stencils.py
+++ b/Tools/jit/_stencils.py
@@ -17,8 +17,6 @@ class HoleValue(enum.Enum):
 
     # The base address of the machine code for the current uop (exposed as _JIT_ENTRY):
     CODE = enum.auto()
-    # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE):
-    CONTINUE = enum.auto()
     # The base address of the read-only data for this uop:
     DATA = enum.auto()
     # The address of the current executor (exposed as _JIT_EXECUTOR):
@@ -97,7 +95,6 @@ _PATCH_FUNCS = {
 # Translate HoleValues to C expressions:
 _HOLE_EXPRS = {
     HoleValue.CODE: "(uintptr_t)code",
-    HoleValue.CONTINUE: "(uintptr_t)code + sizeof(code_body)",
     HoleValue.DATA: "(uintptr_t)data",
     HoleValue.EXECUTOR: "(uintptr_t)executor",
     # These should all have been turned into DATA values by process_relocations:
@@ -209,64 +206,6 @@ class Stencil:
             self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
         self.body.extend([0] * padding)
 
-    def add_nops(self, nop: bytes, alignment: int) -> None:
-        """Add NOPs until there is alignment. Fail if it is not possible."""
-        offset = len(self.body)
-        nop_size = len(nop)
-
-        # Calculate the gap to the next multiple of alignment.
-        gap = -offset % alignment
-        if gap:
-            if gap % nop_size == 0:
-                count = gap // nop_size
-                self.body.extend(nop * count)
-            else:
-                raise ValueError(
-                    f"Cannot add nops of size '{nop_size}' to a body with "
-                    f"offset '{offset}' to align with '{alignment}'"
-                )
-
-    def remove_jump(self) -> None:
-        """Remove a zero-length continuation jump, if it exists."""
-        hole = max(self.holes, key=lambda hole: hole.offset)
-        match hole:
-            case Hole(
-                offset=offset,
-                kind="IMAGE_REL_AMD64_REL32",
-                value=HoleValue.GOT,
-                symbol="_JIT_CONTINUE",
-                addend=-4,
-            ) as hole:
-                # jmp qword ptr [rip]
-                jump = b"\x48\xff\x25\x00\x00\x00\x00"
-                offset -= 3
-            case Hole(
-                offset=offset,
-                kind="IMAGE_REL_I386_REL32" | "R_X86_64_PLT32" | "X86_64_RELOC_BRANCH",
-                value=HoleValue.CONTINUE,
-                symbol=None,
-                addend=addend,
-            ) as hole if (
-                _signed(addend) == -4
-            ):
-                # jmp 5
-                jump = b"\xe9\x00\x00\x00\x00"
-                offset -= 1
-            case Hole(
-                offset=offset,
-                kind="R_AARCH64_JUMP26",
-                value=HoleValue.CONTINUE,
-                symbol=None,
-                addend=0,
-            ) as hole:
-                # b #4
-                jump = b"\x00\x00\x00\x14"
-            case _:
-                return
-        if self.body[offset:] == jump:
-            self.body = self.body[:offset]
-            self.holes.remove(hole)
-
 
 @dataclasses.dataclass
 class StencilGroup:
@@ -284,9 +223,7 @@ class StencilGroup:
     _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
     _trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
 
-    def process_relocations(
-        self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
-    ) -> None:
+    def process_relocations(self, known_symbols: dict[str, int]) -> None:
         """Fix up all GOT and internal relocations for this stencil group."""
         for hole in self.code.holes.copy():
             if (
@@ -306,8 +243,6 @@ class StencilGroup:
                 self._trampolines.add(ordinal)
                 hole.addend = ordinal
                 hole.symbol = None
-        self.code.remove_jump()
-        self.code.add_nops(nop=nop, alignment=alignment)
         self.data.pad(8)
         for stencil in [self.code, self.data]:
             for hole in stencil.holes:
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index b383e39da19..ed10329d25d 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -13,6 +13,7 @@ import typing
 import shlex
 
 import _llvm
+import _optimizers
 import _schema
 import _stencils
 import _writer
@@ -41,8 +42,8 @@ class _Target(typing.Generic[_S, _R]):
     triple: str
     condition: str
     _: dataclasses.KW_ONLY
-    alignment: int = 1
     args: typing.Sequence[str] = ()
+    optimizer: type[_optimizers.Optimizer] = _optimizers.Optimizer
     prefix: str = ""
     stable: bool = False
     debug: bool = False
@@ -121,8 +122,9 @@ class _Target(typing.Generic[_S, _R]):
     async def _compile(
         self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
     ) -> _stencils.StencilGroup:
+        s = tempdir / f"{opname}.s"
         o = tempdir / f"{opname}.o"
-        args = [
+        args_s = [
             f"--target={self.triple}",
             "-DPy_BUILD_CORE_MODULE",
             "-D_DEBUG" if self.debug else "-DNDEBUG",
@@ -136,7 +138,7 @@ class _Target(typing.Generic[_S, _R]):
             f"-I{CPYTHON / 'Python'}",
             f"-I{CPYTHON / 'Tools' / 'jit'}",
             "-O3",
-            "-c",
+            "-S",
             # Shorten full absolute file paths in the generated code (like the
             # __FILE__ macro and assert failure messages) for reproducibility:
             f"-ffile-prefix-map={CPYTHON}=.",
@@ -155,13 +157,16 @@ class _Target(typing.Generic[_S, _R]):
             "-fno-stack-protector",
             "-std=c11",
             "-o",
-            f"{o}",
+            f"{s}",
             f"{c}",
             *self.args,
             # Allow user-provided CFLAGS to override any defaults
             *shlex.split(self.cflags),
         ]
-        await _llvm.run("clang", args, echo=self.verbose)
+        await _llvm.run("clang", args_s, echo=self.verbose)
+        self.optimizer(s, prefix=self.prefix).run()
+        args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
+        await _llvm.run("clang", args_o, echo=self.verbose)
         return await self._parse(o)
 
     async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
@@ -190,11 +195,7 @@ class _Target(typing.Generic[_S, _R]):
                     tasks.append(group.create_task(coro, name=opname))
         stencil_groups = {task.get_name(): task.result() for task in tasks}
         for stencil_group in stencil_groups.values():
-            stencil_group.process_relocations(
-                known_symbols=self.known_symbols,
-                alignment=self.alignment,
-                nop=self._get_nop(),
-            )
+            stencil_group.process_relocations(self.known_symbols)
         return stencil_groups
 
     def build(
@@ -524,42 +525,43 @@ class _MachO(
 
 def get_target(host: str) -> _COFF | _ELF | _MachO:
     """Build a _Target for the given host "triple" and options."""
+    optimizer: type[_optimizers.Optimizer]
     target: _COFF | _ELF | _MachO
     if re.fullmatch(r"aarch64-apple-darwin.*", host):
         condition = "defined(__aarch64__) && defined(__APPLE__)"
-        target = _MachO(host, condition, alignment=8, prefix="_")
+        optimizer = _optimizers.OptimizerAArch64
+        target = _MachO(host, condition, optimizer=optimizer, prefix="_")
     elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
         args = ["-fms-runtime-lib=dll", "-fplt"]
         condition = "defined(_M_ARM64)"
-        target = _COFF(host, condition, alignment=8, args=args)
+        optimizer = _optimizers.OptimizerAArch64
+        target = _COFF(host, condition, args=args, optimizer=optimizer)
     elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
-        args = [
-            "-fpic",
-            # On aarch64 Linux, intrinsics were being emitted and this flag
-            # was required to disable them.
-            "-mno-outline-atomics",
-        ]
+        # -mno-outline-atomics: Keep intrinsics from being emitted.
+        args = ["-fpic", "-mno-outline-atomics"]
         condition = "defined(__aarch64__) && defined(__linux__)"
-        target = _ELF(host, condition, alignment=8, args=args)
+        optimizer = _optimizers.OptimizerAArch64
+        target = _ELF(host, condition, args=args, optimizer=optimizer)
     elif re.fullmatch(r"i686-pc-windows-msvc", host):
-        args = [
-            "-DPy_NO_ENABLE_SHARED",
-            # __attribute__((preserve_none)) is not supported
-            "-Wno-ignored-attributes",
-        ]
+        # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here.
+        args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"]
+        optimizer = _optimizers.OptimizerX86
         condition = "defined(_M_IX86)"
-        target = _COFF(host, condition, args=args, prefix="_")
+        target = _COFF(host, condition, args=args, optimizer=optimizer, prefix="_")
     elif re.fullmatch(r"x86_64-apple-darwin.*", host):
         condition = "defined(__x86_64__) && defined(__APPLE__)"
-        target = _MachO(host, condition, prefix="_")
+        optimizer = _optimizers.OptimizerX86
+        target = _MachO(host, condition, optimizer=optimizer, prefix="_")
     elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
         args = ["-fms-runtime-lib=dll"]
         condition = "defined(_M_X64)"
-        target = _COFF(host, condition, args=args)
+        optimizer = _optimizers.OptimizerX8664Windows
+        target = _COFF(host, condition, args=args, optimizer=optimizer)
     elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
         args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
         condition = "defined(__x86_64__) && defined(__linux__)"
-        target = _ELF(host, condition, args=args)
+        optimizer = _optimizers.OptimizerX86
+        target = _ELF(host, condition, args=args, optimizer=optimizer)
     else:
         raise ValueError(host)
     return target
diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py
index 0dcf6ef844a..afd010a5254 100755
--- a/Tools/patchcheck/patchcheck.py
+++ b/Tools/patchcheck/patchcheck.py
@@ -53,19 +53,43 @@ def get_git_branch():
 
 
 def get_git_upstream_remote():
-    """Get the remote name to use for upstream branches
+    """
+    Get the remote name to use for upstream branches
 
-    Uses "upstream" if it exists, "origin" otherwise
+    Check for presence of "https://github.com/python/cpython" remote URL.
+    If only one is found, return that remote name. If multiple are found,
+    check for and return "upstream", "origin", or "python", in that
+    order. Raise an error if no valid matches are found.
     """
-    cmd = "git remote get-url upstream".split()
-    try:
-        subprocess.check_output(cmd,
-                                stderr=subprocess.DEVNULL,
-                                cwd=SRCDIR,
-                                encoding='UTF-8')
-    except subprocess.CalledProcessError:
-        return "origin"
-    return "upstream"
+    cmd = "git remote -v".split()
+    output = subprocess.check_output(
+        cmd,
+        stderr=subprocess.DEVNULL,
+        cwd=SRCDIR,
+        encoding="UTF-8"
+    )
+    # Filter to desired remotes, accounting for potential uppercasing
+    filtered_remotes = {
+        remote.split("\t")[0].lower() for remote in output.split('\n')
+        if "python/cpython" in remote.lower() and remote.endswith("(fetch)")
+    }
+    if len(filtered_remotes) == 1:
+        [remote] = filtered_remotes
+        return remote
+    for remote_name in ["upstream", "origin", "python"]:
+        if remote_name in filtered_remotes:
+            return remote_name
+    remotes_found = "\n".join(
+        {remote for remote in output.split('\n') if remote.endswith("(fetch)")}
+    )
+    raise ValueError(
+        f"Patchcheck was unable to find an unambiguous upstream remote, "
+        f"with URL matching 'https://github.com/python/cpython'. "
+        f"For help creating an upstream remote, see Dev Guide: "
+        f"https://devguide.python.org/getting-started/"
+        f"git-boot-camp/#cloning-a-forked-cpython-repository "
+        f"\nRemotes found: \n{remotes_found}"
+        )
 
 
 def get_git_remote_default_branch(remote_name):
diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt
index 5bf180bb30a..0beaab2d3e7 100644
--- a/Tools/requirements-dev.txt
+++ b/Tools/requirements-dev.txt
@@ -1,7 +1,7 @@
 # Requirements file for external linters and checks we run on
 # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI
-mypy==1.15
+mypy==1.16.1
 
 # needed for peg_generator:
-types-psutil==6.0.0.20240901
-types-setuptools==74.0.0.20240831
+types-psutil==7.0.0.20250601
+types-setuptools==80.9.0.20250529
diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py
index 849bd5de44e..c0d58aeaadd 100644
--- a/Tools/wasm/emscripten/__main__.py
+++ b/Tools/wasm/emscripten/__main__.py
@@ -167,11 +167,12 @@ def make_build_python(context, working_dir):
 @subdir(HOST_BUILD_DIR, clean_ok=True)
 def make_emscripten_libffi(context, working_dir):
     shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True)
-    with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file:
+    with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete_on_close=False) as tmp_file:
         with urlopen(
             "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz"
         ) as response:
             shutil.copyfileobj(response, tmp_file)
+        tmp_file.close()
         shutil.unpack_archive(tmp_file.name, working_dir)
     call(
         [EMSCRIPTEN_DIR / "make_libffi.sh"],
diff --git a/configure b/configure
index fef9f2d7da9..75ae1699a8e 100755
--- a/configure
+++ b/configure
@@ -1826,8 +1826,8 @@ Optional Features:
                           no)
   --enable-profiling      enable C-level code profiling with gprof (default is
                           no)
-  --disable-gil           enable experimental support for running without the
-                          GIL (default is no)
+  --disable-gil           enable support for running without the GIL (default
+                          is no)
   --enable-pystats        enable internal statistics gathering (default is no)
   --enable-optimizations  enable expensive, stable optimizations (PGO, etc.)
                           (default is no)
@@ -12979,13 +12979,6 @@ if test "$ac_cv_sizeof_off_t" -gt "$ac_cv_sizeof_long" -a \
 else
   have_largefile_support="no"
 fi
-case $ac_sys_system in #(
-  Emscripten) :
-    have_largefile_support="no"
- ;; #(
-  *) :
-     ;;
-esac
 if test "x$have_largefile_support" = xyes
 then :
 
@@ -15749,7 +15742,7 @@ fi
 printf "%s\n" "$ac_cv_ffi_complex_double_supported" >&6; }
 if test "$ac_cv_ffi_complex_double_supported" = "yes"; then
 
-printf "%s\n" "#define Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h
+printf "%s\n" "#define _Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h
 
 fi
 
@@ -23849,7 +23842,7 @@ fi
 
 
 
-ac_fn_check_decl "$LINENO" "MAXLOGNAME" "ac_cv_have_decl_MAXLOGNAME" "#include <sys/params.h>
+ac_fn_check_decl "$LINENO" "MAXLOGNAME" "ac_cv_have_decl_MAXLOGNAME" "#include <sys/param.h>
 " "$ac_c_undeclared_builtin_options" "CFLAGS"
 if test "x$ac_cv_have_decl_MAXLOGNAME" = xyes
 then :
@@ -29940,9 +29933,6 @@ printf "%s\n" "#define Py_REMOTE_DEBUG 1" >>confdefs.h
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 printf "%s\n" "yes" >&6; }
 else
-
-printf "%s\n" "#define Py_REMOTE_DEBUG 0" >>confdefs.h
-
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
 printf "%s\n" "no" >&6; }
 fi
@@ -32643,7 +32633,7 @@ then :
     LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"
 
 
-printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h
+printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC128 1" >>confdefs.h
 
 
     # macOS universal2 builds *support* the -msse etc flags because they're
@@ -32719,7 +32709,7 @@ then :
 
     LIBHACL_SIMD256_FLAGS="-mavx2"
 
-printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h
+printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h
 
 
     # macOS universal2 builds *support* the -mavx2 compiler flag because it's
diff --git a/configure.ac b/configure.ac
index cc37a636c52..4da1ba78b54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1716,7 +1716,7 @@ ABI_THREAD=""
 # --disable-gil
 AC_MSG_CHECKING([for --disable-gil])
 AC_ARG_ENABLE([gil],
-  [AS_HELP_STRING([--disable-gil], [enable experimental support for running without the GIL (default is no)])],
+  [AS_HELP_STRING([--disable-gil], [enable support for running without the GIL (default is no)])],
   [AS_VAR_IF([enable_gil], [yes], [disable_gil=no], [disable_gil=yes])], [disable_gil=no]
 )
 AC_MSG_RESULT([$disable_gil])
@@ -3172,10 +3172,6 @@ if test "$ac_cv_sizeof_off_t" -gt "$ac_cv_sizeof_long" -a \
 else
   have_largefile_support="no"
 fi
-dnl LFS does not work with Emscripten 3.1
-AS_CASE([$ac_sys_system],
-  [Emscripten], [have_largefile_support="no"]
-)
 AS_VAR_IF([have_largefile_support], [yes], [
   AC_DEFINE([HAVE_LARGEFILE_SUPPORT], [1],
   [Defined to enable large file support when an off_t is bigger than a long
@@ -4167,7 +4163,7 @@ int main(void)
 [ac_cv_ffi_complex_double_supported=no])
 ])])
 if test "$ac_cv_ffi_complex_double_supported" = "yes"; then
-    AC_DEFINE([Py_FFI_SUPPORT_C_COMPLEX], [1],
+    AC_DEFINE([_Py_FFI_SUPPORT_C_COMPLEX], [1],
               [Defined if _Complex C type can be used with libffi.])
 fi
 
@@ -5542,7 +5538,7 @@ AC_CHECK_DECL([MAXLOGNAME],
               [AC_DEFINE([HAVE_MAXLOGNAME], [1],
                          [Define if you have the 'MAXLOGNAME' constant.])],
               [],
-              [@%:@include <sys/params.h>])
+              [@%:@include <sys/param.h>])
 
 AC_CHECK_DECLS([UT_NAMESIZE],
               [AC_DEFINE([HAVE_UT_NAMESIZE], [1],
@@ -7172,8 +7168,6 @@ if test "$with_remote_debug" = yes; then
     [Define if you want to enable remote debugging support.])
   AC_MSG_RESULT([yes])
 else
-  AC_DEFINE([Py_REMOTE_DEBUG], [0],
-    [Define if you want to enable remote debugging support.])
   AC_MSG_RESULT([no])
 fi
 
@@ -8032,7 +8026,8 @@ then
   AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[
     [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"]
 
-    AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations])
+    AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [
+      HACL* library can compile SIMD128 implementations])
 
     # macOS universal2 builds *support* the -msse etc flags because they're
     # available on x86_64. However, performance of the HACL SIMD128 implementation
@@ -8063,7 +8058,8 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \
 then
   AX_CHECK_COMPILE_FLAG([-mavx2],[
     [LIBHACL_SIMD256_FLAGS="-mavx2"]
-    AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations])
+    AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [
+      HACL* library can compile SIMD256 implementations])
 
     # macOS universal2 builds *support* the -mavx2 compiler flag because it's
     # available on x86_64; but the HACL SIMD256 build then fails because the
diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-strip b/iOS/Resources/bin/arm64-apple-ios-simulator-strip
new file mode 100755
index 00000000000..fd59d309b73
--- /dev/null
+++ b/iOS/Resources/bin/arm64-apple-ios-simulator-strip
@@ -0,0 +1,2 @@
+#!/bin/sh
+xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch arm64 "$@"
diff --git a/iOS/Resources/bin/arm64-apple-ios-strip b/iOS/Resources/bin/arm64-apple-ios-strip
new file mode 100755
index 00000000000..75e823a3d02
--- /dev/null
+++ b/iOS/Resources/bin/arm64-apple-ios-strip
@@ -0,0 +1,2 @@
+#!/bin/sh
+xcrun --sdk iphoneos${IOS_SDK_VERSION} strip -arch arm64 "$@"
diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-strip b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip
new file mode 100755
index 00000000000..c5cfb289291
--- /dev/null
+++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip
@@ -0,0 +1,2 @@
+#!/bin/sh
+xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch x86_64 "$@"
diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m
index dd6e76f9496..b502a6eb277 100644
--- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m
+++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m
@@ -15,6 +15,11 @@
     PyStatus status;
     PyPreConfig preconfig;
     PyConfig config;
+    PyObject *app_packages_path;
+    PyObject *method_args;
+    PyObject *result;
+    PyObject *site_module;
+    PyObject *site_addsitedir_attr;
     PyObject *sys_module;
     PyObject *sys_path_attr;
     NSArray *test_args;
@@ -111,29 +116,55 @@
         return;
     }
 
-    sys_module = PyImport_ImportModule("sys");
-    if (sys_module == NULL) {
-        XCTFail(@"Could not import sys module");
+    // Add app_packages as a site directory. This both adds to sys.path,
+    // and ensures that any .pth files in that directory will be executed.
+    site_module = PyImport_ImportModule("site");
+    if (site_module == NULL) {
+        XCTFail(@"Could not import site module");
         return;
     }
 
-    sys_path_attr = PyObject_GetAttrString(sys_module, "path");
-    if (sys_path_attr == NULL) {
-        XCTFail(@"Could not access sys.path");
+    site_addsitedir_attr = PyObject_GetAttrString(site_module, "addsitedir");
+    if (site_addsitedir_attr == NULL || !PyCallable_Check(site_addsitedir_attr)) {
+        XCTFail(@"Could not access site.addsitedir");
         return;
     }
 
-    // Add the app packages path
     path = [NSString stringWithFormat:@"%@/app_packages", resourcePath, nil];
     NSLog(@"App packages path: %@", path);
     wtmp_str = Py_DecodeLocale([path UTF8String], NULL);
-    failed = PyList_Insert(sys_path_attr, 0, PyUnicode_FromString([path UTF8String]));
-    if (failed) {
-        XCTFail(@"Unable to add app packages to sys.path");
+    app_packages_path = PyUnicode_FromWideChar(wtmp_str, wcslen(wtmp_str));
+    if (app_packages_path == NULL) {
+        XCTFail(@"Could not convert app_packages path to unicode");
         return;
     }
     PyMem_RawFree(wtmp_str);
 
+    method_args = Py_BuildValue("(O)", app_packages_path);
+    if (method_args == NULL) {
+        XCTFail(@"Could not create arguments for site.addsitedir");
+        return;
+    }
+
+    result = PyObject_CallObject(site_addsitedir_attr, method_args);
+    if (result == NULL) {
+        XCTFail(@"Could not add app_packages directory using site.addsitedir");
+        return;
+    }
+
+    // Add test code to sys.path
+    sys_module = PyImport_ImportModule("sys");
+    if (sys_module == NULL) {
+        XCTFail(@"Could not import sys module");
+        return;
+    }
+
+    sys_path_attr = PyObject_GetAttrString(sys_module, "path");
+    if (sys_path_attr == NULL) {
+        XCTFail(@"Could not access sys.path");
+        return;
+    }
+
     path = [NSString stringWithFormat:@"%@/app", resourcePath, nil];
     NSLog(@"App path: %@", path);
     wtmp_str = Py_DecodeLocale([path UTF8String], NULL);
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 65a2c55217c..d7c496fccc6 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -50,12 +50,6 @@
 /* Define if getpgrp() must be called as getpgrp(0). */
 #undef GETPGRP_HAVE_ARG
 
-/* HACL* library can compile SIMD128 implementations */
-#undef HACL_CAN_COMPILE_SIMD128
-
-/* HACL* library can compile SIMD256 implementations */
-#undef HACL_CAN_COMPILE_SIMD256
-
 /* Define if you have the 'accept' function. */
 #undef HAVE_ACCEPT
 
@@ -1736,9 +1730,6 @@
 /* Defined if Python is built as a shared library. */
 #undef Py_ENABLE_SHARED
 
-/* Defined if _Complex C type can be used with libffi. */
-#undef Py_FFI_SUPPORT_C_COMPLEX
-
 /* Define if you want to disable the GIL */
 #undef Py_GIL_DISABLED
 
@@ -2026,6 +2017,15 @@
 /* Maximum length in bytes of a thread name */
 #undef _PYTHREAD_NAME_MAXLEN
 
+/* Defined if _Complex C type can be used with libffi. */
+#undef _Py_FFI_SUPPORT_C_COMPLEX
+
+/* HACL* library can compile SIMD128 implementations */
+#undef _Py_HACL_CAN_COMPILE_VEC128
+
+/* HACL* library can compile SIMD256 implementations */
+#undef _Py_HACL_CAN_COMPILE_VEC256
+
 /* Define to force use of thread-safe errno, h_errno, and other functions */
 #undef _REENTRANT