diff options
32 files changed, 638 insertions, 139 deletions
diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 49dbc8d71cc..ab9f9c4539a 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -686,6 +686,12 @@ Building values ``p`` (:class:`bool`) [int] Convert a C :c:expr:`int` to a Python :class:`bool` object. + Be aware that this format requires an ``int`` argument. + Unlike most other contexts in C, variadic arguments are not coerced to + a suitable type automatically. + You can convert another type (for example, a pointer or a float) to a + suitable ``int`` value using ``(x) ? 1 : 0`` or ``!!x``. + .. versionadded:: 3.14 ``c`` (:class:`bytes` of length 1) [char] diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 17f126cc065..a03d88092db 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -955,7 +955,7 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: .. index:: single: + (plus); in argparse module -* ``'+'``. Just like ``'*'``, all command-line args present are gathered into a +* ``'+'``. Just like ``'*'``, all command-line arguments present are gathered into a list. Additionally, an error message will be generated if there wasn't at least one command-line argument present. For example:: diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index aa11ddb75e1..a9432401525 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -284,72 +284,76 @@ extern "C" { #define _POP_JUMP_IF_FALSE 500 #define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 502 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 -#define _POP_TWO 504 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_FLOAT 502 +#define _POP_TOP_INT 503 +#define _POP_TOP_LOAD_CONST_INLINE 504 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_NOP 506 +#define _POP_TOP_UNICODE 507 +#define _POP_TWO 508 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 506 +#define _PUSH_FRAME 510 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 507 -#define _PY_FRAME_GENERAL 508 -#define _PY_FRAME_KW 509 -#define _QUICKEN_RESUME 510 -#define _REPLACE_WITH_TRUE 511 +#define _PUSH_NULL_CONDITIONAL 511 +#define _PY_FRAME_GENERAL 512 +#define _PY_FRAME_KW 513 +#define _QUICKEN_RESUME 514 +#define _REPLACE_WITH_TRUE 515 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 512 -#define _SEND 513 -#define _SEND_GEN_FRAME 514 +#define _SAVE_RETURN_OFFSET 516 +#define _SEND 517 +#define _SEND_GEN_FRAME 518 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 515 -#define _STORE_ATTR 516 -#define _STORE_ATTR_INSTANCE_VALUE 517 -#define _STORE_ATTR_SLOT 518 -#define _STORE_ATTR_WITH_HINT 519 +#define _START_EXECUTOR 519 +#define _STORE_ATTR 520 +#define _STORE_ATTR_INSTANCE_VALUE 521 +#define _STORE_ATTR_SLOT 522 +#define _STORE_ATTR_WITH_HINT 523 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 520 -#define _STORE_FAST_0 521 -#define _STORE_FAST_1 522 -#define _STORE_FAST_2 523 -#define _STORE_FAST_3 524 -#define _STORE_FAST_4 525 -#define _STORE_FAST_5 526 -#define _STORE_FAST_6 527 -#define _STORE_FAST_7 528 +#define _STORE_FAST 524 +#define _STORE_FAST_0 525 +#define _STORE_FAST_1 526 +#define _STORE_FAST_2 527 +#define _STORE_FAST_3 528 +#define _STORE_FAST_4 529 +#define _STORE_FAST_5 530 +#define _STORE_FAST_6 531 +#define _STORE_FAST_7 532 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 529 -#define _STORE_SUBSCR 530 -#define _STORE_SUBSCR_DICT 531 -#define _STORE_SUBSCR_LIST_INT 532 -#define _SWAP 533 -#define _SWAP_2 534 -#define _SWAP_3 535 -#define _TIER2_RESUME_CHECK 536 -#define _TO_BOOL 537 +#define _STORE_SLICE 533 +#define _STORE_SUBSCR 534 +#define _STORE_SUBSCR_DICT 535 +#define _STORE_SUBSCR_LIST_INT 536 +#define _SWAP 537 +#define _SWAP_2 538 +#define _SWAP_3 539 +#define _TIER2_RESUME_CHECK 540 +#define _TO_BOOL 541 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 538 +#define _TO_BOOL_LIST 542 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 539 +#define _TO_BOOL_STR 543 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 540 -#define _UNPACK_SEQUENCE_LIST 541 -#define _UNPACK_SEQUENCE_TUPLE 542 -#define _UNPACK_SEQUENCE_TWO_TUPLE 543 +#define _UNPACK_SEQUENCE 544 +#define _UNPACK_SEQUENCE_LIST 545 +#define _UNPACK_SEQUENCE_TUPLE 546 +#define _UNPACK_SEQUENCE_TWO_TUPLE 547 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 543 +#define MAX_UOP_ID 547 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 11345a00785..52cbc2fffe4 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -64,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TOP_NOP] = 0, + [_POP_TOP_INT] = 0, + [_POP_TOP_FLOAT] = 0, + [_POP_TOP_UNICODE] = 0, [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, @@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", + [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT", + [_POP_TOP_INT] = "_POP_TOP_INT", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TOP_NOP] = "_POP_TOP_NOP", + [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE", [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", @@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TOP_NOP: + return 1; + case _POP_TOP_INT: + return 1; + case _POP_TOP_FLOAT: + return 1; + case _POP_TOP_UNICODE: + return 1; case _POP_TWO: return 2; case _PUSH_NULL: diff --git a/InternalDocs/README.md b/InternalDocs/README.md index c20aa015c5b..6b1d9198264 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -42,8 +42,9 @@ Program Execution - [Exception Handling](exception_handling.md) +- [Quiescent-State Based Reclamation (QSBR)](qsbr.md) Modules --- -- [asyncio](asyncio.md)
\ No newline at end of file +- [asyncio](asyncio.md) diff --git a/InternalDocs/asyncio.md b/InternalDocs/asyncio.md index b60fe70478a..22159852ca5 100644 --- a/InternalDocs/asyncio.md +++ b/InternalDocs/asyncio.md @@ -2,10 +2,12 @@ asyncio ======= -This document describes the working and implementation details of C -implementation of the +This document describes the working and implementation details of the [`asyncio`](https://docs.python.org/3/library/asyncio.html) module. +**The following section describes the implementation details of the C implementation**. + +# Task management ## Pre-Python 3.14 implementation @@ -158,7 +160,8 @@ flowchart TD subgraph two["Thread deallocating"] A1{"thread's task list empty? <br> llist_empty(tstate->asyncio_tasks_head)"} A1 --> |true| B1["deallocate thread<br>free_threadstate(tstate)"] - A1 --> |false| C1["add tasks to interpreter's task list<br> llist_concat(&tstate->interp->asyncio_tasks_head,tstate->asyncio_tasks_head)"] + A1 --> |false| C1["add tasks to interpreter's task list<br> llist_concat(&tstate->interp->asyncio_tasks_head, + &tstate->asyncio_tasks_head)"] C1 --> B1 end @@ -205,6 +208,121 @@ In free-threading, it avoids contention on a global dictionary as threads can access the current task of thier running loop without any locking. +--- + +**The following section describes the implementation details of the Python implementation**. + +# async generators + +This section describes the implementation details of async generators in `asyncio`. + +Since async generators are meant to be used from coroutines, +their finalization (execution of finally blocks) needs +to be done while the loop is running. +Most async generators are closed automatically +when they are fully iterated over and exhausted; however, +if the async generator is not fully iterated over, +it may not be closed properly, leading to the `finally` blocks not being executed. + +Consider the following code: +```py +import asyncio + +async def agen(): + try: + yield 1 + finally: + await asyncio.sleep(1) + print("finally executed") + + +async def main(): + async for i in agen(): + break + +loop = asyncio.EventLoop() +loop.run_until_complete(main()) +``` + +The above code will not print "finally executed", because the +async generator `agen` is not fully iterated over +and it is not closed manually by awaiting `agen.aclose()`. + +To solve this, `asyncio` uses the `sys.set_asyncgen_hooks` function to +set hooks for finalizing async generators as described in +[PEP 525](https://peps.python.org/pep-0525/). + +- **firstiter hook**: When the async generator is iterated over for the first time, +the *firstiter hook* is called. The async generator is added to `loop._asyncgens` WeakSet +and the event loop tracks all active async generators. + +- **finalizer hook**: When the async generator is about to be finalized, +the *finalizer hook* is called. The event loop removes the async generator +from `loop._asyncgens` WeakSet, and schedules the finalization of the async +generator by creating a task calling `agen.aclose()`. This ensures that the +finally block is executed while the event loop is running. When the loop is +shutting down, the loop checks if there are active async generators and if so, +it similarly schedules the finalization of all active async generators by calling +`agen.aclose()` on each of them and waits for them to complete before shutting +down the loop. + +This ensures that the async generator's `finally` blocks are executed even +if the generator is not explicitly closed. + +Consider the following example: + +```python +import asyncio + +async def agen(): + try: + yield 1 + yield 2 + finally: + print("executing finally block") + +async def main(): + async for item in agen(): + print(item) + break # not fully iterated + +asyncio.run(main()) +``` + +```mermaid +flowchart TD + subgraph one["Loop running"] + A["asyncio.run(main())"] --> B + B["set async generator hooks <br> sys.set_asyncgen_hooks()"] --> C + C["async for item in agen"] --> F + F{"first iteration?"} --> |true|D + F{"first iteration?"} --> |false|H + D["calls firstiter hook<br>loop._asyncgen_firstiter_hook(agen)"] --> E + E["add agen to WeakSet<br> loop._asyncgens.add(agen)"] --> H + H["item = await agen.\_\_anext\_\_()"] --> J + J{"StopAsyncIteration?"} --> |true|M + J{"StopAsyncIteration?"} --> |false|I + I["print(item)"] --> S + S{"continue iterating?"} --> |true|C + S{"continue iterating?"} --> |false|M + M{"agen is no longer referenced?"} --> |true|N + M{"agen is no longer referenced?"} --> |false|two + N["finalize agen<br>_PyGen_Finalize(agen)"] --> O + O["calls finalizer hook<br>loop._asyncgen_finalizer_hook(agen)"] --> P + P["remove agen from WeakSet<br>loop._asyncgens.discard(agen)"] --> Q + Q["schedule task to close it<br>self.create_task(agen.aclose())"] --> R + R["print('executing finally block')"] --> E1 + + end + + subgraph two["Loop shutting down"] + A1{"check for alive async generators?"} --> |true|B1 + B1["close all async generators <br> await asyncio.gather\(*\[ag.aclose\(\) for ag in loop._asyncgens\]"] --> R + A1{"check for alive async generators?"} --> |false|E1 + E1["loop.close()"] + end + +``` [^1]: https://github.com/python/cpython/issues/123089 -[^2]: https://github.com/python/cpython/issues/80788
\ No newline at end of file +[^2]: https://github.com/python/cpython/issues/80788 diff --git a/InternalDocs/qsbr.md b/InternalDocs/qsbr.md new file mode 100644 index 00000000000..1c4a79a7b44 --- /dev/null +++ b/InternalDocs/qsbr.md @@ -0,0 +1,129 @@ +# Quiescent-State Based Reclamation (QSBR) + +## Introduction + +When implementing lock-free data structures, a key challenge is determining +when it is safe to free memory that has been logically removed from a +structure. Freeing memory too early can lead to use-after-free bugs if another +thread is still accessing it. Freeing it too late results in excessive memory +consumption. + +Safe memory reclamation (SMR) schemes address this by delaying the free +operation until all concurrent read accesses are guaranteed to have completed. +Quiescent-State Based Reclamation (QSBR) is a SMR scheme used in Python's +free-threaded build to manage the lifecycle of shared memory. + +QSBR requires threads to periodically report that they are in a quiescent +state. A thread is in a quiescent state if it holds no references to shared +objects that might be reclaimed. Think of it as a checkpoint where a thread +signals, "I am not in the middle of any operation that relies on a shared +resource." In Python, the eval_breaker provides a natural and convenient place +for threads to report this state. + + +## Use in Free-Threaded Python + +While CPython's memory management is dominated by reference counting and a +tracing garbage collector, these mechanisms are not suitable for all data +structures. For example, the backing array of a list object is not individually +reference-counted but may have a shorter lifetime than the `PyListObject` that +contains it. We could delay reclamation until the next GC run, but we want +reclamation to be prompt and to run the GC less frequently in the free-threaded +build, as it requires pausing all threads. + +Many operations in the free-threaded build are protected by locks. However, for +performance-critical code, we want to allow reads to happen concurrently with +updates. For instance, we want to avoid locking during most list read accesses. +If a list is resized while another thread is reading it, QSBR provides the +mechanism to determine when it is safe to free the list's old backing array. + +Specific use cases for QSBR include: + +* Dictionary keys (`PyDictKeysObject`) and list arrays (`_PyListArray`): When a +dictionary or list that may be shared between threads is resized, we use QSBR +to delay freeing the old keys or array until it's safe. For dicts and lists +that are not shared, their storage can be freed immediately upon resize. + +* Mimalloc `mi_page_t`: Non-locking dictionary and list accesses require +cooperation from the memory allocator. If an object is freed and its memory is +reused, we must ensure the new object's reference count field is at the same +memory location. In practice, this means when a mimalloc page (`mi_page_t`) +becomes empty, we don't immediately allow it to be reused for allocations of a +different size class. QSBR is used to determine when it's safe to repurpose the +page or return its memory to the OS. + + +## Implementation Details + + +### Core Implementation + +The proposal to add QSBR to Python is contained in +[Github issue 115103](https://github.com/python/cpython/issues/115103). +Many details of that proposal have been copied here, so they can be kept +up-to-date with the actual implementation. + +Python's QSBR implementation is based on FreeBSD's "Global Unbounded +Sequences." [^1][^2][^3]. It relies on a few key counters: + +* Global Write Sequence (`wr_seq`): A per-interpreter counter, `wr_seq`, is started +at 1 and incremented by 2 each time it is advanced. This ensures its value is +always odd, which can be used to distinguish it from other state values. When +an object needs to be reclaimed, `wr_seq` is advanced, and the object is tagged +with this new sequence number. + +* Per-Thread Read Sequence: Each thread has a local read sequence counter. When +a thread reaches a quiescent state (e.g., at the eval_breaker), it copies the +current global `wr_seq` to its local counter. + +* Global Read Sequence (`rd_seq`): This per-interpreter value stores the minimum +of all per-thread read sequence counters (excluding detached threads). It is +updated by a "polling" operation. + +To free an object, the following steps are taken: + +1. Advance the global `wr_seq`. + +2. Add the object's pointer to a deferred-free list, tagging it with the new + `wr_seq` value as its qsbr_goal. + +Periodically, a polling mechanism processes this deferred-free list: + +1. The minimum read sequence value across all active threads is calculated and + stored as the global `rd_seq`. + +2. For each item on the deferred-free list, if its qsbr_goal is less than or + equal to the new `rd_seq`, its memory is freed, and it is removed from the: + list. Otherwise, it remains on the list for a future attempt. + + +### Deferred Advance Optimization + +To reduce memory contention from frequent updates to the global `wr_seq`, its +advancement is sometimes deferred. Instead of incrementing `wr_seq` on every +reclamation request, each thread tracks its number of deferrals locally. Once +the deferral count reaches a limit (QSBR_DEFERRED_LIMIT, currently 10), the +thread advances the global `wr_seq` and resets its local count. + +When an object is added to the deferred-free list, its qsbr_goal is set to +`wr_seq` + 2. By setting the goal to the next sequence value, we ensure it's safe +to defer the global counter advancement. This optimization improves runtime +speed but may increase peak memory usage by slightly delaying when memory can +be reclaimed. + + +## Limitations + +Determining the `rd_seq` requires scanning over all thread states. This operation +could become a bottleneck in applications with a very large number of threads +(e.g., >1,000). Future work may address this with more advanced mechanisms, +such as a tree-based structure or incremental scanning. For now, the +implementation prioritizes simplicity, with plans for refinement if +multi-threaded benchmarks reveal performance issues. + + +## References + +[^1]: https://youtu.be/ZXUIFj4nRjk?t=694 +[^2]: https://people.kernel.org/joelfernandes/gus-vs-rcu +[^3]: http://bxr.su/FreeBSD/sys/kern/subr_smr.c#44 diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py index 842599bd187..0589a0f437e 100644 --- a/Lib/_pyrepl/base_eventqueue.py +++ b/Lib/_pyrepl/base_eventqueue.py @@ -87,7 +87,7 @@ class BaseEventQueue: if isinstance(k, dict): self.keymap = k else: - self.insert(Event('key', k, self.flush_buf())) + self.insert(Event('key', k, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap elif self.buf and self.buf[0] == 27: # escape @@ -96,7 +96,7 @@ class BaseEventQueue: # the docstring in keymap.py trace('unrecognized escape sequence, propagating...') self.keymap = self.compiled_keymap - self.insert(Event('key', '\033', bytearray(b'\033'))) + self.insert(Event('key', '\033', b'\033')) for _c in self.flush_buf()[1:]: self.push(_c) @@ -106,5 +106,5 @@ class BaseEventQueue: except UnicodeError: return else: - self.insert(Event('key', decoded, self.flush_buf())) + self.insert(Event('key', decoded, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap diff --git a/Lib/netrc.py b/Lib/netrc.py index bd003e80a48..2f502c1d533 100644 --- a/Lib/netrc.py +++ b/Lib/netrc.py @@ -162,8 +162,8 @@ class netrc: fowner = _getpwuid(prop.st_uid) user = _getpwuid(current_user_id) raise NetrcParseError( - (f"~/.netrc file owner ({fowner}, {user}) does not match" - " current user")) + f"~/.netrc file owner ({fowner}) does not match" + f" current user ({user})") if (prop.st_mode & (stat.S_IRWXG | stat.S_IRWXO)): raise NetrcParseError( "~/.netrc access too permissive: access" diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 84e864b44b9..e4c9a463855 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2392,6 +2392,64 @@ class TestUopsOptimization(unittest.TestCase): assert ex is not None """)) + def test_pop_top_specialize_none(self): + def testfunc(n): + for _ in range(n): + global_identity(None) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + + def test_pop_top_specialize_int(self): + def testfunc(n): + for _ in range(n): + global_identity(100000) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_INT", uops) + + def test_pop_top_specialize_float(self): + def testfunc(n): + for _ in range(n): + global_identity(1e6) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_FLOAT", uops) + + + def test_unary_negative_long_float_type(self): + def testfunc(n): + for _ in range(n): + a = 9397 + f = 9397.0 + x = -a + -a + y = -f + -f + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertNotIn("_GUARD_TOS_INT", uops) + self.assertNotIn("_GUARD_NOS_INT", uops) + self.assertNotIn("_GUARD_TOS_FLOAT", uops) + self.assertNotIn("_GUARD_NOS_FLOAT", uops) def global_identity(x): return x diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index 21d097dbb55..7529c853f9c 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -506,9 +506,12 @@ def _is_perf_version_at_least(major, minor): # The output of perf --version looks like "perf version 6.7-3" but # it can also be perf version "perf version 5.15.143", or even include # a commit hash in the version string, like "6.12.9.g242e6068fd5c" + # + # PermissionError is raised if perf does not exist on the Windows Subsystem + # for Linux, see #134987 try: output = subprocess.check_output(["perf", "--version"], text=True) - except (subprocess.CalledProcessError, FileNotFoundError): + except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): return False version = output.split()[2] version = version.split("-")[0] diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 125c2744698..13b55d0f0a2 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1284,12 +1284,6 @@ class ThreadTests(BaseTestCase): @cpython_only def test_finalize_daemon_thread_hang(self): - if support.check_sanitizer(thread=True, memory=True): - # the thread running `time.sleep(100)` below will still be alive - # at process exit - self.skipTest( - "https://github.com/python/cpython/issues/124878 - Known" - " race condition that TSAN identifies.") # gh-87135: tests that daemon threads hang during finalization script = textwrap.dedent(''' import os diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst new file mode 100644 index 00000000000..6a9d9c683f9 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst @@ -0,0 +1 @@ +Optimize ``_UNARY_NEGATIVE`` in JIT-compiled code. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst new file mode 100644 index 00000000000..715ac7dc925 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst @@ -0,0 +1 @@ +Specialize :opcode:`POP_TOP` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler. diff --git a/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst new file mode 100644 index 00000000000..5b9d89caae7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst @@ -0,0 +1,3 @@ +:mod:`netrc`: improve the error message when the security check for the +ownership of the default configuration file ``~/.netrc`` fails. Patch by +Bénédikt Tran. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 0fe69ee8fac..90a7391ebb0 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -255,7 +255,8 @@ py_hashentry_table_new(void) { return NULL; } -/* Module state */ +// --- Module state ----------------------------------------------------------- + static PyModuleDef _hashlibmodule; typedef struct { @@ -277,6 +278,8 @@ get_hashlib_state(PyObject *module) return (_hashlibstate *)state; } +// --- Module objects --------------------------------------------------------- + typedef struct { HASHLIB_OBJECT_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ @@ -291,15 +294,17 @@ typedef struct { #define HMACobject_CAST(op) ((HMACobject *)(op)) -#include "clinic/_hashopenssl.c.h" +// --- Module clinic configuration -------------------------------------------- + /*[clinic input] module _hashlib -class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASH_type" -class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASHXOF_type" -class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMAC_type" +class _hashlib.HASH "HASHobject *" "&PyType_Type" +class _hashlib.HASHXOF "HASHobject *" "&PyType_Type" +class _hashlib.HMAC "HMACobject *" "&PyType_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=eb805ce4b90b1b31]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6b5c9ce5c28bdc58]*/ +#include "clinic/_hashopenssl.c.h" /* LCOV_EXCL_START */ diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 295bca07650..6c4349ac06b 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -2,6 +2,7 @@ * Written in 2013 by Dmitry Chestnykh <dmitry@codingrobots.com> * Modified for CPython by Christian Heimes <christian@python.org> * Updated to use HACL* by Jonathan Protzenko <jonathan@protzenko.fr> + * Additional work by Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * * To the extent possible under law, the author have dedicated all * copyright and related and neighboring rights to this software to @@ -368,15 +369,18 @@ typedef struct { #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) -#include "clinic/blake2module.c.h" +// --- Module clinic configuration -------------------------------------------- /*[clinic input] module _blake2 -class _blake2.blake2b "Blake2Object *" "&PyBlake2_BLAKE2bType" -class _blake2.blake2s "Blake2Object *" "&PyBlake2_BLAKE2sType" +class _blake2.blake2b "Blake2Object *" "&PyType_Type" +class _blake2.blake2s "Blake2Object *" "&PyType_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b7526666bd18af83]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=86b0972b0c41b3d0]*/ + +#include "clinic/blake2module.c.h" +// --- BLAKE-2 object interface ----------------------------------------------- static Blake2Object * new_Blake2Object(PyTypeObject *type) diff --git a/Modules/md5module.c b/Modules/md5module.c index 5dac3a91f4f..8b6dd4a8195 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -8,6 +8,7 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. @@ -25,18 +26,14 @@ #include "hashlib.h" -/*[clinic input] -module _md5 -class MD5Type "MD5object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ +#include "_hacl/Hacl_Hash_MD5.h" /* The MD5 block size and message digest sizes, in bytes */ #define MD5_BLOCKSIZE 64 #define MD5_DIGESTSIZE 16 -#include "_hacl/Hacl_Hash_MD5.h" +// --- Module objects --------------------------------------------------------- typedef struct { HASHLIB_OBJECT_HEAD @@ -45,8 +42,7 @@ typedef struct { #define _MD5object_CAST(op) ((MD5object *)(op)) -#include "clinic/md5module.c.h" - +// --- Module state ----------------------------------------------------------- typedef struct { PyTypeObject* md5_type; @@ -60,6 +56,18 @@ md5_get_state(PyObject *module) return (MD5State *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _md5 +class MD5Type "MD5object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ + +#include "clinic/md5module.c.h" + +// --- MD5 object interface --------------------------------------------------- + static MD5object * newMD5object(MD5State * st) { diff --git a/Modules/sha1module.c b/Modules/sha1module.c index 3bc83b674f1..faa9dcccc57 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -8,13 +8,13 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. */ -/* SHA1 objects */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif @@ -24,18 +24,14 @@ #include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" // _PyType_GetModuleState() -/*[clinic input] -module _sha1 -class SHA1Type "SHA1object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ +#include "_hacl/Hacl_Hash_SHA1.h" /* The SHA1 block size and message digest sizes, in bytes */ #define SHA1_BLOCKSIZE 64 #define SHA1_DIGESTSIZE 20 -#include "_hacl/Hacl_Hash_SHA1.h" +// --- Module objects --------------------------------------------------------- typedef struct { HASHLIB_OBJECT_HEAD @@ -44,8 +40,7 @@ typedef struct { #define _SHA1object_CAST(op) ((SHA1object *)(op)) -#include "clinic/sha1module.c.h" - +// --- Module state ----------------------------------------------------------- typedef struct { PyTypeObject* sha1_type; @@ -59,6 +54,18 @@ sha1_get_state(PyObject *module) return (SHA1State *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha1 +class SHA1Type "SHA1object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ + +#include "clinic/sha1module.c.h" + +// --- SHA-1 object interface configuration ----------------------------------- + static SHA1object * newSHA1object(SHA1State *st) { diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 6643b7e6b02..36300ba899f 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -9,32 +9,25 @@ Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) Jonathan Protzenko (jonathan@protzenko.fr) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. */ -/* SHA objects */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif #include "Python.h" -#include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "pycore_strhex.h" // _Py_strhex() #include "hashlib.h" -/*[clinic input] -module _sha2 -class SHA256Type "SHA256object *" "&PyType_Type" -class SHA512Type "SHA512object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ - +#include "_hacl/Hacl_Hash_SHA2.h" /* The SHA block sizes and maximum message digest sizes, in bytes */ @@ -43,9 +36,7 @@ class SHA512Type "SHA512object *" "&PyType_Type" #define SHA512_BLOCKSIZE 128 #define SHA512_DIGESTSIZE 64 -/* Our SHA2 implementations defer to the HACL* verified library. */ - -#include "_hacl/Hacl_Hash_SHA2.h" +// --- Module objects --------------------------------------------------------- // TODO: Get rid of int digestsize in favor of Hacl state info? @@ -64,7 +55,7 @@ typedef struct { #define _SHA256object_CAST(op) ((SHA256object *)(op)) #define _SHA512object_CAST(op) ((SHA512object *)(op)) -#include "clinic/sha2module.c.h" +// --- Module state ----------------------------------------------------------- /* We shall use run-time type information in the remainder of this module to * tell apart SHA2-224 and SHA2-256 */ @@ -83,6 +74,19 @@ sha2_get_state(PyObject *module) return (sha2_state *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha2 +class SHA256Type "SHA256object *" "&PyType_Type" +class SHA512Type "SHA512object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ + +#include "clinic/sha2module.c.h" + +// --- SHA-2 object interface ------------------------------------------------- + static int SHA256copy(SHA256object *src, SHA256object *dest) { diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 68b23935276..face90a6094 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -9,6 +9,7 @@ * Greg Stein (gstein@lyra.org) * Trevor Perrin (trevp@trevp.net) * Gregory P. Smith (greg@krypto.org) + * Bénédikt Tran (10796600+picnixz@users.noreply.github.com) * * Copyright (C) 2012-2022 Christian Heimes (christian@python.org) * Licensed to PSF under a Contributor Agreement. @@ -24,6 +25,8 @@ #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "hashlib.h" +#include "_hacl/Hacl_Hash_SHA3.h" + /* * Assert that 'LEN' can be safely casted to uint32_t. * @@ -37,6 +40,8 @@ #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */ +// --- Module state ----------------------------------------------------------- + typedef struct { PyTypeObject *sha3_224_type; PyTypeObject *sha3_256_type; @@ -54,21 +59,10 @@ sha3_get_state(PyObject *module) return (SHA3State *)state; } -/*[clinic input] -module _sha3 -class _sha3.sha3_224 "SHA3object *" "&SHA3_224typ" -class _sha3.sha3_256 "SHA3object *" "&SHA3_256typ" -class _sha3.sha3_384 "SHA3object *" "&SHA3_384typ" -class _sha3.sha3_512 "SHA3object *" "&SHA3_512typ" -class _sha3.shake_128 "SHA3object *" "&SHAKE128type" -class _sha3.shake_256 "SHA3object *" "&SHAKE256type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b8a53680f370285a]*/ +// --- Module objects --------------------------------------------------------- /* The structure for storing SHA3 info */ -#include "_hacl/Hacl_Hash_SHA3.h" - typedef struct { HASHLIB_OBJECT_HEAD Hacl_Hash_SHA3_state_t *hash_state; @@ -76,8 +70,23 @@ typedef struct { #define _SHA3object_CAST(op) ((SHA3object *)(op)) +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha3 +class _sha3.sha3_224 "SHA3object *" "&PyType_Type" +class _sha3.sha3_256 "SHA3object *" "&PyType_Type" +class _sha3.sha3_384 "SHA3object *" "&PyType_Type" +class _sha3.sha3_512 "SHA3object *" "&PyType_Type" +class _sha3.shake_128 "SHA3object *" "&PyType_Type" +class _sha3.shake_256 "SHA3object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ccd22550c7fb99bf]*/ + #include "clinic/sha3module.c.h" +// --- SHA-3 object interface ------------------------------------------------- + static SHA3object * newSHA3object(PyTypeObject *type) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 307844d38cc..535e552e047 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,27 @@ dummy_func( PyStackRef_XCLOSE(value); } + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + tier2 op(_POP_TWO, (nos, tos --)) { PyStackRef_CLOSE(tos); PyStackRef_CLOSE(nos); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f506172550..46fc164a5b3 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,46 @@ break; } + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { _PyStackRef tos; _PyStackRef nos; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b1a63e3d29..145a8c118d3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3f2e2e0351e..f8a0484bdc2 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness @@ -452,7 +452,13 @@ dummy_func(void) { res = sym_new_compact_int(ctx); } else { - res = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } } } @@ -534,7 +540,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { @@ -542,7 +548,7 @@ dummy_func(void) { } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { @@ -561,6 +567,24 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; @@ -803,7 +827,9 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - JitOptRef temp = retval; + // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe + // in bytecodes.c + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 91927180b35..10767ccdbd5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -100,6 +100,47 @@ } case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_NOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -149,7 +190,13 @@ res = sym_new_compact_int(ctx); } else { - res = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } } stack_pointer[-1] = res; break; @@ -784,7 +831,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -2660,7 +2707,7 @@ case _LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2670,7 +2717,7 @@ case _POP_TOP_LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[-1] = value; break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 64cc1b9074f..c3d9e0e778b 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -668,9 +668,6 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym) if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { return sym->cls.type == &PyBool_Type; } - if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { - return true; - } return false; } diff --git a/Python/qsbr.c b/Python/qsbr.c index bf34fb2523d..afa03776c26 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -1,6 +1,6 @@ /* * Implementation of safe memory reclamation scheme using - * quiescent states. + * quiescent states. See InternalDocs/qsbr.md. * * This is derived from the "GUS" safe memory reclamation technique * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6ff0223d2ef..6466d2615cd 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -596,6 +596,7 @@ NON_ESCAPING_FUNCTIONS = ( "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 10567204dcc..0bcdc5395dc 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 276f306dfff..fc3bc47286f 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -91,7 +91,7 @@ class Tier2Emitter(Emitter): self.emit("}\n") return not always_true(first_tkn) - def exit_if( # type: ignore[override] + def exit_if( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 5bf180bb30a..0beaab2d3e7 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,7 +1,7 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.15 +mypy==1.16.1 # needed for peg_generator: -types-psutil==6.0.0.20240901 -types-setuptools==74.0.0.20240831 +types-psutil==7.0.0.20250601 +types-setuptools==80.9.0.20250529 |