diff options
34 files changed, 247 insertions, 203 deletions
diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 63b78f67767..5fb8567ef8c 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -95,6 +95,13 @@ There are a few functions specific to Python functions. .. versionadded:: 3.12 + +.. c:function:: PyObject* PyFunction_GetKwDefaults(PyObject *op) + + Return the keyword-only argument default values of the function object *op*. This can be a + dictionary of arguments or ``NULL``. + + .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op) Return the closure associated with the function object *op*. This can be ``NULL`` @@ -123,6 +130,19 @@ There are a few functions specific to Python functions. Raises :exc:`SystemError` and returns ``-1`` on failure. +.. c:function:: PyObject *PyFunction_GET_CODE(PyObject *op) + PyObject *PyFunction_GET_GLOBALS(PyObject *op) + PyObject *PyFunction_GET_MODULE(PyObject *op) + PyObject *PyFunction_GET_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_KW_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_CLOSURE(PyObject *op) + PyObject *PyFunction_GET_ANNOTATIONS(PyObject *op) + + These functions are similar to their ``PyFunction_Get*`` counterparts, but + do not do type checking. Passing anything other than an instance of + :c:data:`PyFunction_Type` is undefined behavior. + + .. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback) Register *callback* as a function watcher for the current interpreter. diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 99cc823c0c3..144c5608e07 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -963,21 +963,45 @@ PyFunction_Check:PyObject*:o:0: PyFunction_GetAnnotations:PyObject*::0: PyFunction_GetAnnotations:PyObject*:op:0: +PyFunction_GET_ANNOTATIONS:PyObject*::0: +PyFunction_GET_ANNOTATIONS:PyObject*:op:0: + PyFunction_GetClosure:PyObject*::0: PyFunction_GetClosure:PyObject*:op:0: +PyFunction_GET_CLOSURE:PyObject*::0: +PyFunction_GET_CLOSURE:PyObject*:op:0: + PyFunction_GetCode:PyObject*::0: PyFunction_GetCode:PyObject*:op:0: +PyFunction_GET_CODE:PyObject*::0: +PyFunction_GET_CODE:PyObject*:op:0: + PyFunction_GetDefaults:PyObject*::0: PyFunction_GetDefaults:PyObject*:op:0: +PyFunction_GET_DEFAULTS:PyObject*::0: +PyFunction_GET_DEFAULTS:PyObject*:op:0: + +PyFunction_GetKwDefaults:PyObject*::0: +PyFunction_GetKwDefaults:PyObject*:op:0: + +PyFunction_GET_KW_DEFAULTS:PyObject*::0: +PyFunction_GET_KW_DEFAULTS:PyObject*:op:0: + PyFunction_GetGlobals:PyObject*::0: PyFunction_GetGlobals:PyObject*:op:0: +PyFunction_GET_GLOBALS:PyObject*::0: +PyFunction_GET_GLOBALS:PyObject*:op:0: + PyFunction_GetModule:PyObject*::0: PyFunction_GetModule:PyObject*:op:0: +PyFunction_GET_MODULE:PyObject*::0: +PyFunction_GET_MODULE:PyObject*:op:0: + PyFunction_New:PyObject*::+1: PyFunction_New:PyObject*:code:+1: PyFunction_New:PyObject*:globals:+1: diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 2ecce3dba5a..80bd1275973 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1839,15 +1839,15 @@ are always available. They are listed here in alphabetical order. ``range(start, stop, step)``. The *start* and *step* arguments default to ``None``. + Slice objects have read-only data attributes :attr:`!start`, + :attr:`!stop`, and :attr:`!step` which merely return the argument + values (or their default). They have no other explicit functionality; + however, they are used by NumPy and other third-party packages. + .. attribute:: slice.start .. attribute:: slice.stop .. attribute:: slice.step - Slice objects have read-only data attributes :attr:`!start`, - :attr:`!stop`, and :attr:`!step` which merely return the argument - values (or their default). They have no other explicit functionality; - however, they are used by NumPy and other third-party packages. - Slice objects are also generated when extended indexing syntax is used. For example: ``a[start:stop:step]`` or ``a[start:stop, i]``. See :func:`itertools.islice` for an alternate version that returns an diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 429b3cd1f00..17f39aaf5f5 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1023,7 +1023,7 @@ series of :term:`arguments <argument>`: : ["," `keywords_arguments`] : | `starred_and_keywords` ["," `keywords_arguments`] : | `keywords_arguments` - positional_arguments: positional_item ("," positional_item)* + positional_arguments: `positional_item` ("," `positional_item`)* positional_item: `assignment_expression` | "*" `expression` starred_and_keywords: ("*" `expression` | `keyword_item`) : ("," "*" `expression` | "," `keyword_item`)* diff --git a/Include/ceval.h b/Include/ceval.h index 32ab38972e5..e9df8684996 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -133,13 +133,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); #define FVS_MASK 0x4 #define FVS_HAVE_SPEC 0x4 -/* Special methods used by LOAD_SPECIAL */ -#define SPECIAL___ENTER__ 0 -#define SPECIAL___EXIT__ 1 -#define SPECIAL___AENTER__ 2 -#define SPECIAL___AEXIT__ 3 -#define SPECIAL_MAX 3 - #ifndef Py_LIMITED_API # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 18623cc8f1c..cc2defbdf77 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -366,6 +366,13 @@ extern int _PyRunRemoteDebugger(PyThreadState *tstate); PyAPI_FUNC(_PyStackRef) _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef *index_ptr); +/* Special methods used by LOAD_SPECIAL */ +#define SPECIAL___ENTER__ 0 +#define SPECIAL___EXIT__ 1 +#define SPECIAL___AENTER__ 2 +#define SPECIAL___AEXIT__ 3 +#define SPECIAL_MAX 3 + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e669a30b072..f3f2ae0a140 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Periodically process delayed free requests. extern void _PyMem_ProcessDelayed(PyThreadState *tstate); diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5..1f9b3fcf777 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -48,8 +48,21 @@ struct _qsbr_thread_state { // Thread state (or NULL) PyThreadState *tstate; - // Used to defer advancing write sequence a fixed number of times - int deferrals; + // Number of held items added by this thread since the last write sequence + // advance + int deferred_count; + + // Estimate for the amount of memory that is held by this thread since + // the last write sequence advance + size_t deferred_memory; + + // Amount of memory in mimalloc pages deferred from collection. When + // deferred, they are prevented from being used for a different size class + // and in a different thread. + size_t deferred_page_memory; + + // True if the deferred memory frees should be processed. + bool should_process; // Is this thread state allocated? bool allocated; @@ -109,11 +122,17 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) extern uint64_t _Py_qsbr_advance(struct _qsbr_shared *shared); -// Batches requests to advance the write sequence. This advances the write -// sequence every N calls, which reduces overhead but increases time to -// reclamation. Returns the new goal. +// Return the next value for the write sequence (current plus the increment). extern uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); +_Py_qsbr_shared_next(struct _qsbr_shared *shared); + +// Return true if deferred memory frees held by QSBR should be processed to +// determine if they can be safely freed. +static inline bool +_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr) +{ + return qsbr->should_process; +} // Have the read sequences advanced to the given goal? If this returns true, // it safe to reclaim any memory tagged with the goal (or earlier goal). diff --git a/Include/pymacro.h b/Include/pymacro.h index bfe660e8303..b2886ddac5d 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -73,14 +73,14 @@ # else # define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T # endif +# elif defined(_MSC_VER) +# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L # define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T # elif (defined(__GNUC__) || defined(__clang__)) # define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T -# elif defined(_MSC_VER) -# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T # else # define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T # endif diff --git a/Include/pyport.h b/Include/pyport.h index 3eac119bf8e..73f071c41a6 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,7 +49,8 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ +#if (defined(__GNUC__) || defined(__clang__)) && \ + (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ || (defined(__cplusplus) && __cplusplus >= 201103) # define _Py_NULL nullptr #else diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 5c3c4424934..c5860d53b1b 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1475,8 +1475,6 @@ class Logger(Filterer): level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels. There is no arbitrary limit to the depth of nesting. """ - _tls = threading.local() - def __init__(self, name, level=NOTSET): """ Initialize the logger with a name and an optional level. @@ -1673,19 +1671,14 @@ class Logger(Filterer): This method is used for unpickled records received from a socket, as well as those created locally. Logger-level filtering is applied. """ - if self._is_disabled(): + if self.disabled: return - - self._tls.in_progress = True - try: - maybe_record = self.filter(record) - if not maybe_record: - return - if isinstance(maybe_record, LogRecord): - record = maybe_record - self.callHandlers(record) - finally: - self._tls.in_progress = False + maybe_record = self.filter(record) + if not maybe_record: + return + if isinstance(maybe_record, LogRecord): + record = maybe_record + self.callHandlers(record) def addHandler(self, hdlr): """ @@ -1773,7 +1766,7 @@ class Logger(Filterer): """ Is this logger enabled for level 'level'? """ - if self._is_disabled(): + if self.disabled: return False try: @@ -1823,11 +1816,6 @@ class Logger(Filterer): if isinstance(item, Logger) and item.parent is self and _hierlevel(item) == 1 + _hierlevel(item.parent)) - def _is_disabled(self): - # We need to use getattr as it will only be set the first time a log - # message is recorded on any given thread - return self.disabled or getattr(self._tls, 'in_progress', False) - def __repr__(self): level = getLevelName(self.getEffectiveLevel()) return '<%s %s (%s)>' % (self.__class__.__name__, self.name, level) diff --git a/Lib/pprint.py b/Lib/pprint.py index 1e611481b51..92a2c543ac2 100644 --- a/Lib/pprint.py +++ b/Lib/pprint.py @@ -653,6 +653,40 @@ class PrettyPrinter: del context[objid] return "{%s}" % ", ".join(components), readable, recursive + if (issubclass(typ, list) and r is list.__repr__) or \ + (issubclass(typ, tuple) and r is tuple.__repr__): + if issubclass(typ, list): + if not object: + return "[]", True, False + format = "[%s]" + elif len(object) == 1: + format = "(%s,)" + else: + if not object: + return "()", True, False + format = "(%s)" + objid = id(object) + if maxlevels and level >= maxlevels: + return format % "...", False, objid in context + if objid in context: + return _recursion(object), False, True + context[objid] = 1 + readable = True + recursive = False + components = [] + append = components.append + level += 1 + for o in object: + orepr, oreadable, orecur = self.format( + o, context, maxlevels, level) + append(orepr) + if not oreadable: + readable = False + if orecur: + recursive = True + del context[objid] + return format % ", ".join(components), readable, recursive + if issubclass(typ, _collections.abc.MappingView) and r in self._view_reprs: objid = id(object) if maxlevels and level >= maxlevels: @@ -689,40 +723,6 @@ class PrettyPrinter: del context[objid] return typ.__name__ + '([%s])' % ", ".join(components), readable, recursive - if (issubclass(typ, list) and r is list.__repr__) or \ - (issubclass(typ, tuple) and r is tuple.__repr__): - if issubclass(typ, list): - if not object: - return "[]", True, False - format = "[%s]" - elif len(object) == 1: - format = "(%s,)" - else: - if not object: - return "()", True, False - format = "(%s)" - objid = id(object) - if maxlevels and level >= maxlevels: - return format % "...", False, objid in context - if objid in context: - return _recursion(object), False, True - context[objid] = 1 - readable = True - recursive = False - components = [] - append = components.append - level += 1 - for o in object: - orepr, oreadable, orecur = self.format( - o, context, maxlevels, level) - append(orepr) - if not oreadable: - readable = False - if orecur: - recursive = True - del context[objid] - return format % ", ".join(components), readable, recursive - rep = repr(object) return rep, (rep and not rep.startswith('<')), False diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index e672dfcbb46..3819965ed2c 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -4214,89 +4214,6 @@ class ConfigDictTest(BaseTest): handler = logging.getHandlerByName('custom') self.assertEqual(handler.custom_kwargs, custom_kwargs) - # See gh-91555 and gh-90321 - @support.requires_subprocess() - def test_deadlock_in_queue(self): - queue = multiprocessing.Queue() - handler = logging.handlers.QueueHandler(queue) - logger = multiprocessing.get_logger() - level = logger.level - try: - logger.setLevel(logging.DEBUG) - logger.addHandler(handler) - logger.debug("deadlock") - finally: - logger.setLevel(level) - logger.removeHandler(handler) - - def test_recursion_in_custom_handler(self): - class BadHandler(logging.Handler): - def __init__(self): - super().__init__() - def emit(self, record): - logger.debug("recurse") - logger = logging.getLogger("test_recursion_in_custom_handler") - logger.addHandler(BadHandler()) - logger.setLevel(logging.DEBUG) - logger.debug("boom") - - @threading_helper.requires_working_threading() - def test_thread_supression_noninterference(self): - lock = threading.Lock() - logger = logging.getLogger("test_thread_supression_noninterference") - - # Block on the first call, allow others through - # - # NOTE: We need to bypass the base class's lock, otherwise that will - # block multiple calls to the same handler itself. - class BlockOnceHandler(TestHandler): - def __init__(self, barrier): - super().__init__(support.Matcher()) - self.barrier = barrier - - def createLock(self): - self.lock = None - - def handle(self, record): - self.emit(record) - - def emit(self, record): - if self.barrier: - barrier = self.barrier - self.barrier = None - barrier.wait() - with lock: - pass - super().emit(record) - logger.info("blow up if not supressed") - - barrier = threading.Barrier(2) - handler = BlockOnceHandler(barrier) - logger.addHandler(handler) - logger.setLevel(logging.DEBUG) - - t1 = threading.Thread(target=logger.debug, args=("1",)) - with lock: - - # Ensure first thread is blocked in the handler, hence supressing logging... - t1.start() - barrier.wait() - - # ...but the second thread should still be able to log... - t2 = threading.Thread(target=logger.debug, args=("2",)) - t2.start() - t2.join(timeout=3) - - self.assertEqual(len(handler.buffer), 1) - self.assertTrue(handler.matches(levelno=logging.DEBUG, message='2')) - - # The first thread should still be blocked here - self.assertTrue(t1.is_alive()) - - # Now the lock has been released the first thread should complete - t1.join() - self.assertEqual(len(handler.buffer), 2) - self.assertTrue(handler.matches(levelno=logging.DEBUG, message='1')) class ManagerTest(BaseTest): def test_manager_loggerclass(self): diff --git a/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst new file mode 100644 index 00000000000..21a2c87d344 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst @@ -0,0 +1 @@ +Fix building with MSVC when passing option ``/std:clatest``. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst new file mode 100644 index 00000000000..a9501c13c95 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst @@ -0,0 +1,2 @@ +Limit excess memory usage in the :term:`free threading` build when a +large dictionary or list is resized and accessed by multiple threads. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst new file mode 100644 index 00000000000..ce29ddecefe --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst @@ -0,0 +1,2 @@ +Non-blocking mutex lock attempts now return immediately when the lock is busy +instead of briefly spinning in the :term:`free threading` build. diff --git a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst b/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst deleted file mode 100644 index e8f5ba56fcc..00000000000 --- a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst +++ /dev/null @@ -1,2 +0,0 @@ -Ignore log messages generated during handling of log messages, to avoid -deadlock or infinite recursion. diff --git a/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst new file mode 100644 index 00000000000..2bb15cb6d9c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst @@ -0,0 +1 @@ +Speed up :mod:`pprint` for :class:`list` and :class:`tuple`. diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index ee5e2b005e0..9c1f8615161 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -3614,8 +3614,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -3625,8 +3624,7 @@ module_clear(PyObject *mod) assert(state != NULL); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index de06b8b41fe..e5afe746f90 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1952,8 +1952,7 @@ static int module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1962,8 +1961,7 @@ module_clear(PyObject *mod) module_state *state = get_module_state(mod); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index b920c32474f..e7feaa7f186 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1706,8 +1706,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1715,8 +1714,7 @@ module_clear(PyObject *mod) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_opcode.c b/Modules/_opcode.c index c295f7b3152..ef271b6ef56 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -5,7 +5,7 @@ #include "Python.h" #include "compile.h" #include "opcode.h" -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL_MAX #include "pycore_code.h" #include "pycore_compile.h" #include "pycore_intrinsics.h" diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index bbbb4911568..1837de4735c 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3148,7 +3148,7 @@ static PyObject * math_issubnormal_impl(PyObject *module, double x) /*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/ { -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L return PyBool_FromLong(issubnormal(x)); #else return PyBool_FromLong(isfinite(x) && x && !isnormal(x)); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3f53d4cfeb2..91772bc9d19 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -3369,7 +3369,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 72901377cbb..6b7b150f0e2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2d..23d3472b6d4 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d4b8327cb73..deb7fd957e5 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page) } +#ifdef Py_GIL_DISABLED + +// If we are deferring collection of more than this amount of memory for +// mimalloc pages, advance the write sequence. Advancing allows these +// pages to be re-used in a different thread or for a different size class. +#define QSBR_PAGE_MEM_LIMIT 4096*20 + +// Return true if the global write sequence should be advanced for a mimalloc +// page that is deferred from collection. +static bool +should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) +{ + size_t bsize = mi_page_block_size(page); + size_t page_size = page->capacity*bsize; + if (page_size > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + qsbr->deferred_page_memory += page_size; + if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + return false; +} +#endif + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) _PyMem_mi_page_clear_qsbr(page); page->retire_expire = 0; - page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + + if (should_advance_qsbr_for_page(tstate->qsbr, page)) { + page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); + } + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); return false; } @@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } + +#ifdef Py_GIL_DISABLED + +// For deferred advance on free: the number of deferred items before advancing +// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally +// want to process a chunk before it overflows. +#define QSBR_DEFERRED_LIMIT 127 + +// If the deferred memory exceeds 1 MiB, advance the write sequence. This +// helps limit memory usage due to QSBR delaying frees too long. +#define QSBR_FREE_MEM_LIMIT 1024*1024 + +// Return true if the global write sequence should be advanced for a deferred +// memory free. +static bool +should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size) +{ + if (size > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + qsbr->deferred_count++; + qsbr->deferred_memory += size; + if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT || + qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + return false; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + if (should_advance_qsbr_for_free(tstate->qsbr, size)) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_shared_next(tstate->qsbr->shared); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + // Normally the processing of delayed items is done from the eval + // breaker. Processing here is a safety measure to ensure too much + // work does not accumulate. _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1305,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + // We use 0 as the size since we don't have an easy way to know the + // actual size. If we are freeing many objects, the write sequence + // will be advanced due to QSBR_DEFERRED_LIMIT. + free_delayed(((uintptr_t)ptr)|0x01, 0); } } #endif @@ -1317,6 +1399,8 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate_impl->qsbr->should_process = false; + // Process thread-local work process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); diff --git a/Python/ceval.c b/Python/ceval.c index 3da6f61f5ac..d1de4875656 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,7 +8,7 @@ #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL___ENTER__ #include "pycore_code.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 57d8f68b000..aa68371ac8f 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1387,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate) _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } + /* Process deferred memory frees held by QSBR */ + if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { + _PyMem_ProcessDelayed(tstate); + } #endif /* GC scheduled to run */ diff --git a/Python/codegen.c b/Python/codegen.c index 0023d72cd5e..27fe8e1957b 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -28,6 +28,7 @@ #include "pycore_pystate.h" // _Py_GetConfig() #include "pycore_symtable.h" // PySTEntryObject #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString +#include "pycore_ceval.h" // SPECIAL___ENTER__ #define NEED_OPCODE_METADATA #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed diff --git a/Python/lock.c b/Python/lock.c index b125ad0c9e3..ea6ac00bfec 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -58,7 +58,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_ACQUIRED; } } - else if (timeout == 0) { + if (timeout == 0) { return PY_LOCK_FAILURE; } diff --git a/Python/qsbr.c b/Python/qsbr.c index afa03776c26..c992c285cb1 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -41,10 +41,6 @@ // Starting size of the array of qsbr thread states #define MIN_ARRAY_SIZE 8 -// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing -// the write sequence. -#define QSBR_DEFERRED_LIMIT 10 - // Allocate a QSBR thread state from the freelist static struct _qsbr_thread_state * qsbr_allocate(struct _qsbr_shared *shared) @@ -117,13 +113,9 @@ _Py_qsbr_advance(struct _qsbr_shared *shared) } uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr) +_Py_qsbr_shared_next(struct _qsbr_shared *shared) { - if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) { - return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR; - } - qsbr->deferrals = 0; - return _Py_qsbr_advance(qsbr->shared); + return _Py_qsbr_shared_current(shared) + QSBR_INCR; } static uint64_t diff --git a/configure b/configure index 8334e247c3e..50223d81cd9 100755 --- a/configure +++ b/configure @@ -29933,9 +29933,6 @@ printf "%s\n" "#define Py_REMOTE_DEBUG 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else - -printf "%s\n" "#define Py_REMOTE_DEBUG 0" >>confdefs.h - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi diff --git a/configure.ac b/configure.ac index 82fb2d8c492..f58e16c774f 100644 --- a/configure.ac +++ b/configure.ac @@ -7168,8 +7168,6 @@ if test "$with_remote_debug" = yes; then [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([yes]) else - AC_DEFINE([Py_REMOTE_DEBUG], [0], - [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([no]) fi |