aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Include/cpython/pystate.h7
-rw-r--r--Include/internal/pycore_critical_section.h242
-rw-r--r--Include/internal/pycore_lock.h20
-rw-r--r--Include/object.h8
-rw-r--r--Makefile.pre.in2
-rw-r--r--Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst3
-rw-r--r--Modules/Setup.stdlib.in2
-rw-r--r--Modules/_testinternalcapi.c3
-rw-r--r--Modules/_testinternalcapi/parts.h1
-rw-r--r--Modules/_testinternalcapi/test_critical_sections.c213
-rw-r--r--Objects/object.c2
-rw-r--r--PCbuild/_freeze_module.vcxproj3
-rw-r--r--PCbuild/_freeze_module.vcxproj.filters9
-rw-r--r--PCbuild/_testinternalcapi.vcxproj1
-rw-r--r--PCbuild/_testinternalcapi.vcxproj.filters3
-rw-r--r--PCbuild/pythoncore.vcxproj2
-rw-r--r--PCbuild/pythoncore.vcxproj.filters6
-rw-r--r--Python/critical_section.c100
-rw-r--r--Python/pystate.c10
19 files changed, 630 insertions, 7 deletions
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index ec99f90d669..4fad4c60abb 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -149,6 +149,13 @@ struct _ts {
struct _py_trashcan trash;
+ /* Tagged pointer to top-most critical section, or zero if there is no
+ * active critical section. Critical sections are only used in
+ * `--disable-gil` builds (i.e., when Py_NOGIL is defined to 1). In the
+ * default build, this field is always zero.
+ */
+ uintptr_t critical_section;
+
/* Called when a thread state is deleted normally, but not when it
* is destroyed after fork().
* Pain: to prevent rare but fatal shutdown errors (issue 18808),
diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h
new file mode 100644
index 00000000000..73c2e243f20
--- /dev/null
+++ b/Include/internal/pycore_critical_section.h
@@ -0,0 +1,242 @@
+#ifndef Py_INTERNAL_CRITICAL_SECTION_H
+#define Py_INTERNAL_CRITICAL_SECTION_H
+
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "pycore_lock.h" // PyMutex
+#include "pycore_pystate.h" // _PyThreadState_GET()
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Implementation of Python critical sections
+//
+// Conceptually, critical sections are a deadlock avoidance layer on top of
+// per-object locks. These helpers, in combination with those locks, replace
+// our usage of the global interpreter lock to provide thread-safety for
+// otherwise thread-unsafe objects, such as dict.
+//
+// NOTE: These APIs are no-ops in non-free-threaded builds.
+//
+// Straightforward per-object locking could introduce deadlocks that were not
+// present when running with the GIL. Threads may hold locks for multiple
+// objects simultaneously because Python operations can nest. If threads were
+// to acquire the same locks in different orders, they would deadlock.
+//
+// One way to avoid deadlocks is to allow threads to hold only the lock (or
+// locks) for a single operation at a time (typically a single lock, but some
+// operations involve two locks). When a thread begins a nested operation it
+// could suspend the locks for any outer operation: before beginning the nested
+// operation, the locks for the outer operation are released and when the
+// nested operation completes, the locks for the outer operation are
+// reacquired.
+//
+// To improve performance, this API uses a variation of the above scheme.
+// Instead of immediately suspending locks any time a nested operation begins,
+// locks are only suspended if the thread would block. This reduces the number
+// of lock acquisitions and releases for nested operations, while still
+// avoiding deadlocks.
+//
+// Additionally, the locks for any active operation are suspended around
+// other potentially blocking operations, such as I/O. This is because the
+// interaction between locks and blocking operations can lead to deadlocks in
+// the same way as the interaction between multiple locks.
+//
+// Each thread's critical sections and their corresponding locks are tracked in
+// a stack in `PyThreadState.critical_section`. When a thread calls
+// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when
+// waiting to acquire a lock, the thread suspends all of its active critical
+// sections, temporarily releasing the associated locks. When the thread calls
+// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent)
+// critical section by reacquiring the associated lock or locks. See
+// `_PyCriticalSection_Resume()`.
+//
+// NOTE: Only the top-most critical section is guaranteed to be active.
+// Operations that need to lock two objects at once must use
+// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections
+// to lock more than one object at once, because the inner critical section
+// may suspend the outer critical sections. This API does not provide a way
+// to lock more than two objects at once (though it could be added later
+// if actually needed).
+//
+// NOTE: Critical sections implicitly behave like reentrant locks because
+// attempting to acquire the same lock will suspend any outer (earlier)
+// critical sections. However, they are less efficient for this use case than
+// purposefully designed reentrant locks.
+//
+// Example usage:
+// Py_BEGIN_CRITICAL_SECTION(op);
+// ...
+// Py_END_CRITICAL_SECTION();
+//
+// To lock two objects at once:
+// Py_BEGIN_CRITICAL_SECTION2(op1, op2);
+// ...
+// Py_END_CRITICAL_SECTION2();
+
+
+// Tagged pointers to critical sections use the two least significant bits to
+// mark if the pointed-to critical section is inactive and whether it is a
+// _PyCriticalSection2 object.
+#define _Py_CRITICAL_SECTION_INACTIVE 0x1
+#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2
+#define _Py_CRITICAL_SECTION_MASK 0x3
+
+#ifdef Py_NOGIL
+# define Py_BEGIN_CRITICAL_SECTION(op) \
+ { \
+ _PyCriticalSection _cs; \
+ _PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex)
+
+# define Py_END_CRITICAL_SECTION() \
+ _PyCriticalSection_End(&_cs); \
+ }
+
+# define Py_BEGIN_CRITICAL_SECTION2(a, b) \
+ { \
+ _PyCriticalSection2 _cs2; \
+ _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex)
+
+# define Py_END_CRITICAL_SECTION2() \
+ _PyCriticalSection2_End(&_cs2); \
+ }
+#else /* !Py_NOGIL */
+// The critical section APIs are no-ops with the GIL.
+# define Py_BEGIN_CRITICAL_SECTION(op)
+# define Py_END_CRITICAL_SECTION()
+# define Py_BEGIN_CRITICAL_SECTION2(a, b)
+# define Py_END_CRITICAL_SECTION2()
+#endif /* !Py_NOGIL */
+
+typedef struct {
+ // Tagged pointer to an outer active critical section (or 0).
+ // The two least-significant-bits indicate whether the pointed-to critical
+ // section is inactive and whether it is a _PyCriticalSection2 object.
+ uintptr_t prev;
+
+ // Mutex used to protect critical section
+ PyMutex *mutex;
+} _PyCriticalSection;
+
+// A critical section protected by two mutexes. Use
+// _PyCriticalSection2_Begin and _PyCriticalSection2_End.
+typedef struct {
+ _PyCriticalSection base;
+
+ PyMutex *mutex2;
+} _PyCriticalSection2;
+
+static inline int
+_PyCriticalSection_IsActive(uintptr_t tag)
+{
+ return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0;
+}
+
+// Resumes the top-most critical section.
+PyAPI_FUNC(void)
+_PyCriticalSection_Resume(PyThreadState *tstate);
+
+// (private) slow path for locking the mutex
+PyAPI_FUNC(void)
+_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m);
+
+PyAPI_FUNC(void)
+_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
+ int is_m1_locked);
+
+static inline void
+_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m)
+{
+ if (PyMutex_LockFast(&m->v)) {
+ PyThreadState *tstate = _PyThreadState_GET();
+ c->mutex = m;
+ c->prev = tstate->critical_section;
+ tstate->critical_section = (uintptr_t)c;
+ }
+ else {
+ _PyCriticalSection_BeginSlow(c, m);
+ }
+}
+
+// Removes the top-most critical section from the thread's stack of critical
+// sections. If the new top-most critical section is inactive, then it is
+// resumed.
+static inline void
+_PyCriticalSection_Pop(_PyCriticalSection *c)
+{
+ PyThreadState *tstate = _PyThreadState_GET();
+ uintptr_t prev = c->prev;
+ tstate->critical_section = prev;
+
+ if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) {
+ _PyCriticalSection_Resume(tstate);
+ }
+}
+
+static inline void
+_PyCriticalSection_End(_PyCriticalSection *c)
+{
+ PyMutex_Unlock(c->mutex);
+ _PyCriticalSection_Pop(c);
+}
+
+static inline void
+_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
+{
+ if (m1 == m2) {
+ // If the two mutex arguments are the same, treat this as a critical
+ // section with a single mutex.
+ c->mutex2 = NULL;
+ _PyCriticalSection_Begin(&c->base, m1);
+ return;
+ }
+
+ if ((uintptr_t)m2 < (uintptr_t)m1) {
+ // Sort the mutexes so that the lower address is locked first.
+ // The exact order does not matter, but we need to acquire the mutexes
+ // in a consistent order to avoid lock ordering deadlocks.
+ PyMutex *tmp = m1;
+ m1 = m2;
+ m2 = tmp;
+ }
+
+ if (PyMutex_LockFast(&m1->v)) {
+ if (PyMutex_LockFast(&m2->v)) {
+ PyThreadState *tstate = _PyThreadState_GET();
+ c->base.mutex = m1;
+ c->mutex2 = m2;
+ c->base.prev = tstate->critical_section;
+
+ uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES;
+ tstate->critical_section = p;
+ }
+ else {
+ _PyCriticalSection2_BeginSlow(c, m1, m2, 1);
+ }
+ }
+ else {
+ _PyCriticalSection2_BeginSlow(c, m1, m2, 0);
+ }
+}
+
+static inline void
+_PyCriticalSection2_End(_PyCriticalSection2 *c)
+{
+ if (c->mutex2) {
+ PyMutex_Unlock(c->mutex2);
+ }
+ PyMutex_Unlock(c->base.mutex);
+ _PyCriticalSection_Pop(&c->base);
+}
+
+PyAPI_FUNC(void)
+_PyCriticalSection_SuspendAll(PyThreadState *tstate);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */
diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h
index c4bb76a40e7..fe5e21fad22 100644
--- a/Include/internal/pycore_lock.h
+++ b/Include/internal/pycore_lock.h
@@ -32,9 +32,16 @@ extern "C" {
// PyMutex_Lock(&m);
// ...
// PyMutex_Unlock(&m);
-typedef struct _PyMutex {
- uint8_t v;
-} PyMutex;
+
+// NOTE: In Py_NOGIL builds, `struct _PyMutex` is defined in Include/object.h.
+// The Py_NOGIL builds need the definition in Include/object.h for the
+// `ob_mutex` field in PyObject. For the default (non-free-threaded) build,
+// we define the struct here to avoid exposing it in the public API.
+#ifndef Py_NOGIL
+struct _PyMutex { uint8_t v; };
+#endif
+
+typedef struct _PyMutex PyMutex;
#define _Py_UNLOCKED 0
#define _Py_LOCKED 1
@@ -46,6 +53,13 @@ PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
// (private) slow path for unlocking the mutex
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
+static inline int
+PyMutex_LockFast(uint8_t *lock_bits)
+{
+ uint8_t expected = _Py_UNLOCKED;
+ return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED);
+}
+
// Locks the mutex.
//
// If the mutex is currently locked, the calling thread will be parked until
diff --git a/Include/object.h b/Include/object.h
index bdc07d9e8d7..f6693562d21 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -119,7 +119,7 @@ check by comparing the reference count field to the immortality reference count.
{ \
0, \
0, \
- 0, \
+ { 0 }, \
0, \
_Py_IMMORTAL_REFCNT_LOCAL, \
0, \
@@ -204,10 +204,14 @@ struct _object {
// Create a shared field from a refcnt and desired flags
#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))
+// NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in
+// pycore_lock.h. See pycore_lock.h for more details.
+struct _PyMutex { uint8_t v; };
+
struct _object {
uintptr_t ob_tid; // thread id (or zero)
uint16_t _padding;
- uint8_t ob_mutex; // per-object lock
+ struct _PyMutex ob_mutex; // per-object lock
uint8_t ob_gc_bits; // gc-related state
uint32_t ob_ref_local; // local reference count
Py_ssize_t ob_ref_shared; // shared (atomic) reference count
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 365491de4e3..0629e77dc2d 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -409,6 +409,7 @@ PYTHON_OBJS= \
Python/codecs.o \
Python/compile.o \
Python/context.o \
+ Python/critical_section.o \
Python/crossinterp.o \
Python/dynamic_annotations.o \
Python/errors.o \
@@ -1802,6 +1803,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_complexobject.h \
$(srcdir)/Include/internal/pycore_condvar.h \
$(srcdir)/Include/internal/pycore_context.h \
+ $(srcdir)/Include/internal/pycore_critical_section.h \
$(srcdir)/Include/internal/pycore_crossinterp.h \
$(srcdir)/Include/internal/pycore_dict.h \
$(srcdir)/Include/internal/pycore_dict_state.h \
diff --git a/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst b/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst
new file mode 100644
index 00000000000..c2bd3ae36e6
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst
@@ -0,0 +1,3 @@
+Implement "Python Critical Sections" from :pep:`703`. These are macros to
+help replace the GIL with per-object locks in the ``--disable-gil`` build of
+CPython. The macros are no-ops in the default build.
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index 9e1188304de..d144ad312ef 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -158,7 +158,7 @@
@MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c
@MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
-@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c
+@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/bytearray.c _testcapi/bytes.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c
@MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
@MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 7dba29a54d7..604a59e7e19 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1687,6 +1687,9 @@ module_exec(PyObject *module)
if (_PyTestInternalCapi_Init_Set(module) < 0) {
return 1;
}
+ if (_PyTestInternalCapi_Init_CriticalSection(module) < 0) {
+ return 1;
+ }
if (PyModule_Add(module, "SIZEOF_PYGC_HEAD",
PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) {
diff --git a/Modules/_testinternalcapi/parts.h b/Modules/_testinternalcapi/parts.h
index 3d2774e3f1b..49a1395f499 100644
--- a/Modules/_testinternalcapi/parts.h
+++ b/Modules/_testinternalcapi/parts.h
@@ -13,5 +13,6 @@
int _PyTestInternalCapi_Init_Lock(PyObject *module);
int _PyTestInternalCapi_Init_PyTime(PyObject *module);
int _PyTestInternalCapi_Init_Set(PyObject *module);
+int _PyTestInternalCapi_Init_CriticalSection(PyObject *module);
#endif // Py_TESTINTERNALCAPI_PARTS_H
diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c
new file mode 100644
index 00000000000..238f29c3c62
--- /dev/null
+++ b/Modules/_testinternalcapi/test_critical_sections.c
@@ -0,0 +1,213 @@
+/*
+ * C Extension module to test pycore_critical_section.h API.
+ */
+
+#include "parts.h"
+
+#include "pycore_critical_section.h"
+
+#ifdef Py_NOGIL
+#define assert_nogil assert
+#define assert_gil(x)
+#else
+#define assert_gil assert
+#define assert_nogil(x)
+#endif
+
+
+static PyObject *
+test_critical_sections(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ PyObject *d1 = PyDict_New();
+ assert(d1 != NULL);
+
+ PyObject *d2 = PyDict_New();
+ assert(d2 != NULL);
+
+ // Beginning a critical section should lock the associated object and
+ // push the critical section onto the thread's stack (in Py_NOGIL builds).
+ Py_BEGIN_CRITICAL_SECTION(d1);
+ assert_nogil(PyMutex_IsLocked(&d1->ob_mutex));
+ assert_nogil(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section));
+ assert_gil(PyThreadState_GET()->critical_section == 0);
+ Py_END_CRITICAL_SECTION();
+ assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex));
+
+ assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex));
+ assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex));
+ Py_BEGIN_CRITICAL_SECTION2(d1, d2);
+ assert_nogil(PyMutex_IsLocked(&d1->ob_mutex));
+ assert_nogil(PyMutex_IsLocked(&d2->ob_mutex));
+ Py_END_CRITICAL_SECTION2();
+ assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex));
+ assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex));
+
+ // Passing the same object twice should work (and not deadlock).
+ assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex));
+ Py_BEGIN_CRITICAL_SECTION2(d2, d2);
+ assert_nogil(PyMutex_IsLocked(&d2->ob_mutex));
+ Py_END_CRITICAL_SECTION2();
+ assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex));
+
+ Py_DECREF(d2);
+ Py_DECREF(d1);
+ Py_RETURN_NONE;
+}
+
+static void
+lock_unlock_object(PyObject *obj, int recurse_depth)
+{
+ Py_BEGIN_CRITICAL_SECTION(obj);
+ if (recurse_depth > 0) {
+ lock_unlock_object(obj, recurse_depth - 1);
+ }
+ Py_END_CRITICAL_SECTION();
+}
+
+static void
+lock_unlock_two_objects(PyObject *a, PyObject *b, int recurse_depth)
+{
+ Py_BEGIN_CRITICAL_SECTION2(a, b);
+ if (recurse_depth > 0) {
+ lock_unlock_two_objects(a, b, recurse_depth - 1);
+ }
+ Py_END_CRITICAL_SECTION2();
+}
+
+
+// Test that nested critical sections do not deadlock if they attempt to lock
+// the same object.
+static PyObject *
+test_critical_sections_nest(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ PyObject *a = PyDict_New();
+ assert(a != NULL);
+ PyObject *b = PyDict_New();
+ assert(b != NULL);
+
+ // Locking an object recursively with this API should not deadlock.
+ assert_nogil(!PyMutex_IsLocked(&a->ob_mutex));
+ Py_BEGIN_CRITICAL_SECTION(a);
+ assert_nogil(PyMutex_IsLocked(&a->ob_mutex));
+ lock_unlock_object(a, 10);
+ assert_nogil(PyMutex_IsLocked(&a->ob_mutex));
+ Py_END_CRITICAL_SECTION();
+ assert_nogil(!PyMutex_IsLocked(&a->ob_mutex));
+
+ // Same test but with two objects.
+ Py_BEGIN_CRITICAL_SECTION2(b, a);
+ lock_unlock_two_objects(a, b, 10);
+ assert_nogil(PyMutex_IsLocked(&a->ob_mutex));
+ assert_nogil(PyMutex_IsLocked(&b->ob_mutex));
+ Py_END_CRITICAL_SECTION2();
+
+ Py_DECREF(b);
+ Py_DECREF(a);
+ Py_RETURN_NONE;
+}
+
+// Test that a critical section is suspended by a Py_BEGIN_ALLOW_THREADS and
+// resumed by a Py_END_ALLOW_THREADS.
+static PyObject *
+test_critical_sections_suspend(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ PyObject *a = PyDict_New();
+ assert(a != NULL);
+
+ Py_BEGIN_CRITICAL_SECTION(a);
+ assert_nogil(PyMutex_IsLocked(&a->ob_mutex));
+
+ // Py_BEGIN_ALLOW_THREADS should suspend the active critical section
+ Py_BEGIN_ALLOW_THREADS
+ assert_nogil(!PyMutex_IsLocked(&a->ob_mutex));
+ Py_END_ALLOW_THREADS;
+
+ // After Py_END_ALLOW_THREADS the critical section should be resumed.
+ assert_nogil(PyMutex_IsLocked(&a->ob_mutex));
+ Py_END_CRITICAL_SECTION();
+
+ Py_DECREF(a);
+ Py_RETURN_NONE;
+}
+
+struct test_data {
+ PyObject *obj1;
+ PyObject *obj2;
+ PyObject *obj3;
+ Py_ssize_t countdown;
+ PyEvent done_event;
+};
+
+static void
+thread_critical_sections(void *arg)
+{
+ const Py_ssize_t NUM_ITERS = 200;
+ struct test_data *test_data = arg;
+ PyGILState_STATE gil = PyGILState_Ensure();
+
+ for (Py_ssize_t i = 0; i < NUM_ITERS; i++) {
+ Py_BEGIN_CRITICAL_SECTION(test_data->obj1);
+ Py_END_CRITICAL_SECTION();
+
+ Py_BEGIN_CRITICAL_SECTION(test_data->obj2);
+ lock_unlock_object(test_data->obj1, 1);
+ Py_END_CRITICAL_SECTION();
+
+ Py_BEGIN_CRITICAL_SECTION2(test_data->obj3, test_data->obj1);
+ lock_unlock_object(test_data->obj2, 2);
+ Py_END_CRITICAL_SECTION2();
+
+ Py_BEGIN_CRITICAL_SECTION(test_data->obj3);
+ Py_BEGIN_ALLOW_THREADS
+ Py_END_ALLOW_THREADS
+ Py_END_CRITICAL_SECTION();
+ }
+
+ PyGILState_Release(gil);
+ if (_Py_atomic_add_ssize(&test_data->countdown, -1) == 1) {
+ // last thread to finish sets done_event
+ _PyEvent_Notify(&test_data->done_event);
+ }
+}
+
+static PyObject *
+test_critical_sections_threads(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ const Py_ssize_t NUM_THREADS = 4;
+ struct test_data test_data = {
+ .obj1 = PyDict_New(),
+ .obj2 = PyDict_New(),
+ .obj3 = PyDict_New(),
+ .countdown = NUM_THREADS,
+ };
+ assert(test_data.obj1 != NULL);
+ assert(test_data.obj2 != NULL);
+ assert(test_data.obj3 != NULL);
+
+ for (int i = 0; i < NUM_THREADS; i++) {
+ PyThread_start_new_thread(&thread_critical_sections, &test_data);
+ }
+ PyEvent_Wait(&test_data.done_event);
+
+ Py_DECREF(test_data.obj3);
+ Py_DECREF(test_data.obj2);
+ Py_DECREF(test_data.obj1);
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef test_methods[] = {
+ {"test_critical_sections", test_critical_sections, METH_NOARGS},
+ {"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS},
+ {"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS},
+ {"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
+ {NULL, NULL} /* sentinel */
+};
+
+int
+_PyTestInternalCapi_Init_CriticalSection(PyObject *mod)
+{
+ if (PyModule_AddFunctions(mod, test_methods) < 0) {
+ return -1;
+ }
+ return 0;
+}
diff --git a/Objects/object.c b/Objects/object.c
index b561da7fca3..b7662783a06 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2371,7 +2371,7 @@ new_reference(PyObject *op)
#else
op->ob_tid = _Py_ThreadId();
op->_padding = 0;
- op->ob_mutex = 0;
+ op->ob_mutex = (struct _PyMutex){ 0 };
op->ob_gc_bits = 0;
op->ob_ref_local = 1;
op->ob_ref_shared = 0;
diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj
index e4ce3fefc91..a1c37e183f2 100644
--- a/PCbuild/_freeze_module.vcxproj
+++ b/PCbuild/_freeze_module.vcxproj
@@ -195,6 +195,7 @@
<ClCompile Include="..\Python\codecs.c" />
<ClCompile Include="..\Python\compile.c" />
<ClCompile Include="..\Python\context.c" />
+ <ClCompile Include="..\Python\critical_section.c" />
<ClCompile Include="..\Python\crossinterp.c" />
<ClCompile Include="..\Python\dtoa.c" />
<ClCompile Include="..\Python\dynamic_annotations.c" />
@@ -220,12 +221,14 @@
<ClCompile Include="..\Python\intrinsics.c" />
<ClCompile Include="..\Python\instrumentation.c" />
<ClCompile Include="..\Python\legacy_tracing.c" />
+ <ClCompile Include="..\Python\lock.c" />
<ClCompile Include="..\Python\marshal.c" />
<ClCompile Include="..\Python\modsupport.c" />
<ClCompile Include="..\Python\mysnprintf.c" />
<ClCompile Include="..\Python\mystrtoul.c" />
<ClCompile Include="..\Python\optimizer.c" />
<ClCompile Include="..\Python\optimizer_analysis.c" />
+ <ClCompile Include="..\Python\parking_lot.c" />
<ClCompile Include="..\Python\pathconfig.c" />
<ClCompile Include="..\Python\perf_trampoline.c" />
<ClCompile Include="..\Python\preconfig.c" />
diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters
index 8e987cdf2b8..1c5a6d623f4 100644
--- a/PCbuild/_freeze_module.vcxproj.filters
+++ b/PCbuild/_freeze_module.vcxproj.filters
@@ -103,6 +103,9 @@
<ClCompile Include="..\Python\context.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\Python\critical_section.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\Python\crossinterp.c">
<Filter>Source Files</Filter>
</ClCompile>
@@ -223,6 +226,9 @@
<ClCompile Include="..\Python\legacy_tracing.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\Python\lock.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\Objects\interpreteridobject.c">
<Filter>Source Files</Filter>
</ClCompile>
@@ -289,6 +295,9 @@
<ClCompile Include="..\Parser\parser.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\Python\parking_lot.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\Python\pathconfig.c">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/PCbuild/_testinternalcapi.vcxproj b/PCbuild/_testinternalcapi.vcxproj
index a729ab3877d..558f66ca95c 100644
--- a/PCbuild/_testinternalcapi.vcxproj
+++ b/PCbuild/_testinternalcapi.vcxproj
@@ -95,6 +95,7 @@
<ItemGroup>
<ClCompile Include="..\Modules\_testinternalcapi.c" />
<ClCompile Include="..\Modules\_testinternalcapi\pytime.c" />
+ <ClCompile Include="..\Modules\_testinternalcapi\test_critical_sections.c" />
<ClCompile Include="..\Modules\_testinternalcapi\test_lock.c" />
<ClCompile Include="..\Modules\_testinternalcapi\set.c" />
</ItemGroup>
diff --git a/PCbuild/_testinternalcapi.vcxproj.filters b/PCbuild/_testinternalcapi.vcxproj.filters
index 9c8a5d793ee..abfeeb39630 100644
--- a/PCbuild/_testinternalcapi.vcxproj.filters
+++ b/PCbuild/_testinternalcapi.vcxproj.filters
@@ -15,6 +15,9 @@
<ClCompile Include="..\Modules\_testinternalcapi\pytime.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\Modules\_testinternalcapi\test_critical_sections.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\Modules\_testinternalcapi\test_lock.c">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index eca2671b0cc..f6fbd0fccc3 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -216,6 +216,7 @@
<ClInclude Include="..\Include\internal\pycore_complexobject.h" />
<ClInclude Include="..\Include\internal\pycore_condvar.h" />
<ClInclude Include="..\Include\internal\pycore_context.h" />
+ <ClInclude Include="..\Include\internal\pycore_critical_section.h" />
<ClInclude Include="..\Include\internal\pycore_crossinterp.h" />
<ClInclude Include="..\Include\internal\pycore_descrobject.h" />
<ClInclude Include="..\Include\internal\pycore_dict.h" />
@@ -548,6 +549,7 @@
<ClCompile Include="..\Python\codecs.c" />
<ClCompile Include="..\Python\compile.c" />
<ClCompile Include="..\Python\context.c" />
+ <ClCompile Include="..\Python\critical_section.c" />
<ClCompile Include="..\Python\crossinterp.c" />
<ClCompile Include="..\Python\dynamic_annotations.c" />
<ClCompile Include="..\Python\dynload_win.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 447bb266d75..eb7ba0417c8 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -579,6 +579,9 @@
<ClInclude Include="..\Include\internal\pycore_context.h">
<Filter>Include\internal</Filter>
</ClInclude>
+ <ClInclude Include="..\Include\internal\pycore_critical_section.h">
+ <Filter>Include\internal</Filter>
+ </ClInclude>
<ClInclude Include="..\Include\internal\pycore_crossinterp.h">
<Filter>Include\internal</Filter>
</ClInclude>
@@ -1241,6 +1244,9 @@
<ClCompile Include="..\Python\compile.c">
<Filter>Python</Filter>
</ClCompile>
+ <ClCompile Include="..\Python\critical_section.c">
+ <Filter>Python</Filter>
+ </ClCompile>
<ClCompile Include="..\Python\crossinterp.c">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/Python/critical_section.c b/Python/critical_section.c
new file mode 100644
index 00000000000..2214d80eeb2
--- /dev/null
+++ b/Python/critical_section.c
@@ -0,0 +1,100 @@
+#include "Python.h"
+
+#include "pycore_lock.h"
+#include "pycore_critical_section.h"
+
+static_assert(_Alignof(_PyCriticalSection) >= 4,
+ "critical section must be aligned to at least 4 bytes");
+
+void
+_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m)
+{
+ PyThreadState *tstate = _PyThreadState_GET();
+ c->mutex = NULL;
+ c->prev = (uintptr_t)tstate->critical_section;
+ tstate->critical_section = (uintptr_t)c;
+
+ _PyMutex_LockSlow(m);
+ c->mutex = m;
+}
+
+void
+_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
+ int is_m1_locked)
+{
+ PyThreadState *tstate = _PyThreadState_GET();
+ c->base.mutex = NULL;
+ c->mutex2 = NULL;
+ c->base.prev = tstate->critical_section;
+ tstate->critical_section = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES;
+
+ if (!is_m1_locked) {
+ PyMutex_Lock(m1);
+ }
+ PyMutex_Lock(m2);
+ c->base.mutex = m1;
+ c->mutex2 = m2;
+}
+
+static _PyCriticalSection *
+untag_critical_section(uintptr_t tag)
+{
+ return (_PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK);
+}
+
+// Release all locks held by critical sections. This is called by
+// _PyThreadState_Detach.
+void
+_PyCriticalSection_SuspendAll(PyThreadState *tstate)
+{
+ uintptr_t *tagptr = &tstate->critical_section;
+ while (_PyCriticalSection_IsActive(*tagptr)) {
+ _PyCriticalSection *c = untag_critical_section(*tagptr);
+
+ if (c->mutex) {
+ PyMutex_Unlock(c->mutex);
+ if ((*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) {
+ _PyCriticalSection2 *c2 = (_PyCriticalSection2 *)c;
+ if (c2->mutex2) {
+ PyMutex_Unlock(c2->mutex2);
+ }
+ }
+ }
+
+ *tagptr |= _Py_CRITICAL_SECTION_INACTIVE;
+ tagptr = &c->prev;
+ }
+}
+
+void
+_PyCriticalSection_Resume(PyThreadState *tstate)
+{
+ uintptr_t p = tstate->critical_section;
+ _PyCriticalSection *c = untag_critical_section(p);
+ assert(!_PyCriticalSection_IsActive(p));
+
+ PyMutex *m1 = c->mutex;
+ c->mutex = NULL;
+
+ PyMutex *m2 = NULL;
+ _PyCriticalSection2 *c2 = NULL;
+ if ((p & _Py_CRITICAL_SECTION_TWO_MUTEXES)) {
+ c2 = (_PyCriticalSection2 *)c;
+ m2 = c2->mutex2;
+ c2->mutex2 = NULL;
+ }
+
+ if (m1) {
+ PyMutex_Lock(m1);
+ }
+ if (m2) {
+ PyMutex_Lock(m2);
+ }
+
+ c->mutex = m1;
+ if (m2) {
+ c2->mutex2 = m2;
+ }
+
+ tstate->critical_section &= ~_Py_CRITICAL_SECTION_INACTIVE;
+}
diff --git a/Python/pystate.c b/Python/pystate.c
index b369a56d6d5..991d8d204a1 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -4,6 +4,7 @@
#include "Python.h"
#include "pycore_ceval.h"
#include "pycore_code.h" // stats
+#include "pycore_critical_section.h" // _PyCriticalSection_Resume()
#include "pycore_dtoa.h" // _dtoa_state_INIT()
#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init()
#include "pycore_frame.h"
@@ -1911,6 +1912,12 @@ _PyThreadState_Attach(PyThreadState *tstate)
Py_FatalError("thread attach failed");
}
+ // Resume previous critical section. This acquires the lock(s) from the
+ // top-most critical section.
+ if (tstate->critical_section != 0) {
+ _PyCriticalSection_Resume(tstate);
+ }
+
#if defined(Py_DEBUG)
errno = err;
#endif
@@ -1922,6 +1929,9 @@ _PyThreadState_Detach(PyThreadState *tstate)
// XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate));
assert(tstate->state == _Py_THREAD_ATTACHED);
assert(tstate == current_fast_get(&_PyRuntime));
+ if (tstate->critical_section != 0) {
+ _PyCriticalSection_SuspendAll(tstate);
+ }
tstate_set_detached(tstate);
tstate_deactivate(tstate);
current_fast_clear(&_PyRuntime);