aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Python/gc_free_threading.c
diff options
context:
space:
mode:
authorNeil Schemenauer <nas-github@arctrix.com>2025-01-15 11:27:28 -0800
committerGitHub <noreply@github.com>2025-01-15 11:27:28 -0800
commit080f444a58dae1bb76a87ce746a09fd499792cfb (patch)
tree73ea5f42ef212db1359cfa08cac34511d444a2b3 /Python/gc_free_threading.c
parent8d8b854824c4723d7c5924f1d5c6a397ea7214a5 (diff)
downloadcpython-080f444a58dae1bb76a87ce746a09fd499792cfb.tar.gz
cpython-080f444a58dae1bb76a87ce746a09fd499792cfb.zip
gh-128807: Add marking phase for free-threaded cyclic GC (gh-128808)
Diffstat (limited to 'Python/gc_free_threading.c')
-rw-r--r--Python/gc_free_threading.c336
1 files changed, 320 insertions, 16 deletions
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index f7f44407494..d1023d93510 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -17,6 +17,17 @@
#include "pydtrace.h"
#include "pycore_uniqueid.h" // _PyObject_MergeThreadLocalRefcounts()
+
+// enable the "mark alive" pass of GC
+#define GC_ENABLE_MARK_ALIVE 1
+
+// include additional roots in "mark alive" pass
+#define GC_MARK_ALIVE_EXTRA_ROOTS 1
+
+// include Python stacks as set of known roots
+#define GC_MARK_ALIVE_STACKS 1
+
+
#ifdef Py_GIL_DISABLED
typedef struct _gc_runtime_state GCState;
@@ -114,27 +125,65 @@ worklist_remove(struct worklist_iter *iter)
}
static inline int
+gc_has_bit(PyObject *op, uint8_t bit)
+{
+ return (op->ob_gc_bits & bit) != 0;
+}
+
+static inline void
+gc_set_bit(PyObject *op, uint8_t bit)
+{
+ op->ob_gc_bits |= bit;
+}
+
+static inline void
+gc_clear_bit(PyObject *op, uint8_t bit)
+{
+ op->ob_gc_bits &= ~bit;
+}
+
+static inline int
gc_is_frozen(PyObject *op)
{
- return (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0;
+ return gc_has_bit(op, _PyGC_BITS_FROZEN);
}
static inline int
gc_is_unreachable(PyObject *op)
{
- return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0;
+ return gc_has_bit(op, _PyGC_BITS_UNREACHABLE);
}
-static void
+static inline void
gc_set_unreachable(PyObject *op)
{
- op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE;
+ gc_set_bit(op, _PyGC_BITS_UNREACHABLE);
}
-static void
+static inline void
gc_clear_unreachable(PyObject *op)
{
- op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE;
+ gc_clear_bit(op, _PyGC_BITS_UNREACHABLE);
+}
+
+static inline int
+gc_is_alive(PyObject *op)
+{
+ return gc_has_bit(op, _PyGC_BITS_ALIVE);
+}
+
+#ifdef GC_ENABLE_MARK_ALIVE
+static inline void
+gc_set_alive(PyObject *op)
+{
+ gc_set_bit(op, _PyGC_BITS_ALIVE);
+}
+#endif
+
+static inline void
+gc_clear_alive(PyObject *op)
+{
+ gc_clear_bit(op, _PyGC_BITS_ALIVE);
}
// Initialize the `ob_tid` field to zero if the object is not already
@@ -143,6 +192,7 @@ static void
gc_maybe_init_refs(PyObject *op)
{
if (!gc_is_unreachable(op)) {
+ assert(!gc_is_alive(op));
gc_set_unreachable(op);
op->ob_tid = 0;
}
@@ -264,9 +314,13 @@ static void
gc_restore_refs(PyObject *op)
{
if (gc_is_unreachable(op)) {
+ assert(!gc_is_alive(op));
gc_restore_tid(op);
gc_clear_unreachable(op);
}
+ else {
+ gc_clear_alive(op);
+ }
}
// Given a mimalloc memory block return the PyObject stored in it or NULL if
@@ -392,6 +446,119 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
_Py_FOR_EACH_TSTATE_END(interp);
}
+// Untrack objects that can never create reference cycles.
+// Return true if the object was untracked.
+static bool
+gc_maybe_untrack(PyObject *op)
+{
+ // Currently we only check for tuples containing only non-GC objects. In
+ // theory we could check other immutable objects that contain references
+ // to non-GC objects.
+ if (PyTuple_CheckExact(op)) {
+ _PyTuple_MaybeUntrack(op);
+ if (!_PyObject_GC_IS_TRACKED(op)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+#ifdef GC_ENABLE_MARK_ALIVE
+static int
+mark_alive_stack_push(PyObject *op, _PyObjectStack *stack)
+{
+ if (op == NULL) {
+ return 0;
+ }
+ if (!_PyObject_GC_IS_TRACKED(op)) {
+ return 0;
+ }
+ if (gc_is_alive(op)) {
+ return 0; // already visited this object
+ }
+ if (gc_maybe_untrack(op)) {
+ return 0; // was untracked, don't visit it
+ }
+
+ // Need to call tp_traverse on this object. Add to stack and mark it
+ // alive so we don't traverse it a second time.
+ gc_set_alive(op);
+ if (_PyObjectStack_Push(stack, op) < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static bool
+gc_clear_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
+ void *block, size_t block_size, void *args)
+{
+ PyObject *op = op_from_block(block, args, false);
+ if (op == NULL) {
+ return true;
+ }
+ if (gc_is_alive(op)) {
+ gc_clear_alive(op);
+ }
+ return true;
+}
+
+static void
+gc_abort_mark_alive(PyInterpreterState *interp,
+ struct collection_state *state,
+ _PyObjectStack *stack)
+{
+ // We failed to allocate memory for "stack" while doing the "mark
+ // alive" phase. In that case, free the object stack and make sure
+ // that no objects have the alive bit set.
+ _PyObjectStack_Clear(stack);
+ gc_visit_heaps(interp, &gc_clear_alive_bits, &state->base);
+}
+
+#ifdef GC_MARK_ALIVE_STACKS
+static int
+gc_visit_stackref_mark_alive(_PyObjectStack *stack, _PyStackRef stackref)
+{
+ // Note: we MUST check that it is deferred before checking the rest.
+ // Otherwise we might read into invalid memory due to non-deferred references
+ // being dead already.
+ if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) {
+ PyObject *op = PyStackRef_AsPyObjectBorrow(stackref);
+ if (mark_alive_stack_push(op, stack) < 0) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+gc_visit_thread_stacks_mark_alive(PyInterpreterState *interp, _PyObjectStack *stack)
+{
+ _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
+ for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) {
+ PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
+ if (executable == NULL || !PyCode_Check(executable)) {
+ continue;
+ }
+
+ PyCodeObject *co = (PyCodeObject *)executable;
+ int max_stack = co->co_nlocalsplus + co->co_stacksize;
+ if (gc_visit_stackref_mark_alive(stack, f->f_executable) < 0) {
+ return -1;
+ }
+ for (int i = 0; i < max_stack; i++) {
+ if (gc_visit_stackref_mark_alive(stack, f->localsplus[i]) < 0) {
+ return -1;
+ }
+ }
+ }
+ }
+ _Py_FOR_EACH_TSTATE_END(interp);
+ return 0;
+}
+#endif // GC_MARK_ALIVE_STACKS
+#endif // GC_ENABLE_MARK_ALIVE
+
static void
queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
{
@@ -460,7 +627,8 @@ visit_decref(PyObject *op, void *arg)
{
if (_PyObject_GC_IS_TRACKED(op)
&& !_Py_IsImmortal(op)
- && !gc_is_frozen(op))
+ && !gc_is_frozen(op)
+ && !gc_is_alive(op))
{
// If update_refs hasn't reached this object yet, mark it
// as (tentatively) unreachable and initialize ob_tid to zero.
@@ -482,6 +650,10 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
+ if (gc_is_alive(op)) {
+ return true;
+ }
+
// Exclude immortal objects from garbage collection
if (_Py_IsImmortal(op)) {
op->ob_tid = 0;
@@ -497,14 +669,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
_PyObject_ASSERT(op, refcount >= 0);
if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
- // Untrack tuples and dicts as necessary in this pass, but not objects
- // with zero refcount, which we will want to collect.
- if (PyTuple_CheckExact(op)) {
- _PyTuple_MaybeUntrack(op);
- if (!_PyObject_GC_IS_TRACKED(op)) {
- gc_restore_refs(op);
- return true;
- }
+ if (gc_maybe_untrack(op)) {
+ gc_restore_refs(op);
+ return true;
}
}
@@ -554,6 +721,21 @@ mark_reachable(PyObject *op)
#ifdef GC_DEBUG
static bool
+validate_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
+ void *block, size_t block_size, void *args)
+{
+ PyObject *op = op_from_block(block, args, false);
+ if (op == NULL) {
+ return true;
+ }
+
+ _PyObject_ASSERT_WITH_MSG(op, !gc_is_alive(op),
+ "object should not be marked as alive yet");
+
+ return true;
+}
+
+static bool
validate_refcounts(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
{
@@ -586,6 +768,11 @@ validate_gc_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
+ if (gc_is_alive(op)) {
+ _PyObject_ASSERT(op, !gc_is_unreachable(op));
+ return true;
+ }
+
_PyObject_ASSERT(op, gc_is_unreachable(op));
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
"refcount is too small");
@@ -602,6 +789,10 @@ mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
+ if (gc_is_alive(op)) {
+ return true;
+ }
+
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
"refcount is too small");
@@ -630,6 +821,7 @@ restore_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
}
gc_restore_tid(op);
gc_clear_unreachable(op);
+ gc_clear_alive(op);
return true;
}
@@ -679,6 +871,7 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
// object is reachable, restore `ob_tid`; we're done with these objects
gc_restore_tid(op);
+ gc_clear_alive(op);
state->long_lived_total++;
return true;
}
@@ -686,6 +879,89 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
static int
move_legacy_finalizer_reachable(struct collection_state *state);
+#ifdef GC_ENABLE_MARK_ALIVE
+static int
+propagate_alive_bits(_PyObjectStack *stack)
+{
+ for (;;) {
+ PyObject *op = _PyObjectStack_Pop(stack);
+ if (op == NULL) {
+ break;
+ }
+ assert(_PyObject_GC_IS_TRACKED(op));
+ assert(gc_is_alive(op));
+ traverseproc traverse = Py_TYPE(op)->tp_traverse;
+ if (traverse(op, (visitproc)&mark_alive_stack_push, stack) < 0) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+// Using tp_traverse, mark everything reachable from known root objects
+// (which must be non-garbage) as alive (_PyGC_BITS_ALIVE is set). In
+// most programs, this marks nearly all objects that are not actually
+// unreachable.
+//
+// Actually alive objects can be missed in this pass if they are alive
+// due to being referenced from an unknown root (e.g. an extension
+// module global), some tp_traverse methods are either missing or not
+// accurate, or objects that have been untracked. Objects that are only
+// reachable from the aforementioned are also missed.
+//
+// If gc.freeze() is used, this pass is disabled since it is unlikely to
+// help much. The next stages of cyclic GC will ignore objects with the
+// alive bit set.
+//
+// Returns -1 on failure (out of memory).
+static int
+mark_alive_from_roots(PyInterpreterState *interp,
+ struct collection_state *state)
+{
+#ifdef GC_DEBUG
+ // Check that all objects don't have alive bit set
+ gc_visit_heaps(interp, &validate_alive_bits, &state->base);
+#endif
+ _PyObjectStack stack = { NULL };
+
+ #define STACK_PUSH(op) \
+ if (mark_alive_stack_push(op, &stack) < 0) { \
+ gc_abort_mark_alive(interp, state, &stack); \
+ return -1; \
+ }
+ STACK_PUSH(interp->sysdict);
+#ifdef GC_MARK_ALIVE_EXTRA_ROOTS
+ STACK_PUSH(interp->builtins);
+ STACK_PUSH(interp->dict);
+ struct types_state *types = &interp->types;
+ for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
+ STACK_PUSH(types->builtins.initialized[i].tp_dict);
+ STACK_PUSH(types->builtins.initialized[i].tp_subclasses);
+ }
+ for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
+ STACK_PUSH(types->for_extensions.initialized[i].tp_dict);
+ STACK_PUSH(types->for_extensions.initialized[i].tp_subclasses);
+ }
+#endif
+#ifdef GC_MARK_ALIVE_STACKS
+ if (gc_visit_thread_stacks_mark_alive(interp, &stack) < 0) {
+ gc_abort_mark_alive(interp, state, &stack);
+ return -1;
+ }
+#endif
+ #undef STACK_PUSH
+
+ // Use tp_traverse to find everything reachable from roots.
+ if (propagate_alive_bits(&stack) < 0) {
+ gc_abort_mark_alive(interp, state, &stack);
+ return -1;
+ }
+
+ return 0;
+}
+#endif // GC_ENABLE_MARK_ALIVE
+
+
static int
deduce_unreachable_heap(PyInterpreterState *interp,
struct collection_state *state)
@@ -1245,6 +1521,25 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
process_delayed_frees(interp, state);
+ #ifdef GC_ENABLE_MARK_ALIVE
+ // If gc.freeze() was used, it seems likely that doing this "mark alive"
+ // pass will not be a performance win. Typically the majority of alive
+ // objects will be marked as frozen and will be skipped anyhow, without
+ // doing this extra work. Doing this pass also defeats one of the
+ // purposes of using freeze: avoiding writes to objects that are frozen.
+ // So, we just skip this if gc.freeze() was used.
+ if (!state->gcstate->freeze_active) {
+ // Mark objects reachable from known roots as "alive". These will
+ // be ignored for rest of the GC pass.
+ int err = mark_alive_from_roots(interp, state);
+ if (err < 0) {
+ _PyEval_StartTheWorld(interp);
+ PyErr_NoMemory();
+ return;
+ }
+ }
+ #endif
+
// Find unreachable objects
int err = deduce_unreachable_heap(interp, state);
if (err < 0) {
@@ -1253,6 +1548,11 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
return;
}
+#ifdef GC_DEBUG
+ // At this point, no object should have the alive bit set
+ gc_visit_heaps(interp, &validate_alive_bits, &state->base);
+#endif
+
// Print debugging information.
if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) {
PyObject *op;
@@ -1564,6 +1864,8 @@ _PyGC_Freeze(PyInterpreterState *interp)
{
struct visitor_args args;
_PyEval_StopTheWorld(interp);
+ GCState *gcstate = get_gc_state();
+ gcstate->freeze_active = true;
gc_visit_heaps(interp, &visit_freeze, &args);
_PyEval_StartTheWorld(interp);
}
@@ -1574,7 +1876,7 @@ visit_unfreeze(const mi_heap_t *heap, const mi_heap_area_t *area,
{
PyObject *op = op_from_block(block, args, true);
if (op != NULL) {
- op->ob_gc_bits &= ~_PyGC_BITS_FROZEN;
+ gc_clear_bit(op, _PyGC_BITS_FROZEN);
}
return true;
}
@@ -1584,6 +1886,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
{
struct visitor_args args;
_PyEval_StopTheWorld(interp);
+ GCState *gcstate = get_gc_state();
+ gcstate->freeze_active = false;
gc_visit_heaps(interp, &visit_unfreeze, &args);
_PyEval_StartTheWorld(interp);
}