diff options
Diffstat (limited to 'Python/gc_free_threading.c')
-rw-r--r-- | Python/gc_free_threading.c | 102 |
1 files changed, 29 insertions, 73 deletions
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index f2cd8498146..a6513a2c4ab 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -46,6 +46,7 @@ struct collection_state { GCState *gcstate; Py_ssize_t collected; Py_ssize_t uncollectable; + Py_ssize_t long_lived_total; struct worklist unreachable; struct worklist legacy_finalizers; struct worklist wrcb_to_call; @@ -443,7 +444,7 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, else { // object is reachable, restore `ob_tid`; we're done with these objects gc_restore_tid(op); - state->gcstate->long_lived_total++; + state->long_lived_total++; } return true; @@ -605,6 +606,8 @@ get_gc_state(void) void _PyGC_InitState(GCState *gcstate) { + // TODO: move to pycore_runtime_init.h once the incremental GC lands. + gcstate->generations[0].threshold = 2000; } @@ -885,62 +888,6 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, assert(!_PyErr_Occurred(tstate)); } - -/* Find the oldest generation (highest numbered) where the count - * exceeds the threshold. Objects in the that generation and - * generations younger than it will be collected. */ -static int -gc_select_generation(GCState *gcstate) -{ - for (int i = NUM_GENERATIONS-1; i >= 0; i--) { - if (gcstate->generations[i].count > gcstate->generations[i].threshold) { - /* Avoid quadratic performance degradation in number - of tracked objects (see also issue #4074): - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - - long_lived_pending / long_lived_total - - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - <constant number> of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html - */ - if (i == NUM_GENERATIONS - 1 - && gcstate->long_lived_pending < gcstate->long_lived_total / 4) - { - continue; - } - return i; - } - } - return -1; -} - static void cleanup_worklist(struct worklist *worklist) { @@ -952,6 +899,21 @@ cleanup_worklist(struct worklist *worklist) } } +static bool +gc_should_collect(GCState *gcstate) +{ + int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count); + int threshold = gcstate->generations[0].threshold; + if (count <= threshold || threshold == 0 || !gcstate->enabled) { + return false; + } + // Avoid quadratic behavior by scaling threshold to the number of live + // objects. A few tests rely on immediate scheduling of the GC so we ignore + // the scaled threshold if generations[1].threshold is set to zero. + return (count > gcstate->long_lived_total / 4 || + gcstate->generations[1].threshold == 0); +} + static void gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) { @@ -1029,15 +991,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) return 0; } - if (generation == GENERATION_AUTO) { - // Select the oldest generation that needs collecting. We will collect - // objects from that generation and all generations younger than it. - generation = gc_select_generation(gcstate); - if (generation < 0) { - // No generation needs to be collected. - _Py_atomic_store_int(&gcstate->collecting, 0); - return 0; - } + if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) { + // Don't collect if the threshold is not exceeded. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; } assert(generation >= 0 && generation < NUM_GENERATIONS); @@ -1082,6 +1039,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) m = state.collected; n = state.uncollectable; + gcstate->long_lived_total = state.long_lived_total; if (gcstate->debug & _PyGC_DEBUG_STATS) { double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); @@ -1523,12 +1481,10 @@ _PyObject_GC_Link(PyObject *op) { PyThreadState *tstate = _PyThreadState_GET(); GCState *gcstate = &tstate->interp->gc; - gcstate->generations[0].count++; /* number of allocated GC objects */ - if (gcstate->generations[0].count > gcstate->generations[0].threshold && - gcstate->enabled && - gcstate->generations[0].threshold && - !_Py_atomic_load_int_relaxed(&gcstate->collecting) && - !_PyErr_Occurred(tstate)) + gcstate->generations[0].count++; + + if (gc_should_collect(gcstate) && + !_Py_atomic_load_int_relaxed(&gcstate->collecting)) { _Py_ScheduleGC(tstate->interp); } @@ -1537,7 +1493,7 @@ _PyObject_GC_Link(PyObject *op) void _Py_RunGC(PyThreadState *tstate) { - gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); + gc_collect_main(tstate, 0, _Py_GC_REASON_HEAP); } static PyObject * |