Skip to content

Commit 27f27a6

Browse files
authored
[3.14] gh-142531: Fix free-threaded GC performance regression (gh-142562) (gh-142617)
If there are many untracked tuples, the GC will run too often, resulting in poor performance. The fix is to include untracked tuples in the "long lived" object count. The number of frozen objects is also now included since the free-threaded GC must scan those too. (cherry picked from commit e38967e)
1 parent e014076 commit 27f27a6

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

Lib/test/test_gc.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,24 @@ def test_something(self):
11571157
assert_python_ok("-c", source)
11581158

11591159

1160+
@unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
1161+
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
1162+
def test_tuple_untrack_counts(self):
1163+
# This ensures that the free-threaded GC is counting untracked tuples
1164+
# in the "long_lived_total" count. This is required to avoid
1165+
# performance issues from running the GC too frequently. See
1166+
# GH-142531 as an example.
1167+
gc.collect()
1168+
count = _testinternalcapi.get_long_lived_total()
1169+
n = 20_000
1170+
tuples = [(x,) for x in range(n)]
1171+
gc.collect()
1172+
new_count = _testinternalcapi.get_long_lived_total()
1173+
self.assertFalse(gc.is_tracked(tuples[0]))
1174+
# Use n // 2 just in case some other objects were collected.
1175+
self.assertTrue(new_count - count > (n // 2))
1176+
1177+
11601178
class IncrementalGCTests(unittest.TestCase):
11611179
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
11621180
@requires_gil_enabled("Free threading does not support incremental GC")
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a free-threaded GC performance regression. If there are many untracked
2+
tuples, the GC will run too often, resulting in poor performance. The fix
3+
is to include untracked tuples in the "long lived" object count. The number
4+
of frozen objects is also now included since the free-threaded GC must
5+
scan those too.

Modules/_testinternalcapi.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2250,6 +2250,13 @@ get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
22502250
}
22512251
return PyLong_FromVoidPtr(bc);
22522252
}
2253+
2254+
static PyObject *
2255+
get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
2256+
{
2257+
return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
2258+
}
2259+
22532260
#endif
22542261

22552262
static PyObject *
@@ -2552,6 +2559,7 @@ static PyMethodDef module_functions[] = {
25522559
{"py_thread_id", get_py_thread_id, METH_NOARGS},
25532560
{"get_tlbc", get_tlbc, METH_O, NULL},
25542561
{"get_tlbc_id", get_tlbc_id, METH_O, NULL},
2562+
{"get_long_lived_total", get_long_lived_total, METH_NOARGS},
25552563
#endif
25562564
#ifdef _Py_TIER2
25572565
{"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},

Python/gc_free_threading.c

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,19 @@ op_from_block(void *block, void *arg, bool include_frozen)
374374
return op;
375375
}
376376

377+
// As above but returns untracked and frozen objects as well.
378+
static PyObject *
379+
op_from_block_all_gc(void *block, void *arg)
380+
{
381+
struct visitor_args *a = arg;
382+
if (block == NULL) {
383+
return NULL;
384+
}
385+
PyObject *op = (PyObject *)((char*)block + a->offset);
386+
assert(PyObject_IS_GC(op));
387+
return op;
388+
}
389+
377390
static int
378391
gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor,
379392
struct visitor_args *arg)
@@ -1175,12 +1188,20 @@ static bool
11751188
scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
11761189
void *block, size_t block_size, void *args)
11771190
{
1178-
PyObject *op = op_from_block(block, args, false);
1191+
PyObject *op = op_from_block_all_gc(block, args);
11791192
if (op == NULL) {
11801193
return true;
11811194
}
1182-
11831195
struct collection_state *state = (struct collection_state *)args;
1196+
// The free-threaded GC cost is proportional to the number of objects in
1197+
// the mimalloc GC heap and so we should include the counts for untracked
1198+
// and frozen objects as well. This is especially important if many
1199+
// tuples have been untracked.
1200+
state->long_lived_total++;
1201+
if (!_PyObject_GC_IS_TRACKED(op) || gc_is_frozen(op)) {
1202+
return true;
1203+
}
1204+
11841205
if (gc_is_unreachable(op)) {
11851206
// Disable deferred refcounting for unreachable objects so that they
11861207
// are collected immediately after finalization.
@@ -1198,6 +1219,9 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
11981219
else {
11991220
worklist_push(&state->unreachable, op);
12001221
}
1222+
// It is possible this object will be resurrected but
1223+
// for now we assume it will be deallocated.
1224+
state->long_lived_total--;
12011225
return true;
12021226
}
12031227

@@ -1211,7 +1235,6 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
12111235
// object is reachable, restore `ob_tid`; we're done with these objects
12121236
gc_restore_tid(op);
12131237
gc_clear_alive(op);
1214-
state->long_lived_total++;
12151238
return true;
12161239
}
12171240

@@ -1818,6 +1841,7 @@ handle_resurrected_objects(struct collection_state *state)
18181841
_PyObject_ASSERT(op, Py_REFCNT(op) > 1);
18191842
worklist_remove(&iter);
18201843
merge_refcount(op, -1); // remove worklist reference
1844+
state->long_lived_total++;
18211845
}
18221846
}
18231847
}
@@ -2220,9 +2244,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
22202244
}
22212245
}
22222246

2223-
// Record the number of live GC objects
2224-
interp->gc.long_lived_total = state->long_lived_total;
2225-
22262247
// Clear weakrefs and enqueue callbacks (but do not call them).
22272248
clear_weakrefs(state);
22282249
_PyEval_StartTheWorld(interp);
@@ -2240,6 +2261,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
22402261
err = handle_resurrected_objects(state);
22412262
// Clear free lists in all threads
22422263
_PyGC_ClearAllFreeLists(interp);
2264+
// Record the number of live GC objects
2265+
interp->gc.long_lived_total = state->long_lived_total;
22432266
_PyEval_StartTheWorld(interp);
22442267

22452268
if (err < 0) {

0 commit comments

Comments
 (0)