Skip to content

Commit e38967e

Browse files
authored
gh-142531: Fix free-threaded GC performance regression (gh-142562)
If there are many untracked tuples, the GC will run too often, resulting in poor performance. The fix is to include untracked tuples in the "long lived" object count. The number of frozen objects is also now included since the free-threaded GC must scan those too.
1 parent af18572 commit e38967e

File tree

4 files changed

+61
-6
lines changed

4 files changed

+61
-6
lines changed

Lib/test/test_gc.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,24 @@ def test():
12311231
assert_python_ok("-c", code_inside_function)
12321232

12331233

1234+
@unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
1235+
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
1236+
def test_tuple_untrack_counts(self):
1237+
# This ensures that the free-threaded GC is counting untracked tuples
1238+
# in the "long_lived_total" count. This is required to avoid
1239+
# performance issues from running the GC too frequently. See
1240+
# GH-142531 as an example.
1241+
gc.collect()
1242+
count = _testinternalcapi.get_long_lived_total()
1243+
n = 20_000
1244+
tuples = [(x,) for x in range(n)]
1245+
gc.collect()
1246+
new_count = _testinternalcapi.get_long_lived_total()
1247+
self.assertFalse(gc.is_tracked(tuples[0]))
1248+
# Use n // 2 just in case some other objects were collected.
1249+
self.assertTrue(new_count - count > (n // 2))
1250+
1251+
12341252
class IncrementalGCTests(unittest.TestCase):
12351253
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
12361254
@requires_gil_enabled("Free threading does not support incremental GC")
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a free-threaded GC performance regression. If there are many untracked
2+
tuples, the GC will run too often, resulting in poor performance. The fix
3+
is to include untracked tuples in the "long lived" object count. The number
4+
of frozen objects is also now included since the free-threaded GC must
5+
scan those too.

Modules/_testinternalcapi.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2250,6 +2250,13 @@ get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj)
22502250
}
22512251
return PyLong_FromVoidPtr(bc);
22522252
}
2253+
2254+
static PyObject *
2255+
get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
2256+
{
2257+
return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
2258+
}
2259+
22532260
#endif
22542261

22552262
static PyObject *
@@ -2590,6 +2597,7 @@ static PyMethodDef module_functions[] = {
25902597
{"py_thread_id", get_py_thread_id, METH_NOARGS},
25912598
{"get_tlbc", get_tlbc, METH_O, NULL},
25922599
{"get_tlbc_id", get_tlbc_id, METH_O, NULL},
2600+
{"get_long_lived_total", get_long_lived_total, METH_NOARGS},
25932601
#endif
25942602
#ifdef _Py_TIER2
25952603
{"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},

Python/gc_free_threading.c

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,19 @@ op_from_block(void *block, void *arg, bool include_frozen)
375375
return op;
376376
}
377377

378+
// As above but returns untracked and frozen objects as well.
379+
static PyObject *
380+
op_from_block_all_gc(void *block, void *arg)
381+
{
382+
struct visitor_args *a = arg;
383+
if (block == NULL) {
384+
return NULL;
385+
}
386+
PyObject *op = (PyObject *)((char*)block + a->offset);
387+
assert(PyObject_IS_GC(op));
388+
return op;
389+
}
390+
378391
static int
379392
gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor,
380393
struct visitor_args *arg)
@@ -1186,12 +1199,20 @@ static bool
11861199
scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
11871200
void *block, size_t block_size, void *args)
11881201
{
1189-
PyObject *op = op_from_block(block, args, false);
1202+
PyObject *op = op_from_block_all_gc(block, args);
11901203
if (op == NULL) {
11911204
return true;
11921205
}
1193-
11941206
struct collection_state *state = (struct collection_state *)args;
1207+
// The free-threaded GC cost is proportional to the number of objects in
1208+
// the mimalloc GC heap and so we should include the counts for untracked
1209+
// and frozen objects as well. This is especially important if many
1210+
// tuples have been untracked.
1211+
state->long_lived_total++;
1212+
if (!_PyObject_GC_IS_TRACKED(op) || gc_is_frozen(op)) {
1213+
return true;
1214+
}
1215+
11951216
if (gc_is_unreachable(op)) {
11961217
// Disable deferred refcounting for unreachable objects so that they
11971218
// are collected immediately after finalization.
@@ -1209,6 +1230,9 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
12091230
else {
12101231
worklist_push(&state->unreachable, op);
12111232
}
1233+
// It is possible this object will be resurrected but
1234+
// for now we assume it will be deallocated.
1235+
state->long_lived_total--;
12121236
return true;
12131237
}
12141238

@@ -1222,7 +1246,6 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
12221246
// object is reachable, restore `ob_tid`; we're done with these objects
12231247
gc_restore_tid(op);
12241248
gc_clear_alive(op);
1225-
state->long_lived_total++;
12261249
return true;
12271250
}
12281251

@@ -1891,6 +1914,7 @@ handle_resurrected_objects(struct collection_state *state)
18911914
_PyObject_ASSERT(op, Py_REFCNT(op) > 1);
18921915
worklist_remove(&iter);
18931916
merge_refcount(op, -1); // remove worklist reference
1917+
state->long_lived_total++;
18941918
}
18951919
}
18961920
}
@@ -2303,9 +2327,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
23032327
}
23042328
}
23052329

2306-
// Record the number of live GC objects
2307-
interp->gc.long_lived_total = state->long_lived_total;
2308-
23092330
// Find weakref callbacks we will honor (but do not call them).
23102331
find_weakref_callbacks(state);
23112332
_PyEval_StartTheWorld(interp);
@@ -2326,8 +2347,11 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
23262347
if (err == 0) {
23272348
clear_weakrefs(state);
23282349
}
2350+
// Record the number of live GC objects
2351+
interp->gc.long_lived_total = state->long_lived_total;
23292352
_PyEval_StartTheWorld(interp);
23302353

2354+
23312355
if (err < 0) {
23322356
cleanup_worklist(&state->unreachable);
23332357
cleanup_worklist(&state->legacy_finalizers);

0 commit comments

Comments
 (0)