Skip to content

Commit a92f15b

Browse files
committed
pythongh-128679: Fix tracemalloc.stop() race conditions
tracemalloc_alloc(), tracemalloc_realloc(), PyTraceMalloc_Track(), PyTraceMalloc_Untrack() and _PyTraceMalloc_TraceRef() now check tracemalloc_config.tracing after calling TABLES_LOCK(). _PyTraceMalloc_Stop() now protects more code with TABLES_LOCK(), especially setting tracemalloc_config.tracing to 1. Add a test using PyTraceMalloc_Track() to test tracemalloc.stop() race condition.
1 parent 639e0f3 commit a92f15b

File tree

4 files changed

+181
-50
lines changed

4 files changed

+181
-50
lines changed

Lib/test/test_tracemalloc.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
from test.support.script_helper import (assert_python_ok, assert_python_failure,
88
interpreter_requires_environment)
99
from test import support
10-
from test.support import os_helper
1110
from test.support import force_not_colorized
11+
from test.support import os_helper
12+
from test.support import threading_helper
1213

1314
try:
1415
import _testcapi
@@ -952,7 +953,6 @@ def check_env_var_invalid(self, nframe):
952953
return
953954
self.fail(f"unexpected output: {stderr!a}")
954955

955-
956956
def test_env_var_invalid(self):
957957
for nframe in INVALID_NFRAME:
958958
with self.subTest(nframe=nframe):
@@ -1101,6 +1101,12 @@ def test_stop_untrack(self):
11011101
with self.assertRaises(RuntimeError):
11021102
self.untrack()
11031103

1104+
@unittest.skipIf(_testcapi is None, 'need _testcapi')
1105+
@threading_helper.requires_working_threading()
1106+
def test_tracemalloc_track_race(self):
1107+
# gh-128679: Test fix for tracemalloc.stop() race condition
1108+
_testcapi.tracemalloc_track_race()
1109+
11041110

11051111
if __name__ == "__main__":
11061112
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :func:`tracemalloc.stop` race condition. Fix :mod:`tracemalloc` to
2+
support calling :func:`tracemalloc.stop` in one thread, while another thread
3+
is tracing memory allocations. Patch by Victor Stinner.

Modules/_testcapimodule.c

+99
Original file line numberDiff line numberDiff line change
@@ -3386,6 +3386,104 @@ test_atexit(PyObject *self, PyObject *Py_UNUSED(args))
33863386
Py_RETURN_NONE;
33873387
}
33883388

3389+
3390+
static void
3391+
tracemalloc_track_race_thread(void *data)
3392+
{
3393+
PyTraceMalloc_Track(123, 10, 1);
3394+
3395+
PyThread_type_lock lock = (PyThread_type_lock)data;
3396+
PyThread_release_lock(lock);
3397+
}
3398+
3399+
// gh-128679: Test fix for tracemalloc.stop() race condition
3400+
static PyObject *
3401+
tracemalloc_track_race(PyObject *self, PyObject *args)
3402+
{
3403+
#define NTHREAD 50
3404+
PyObject *tracemalloc = NULL;
3405+
PyObject *stop = NULL;
3406+
PyThread_type_lock locks[NTHREAD];
3407+
memset(locks, 0, sizeof(locks));
3408+
3409+
// Call tracemalloc.start()
3410+
tracemalloc = PyImport_ImportModule("tracemalloc");
3411+
if (tracemalloc == NULL) {
3412+
goto error;
3413+
}
3414+
PyObject *start = PyObject_GetAttrString(tracemalloc, "start");
3415+
if (start == NULL) {
3416+
goto error;
3417+
}
3418+
PyObject *res = PyObject_CallNoArgs(start);
3419+
Py_DECREF(start);
3420+
if (res == NULL) {
3421+
goto error;
3422+
}
3423+
Py_DECREF(res);
3424+
3425+
stop = PyObject_GetAttrString(tracemalloc, "stop");
3426+
Py_CLEAR(tracemalloc);
3427+
if (stop == NULL) {
3428+
goto error;
3429+
}
3430+
3431+
// Start threads
3432+
for (size_t i = 0; i < NTHREAD; i++) {
3433+
PyThread_type_lock lock = PyThread_allocate_lock();
3434+
if (!lock) {
3435+
PyErr_NoMemory();
3436+
goto error;
3437+
}
3438+
locks[i] = lock;
3439+
PyThread_acquire_lock(lock, 1);
3440+
3441+
unsigned long thread;
3442+
thread = PyThread_start_new_thread(tracemalloc_track_race_thread,
3443+
(void*)lock);
3444+
if (thread == (unsigned long)-1) {
3445+
PyErr_SetString(PyExc_RuntimeError, "can't start new thread");
3446+
goto error;
3447+
}
3448+
}
3449+
3450+
// Call tracemalloc.stop() while threads are running
3451+
res = PyObject_CallNoArgs(stop);
3452+
Py_CLEAR(stop);
3453+
if (res == NULL) {
3454+
goto error;
3455+
}
3456+
Py_DECREF(res);
3457+
3458+
// Wait until threads complete with the GIL released
3459+
Py_BEGIN_ALLOW_THREADS
3460+
for (size_t i = 0; i < NTHREAD; i++) {
3461+
PyThread_type_lock lock = locks[i];
3462+
PyThread_acquire_lock(lock, 1);
3463+
PyThread_release_lock(lock);
3464+
}
3465+
Py_END_ALLOW_THREADS
3466+
3467+
// Free threads locks
3468+
for (size_t i=0; i < NTHREAD; i++) {
3469+
PyThread_type_lock lock = locks[i];
3470+
PyThread_free_lock(lock);
3471+
}
3472+
Py_RETURN_NONE;
3473+
3474+
error:
3475+
Py_CLEAR(tracemalloc);
3476+
Py_CLEAR(stop);
3477+
for (size_t i=0; i < NTHREAD; i++) {
3478+
PyThread_type_lock lock = locks[i];
3479+
if (lock) {
3480+
PyThread_free_lock(lock);
3481+
}
3482+
}
3483+
return NULL;
3484+
#undef NTHREAD
3485+
}
3486+
33893487
static PyMethodDef TestMethods[] = {
33903488
{"set_errno", set_errno, METH_VARARGS},
33913489
{"test_config", test_config, METH_NOARGS},
@@ -3532,6 +3630,7 @@ static PyMethodDef TestMethods[] = {
35323630
{"test_critical_sections", test_critical_sections, METH_NOARGS},
35333631
{"pyeval_getlocals", pyeval_getlocals, METH_NOARGS},
35343632
{"test_atexit", test_atexit, METH_NOARGS},
3633+
{"tracemalloc_track_race", tracemalloc_track_race, METH_NOARGS},
35353634
{NULL, NULL} /* sentinel */
35363635
};
35373636

Python/tracemalloc.c

+71-48
Original file line numberDiff line numberDiff line change
@@ -538,12 +538,16 @@ tracemalloc_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
538538
return NULL;
539539

540540
TABLES_LOCK();
541-
if (ADD_TRACE(ptr, nelem * elsize) < 0) {
542-
/* Failed to allocate a trace for the new memory block */
543-
TABLES_UNLOCK();
544-
alloc->free(alloc->ctx, ptr);
545-
return NULL;
541+
542+
if (tracemalloc_config.tracing) {
543+
if (ADD_TRACE(ptr, nelem * elsize) < 0) {
544+
/* Failed to allocate a trace for the new memory block */
545+
alloc->free(alloc->ctx, ptr);
546+
ptr = NULL;
547+
}
546548
}
549+
// else: gh-128679: tracemalloc.stop() was called by another thread
550+
547551
TABLES_UNLOCK();
548552
return ptr;
549553
}
@@ -559,11 +563,15 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size)
559563
if (ptr2 == NULL)
560564
return NULL;
561565

566+
TABLES_LOCK();
567+
if (!tracemalloc_config.tracing) {
568+
// gh-128679: tracemalloc.stop() was called by another thread
569+
goto done;
570+
}
571+
562572
if (ptr != NULL) {
563573
/* an existing memory block has been resized */
564574

565-
TABLES_LOCK();
566-
567575
/* tracemalloc_add_trace() updates the trace if there is already
568576
a trace at address ptr2 */
569577
if (ptr2 != ptr) {
@@ -576,45 +584,46 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size)
576584
memory block and so removed bytes.
577585
578586
This case is very unlikely: a hash entry has just been
579-
released, so the hash table should have at least one free entry.
587+
released, so the hash table should have at least one free
588+
entry.
580589
581590
The GIL and the table lock ensures that only one thread is
582591
allocating memory. */
583592
Py_FatalError("tracemalloc_realloc() failed to allocate a trace");
584593
}
585-
TABLES_UNLOCK();
586594
}
587595
else {
588596
/* new allocation */
589597

590-
TABLES_LOCK();
591598
if (ADD_TRACE(ptr2, new_size) < 0) {
592599
/* Failed to allocate a trace for the new memory block */
593-
TABLES_UNLOCK();
594600
alloc->free(alloc->ctx, ptr2);
595-
return NULL;
601+
ptr2 = NULL;
596602
}
597-
TABLES_UNLOCK();
598603
}
604+
605+
done:
606+
TABLES_UNLOCK();
599607
return ptr2;
600608
}
601609

602610

603611
static void
604612
tracemalloc_free(void *ctx, void *ptr)
605613
{
606-
PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx;
607-
608614
if (ptr == NULL)
609615
return;
610616

611-
/* GIL cannot be locked in PyMem_RawFree() because it would introduce
612-
a deadlock in _PyThreadState_DeleteCurrent(). */
613-
617+
PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx;
614618
alloc->free(alloc->ctx, ptr);
615619

616620
TABLES_LOCK();
617-
REMOVE_TRACE(ptr);
621+
622+
if (tracemalloc_config.tracing) {
623+
REMOVE_TRACE(ptr);
624+
}
625+
// else: gh-128679: tracemalloc.stop() was called by another thread
626+
618627
TABLES_UNLOCK();
619628
}
620629

@@ -779,17 +788,15 @@ tracemalloc_clear_filename(void *value)
779788

780789
/* reentrant flag must be set to call this function and GIL must be held */
781790
static void
782-
tracemalloc_clear_traces(void)
791+
tracemalloc_clear_traces_unlocked(void)
783792
{
784793
/* The GIL protects variables against concurrent access */
785794
assert(PyGILState_Check());
786795

787-
TABLES_LOCK();
788796
_Py_hashtable_clear(tracemalloc_traces);
789797
_Py_hashtable_clear(tracemalloc_domains);
790798
tracemalloc_traced_memory = 0;
791799
tracemalloc_peak_traced_memory = 0;
792-
TABLES_UNLOCK();
793800

794801
_Py_hashtable_clear(tracemalloc_tracebacks);
795802

@@ -963,6 +970,10 @@ _PyTraceMalloc_Stop(void)
963970
if (!tracemalloc_config.tracing)
964971
return;
965972

973+
// Lock to synchronize with tracemalloc_free() which checks
974+
// 'tracing' while holding the lock.
975+
TABLES_LOCK();
976+
966977
/* stop tracing Python memory allocations */
967978
tracemalloc_config.tracing = 0;
968979

@@ -973,11 +984,13 @@ _PyTraceMalloc_Stop(void)
973984
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
974985
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
975986

976-
tracemalloc_clear_traces();
987+
tracemalloc_clear_traces_unlocked();
977988

978989
/* release memory */
979990
raw_free(tracemalloc_traceback);
980991
tracemalloc_traceback = NULL;
992+
993+
TABLES_UNLOCK();
981994
}
982995

983996

@@ -1307,20 +1320,19 @@ int
13071320
PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
13081321
size_t size)
13091322
{
1310-
int res;
1311-
PyGILState_STATE gil_state;
1323+
PyGILState_STATE gil_state = PyGILState_Ensure();
1324+
TABLES_LOCK();
13121325

1313-
if (!tracemalloc_config.tracing) {
1314-
/* tracemalloc is not tracing: do nothing */
1315-
return -2;
1326+
int res;
1327+
if (tracemalloc_config.tracing) {
1328+
res = tracemalloc_add_trace(domain, ptr, size);
1329+
}
1330+
else {
1331+
// gh-128679: tracemalloc.stop() was called by another thread
1332+
res = -2;
13161333
}
13171334

1318-
gil_state = PyGILState_Ensure();
1319-
1320-
TABLES_LOCK();
1321-
res = tracemalloc_add_trace(domain, ptr, size);
13221335
TABLES_UNLOCK();
1323-
13241336
PyGILState_Release(gil_state);
13251337
return res;
13261338
}
@@ -1329,16 +1341,20 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
13291341
int
13301342
PyTraceMalloc_Untrack(unsigned int domain, uintptr_t ptr)
13311343
{
1332-
if (!tracemalloc_config.tracing) {
1344+
TABLES_LOCK();
1345+
1346+
int result;
1347+
if (tracemalloc_config.tracing) {
1348+
tracemalloc_remove_trace(domain, ptr);
1349+
result = 0;
1350+
}
1351+
else {
13331352
/* tracemalloc is not tracing: do nothing */
1334-
return -2;
1353+
result = -2;
13351354
}
13361355

1337-
TABLES_LOCK();
1338-
tracemalloc_remove_trace(domain, ptr);
13391356
TABLES_UNLOCK();
1340-
1341-
return 0;
1357+
return result;
13421358
}
13431359

13441360

@@ -1376,16 +1392,21 @@ _PyTraceMalloc_TraceRef(PyObject *op, PyRefTracerEvent event, void* Py_UNUSED(ig
13761392
int res = -1;
13771393

13781394
TABLES_LOCK();
1379-
trace_t *trace = _Py_hashtable_get(tracemalloc_traces, TO_PTR(ptr));
1380-
if (trace != NULL) {
1381-
/* update the traceback of the memory block */
1382-
traceback_t *traceback = traceback_new();
1383-
if (traceback != NULL) {
1384-
trace->traceback = traceback;
1385-
res = 0;
1395+
1396+
if (tracemalloc_config.tracing) {
1397+
trace_t *trace = _Py_hashtable_get(tracemalloc_traces, TO_PTR(ptr));
1398+
if (trace != NULL) {
1399+
/* update the traceback of the memory block */
1400+
traceback_t *traceback = traceback_new();
1401+
if (traceback != NULL) {
1402+
trace->traceback = traceback;
1403+
res = 0;
1404+
}
13861405
}
1406+
/* else: cannot track the object, its memory block size is unknown */
13871407
}
1388-
/* else: cannot track the object, its memory block size is unknown */
1408+
// else: gh-128679: tracemalloc.stop() was called by another thread
1409+
13891410
TABLES_UNLOCK();
13901411

13911412
return res;
@@ -1418,7 +1439,9 @@ _PyTraceMalloc_ClearTraces(void)
14181439
return;
14191440
}
14201441
set_reentrant(1);
1421-
tracemalloc_clear_traces();
1442+
TABLES_LOCK();
1443+
tracemalloc_clear_traces_unlocked();
1444+
TABLES_UNLOCK();
14221445
set_reentrant(0);
14231446
}
14241447

0 commit comments

Comments
 (0)