Skip to content

Commit 893034c

Browse files
authored
gh-132917: Use RSS + swap for estimate of process memory usage (gh-133464)
1 parent e4561da commit 893034c

File tree

4 files changed

+66
-57
lines changed

4 files changed

+66
-57
lines changed

Doc/library/gc.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ The :mod:`gc` module provides the following functions:
128128
starts. For each collection, all the objects in the young generation and some
129129
fraction of the old generation is collected.
130130

131-
In the free-threaded build, the increase in process resident set size (RSS)
132-
is also checked before running the collector. If the RSS has not increased
131+
In the free-threaded build, the increase in process memory usage is also
132+
checked before running the collector. If the memory usage has not increased
133133
by 10% since the last collection and the net number of object allocations
134134
has not exceeded 40 times *threshold0*, the collection is not run.
135135

Include/internal/pycore_interp_structs.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -246,14 +246,14 @@ struct _gc_runtime_state {
246246
/* True if gc.freeze() has been used. */
247247
int freeze_active;
248248

249-
/* Resident set size (RSS) of the process after last GC. */
250-
Py_ssize_t last_rss;
249+
/* Memory usage of the process (RSS + swap) after last GC. */
250+
Py_ssize_t last_mem;
251251

252252
/* This accumulates the new object count whenever collection is deferred
253253
due to the RSS increase condition not being meet. Reset on collection. */
254254
Py_ssize_t deferred_count;
255255

256-
/* Mutex held for gc_should_collect_rss(). */
256+
/* Mutex held for gc_should_collect_mem_usage(). */
257257
PyMutex mutex;
258258
#endif
259259
};
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
For the free-threaded build, check the process resident set size (RSS)
2-
increase before triggering a full automatic garbage collection. If the RSS
3-
has not increased 10% since the last collection then it is deferred.
1+
For the free-threaded build, check the process memory usage increase before
2+
triggering a full automatic garbage collection. If the memory used has not
3+
increased 10% since the last collection then defer it.

Python/gc_free_threading.c

+58-49
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
#include "pydtrace.h"
1919

20-
// Platform-specific includes for get_current_rss().
20+
// Platform-specific includes for get_process_mem_usage().
2121
#ifdef _WIN32
2222
#include <windows.h>
2323
#include <psapi.h> // For GetProcessMemoryInfo
2424
#elif defined(__linux__)
2525
#include <unistd.h> // For sysconf, getpid
2626
#elif defined(__APPLE__)
2727
#include <mach/mach.h>
28+
#include <mach/task.h> // Required for TASK_VM_INFO
2829
#include <unistd.h> // For sysconf, getpid
2930
#elif defined(__FreeBSD__)
3031
#include <sys/types.h>
@@ -1901,13 +1902,14 @@ cleanup_worklist(struct worklist *worklist)
19011902
}
19021903
}
19031904

1904-
// Return the current resident set size (RSS) of the process, in units of KB.
1905-
// Returns -1 if this operation is not supported or on failure.
1905+
// Return the memory usage (typically RSS + swap) of the process, in units of
1906+
// KB. Returns -1 if this operation is not supported or on failure.
19061907
static Py_ssize_t
1907-
get_current_rss(void)
1908+
get_process_mem_usage(void)
19081909
{
19091910
#ifdef _WIN32
19101911
// Windows implementation using GetProcessMemoryInfo
1912+
// Returns WorkingSetSize + PagefileUsage
19111913
PROCESS_MEMORY_COUNTERS pmc;
19121914
HANDLE hProcess = GetCurrentProcess();
19131915
if (NULL == hProcess) {
@@ -1917,55 +1919,58 @@ get_current_rss(void)
19171919

19181920
// GetProcessMemoryInfo returns non-zero on success
19191921
if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
1920-
// pmc.WorkingSetSize is in bytes. Convert to KB.
1921-
return (Py_ssize_t)(pmc.WorkingSetSize / 1024);
1922+
// Values are in bytes, convert to KB.
1923+
return (Py_ssize_t)((pmc.WorkingSetSize + pmc.PagefileUsage) / 1024);
19221924
}
19231925
else {
19241926
return -1;
19251927
}
19261928

19271929
#elif __linux__
1928-
// Linux implementation using /proc/self/statm
1929-
long page_size_bytes = sysconf(_SC_PAGE_SIZE);
1930-
if (page_size_bytes <= 0) {
1931-
return -1;
1932-
}
1933-
1934-
FILE *fp = fopen("/proc/self/statm", "r");
1930+
// Linux, use smaps_rollup (Kernel >= 4.4) for RSS + Swap
1931+
FILE* fp = fopen("/proc/self/smaps_rollup", "r");
19351932
if (fp == NULL) {
19361933
return -1;
19371934
}
19381935

1939-
// Second number is resident size in pages
1940-
long rss_pages;
1941-
if (fscanf(fp, "%*d %ld", &rss_pages) != 1) {
1942-
fclose(fp);
1943-
return -1;
1936+
char line_buffer[256];
1937+
long long rss_kb = -1;
1938+
long long swap_kb = -1;
1939+
1940+
while (fgets(line_buffer, sizeof(line_buffer), fp) != NULL) {
1941+
if (rss_kb == -1 && strncmp(line_buffer, "Rss:", 4) == 0) {
1942+
sscanf(line_buffer + 4, "%lld", &rss_kb);
1943+
}
1944+
else if (swap_kb == -1 && strncmp(line_buffer, "Swap:", 5) == 0) {
1945+
sscanf(line_buffer + 5, "%lld", &swap_kb);
1946+
}
1947+
if (rss_kb != -1 && swap_kb != -1) {
1948+
break; // Found both
1949+
}
19441950
}
19451951
fclose(fp);
19461952

1947-
// Sanity check
1948-
if (rss_pages < 0 || rss_pages > 1000000000) {
1949-
return -1;
1953+
if (rss_kb != -1 && swap_kb != -1) {
1954+
return (Py_ssize_t)(rss_kb + swap_kb);
19501955
}
1951-
1952-
// Convert unit to KB
1953-
return (Py_ssize_t)rss_pages * (page_size_bytes / 1024);
1956+
return -1;
19541957

19551958
#elif defined(__APPLE__)
19561959
// --- MacOS (Darwin) ---
1957-
mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
1958-
mach_task_basic_info_data_t info;
1960+
// Returns phys_footprint (RAM + compressed memory)
1961+
task_vm_info_data_t vm_info;
1962+
mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
19591963
kern_return_t kerr;
19601964

1961-
kerr = task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &count);
1965+
kerr = task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count);
19621966
if (kerr != KERN_SUCCESS) {
19631967
return -1;
19641968
}
1965-
// info.resident_size is in bytes. Convert to KB.
1966-
return (Py_ssize_t)(info.resident_size / 1024);
1969+
// phys_footprint is in bytes. Convert to KB.
1970+
return (Py_ssize_t)(vm_info.phys_footprint / 1024);
19671971

19681972
#elif defined(__FreeBSD__)
1973+
// NOTE: Returns RSS only. Per-process swap usage isn't readily available
19691974
long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
19701975
if (page_size_kb <= 0) {
19711976
return -1;
@@ -2004,6 +2009,7 @@ get_current_rss(void)
20042009
return rss_kb;
20052010

20062011
#elif defined(__OpenBSD__)
2012+
// NOTE: Returns RSS only. Per-process swap usage isn't readily available
20072013
long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
20082014
if (page_size_kb <= 0) {
20092015
return -1;
@@ -2039,37 +2045,39 @@ get_current_rss(void)
20392045
}
20402046

20412047
static bool
2042-
gc_should_collect_rss(GCState *gcstate)
2048+
gc_should_collect_mem_usage(GCState *gcstate)
20432049
{
2044-
Py_ssize_t rss = get_current_rss();
2045-
if (rss < 0) {
2046-
// Reading RSS is not support or failed.
2050+
Py_ssize_t mem = get_process_mem_usage();
2051+
if (mem < 0) {
2052+
// Reading process memory usage is not support or failed.
20472053
return true;
20482054
}
20492055
int threshold = gcstate->young.threshold;
20502056
Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed(&gcstate->deferred_count);
20512057
if (deferred > threshold * 40) {
2052-
// Too many new container objects since last GC, even though RSS
2058+
// Too many new container objects since last GC, even though memory use
20532059
// might not have increased much. This is intended to avoid resource
20542060
// exhaustion if some objects consume resources but don't result in a
2055-
// RSS increase. We use 40x as the factor here because older versions
2056-
// of Python would do full collections after roughly every 70,000 new
2057-
// container objects.
2061+
// memory usage increase. We use 40x as the factor here because older
2062+
// versions of Python would do full collections after roughly every
2063+
// 70,000 new container objects.
20582064
return true;
20592065
}
2060-
Py_ssize_t last_rss = gcstate->last_rss;
2061-
Py_ssize_t rss_threshold = Py_MAX(last_rss / 10, 128);
2062-
if ((rss - last_rss) > rss_threshold) {
2063-
// The RSS has increased too much, do a collection.
2066+
Py_ssize_t last_mem = gcstate->last_mem;
2067+
Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128);
2068+
if ((mem - last_mem) > mem_threshold) {
2069+
// The process memory usage has increased too much, do a collection.
20642070
return true;
20652071
}
20662072
else {
2067-
// The RSS has not increased enough, defer the collection and clear
2068-
// the young object count so we don't check RSS again on the next call
2069-
// to gc_should_collect().
2073+
// The memory usage has not increased enough, defer the collection and
2074+
// clear the young object count so we don't check memory usage again
2075+
// on the next call to gc_should_collect().
20702076
PyMutex_Lock(&gcstate->mutex);
2071-
gcstate->deferred_count += gcstate->young.count;
2072-
gcstate->young.count = 0;
2077+
_Py_atomic_store_ssize_relaxed(&gcstate->deferred_count,
2078+
gcstate->deferred_count +
2079+
gcstate->young.count);
2080+
_Py_atomic_store_int(&gcstate->young.count, 0);
20732081
PyMutex_Unlock(&gcstate->mutex);
20742082
return false;
20752083
}
@@ -2094,7 +2102,7 @@ gc_should_collect(GCState *gcstate)
20942102
// objects.
20952103
return false;
20962104
}
2097-
return gc_should_collect_rss(gcstate);
2105+
return gc_should_collect_mem_usage(gcstate);
20982106
}
20992107

21002108
static void
@@ -2237,8 +2245,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
22372245
// to be freed.
22382246
delete_garbage(state);
22392247

2240-
// Store the current RSS, possibly smaller now that we deleted garbage.
2241-
state->gcstate->last_rss = get_current_rss();
2248+
// Store the current memory usage, can be smaller now if breaking cycles
2249+
// freed some memory.
2250+
state->gcstate->last_mem = get_process_mem_usage();
22422251

22432252
// Append objects with legacy finalizers to the "gc.garbage" list.
22442253
handle_legacy_finalizers(state);

0 commit comments

Comments
 (0)