Skip to content

Commit 567b583

Browse files
committed
Merge: tracing/osnoise: Do not cancel timer if no kthread and add cpumask to identify user threads
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/5167 JIRA: https://issues.redhat.com/browse/RHEL-39968 rtla crashes the timerlat tracer if killed with SIGTERM and subsequently re-run: while true; do rtla timerlat top -u -q & PID=$!; sleep 5; kill -INT $PID; sleep 0.001; kill -TERM $PID; wait $PID; done Fix two separate causes of the issue: - Check for an active kthread before doing hrtimer_cancel() in both timerlat_fd_release() and tlat_var_reset(). Add necessary locking to prevent race between the kthread read and the timer cancel. - Identify CPUs on which a user workload is running with a cpumask and prevent calling of kthread_stop() on the thread such CPUs. Signed-off-by: Tomas Glozar <[email protected]> Approved-by: Luis Claudio R. Goncalves <[email protected]> Approved-by: Andrew Halaney <[email protected]> Approved-by: CKI KWF Bot <[email protected]> Merged-by: Rado Vrbovsky <[email protected]>
2 parents e964808 + 2c246e8 commit 567b583

File tree

1 file changed

+34
-16
lines changed

1 file changed

+34
-16
lines changed

kernel/trace/trace_osnoise.c

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,11 @@ static inline struct osnoise_variables *this_cpu_osn_var(void)
228228
return this_cpu_ptr(&per_cpu_osnoise_var);
229229
}
230230

231+
/*
232+
* Protect the interface.
233+
*/
234+
static struct mutex interface_lock;
235+
231236
#ifdef CONFIG_TIMERLAT_TRACER
232237
/*
233238
* Runtime information for the timer mode.
@@ -259,14 +264,20 @@ static inline void tlat_var_reset(void)
259264
{
260265
struct timerlat_variables *tlat_var;
261266
int cpu;
267+
268+
/* Synchronize with the timerlat interfaces */
269+
mutex_lock(&interface_lock);
262270
/*
263271
* So far, all the values are initialized as 0, so
264272
* zeroing the structure is perfect.
265273
*/
266274
for_each_cpu(cpu, cpu_online_mask) {
267275
tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
276+
if (tlat_var->kthread)
277+
hrtimer_cancel(&tlat_var->timer);
268278
memset(tlat_var, 0, sizeof(*tlat_var));
269279
}
280+
mutex_unlock(&interface_lock);
270281
}
271282
#else /* CONFIG_TIMERLAT_TRACER */
272283
#define tlat_var_reset() do {} while (0)
@@ -331,11 +342,6 @@ struct timerlat_sample {
331342
};
332343
#endif
333344

334-
/*
335-
* Protect the interface.
336-
*/
337-
static struct mutex interface_lock;
338-
339345
/*
340346
* Tracer data.
341347
*/
@@ -1612,6 +1618,7 @@ static int run_osnoise(void)
16121618

16131619
static struct cpumask osnoise_cpumask;
16141620
static struct cpumask save_cpumask;
1621+
static struct cpumask kthread_cpumask;
16151622

16161623
/*
16171624
* osnoise_sleep - sleep until the next period
@@ -1675,6 +1682,7 @@ static inline int osnoise_migration_pending(void)
16751682
*/
16761683
mutex_lock(&interface_lock);
16771684
this_cpu_osn_var()->kthread = NULL;
1685+
cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
16781686
mutex_unlock(&interface_lock);
16791687

16801688
return 1;
@@ -1945,11 +1953,16 @@ static void stop_kthread(unsigned int cpu)
19451953
{
19461954
struct task_struct *kthread;
19471955

1956+
mutex_lock(&interface_lock);
19481957
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
19491958
if (kthread) {
1950-
if (test_bit(OSN_WORKLOAD, &osnoise_options)) {
1959+
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1960+
mutex_unlock(&interface_lock);
1961+
1962+
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1963+
!WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
19511964
kthread_stop(kthread);
1952-
} else {
1965+
} else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
19531966
/*
19541967
* This is a user thread waiting on the timerlat_fd. We need
19551968
* to close all users, and the best way to guarantee this is
@@ -1958,16 +1971,15 @@ static void stop_kthread(unsigned int cpu)
19581971
kill_pid(kthread->thread_pid, SIGKILL, 1);
19591972
put_task_struct(kthread);
19601973
}
1961-
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
19621974
} else {
1975+
mutex_unlock(&interface_lock);
19631976
/* if no workload, just return */
19641977
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
19651978
/*
19661979
* This is set in the osnoise tracer case.
19671980
*/
19681981
per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
19691982
barrier();
1970-
return;
19711983
}
19721984
}
19731985
}
@@ -1982,12 +1994,8 @@ static void stop_per_cpu_kthreads(void)
19821994
{
19831995
int cpu;
19841996

1985-
cpus_read_lock();
1986-
1987-
for_each_online_cpu(cpu)
1997+
for_each_possible_cpu(cpu)
19881998
stop_kthread(cpu);
1989-
1990-
cpus_read_unlock();
19911999
}
19922000

19932001
/*
@@ -2021,6 +2029,7 @@ static int start_kthread(unsigned int cpu)
20212029
}
20222030

20232031
per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2032+
cpumask_set_cpu(cpu, &kthread_cpumask);
20242033

20252034
return 0;
20262035
}
@@ -2048,8 +2057,16 @@ static int start_per_cpu_kthreads(void)
20482057
*/
20492058
cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
20502059

2051-
for_each_possible_cpu(cpu)
2060+
for_each_possible_cpu(cpu) {
2061+
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2062+
struct task_struct *kthread;
2063+
2064+
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
2065+
if (!WARN_ON(!kthread))
2066+
kthread_stop(kthread);
2067+
}
20522068
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
2069+
}
20532070

20542071
for_each_cpu(cpu, current_mask) {
20552072
retval = start_kthread(cpu);
@@ -2579,7 +2596,8 @@ static int timerlat_fd_release(struct inode *inode, struct file *file)
25792596
osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
25802597
tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
25812598

2582-
hrtimer_cancel(&tlat_var->timer);
2599+
if (tlat_var->kthread)
2600+
hrtimer_cancel(&tlat_var->timer);
25832601
memset(tlat_var, 0, sizeof(*tlat_var));
25842602

25852603
osn_var->sampling = 0;

0 commit comments

Comments
 (0)