Skip to content

Commit 51bbb48

Browse files
committed
Merge: Scheduler updates for 9.7
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6398 JIRA: https://issues.redhat.com/browse/RHEL-78821 Proactive fixes and minor updates for scheduler related code. This includes needed commits up to v6.14-rc1. There are not as many since there are a few features upstream which we are not taking into rhel9 at this point. Signed-off-by: Phil Auld <[email protected]> Approved-by: Waiman Long <[email protected]> Approved-by: Herton R. Krzesinski <[email protected]> Approved-by: Tony Camuso <[email protected]> Approved-by: Juri Lelli <[email protected]> Approved-by: Rafael Aquini <[email protected]> Approved-by: CKI KWF Bot <[email protected]> Merged-by: Augusto Caringi <[email protected]>
2 parents 500cda7 + 04db326 commit 51bbb48

File tree

23 files changed

+375
-295
lines changed

23 files changed

+375
-295
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2321,7 +2321,9 @@
23212321
specified in the flag list (default: domain):
23222322

23232323
nohz
2324-
Disable the tick when a single task runs.
2324+
Disable the tick when a single task runs as well as
2325+
disabling other kernel noises like having RCU callbacks
2326+
offloaded. This is equivalent to the nohz_full parameter.
23252327

23262328
A residual 1Hz tick is offloaded to workqueues, which you
23272329
need to affine to housekeeping through the global

Documentation/scheduler/sched-bwc.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ Quota, period and burst are managed within the cpu subsystem via cgroupfs.
7474
to cgroup v1. For cgroup v2, see
7575
:ref:`Documentation/admin-guide/cgroup-v2.rst <cgroup-v2-cpu>`.
7676

77-
- cpu.cfs_quota_us: the total available run-time within a period (in
7877
- cpu.cfs_quota_us: run-time replenished within a period (in microseconds)
7978
- cpu.cfs_period_us: the length of a period (in microseconds)
8079
- cpu.stat: exports throttling statistics [explained further below]
@@ -135,7 +134,7 @@ cpu.stat:
135134
of the group have been throttled.
136135
- nr_bursts: Number of periods burst occurs.
137136
- burst_time: Cumulative wall-time (in nanoseconds) that any CPUs has used
138-
above quota in respective periods
137+
above quota in respective periods.
139138

140139
This interface is read-only.
141140

@@ -238,7 +237,7 @@ Examples
238237
additionally, in case accumulation has been done.
239238

240239
With 50ms period, 20ms quota will be equivalent to 40% of 1 CPU.
241-
And 10ms burst will be equivalent to 20% of 1 CPU.
240+
And 10ms burst will be equivalent to 20% of 1 CPU::
242241

243242
# echo 20000 > cpu.cfs_quota_us /* quota = 20ms */
244243
# echo 50000 > cpu.cfs_period_us /* period = 50ms */

drivers/base/topology.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,35 @@ static ssize_t name##_read(struct file *file, struct kobject *kobj, \
2727
loff_t off, size_t count) \
2828
{ \
2929
struct device *dev = kobj_to_dev(kobj); \
30+
cpumask_var_t mask; \
31+
ssize_t n; \
3032
\
31-
return cpumap_print_bitmask_to_buf(buf, topology_##mask(dev->id), \
32-
off, count); \
33+
if (!alloc_cpumask_var(&mask, GFP_KERNEL)) \
34+
return -ENOMEM; \
35+
\
36+
cpumask_copy(mask, topology_##mask(dev->id)); \
37+
n = cpumap_print_bitmask_to_buf(buf, mask, off, count); \
38+
free_cpumask_var(mask); \
39+
\
40+
return n; \
3341
} \
3442
\
3543
static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
3644
struct bin_attribute *attr, char *buf, \
3745
loff_t off, size_t count) \
3846
{ \
3947
struct device *dev = kobj_to_dev(kobj); \
48+
cpumask_var_t mask; \
49+
ssize_t n; \
50+
\
51+
if (!alloc_cpumask_var(&mask, GFP_KERNEL)) \
52+
return -ENOMEM; \
53+
\
54+
cpumask_copy(mask, topology_##mask(dev->id)); \
55+
n = cpumap_print_list_to_buf(buf, mask, off, count); \
56+
free_cpumask_var(mask); \
4057
\
41-
return cpumap_print_list_to_buf(buf, topology_##mask(dev->id), \
42-
off, count); \
58+
return n; \
4359
}
4460

4561
define_id_show_func(physical_package_id, "%d");

include/linux/sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,8 @@ struct sched_dl_entity {
631631
*
632632
* @dl_overrun tells if the task asked to be informed about runtime
633633
* overruns.
634+
*
635+
* @dl_server tells if this is a server entity.
634636
*/
635637
unsigned int dl_throttled : 1;
636638
unsigned int dl_yielded : 1;

include/linux/sched/isolation.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,21 @@
77
#include <linux/tick.h>
88

99
enum hk_type {
10-
HK_TYPE_TIMER,
11-
HK_TYPE_RCU,
12-
HK_TYPE_MISC,
13-
HK_TYPE_SCHED,
14-
HK_TYPE_TICK,
1510
HK_TYPE_DOMAIN,
16-
HK_TYPE_WQ,
1711
HK_TYPE_MANAGED_IRQ,
18-
HK_TYPE_KTHREAD,
19-
HK_TYPE_MAX
12+
HK_TYPE_KERNEL_NOISE,
13+
HK_TYPE_MAX,
14+
15+
/*
16+
* The following housekeeping types are only set by the nohz_full
17+
* boot commandline option. So they can share the same value.
18+
*/
19+
HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE,
20+
HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE,
21+
HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE,
22+
HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE,
23+
HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE,
24+
HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE
2025
};
2126

2227
#ifdef CONFIG_CPU_ISOLATION

include/linux/uaccess.h

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
372372
return 0;
373373
}
374374

375+
/**
376+
* copy_struct_to_user: copy a struct to userspace
377+
* @dst: Destination address, in userspace. This buffer must be @ksize
378+
* bytes long.
379+
* @usize: (Alleged) size of @dst struct.
380+
* @src: Source address, in kernel space.
381+
* @ksize: Size of @src struct.
382+
* @ignored_trailing: Set to %true if there was a non-zero byte in @src that
383+
* userspace cannot see because they are using an smaller struct.
384+
*
385+
* Copies a struct from kernel space to userspace, in a way that guarantees
386+
* backwards-compatibility for struct syscall arguments (as long as future
387+
* struct extensions are made such that all new fields are *appended* to the
388+
* old struct, and zeroed-out new fields have the same meaning as the old
389+
* struct).
390+
*
391+
* Some syscalls may wish to make sure that userspace knows about everything in
392+
* the struct, and if there is a non-zero value that userspce doesn't know
393+
* about, they want to return an error (such as -EMSGSIZE) or have some other
394+
* fallback (such as adding a "you're missing some information" flag). If
395+
* @ignored_trailing is non-%NULL, it will be set to %true if there was a
396+
* non-zero byte that could not be copied to userspace (ie. was past @usize).
397+
*
398+
* While unconditionally returning an error in this case is the simplest
399+
* solution, for maximum backward compatibility you should try to only return
400+
* -EMSGSIZE if the user explicitly requested the data that couldn't be copied.
401+
* Note that structure sizes can change due to header changes and simple
402+
* recompilations without code changes(!), so if you care about
403+
* @ignored_trailing you probably want to make sure that any new field data is
404+
* associated with a flag. Otherwise you might assume that a program knows
405+
* about data it does not.
406+
*
407+
* @ksize is just sizeof(*src), and @usize should've been passed by userspace.
408+
* The recommended usage is something like the following:
409+
*
410+
* SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize)
411+
* {
412+
* int err;
413+
* bool ignored_trailing;
414+
* struct foo karg = {};
415+
*
416+
* if (usize > PAGE_SIZE)
417+
* return -E2BIG;
418+
* if (usize < FOO_SIZE_VER0)
419+
* return -EINVAL;
420+
*
421+
* // ... modify karg somehow ...
422+
*
423+
* err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg),
424+
* &ignored_trailing);
425+
* if (err)
426+
* return err;
427+
* if (ignored_trailing)
428+
* return -EMSGSIZE:
429+
*
430+
* // ...
431+
* }
432+
*
433+
* There are three cases to consider:
434+
* * If @usize == @ksize, then it's copied verbatim.
435+
* * If @usize < @ksize, then the kernel is trying to pass userspace a newer
436+
* struct than it supports. Thus we only copy the interoperable portions
437+
* (@usize) and ignore the rest (but @ignored_trailing is set to %true if
438+
* any of the trailing (@ksize - @usize) bytes are non-zero).
439+
* * If @usize > @ksize, then the kernel is trying to pass userspace an older
440+
* struct than userspace supports. In order to make sure the
441+
* unknown-to-the-kernel fields don't contain garbage values, we zero the
442+
* trailing (@usize - @ksize) bytes.
443+
*
444+
* Returns (in all cases, some data may have been copied):
445+
* * -EFAULT: access to userspace failed.
446+
*/
447+
static __always_inline __must_check int
448+
copy_struct_to_user(void __user *dst, size_t usize, const void *src,
449+
size_t ksize, bool *ignored_trailing)
450+
{
451+
size_t size = min(ksize, usize);
452+
size_t rest = max(ksize, usize) - size;
453+
454+
/* Double check if ksize is larger than a known object size. */
455+
if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1)))
456+
return -E2BIG;
457+
458+
/* Deal with trailing bytes. */
459+
if (usize > ksize) {
460+
if (clear_user(dst + size, rest))
461+
return -EFAULT;
462+
}
463+
if (ignored_trailing)
464+
*ignored_trailing = ksize < usize &&
465+
memchr_inv(src + size, 0, rest) != NULL;
466+
/* Copy the interoperable parts of the struct. */
467+
if (copy_to_user(dst, src, size))
468+
return -EFAULT;
469+
return 0;
470+
}
471+
375472
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size);
376473

377474
long copy_from_kernel_nofault(void *dst, const void *src, size_t size);

include/linux/wait_bit.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
4949
extern int bit_wait(struct wait_bit_key *key, int mode);
5050
extern int bit_wait_io(struct wait_bit_key *key, int mode);
5151
extern int bit_wait_timeout(struct wait_bit_key *key, int mode);
52-
extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode);
5352

5453
/**
5554
* wait_on_bit - wait for a bit to be cleared

0 commit comments

Comments
 (0)