Skip to content

Commit 5e74df2

Browse files
committed
Merge tag 'x86-urgent-2024-03-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: - Ensure that the encryption mask at boot is properly propagated on 5-level page tables, otherwise the PGD entry is incorrectly set to non-encrypted, which causes system crashes during boot. - Undo the deferred 5-level page table setup as it cannot work with memory encryption enabled. - Prevent inconsistent XFD state on CPU hotplug, where the MSR is reset to the default value but the cached variable is not, so subsequent comparisons might yield the wrong result and as a consequence the result prevents updating the MSR. - Register the local APIC address only once in the MPPARSE enumeration to prevent triggering the related WARN_ONs() in the APIC and topology code. - Handle the case where no APIC is found gracefully by registering a fake APIC in the topology code. That makes all related topology functions work correctly and does not affect the actual APIC driver code at all. - Don't evaluate logical IDs during early boot as the local APIC IDs are not yet enumerated and the invoked function returns an error code. Nothing requires the logical IDs before the final CPUID enumeration takes place, which happens after the enumeration. - Cure the fallout of the per CPU rework on UP which misplaced the copying of boot_cpu_data to per CPU data so that the final update to boot_cpu_data got lost which caused inconsistent state and boot crashes. - Use copy_from_kernel_nofault() in the kprobes setup as there is no guarantee that the address can be safely accessed. - Reorder struct members in struct saved_context to work around another kmemleak false positive - Remove the buggy code which tries to update the E820 kexec table for setup_data as that is never passed to the kexec kernel. - Update the resource control documentation to use the proper units. - Fix a Kconfig warning observed with tinyconfig * tag 'x86-urgent-2024-03-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/boot/64: Move 5-level paging global variable assignments back x86/boot/64: Apply encryption mask to 5-level pagetable update x86/cpu: Add model number for another Intel Arrow Lake mobile processor x86/fpu: Keep xfd_state in sync with MSR_IA32_XFD Documentation/x86: Document that resctrl bandwidth control units are MiB x86/mpparse: Register APIC address only once x86/topology: Handle the !APIC case gracefully x86/topology: Don't evaluate logical IDs during early boot x86/cpu: Ensure that CPU info updates are propagated on UP kprobes/x86: Use copy_from_kernel_nofault() to read from unsafe address x86/pm: Work around false positive kmemleak report in msr_build_context() x86/kexec: Do not update E820 kexec table for setup_data x86/config: Fix warning for 'make ARCH=x86_64 tinyconfig'
2 parents b136f68 + 9843231 commit 5e74df2

File tree

15 files changed

+80
-89
lines changed

15 files changed

+80
-89
lines changed

Documentation/arch/x86/resctrl.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ mount options are:
4545
Enable code/data prioritization in L2 cache allocations.
4646
"mba_MBps":
4747
Enable the MBA Software Controller(mba_sc) to specify MBA
48-
bandwidth in MBps
48+
bandwidth in MiBps
4949
"debug":
5050
Make debug files accessible. Available debug files are annotated with
5151
"Available only with debug option".
@@ -526,7 +526,7 @@ threads start using more cores in an rdtgroup, the actual bandwidth may
526526
increase or vary although user specified bandwidth percentage is same.
527527

528528
In order to mitigate this and make the interface more user friendly,
529-
resctrl added support for specifying the bandwidth in MBps as well. The
529+
resctrl added support for specifying the bandwidth in MiBps as well. The
530530
kernel underneath would use a software feedback mechanism or a "Software
531531
Controller(mba_sc)" which reads the actual bandwidth using MBM counters
532532
and adjust the memory bandwidth percentages to ensure::
@@ -573,13 +573,13 @@ Memory b/w domain is L3 cache.
573573

574574
MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
575575

576-
Memory bandwidth Allocation specified in MBps
576+
Memory bandwidth Allocation specified in MiBps
577577
---------------------------------------------
578578

579579
Memory bandwidth domain is L3 cache.
580580
::
581581

582-
MB:<cache_id0>=bw_MBps0;<cache_id1>=bw_MBps1;...
582+
MB:<cache_id0>=bw_MiBps0;<cache_id1>=bw_MiBps1;...
583583

584584
Slow Memory Bandwidth Allocation (SMBA)
585585
---------------------------------------

arch/x86/configs/tiny.config

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
CONFIG_NOHIGHMEM=y
22
# CONFIG_HIGHMEM4G is not set
33
# CONFIG_HIGHMEM64G is not set
4+
# CONFIG_UNWINDER_ORC is not set
45
CONFIG_UNWINDER_GUESS=y
56
# CONFIG_UNWINDER_FRAME_POINTER is not set

arch/x86/include/asm/intel-family.h

+1
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127

128128
#define INTEL_FAM6_ARROWLAKE_H 0xC5
129129
#define INTEL_FAM6_ARROWLAKE 0xC6
130+
#define INTEL_FAM6_ARROWLAKE_U 0xB5
130131

131132
#define INTEL_FAM6_LUNARLAKE_M 0xBD
132133

arch/x86/include/asm/suspend_32.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@
1212

1313
/* image of the saved processor state */
1414
struct saved_context {
15-
/*
16-
* On x86_32, all segment registers except gs are saved at kernel
17-
* entry in pt_regs.
18-
*/
19-
u16 gs;
2015
unsigned long cr0, cr2, cr3, cr4;
2116
u64 misc_enable;
2217
struct saved_msrs saved_msrs;
@@ -27,6 +22,11 @@ struct saved_context {
2722
unsigned long tr;
2823
unsigned long safety;
2924
unsigned long return_address;
25+
/*
26+
* On x86_32, all segment registers except gs are saved at kernel
27+
* entry in pt_regs.
28+
*/
29+
u16 gs;
3030
bool misc_enable_saved;
3131
} __attribute__((packed));
3232

arch/x86/kernel/cpu/common.c

+9
Original file line numberDiff line numberDiff line change
@@ -2307,6 +2307,8 @@ void arch_smt_update(void)
23072307

23082308
void __init arch_cpu_finalize_init(void)
23092309
{
2310+
struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
2311+
23102312
identify_boot_cpu();
23112313

23122314
select_idle_routine();
@@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void)
23452347
fpu__init_system();
23462348
fpu__init_cpu();
23472349

2350+
/*
2351+
* Ensure that access to the per CPU representation has the initial
2352+
* boot CPU configuration.
2353+
*/
2354+
*c = boot_cpu_data;
2355+
c->initialized = true;
2356+
23482357
alternative_instructions();
23492358

23502359
if (IS_ENABLED(CONFIG_X86_64)) {

arch/x86/kernel/cpu/topology.c

+11
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,17 @@ void __init topology_init_possible_cpus(void)
415415
unsigned int total = assigned + disabled;
416416
u32 apicid, firstid;
417417

418+
/*
419+
* If there was no APIC registered, then fake one so that the
420+
* topology bitmap is populated. That ensures that the code below
421+
* is valid and the various query interfaces can be used
422+
* unconditionally. This does not affect the actual APIC code in
423+
* any way because either the local APIC address has not been
424+
* registered or the local APIC was disabled on the command line.
425+
*/
426+
if (topo_info.boot_cpu_apic_id == BAD_APICID)
427+
topology_register_boot_apic(0);
428+
418429
if (!restrict_to_up()) {
419430
if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
420431
disabled += assigned - nr_cpu_ids;

arch/x86/kernel/cpu/topology_common.c

+7-5
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,18 @@ static void parse_topology(struct topo_scan *tscan, bool early)
140140
}
141141
}
142142

143-
static void topo_set_ids(struct topo_scan *tscan)
143+
static void topo_set_ids(struct topo_scan *tscan, bool early)
144144
{
145145
struct cpuinfo_x86 *c = tscan->c;
146146
u32 apicid = c->topo.apicid;
147147

148148
c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN);
149149
c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN);
150150

151-
c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
152-
c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
151+
if (!early) {
152+
c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
153+
c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
154+
}
153155

154156
/* Package relative core ID */
155157
c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >>
@@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c)
187189
tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]);
188190
}
189191

190-
topo_set_ids(&tscan);
192+
topo_set_ids(&tscan, false);
191193
}
192194

193195
void __init cpu_init_topology(struct cpuinfo_x86 *c)
@@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c)
208210
x86_topo_system.dom_size[dom] = 1U << sft;
209211
}
210212

211-
topo_set_ids(&tscan);
213+
topo_set_ids(&tscan, true);
212214

213215
/*
214216
* AMD systems have Nodes per package which cannot be mapped to

arch/x86/kernel/e820.c

+1-16
Original file line numberDiff line numberDiff line change
@@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void)
10161016

10171017
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
10181018

1019-
/*
1020-
* SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
1021-
* kexec and do not need to be reserved.
1022-
*/
1023-
if (data->type != SETUP_EFI &&
1024-
data->type != SETUP_IMA &&
1025-
data->type != SETUP_RNG_SEED)
1026-
e820__range_update_kexec(pa_data,
1027-
sizeof(*data) + data->len,
1028-
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
1029-
10301019
if (data->type == SETUP_INDIRECT) {
10311020
len += data->len;
10321021
early_memunmap(data, sizeof(*data));
@@ -1038,20 +1027,16 @@ void __init e820__reserve_setup_data(void)
10381027

10391028
indirect = (struct setup_indirect *)data->data;
10401029

1041-
if (indirect->type != SETUP_INDIRECT) {
1030+
if (indirect->type != SETUP_INDIRECT)
10421031
e820__range_update(indirect->addr, indirect->len,
10431032
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
1044-
e820__range_update_kexec(indirect->addr, indirect->len,
1045-
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
1046-
}
10471033
}
10481034

10491035
pa_data = pa_next;
10501036
early_memunmap(data, len);
10511037
}
10521038

10531039
e820__update_table(e820_table);
1054-
e820__update_table(e820_table_kexec);
10551040

10561041
pr_info("extended physical RAM map:\n");
10571042
e820__print_table("reserve setup_data");

arch/x86/kernel/fpu/xstate.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void)
178178
* Must happen after CR4 setup and before xsetbv() to allow KVM
179179
* lazy passthrough. Write independent of the dynamic state static
180180
* key as that does not work on the boot CPU. This also ensures
181-
* that any stale state is wiped out from XFD.
181+
* that any stale state is wiped out from XFD. Reset the per CPU
182+
* xfd cache too.
182183
*/
183184
if (cpu_feature_enabled(X86_FEATURE_XFD))
184-
wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
185+
xfd_set_state(init_fpstate.xfd);
185186

186187
/*
187188
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features

arch/x86/kernel/fpu/xstate.h

+10-4
Original file line numberDiff line numberDiff line change
@@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs
148148
#endif
149149

150150
#ifdef CONFIG_X86_64
151+
static inline void xfd_set_state(u64 xfd)
152+
{
153+
wrmsrl(MSR_IA32_XFD, xfd);
154+
__this_cpu_write(xfd_state, xfd);
155+
}
156+
151157
static inline void xfd_update_state(struct fpstate *fpstate)
152158
{
153159
if (fpu_state_size_dynamic()) {
154160
u64 xfd = fpstate->xfd;
155161

156-
if (__this_cpu_read(xfd_state) != xfd) {
157-
wrmsrl(MSR_IA32_XFD, xfd);
158-
__this_cpu_write(xfd_state, xfd);
159-
}
162+
if (__this_cpu_read(xfd_state) != xfd)
163+
xfd_set_state(xfd);
160164
}
161165
}
162166

163167
extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
164168
#else
169+
static inline void xfd_set_state(u64 xfd) { }
170+
165171
static inline void xfd_update_state(struct fpstate *fpstate) { }
166172

167173
static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {

arch/x86/kernel/head64.c

+8-10
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ static inline bool check_la57_support(void)
8181
if (!(native_read_cr4() & X86_CR4_LA57))
8282
return false;
8383

84+
RIP_REL_REF(__pgtable_l5_enabled) = 1;
85+
RIP_REL_REF(pgdir_shift) = 48;
86+
RIP_REL_REF(ptrs_per_p4d) = 512;
87+
RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5;
88+
RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5;
89+
RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5;
90+
8491
return true;
8592
}
8693

@@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
175182
p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
176183
p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
177184

178-
pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC;
185+
pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
179186
}
180187

181188
RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
@@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
431438
(__START_KERNEL & PGDIR_MASK)));
432439
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
433440

434-
if (check_la57_support()) {
435-
__pgtable_l5_enabled = 1;
436-
pgdir_shift = 48;
437-
ptrs_per_p4d = 512;
438-
page_offset_base = __PAGE_OFFSET_BASE_L5;
439-
vmalloc_base = __VMALLOC_BASE_L5;
440-
vmemmap_base = __VMEMMAP_BASE_L5;
441-
}
442-
443441
cr4_init_shadow();
444442

445443
/* Kill off the identity-map trampoline */

arch/x86/kernel/kprobes/core.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,16 @@ static bool can_probe(unsigned long paddr)
373373
kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
374374
bool *on_func_entry)
375375
{
376-
if (is_endbr(*(u32 *)addr)) {
376+
u32 insn;
377+
378+
/*
379+
* Since 'addr' is not guaranteed to be safe to access, use
380+
* copy_from_kernel_nofault() to read the instruction:
381+
*/
382+
if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
383+
return NULL;
384+
385+
if (is_endbr(insn)) {
377386
*on_func_entry = !offset || offset == 4;
378387
if (*on_func_entry)
379388
offset = 4;

arch/x86/kernel/mpparse.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
197197
if (!smp_check_mpc(mpc, oem, str))
198198
return 0;
199199

200-
/* Initialize the lapic mapping */
201-
if (!acpi_lapic)
202-
register_lapic_address(mpc->lapic);
203-
204-
if (early)
200+
if (early) {
201+
/* Initialize the lapic mapping */
202+
if (!acpi_lapic)
203+
register_lapic_address(mpc->lapic);
205204
return 1;
205+
}
206206

207207
/* Now process the configuration blocks. */
208208
while (count < mpc->length) {

arch/x86/kernel/setup.c

-10
Original file line numberDiff line numberDiff line change
@@ -1206,16 +1206,6 @@ void __init i386_reserve_resources(void)
12061206

12071207
#endif /* CONFIG_X86_32 */
12081208

1209-
#ifndef CONFIG_SMP
1210-
void __init smp_prepare_boot_cpu(void)
1211-
{
1212-
struct cpuinfo_x86 *c = &cpu_data(0);
1213-
1214-
*c = boot_cpu_data;
1215-
c->initialized = true;
1216-
}
1217-
#endif
1218-
12191209
static struct notifier_block kernel_offset_notifier = {
12201210
.notifier_call = dump_kernel_offset
12211211
};

arch/x86/kernel/smpboot.c

+5-27
Original file line numberDiff line numberDiff line change
@@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused)
313313
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
314314
}
315315

316-
static void __init smp_store_boot_cpu_info(void)
317-
{
318-
struct cpuinfo_x86 *c = &cpu_data(0);
319-
320-
*c = boot_cpu_data;
321-
c->initialized = true;
322-
}
323-
324316
/*
325317
* The bootstrap kernel entry code has set these up. Save them for
326318
* a given CPU
@@ -1039,29 +1031,15 @@ static __init void disable_smp(void)
10391031
cpumask_set_cpu(0, topology_die_cpumask(0));
10401032
}
10411033

1042-
static void __init smp_cpu_index_default(void)
1043-
{
1044-
int i;
1045-
struct cpuinfo_x86 *c;
1046-
1047-
for_each_possible_cpu(i) {
1048-
c = &cpu_data(i);
1049-
/* mark all to hotplug */
1050-
c->cpu_index = nr_cpu_ids;
1051-
}
1052-
}
1053-
10541034
void __init smp_prepare_cpus_common(void)
10551035
{
10561036
unsigned int i;
10571037

1058-
smp_cpu_index_default();
1059-
1060-
/*
1061-
* Setup boot CPU information
1062-
*/
1063-
smp_store_boot_cpu_info(); /* Final full version of the data */
1064-
mb();
1038+
/* Mark all except the boot CPU as hotpluggable */
1039+
for_each_possible_cpu(i) {
1040+
if (i)
1041+
per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids;
1042+
}
10651043

10661044
for_each_possible_cpu(i) {
10671045
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);

0 commit comments

Comments
 (0)