Skip to content

Commit 470bfbb

Browse files
committed
Merge: edac: add FRU Text decoding support
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/5869 JIRA: https://issues.redhat.com/browse/RHEL-52657 Signed-off-by: Aristeu Rozanski <[email protected]> Approved-by: Rafael Aquini <[email protected]> Approved-by: Steve Best <[email protected]> Approved-by: David Arcari <[email protected]> Approved-by: CKI KWF Bot <[email protected]> Merged-by: Augusto Caringi <[email protected]>
2 parents 9fab276 + 49a7613 commit 470bfbb

File tree

12 files changed

+354
-195
lines changed

12 files changed

+354
-195
lines changed

arch/x86/include/asm/mce.h

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
* - TCC bit is present in MCx_STATUS.
6262
*/
6363
#define MCI_CONFIG_MCAX 0x1
64+
#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
6465
#define MCI_IPID_MCATYPE 0xFFFF0000
6566
#define MCI_IPID_HWID 0xFFF
6667

@@ -122,6 +123,9 @@
122123
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
123124
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
124125
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
126+
/* Registers MISC2 to MISC4 are at offsets B to D. */
127+
#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
128+
#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
125129
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
126130
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
127131
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -132,6 +136,8 @@
132136
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
133137
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
134138
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
139+
#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
140+
#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
135141

136142
#define XEC(x, mask) (((x) >> 16) & mask)
137143

@@ -187,6 +193,32 @@ enum mce_notifier_prios {
187193
MCE_PRIO_HIGHEST = MCE_PRIO_CEC
188194
};
189195

196+
/**
197+
* struct mce_hw_err - Hardware Error Record.
198+
* @m: Machine Check record.
199+
* @vendor: Vendor-specific error information.
200+
*
201+
* Vendor-specific fields should not be added to struct mce. Instead, vendors
202+
* should export their vendor-specific data through their structure in the
203+
* vendor union below.
204+
*
205+
* AMD's vendor data is parsed by error decoding tools for supplemental error
206+
* information. Thus, current offsets of existing fields must be maintained.
207+
* Only add new fields at the end of AMD's vendor structure.
208+
*/
209+
struct mce_hw_err {
210+
struct mce m;
211+
212+
union vendor_info {
213+
struct {
214+
u64 synd1; /* MCA_SYND1 MSR */
215+
u64 synd2; /* MCA_SYND2 MSR */
216+
} amd;
217+
} vendor;
218+
};
219+
220+
#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
221+
190222
struct notifier_block;
191223
extern void mce_register_decode_chain(struct notifier_block *nb);
192224
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -221,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
221253
u64 lapic_id) { return -EINVAL; }
222254
#endif
223255

224-
void mce_setup(struct mce *m);
225-
void mce_log(struct mce *m);
256+
void mce_prep_record(struct mce_hw_err *err);
257+
void mce_log(struct mce_hw_err *err);
226258
DECLARE_PER_CPU(struct device *, mce_device);
227259

228260
/* Maximum number of MCA banks per CPU. */

arch/x86/include/uapi/asm/mce.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
/*
99
* Fields are zero when not available. Also, this struct is shared with
1010
* userspace mcelog and thus must keep existing fields at current offsets.
11-
* Only add new fields to the end of the structure
11+
* Only add new, shared fields to the end of the structure.
12+
* Do not add vendor-specific fields.
1213
*/
1314
struct mce {
1415
__u64 status; /* Bank's MCi_STATUS MSR */

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -778,29 +778,33 @@ bool amd_mce_usable_address(struct mce *m)
778778

779779
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
780780
{
781-
struct mce m;
781+
struct mce_hw_err err;
782+
struct mce *m = &err.m;
782783

783-
mce_setup(&m);
784+
mce_prep_record(&err);
784785

785-
m.status = status;
786-
m.misc = misc;
787-
m.bank = bank;
788-
m.tsc = rdtsc();
786+
m->status = status;
787+
m->misc = misc;
788+
m->bank = bank;
789+
m->tsc = rdtsc();
789790

790-
if (m.status & MCI_STATUS_ADDRV) {
791-
m.addr = addr;
791+
if (m->status & MCI_STATUS_ADDRV) {
792+
m->addr = addr;
792793

793-
smca_extract_err_addr(&m);
794+
smca_extract_err_addr(m);
794795
}
795796

796797
if (mce_flags.smca) {
797-
rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
798+
rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
798799

799-
if (m.status & MCI_STATUS_SYNDV)
800-
rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
800+
if (m->status & MCI_STATUS_SYNDV) {
801+
rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
802+
rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
803+
rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
804+
}
801805
}
802806

803-
mce_log(&m);
807+
mce_log(&err);
804808
}
805809

806810
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)

arch/x86/kernel/cpu/mce/apei.c

Lines changed: 84 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
3030
{
31-
struct mce m;
31+
struct mce_hw_err err;
32+
struct mce *m;
3233
int lsb;
3334

3435
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
@@ -44,30 +45,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
4445
else
4546
lsb = PAGE_SHIFT;
4647

47-
mce_setup(&m);
48-
m.bank = -1;
48+
mce_prep_record(&err);
49+
m = &err.m;
50+
m->bank = -1;
4951
/* Fake a memory read error with unknown channel */
50-
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
51-
m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
52+
m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
53+
m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
5254

5355
if (severity >= GHES_SEV_RECOVERABLE)
54-
m.status |= MCI_STATUS_UC;
56+
m->status |= MCI_STATUS_UC;
5557

5658
if (severity >= GHES_SEV_PANIC) {
57-
m.status |= MCI_STATUS_PCC;
58-
m.tsc = rdtsc();
59+
m->status |= MCI_STATUS_PCC;
60+
m->tsc = rdtsc();
5961
}
6062

61-
m.addr = mem_err->physical_addr;
62-
mce_log(&m);
63+
m->addr = mem_err->physical_addr;
64+
mce_log(&err);
6365
}
6466
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
6567

6668
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
6769
{
6870
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
69-
unsigned int cpu;
70-
struct mce m;
71+
unsigned int cpu, num_regs;
72+
bool apicid_found = false;
73+
struct mce_hw_err err;
74+
struct mce *m;
7175

7276
if (!boot_cpu_has(X86_FEATURE_SMCA))
7377
return -EINVAL;
@@ -85,41 +89,86 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
8589
return -EINVAL;
8690

8791
/*
88-
* The register array size must be large enough to include all the
89-
* SMCA registers which need to be extracted.
90-
*
9192
* The number of registers in the register array is determined by
9293
* Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
93-
* The register layout is fixed and currently the raw data in the
94-
* register array includes 6 SMCA registers which the kernel can
95-
* extract.
94+
* Sanity-check registers array size.
9695
*/
97-
if (ctx_info->reg_arr_size < 48)
96+
num_regs = ctx_info->reg_arr_size >> 3;
97+
if (!num_regs)
9898
return -EINVAL;
9999

100-
mce_setup(&m);
101-
102-
m.extcpu = -1;
103-
m.socketid = -1;
104-
105100
for_each_possible_cpu(cpu) {
106101
if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
107-
m.extcpu = cpu;
108-
m.socketid = cpu_data(m.extcpu).topo.pkg_id;
102+
apicid_found = true;
109103
break;
110104
}
111105
}
112106

113-
m.apicid = lapic_id;
114-
m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
115-
m.status = *i_mce;
116-
m.addr = *(i_mce + 1);
117-
m.misc = *(i_mce + 2);
118-
/* Skipping MCA_CONFIG */
119-
m.ipid = *(i_mce + 4);
120-
m.synd = *(i_mce + 5);
107+
if (!apicid_found)
108+
return -EINVAL;
109+
110+
m = &err.m;
111+
memset(&err, 0, sizeof(struct mce_hw_err));
112+
mce_prep_record_common(m);
113+
mce_prep_record_per_cpu(cpu, m);
114+
115+
m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
116+
117+
/*
118+
* The SMCA register layout is fixed and includes 16 registers.
119+
* The end of the array may be variable, but the beginning is known.
120+
* Cap the number of registers to expected max (15).
121+
*/
122+
if (num_regs > 15)
123+
num_regs = 15;
124+
125+
switch (num_regs) {
126+
/* MCA_SYND2 */
127+
case 15:
128+
err.vendor.amd.synd2 = *(i_mce + 14);
129+
fallthrough;
130+
/* MCA_SYND1 */
131+
case 14:
132+
err.vendor.amd.synd1 = *(i_mce + 13);
133+
fallthrough;
134+
/* MCA_MISC4 */
135+
case 13:
136+
/* MCA_MISC3 */
137+
case 12:
138+
/* MCA_MISC2 */
139+
case 11:
140+
/* MCA_MISC1 */
141+
case 10:
142+
/* MCA_DEADDR */
143+
case 9:
144+
/* MCA_DESTAT */
145+
case 8:
146+
/* reserved */
147+
case 7:
148+
/* MCA_SYND */
149+
case 6:
150+
m->synd = *(i_mce + 5);
151+
fallthrough;
152+
/* MCA_IPID */
153+
case 5:
154+
m->ipid = *(i_mce + 4);
155+
fallthrough;
156+
/* MCA_CONFIG */
157+
case 4:
158+
/* MCA_MISC0 */
159+
case 3:
160+
m->misc = *(i_mce + 2);
161+
fallthrough;
162+
/* MCA_ADDR */
163+
case 2:
164+
m->addr = *(i_mce + 1);
165+
fallthrough;
166+
/* MCA_STATUS */
167+
case 1:
168+
m->status = *i_mce;
169+
}
121170

122-
mce_log(&m);
171+
mce_log(&err);
123172

124173
return 0;
125174
}

0 commit comments

Comments
 (0)