@@ -187,6 +187,10 @@ enum mce_notifier_prios {
MCE_PRIO_HIGHEST = MCE_PRIO_CEC
};
+struct mce_hw_err {
+ struct mce m;
+};
+
struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -222,7 +226,7 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
#endif
void mce_setup(struct mce *m);
-void mce_log(struct mce *m);
+void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device);
/* Maximum number of MCA banks per CPU. */
@@ -909,9 +909,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
* log it.
*/
-static void handle_smca_dfr_error(struct mce *m)
+static void handle_smca_dfr_error(struct mce_hw_err *err)
{
- struct mce m_dfr;
+ struct mce_hw_err err_dfr;
u64 mca_destat;
/* Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. */
@@ -919,26 +919,26 @@ static void handle_smca_dfr_error(struct mce *m)
return;
/* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
- if (m->status & MCI_STATUS_DEFERRED)
+ if (err->m.status & MCI_STATUS_DEFERRED)
goto out;
/* MCA_STATUS didn't have a deferred error, so check MCA_DESTAT for one. */
- mca_destat = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
+ mca_destat = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DESTAT(err->m.bank));
if (!(mca_destat & MCI_STATUS_VAL))
return;
/* Reuse the same data collected from machine_check_poll(). */
- memcpy(&m_dfr, m, sizeof(m_dfr));
+ memcpy(&err_dfr, err, sizeof(err_dfr));
/* Save the MCA_DE{STAT,ADDR} values. */
- m_dfr.status = mca_destat;
- m_dfr.addr = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DEADDR(m_dfr.bank));
+ err_dfr.m.status = mca_destat;
+ err_dfr.m.addr = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DEADDR(err_dfr.m.bank));
- mce_log(&m_dfr);
+ mce_log(&err_dfr);
out:
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(err->m.bank), 0);
}
static void reset_block(struct threshold_block *block)
@@ -997,10 +997,10 @@ static void amd_mca_interrupt(void)
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
}
-void amd_handle_error(struct mce *m)
+void amd_handle_error(struct mce_hw_err *err)
{
- reset_thr_blocks(m->bank);
- handle_smca_dfr_error(m);
+ reset_thr_blocks(err->m.bank);
+ handle_smca_dfr_error(err);
}
/*
@@ -28,9 +28,12 @@
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m = &err.m;
int lsb;
+ memset(&err, 0, sizeof(struct mce_hw_err));
+
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
@@ -44,30 +47,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
else
lsb = PAGE_SHIFT;
- mce_setup(&m);
- m.bank = -1;
+ mce_setup(m);
+ m->bank = -1;
/* Fake a memory read error with unknown channel */
- m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
- m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
+ m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
+ m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
if (severity >= GHES_SEV_RECOVERABLE)
- m.status |= MCI_STATUS_UC;
+ m->status |= MCI_STATUS_UC;
if (severity >= GHES_SEV_PANIC) {
- m.status |= MCI_STATUS_PCC;
- m.tsc = rdtsc();
+ m->status |= MCI_STATUS_PCC;
+ m->tsc = rdtsc();
}
- m.addr = mem_err->physical_addr;
- mce_log(&m);
+ m->addr = mem_err->physical_addr;
+ mce_log(&err);
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
+ struct mce_hw_err err;
+ struct mce *m = &err.m;
unsigned int cpu;
- struct mce m;
+
+ memset(&err, 0, sizeof(struct mce_hw_err));
if (!boot_cpu_has(X86_FEATURE_SMCA))
return -EINVAL;
@@ -105,19 +111,19 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (!cpu_possible(cpu))
return -EINVAL;
- mce_setup_global(&m);
- m.cpu = m.extcpu = cpu;
- mce_setup_per_cpu(&m);
+ mce_setup_global(m);
+ m->cpu = m->extcpu = cpu;
+ mce_setup_per_cpu(m);
- m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
- m.status = *i_mce;
- m.addr = *(i_mce + 1);
- m.misc = *(i_mce + 2);
+ m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
+ m->status = *i_mce;
+ m->addr = *(i_mce + 1);
+ m->misc = *(i_mce + 2);
/* Skipping MCA_CONFIG */
- m.ipid = *(i_mce + 4);
- m.synd = *(i_mce + 5);
+ m->ipid = *(i_mce + 4);
+ m->synd = *(i_mce + 5);
- mce_log(&m);
+ mce_log(&err);
return 0;
}
@@ -86,7 +86,7 @@ struct mca_config mca_cfg __read_mostly = {
.monarch_timeout = -1
};
-static DEFINE_PER_CPU(struct mce, mces_seen);
+static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
static unsigned long mce_need_notify;
/*
@@ -145,9 +145,9 @@ void mce_setup(struct mce *m)
DEFINE_PER_CPU(struct mce, injectm);
EXPORT_PER_CPU_SYMBOL_GPL(injectm);
-void mce_log(struct mce *m)
+void mce_log(struct mce_hw_err *err)
{
- if (!mce_gen_pool_add(m))
+ if (!mce_gen_pool_add(err))
irq_work_queue(&mce_irq_work);
}
EXPORT_SYMBOL_GPL(mce_log);
@@ -168,8 +168,10 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
-static void __print_mce(struct mce *m)
+static void __print_mce(struct mce_hw_err *err)
{
+ struct mce *m = &err->m;
+
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu,
(m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
@@ -211,9 +213,11 @@ static void __print_mce(struct mce *m)
m->microcode);
}
-static void print_mce(struct mce *m)
+static void print_mce(struct mce_hw_err *err)
{
- __print_mce(m);
+ struct mce *m = &err->m;
+
+ __print_mce(err);
if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
@@ -240,7 +244,7 @@ static void wait_for_panic(void)
panic("Panicing machine check CPU died");
}
-static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
+static noinstr void mce_panic(const char *msg, struct mce_hw_err *final, char *exp)
{
struct llist_node *pending;
struct mce_evt_llist *l;
@@ -271,20 +275,22 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
llist_for_each_entry(l, pending, llnode) {
- struct mce *m = &l->mce;
+ struct mce_hw_err *err = &l->err;
+ struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC)) {
- print_mce(m);
+ print_mce(err);
if (!apei_err)
apei_err = apei_write_mce(m);
}
}
/* Now print uncorrected but with the final one last */
llist_for_each_entry(l, pending, llnode) {
- struct mce *m = &l->mce;
+ struct mce_hw_err *err = &l->err;
+ struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || mce_cmp(m, final)) {
- print_mce(m);
+ if (!final || mce_cmp(m, &final->m)) {
+ print_mce(err);
if (!apei_err)
apei_err = apei_write_mce(m);
}
@@ -292,7 +298,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
if (final) {
print_mce(final);
if (!apei_err)
- apei_err = apei_write_mce(final);
+ apei_err = apei_write_mce(&final->m);
}
if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
@@ -307,8 +313,8 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
* panic.
*/
if (kexec_crash_loaded()) {
- if (final && (final->status & MCI_STATUS_ADDRV)) {
- p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
+ if (final && (final->m.status & MCI_STATUS_ADDRV)) {
+ p = pfn_to_online_page(final->m.addr >> PAGE_SHIFT);
if (p)
SetPageHWPoison(p);
}
@@ -557,13 +563,13 @@ EXPORT_SYMBOL_GPL(mce_is_correctable);
static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
- struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = (struct mce_hw_err *)data;
- if (!m)
+ if (!err)
return NOTIFY_DONE;
/* Emit the trace record: */
- trace_mce_record(m);
+ trace_mce_record(err);
set_bit(0, &mce_need_notify);
@@ -607,13 +613,13 @@ static struct notifier_block mce_uc_nb = {
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
- struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = (struct mce_hw_err *)data;
- if (!m)
+ if (!err)
return NOTIFY_DONE;
- if (mca_cfg.print_all || !m->kflags)
- __print_mce(m);
+ if (mca_cfg.print_all || !(err->m.kflags))
+ __print_mce(err);
return NOTIFY_DONE;
}
@@ -655,10 +661,10 @@ static noinstr void mce_read_aux(struct mce *m, int i)
}
}
-static void vendor_handle_error(struct mce *m)
+static void vendor_handle_error(struct mce_hw_err *err)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return amd_handle_error(m);
+ return amd_handle_error(err);
}
DEFINE_PER_CPU(unsigned, mce_poll_count);
@@ -690,31 +696,34 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
bool error_seen = false;
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m = &err.m;
int i;
+ memset(&err, 0, sizeof(struct mce_hw_err));
+
this_cpu_inc(mce_poll_count);
- mce_gather_info(&m, NULL);
+ mce_gather_info(m, NULL);
if (flags & MCP_TIMESTAMP)
- m.tsc = rdtsc();
+ m->tsc = rdtsc();
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
+ m->misc = 0;
+ m->addr = 0;
+ m->bank = i;
barrier();
- m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
/* If this entry is not valid, ignore it */
- if (!(m.status & MCI_STATUS_VAL)) {
+ if (!(m->status & MCI_STATUS_VAL)) {
if (smca_destat_is_valid(i)) {
- mce_read_aux(&m, i);
+ mce_read_aux(m, i);
goto clear_it;
}
@@ -725,7 +734,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
*/
- if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
+ if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
goto log_it;
/*
@@ -735,20 +744,20 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* everything else.
*/
if (!mca_cfg.ser) {
- if (m.status & MCI_STATUS_UC)
+ if (m->status & MCI_STATUS_UC)
continue;
goto log_it;
}
/* Log "not enabled" (speculative) errors */
- if (!(m.status & MCI_STATUS_EN))
+ if (!(m->status & MCI_STATUS_EN))
goto log_it;
/*
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
* UC == 1 && PCC == 0 && S == 0
*/
- if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
+ if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
goto log_it;
/*
@@ -764,23 +773,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (flags & MCP_DONTLOG)
goto clear_it;
- mce_read_aux(&m, i);
- m.severity = mce_severity(&m, NULL, NULL, false);
+ mce_read_aux(m, i);
+ m->severity = mce_severity(m, NULL, NULL, false);
+
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
+ if (mca_cfg.dont_log_ce && !mce_usable_address(m))
goto clear_it;
if (flags & MCP_QUEUE_LOG)
- mce_gen_pool_add(&m);
+ mce_gen_pool_add(&err);
else
- mce_log(&m);
+ mce_log(&err);
clear_it:
- vendor_handle_error(&m);
+ vendor_handle_error(&err);
/*
* Clear state for this bank.
@@ -1017,6 +1027,7 @@ static noinstr int mce_timed_out(u64 *t, const char *msg)
static void mce_reign(void)
{
int cpu;
+ struct mce_hw_err *err = NULL;
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
@@ -1027,11 +1038,13 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- struct mce *mtmp = &per_cpu(mces_seen, cpu);
+ struct mce_hw_err *etmp = &per_cpu(hw_errs_seen, cpu);
+ struct mce *mtmp = &etmp->m;
if (mtmp->severity > global_worst) {
global_worst = mtmp->severity;
- m = &per_cpu(mces_seen, cpu);
+ err = &per_cpu(hw_errs_seen, cpu);
+ m = &err->m;
}
}
@@ -1043,7 +1056,7 @@ static void mce_reign(void)
if (m && global_worst >= MCE_PANIC_SEVERITY) {
/* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, &msg, true);
- mce_panic("Fatal machine check", m, msg);
+ mce_panic("Fatal machine check", err, msg);
}
/*
@@ -1060,11 +1073,11 @@ static void mce_reign(void)
mce_panic("Fatal machine check from unknown source", NULL, NULL);
/*
- * Now clear all the mces_seen so that they don't reappear on
+ * Now clear all the hw_errs_seen so that they don't reappear on
* the next mce.
*/
for_each_possible_cpu(cpu)
- memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
+ memset(&per_cpu(hw_errs_seen, cpu), 0, sizeof(struct mce_hw_err));
}
static atomic_t global_nwo;
@@ -1268,12 +1281,13 @@ static noinstr bool mce_check_crashing_cpu(void)
}
static __always_inline int
-__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
+__mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks, int no_way_out,
int *worst)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
+ struct mce *m = &err->m;
int severity, i, taint = 0;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
@@ -1329,7 +1343,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
* done in #MC context, where instrumentation is disabled.
*/
instrumentation_begin();
- mce_log(m);
+ mce_log(err);
instrumentation_end();
if (severity > *worst) {
@@ -1399,8 +1413,9 @@ static void kill_me_never(struct callback_head *cb)
set_mce_nospec(pfn);
}
-static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
+static void queue_task_work(struct mce_hw_err *err, char *msg, void (*func)(struct callback_head *))
{
+ struct mce *m = &err->m;
int count = ++current->mce_count;
/* First call, save all the details */
@@ -1414,11 +1429,12 @@ static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callba
/* Ten is likely overkill. Don't expect more than two faults before task_work() */
if (count > 10)
- mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
+ mce_panic("Too many consecutive machine checks while accessing user data",
+ err, msg);
/* Second or later call, make sure page address matches the one from first call */
if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
- mce_panic("Consecutive machine checks to different user pages", m, msg);
+ mce_panic("Consecutive machine checks to different user pages", err, msg);
/* Do not call task_work_add() more than once */
if (count > 1)
@@ -1467,8 +1483,14 @@ noinstr void do_machine_check(struct pt_regs *regs)
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
- struct mce m, *final;
+ struct mce_hw_err *final;
+ struct mce_hw_err err;
char *msg = NULL;
+ struct mce *m;
+
+ memset(&err, 0, sizeof(struct mce_hw_err));
+
+ m = &err.m;
if (unlikely(mce_flags.p5))
return pentium_machine_check(regs);
@@ -1506,13 +1528,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
this_cpu_inc(mce_exception_count);
- mce_gather_info(&m, regs);
- m.tsc = rdtsc();
+ mce_gather_info(m, regs);
+ m->tsc = rdtsc();
- final = this_cpu_ptr(&mces_seen);
- *final = m;
+ final = this_cpu_ptr(&hw_errs_seen);
+ final->m = *m;
- no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
+ no_way_out = mce_no_way_out(m, &msg, valid_banks, regs);
barrier();
@@ -1521,15 +1543,15 @@ noinstr void do_machine_check(struct pt_regs *regs)
* Assume the worst for now, but if we find the
* severity is MCE_AR_SEVERITY we have other options.
*/
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
kill_current_task = 1;
/*
* Check if this MCE is signaled to only this logical processor,
* on Intel, Zhaoxin only.
*/
- if (m.cpuvendor == X86_VENDOR_INTEL ||
- m.cpuvendor == X86_VENDOR_ZHAOXIN)
- lmce = m.mcgstatus & MCG_STATUS_LMCES;
+ if (m->cpuvendor == X86_VENDOR_INTEL ||
+ m->cpuvendor == X86_VENDOR_ZHAOXIN)
+ lmce = m->mcgstatus & MCG_STATUS_LMCES;
/*
* Local machine check may already know that we have to panic.
@@ -1540,12 +1562,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
*/
if (lmce) {
if (no_way_out)
- mce_panic("Fatal local machine check", &m, msg);
+ mce_panic("Fatal local machine check", &err, msg);
} else {
order = mce_start(&no_way_out);
}
- taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
+ taint = __mc_scan_banks(&err, regs, &final->m, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1560,7 +1582,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
no_way_out = worst >= MCE_PANIC_SEVERITY;
if (no_way_out)
- mce_panic("Fatal machine check on current CPU", &m, msg);
+ mce_panic("Fatal machine check on current CPU", &err, msg);
}
} else {
/*
@@ -1572,8 +1594,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY) {
- mce_severity(&m, regs, &msg, true);
- mce_panic("Local fatal machine check!", &m, msg);
+ mce_severity(m, regs, &msg, true);
+ mce_panic("Local fatal machine check!", &err, msg);
}
}
@@ -1591,14 +1613,14 @@ noinstr void do_machine_check(struct pt_regs *regs)
goto out;
/* Fault was in user mode and we need to take some action */
- if ((m.cs & 3) == 3) {
+ if ((m->cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- if (!mce_usable_address(&m))
- queue_task_work(&m, msg, kill_me_now);
+ if (!mce_usable_address(m))
+ queue_task_work(&err, msg, kill_me_now);
else
- queue_task_work(&m, msg, kill_me_maybe);
+ queue_task_work(&err, msg, kill_me_maybe);
} else {
/*
@@ -1610,13 +1632,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
* corresponding exception handler which would do that is the
* proper one.
*/
- if (m.kflags & MCE_IN_KERNEL_RECOV) {
+ if (m->kflags & MCE_IN_KERNEL_RECOV) {
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
- mce_panic("Failed kernel mode recovery", &m, msg);
+ mce_panic("Failed kernel mode recovery", &err, msg);
}
- if (m.kflags & MCE_IN_KERNEL_COPYIN)
- queue_task_work(&m, msg, kill_me_never);
+ if (m->kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&err, msg, kill_me_never);
}
out:
@@ -31,15 +31,15 @@ static char gen_pool_buf[MCE_POOLSZ];
*/
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{
+ struct mce_hw_err *err1, *err2;
struct mce_evt_llist *node;
- struct mce *m1, *m2;
- m1 = &t->mce;
+ err1 = &t->err;
llist_for_each_entry(node, &l->llnode, llnode) {
- m2 = &node->mce;
+ err2 = &node->err;
- if (!mce_cmp(m1, m2))
+ if (!mce_cmp(&err1->m, &err2->m))
return true;
}
return false;
@@ -73,9 +73,9 @@ struct llist_node *mce_gen_pool_prepare_records(void)
void mce_gen_pool_process(struct work_struct *__unused)
{
+ struct mce_hw_err *err;
struct llist_node *head;
struct mce_evt_llist *node, *tmp;
- struct mce *mce;
head = llist_del_all(&mce_event_llist);
if (!head)
@@ -83,8 +83,8 @@ void mce_gen_pool_process(struct work_struct *__unused)
head = llist_reverse_order(head);
llist_for_each_entry_safe(node, tmp, head, llnode) {
- mce = &node->mce;
- blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+ err = &node->err;
+ blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, err);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
}
}
@@ -94,11 +94,11 @@ bool mce_gen_pool_empty(void)
return llist_empty(&mce_event_llist);
}
-int mce_gen_pool_add(struct mce *mce)
+int mce_gen_pool_add(struct mce_hw_err *err)
{
struct mce_evt_llist *node;
- if (filter_mce(mce))
+ if (filter_mce(&err->m))
return -EINVAL;
if (!mce_evt_pool)
@@ -110,7 +110,7 @@ int mce_gen_pool_add(struct mce *mce)
return -ENOMEM;
}
- memcpy(&node->mce, mce, sizeof(*mce));
+ memcpy(&node->err, err, sizeof(*err));
llist_add(&node->llnode, &mce_event_llist);
return 0;
@@ -500,6 +500,7 @@ static void prepare_msrs(void *info)
static void do_inject(void)
{
+ struct mce_hw_err err;
u64 mcg_status = 0;
unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank;
@@ -515,7 +516,8 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_SYNDV;
if (inj_type == SW_INJ) {
- mce_log(&i_mce);
+ err.m = i_mce;
+ mce_log(&err);
return;
}
@@ -26,12 +26,12 @@ extern struct blocking_notifier_head x86_mce_decoder_chain;
struct mce_evt_llist {
struct llist_node llnode;
- struct mce mce;
+ struct mce_hw_err err;
};
void mce_gen_pool_process(struct work_struct *__unused);
bool mce_gen_pool_empty(void);
-int mce_gen_pool_add(struct mce *mce);
+int mce_gen_pool_add(struct mce_hw_err *err);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
@@ -212,7 +212,7 @@ void mce_setup_per_cpu(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
bool amd_mce_usable_address(struct mce *m);
-void amd_handle_error(struct mce *m);
+void amd_handle_error(struct mce_hw_err *err);
/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -241,7 +241,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
#else
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
-static inline void amd_handle_error(struct mce *m) { }
+static inline void amd_handle_error(struct mce_hw_err *err) { }
static inline void smca_extract_err_addr(struct mce *m) { }
#endif
@@ -11,9 +11,9 @@
TRACE_EVENT(mce_record,
- TP_PROTO(struct mce *m),
+ TP_PROTO(struct mce_hw_err *err),
- TP_ARGS(m),
+ TP_ARGS(err),
TP_STRUCT__entry(
__field( u64, mcgcap )
@@ -36,23 +36,23 @@ TRACE_EVENT(mce_record,
),
TP_fast_assign(
- __entry->mcgcap = m->mcgcap;
- __entry->mcgstatus = m->mcgstatus;
- __entry->status = m->status;
- __entry->addr = m->addr;
- __entry->misc = m->misc;
- __entry->synd = m->synd;
- __entry->ipid = m->ipid;
- __entry->ip = m->ip;
- __entry->tsc = m->tsc;
- __entry->walltime = m->time;
- __entry->cpu = m->extcpu;
- __entry->cpuid = m->cpuid;
- __entry->apicid = m->apicid;
- __entry->socketid = m->socketid;
- __entry->cs = m->cs;
- __entry->bank = m->bank;
- __entry->cpuvendor = m->cpuvendor;
+ __entry->mcgcap = err->m.mcgcap;
+ __entry->mcgstatus = err->m.mcgstatus;
+ __entry->status = err->m.status;
+ __entry->addr = err->m.addr;
+ __entry->misc = err->m.misc;
+ __entry->synd = err->m.synd;
+ __entry->ipid = err->m.ipid;
+ __entry->ip = err->m.ip;
+ __entry->tsc = err->m.tsc;
+ __entry->walltime = err->m.time;
+ __entry->cpu = err->m.extcpu;
+ __entry->cpuid = err->m.cpuid;
+ __entry->apicid = err->m.apicid;
+ __entry->socketid = err->m.socketid;
+ __entry->cs = err->m.cs;
+ __entry->bank = err->m.bank;
+ __entry->cpuvendor = err->m.cpuvendor;
),
TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",