diff options
Diffstat (limited to 'sys/x86/x86/mca.c')
-rw-r--r-- | sys/x86/x86/mca.c | 92 |
1 files changed, 80 insertions, 12 deletions
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c index 1851df8d00a0..4b40f343ac90 100644 --- a/sys/x86/x86/mca.c +++ b/sys/x86/x86/mca.c @@ -124,6 +124,17 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN, &workaround_erratum383, 0, "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); +#ifdef DIAGNOSTIC +static uint64_t fake_status; +SYSCTL_U64(_hw_mca, OID_AUTO, fake_status, CTLFLAG_RW, + &fake_status, 0, + "Insert artificial MCA with given status (testing purpose only)"); +static int fake_bank; +SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW, + &fake_bank, 0, + "Bank to use for artificial MCAs (testing purpose only)"); +#endif + static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; @@ -131,8 +142,10 @@ static STAILQ_HEAD(, mca_internal) mca_pending; static int mca_ticks = 300; static struct taskqueue *mca_tq; static struct task mca_resize_task; +static struct task mca_postscan_task; static struct timeout_task mca_scan_task; static struct mtx mca_lock; +static bool mca_startup_done = false; /* Statistics on number of MCA events by type, updated atomically. */ static uint64_t mca_stats[MCA_T_COUNT]; @@ -699,8 +712,24 @@ mca_check_status(enum scan_mode mode, uint64_t mcg_cap, int bank, bool mce, recover; status = rdmsr(mca_msr_ops.status(bank)); - if (!(status & MC_STATUS_VAL)) + if (!(status & MC_STATUS_VAL)) { +#ifdef DIAGNOSTIC + /* + * Check if we have a pending artificial event to generate. + * Note that this is potentially racy with the sysctl. The + * tradeoff is deemed acceptable given the test nature + * of the code. + */ + if (fake_status && bank == fake_bank) { + status = fake_status; + fake_status = 0; + } + if (!(status & MC_STATUS_VAL)) + return (0); +#else return (0); +#endif + } recover = *recoverablep; mce = mca_is_mce(mcg_cap, status, &recover); @@ -794,9 +823,9 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record) mtx_lock_spin(&mca_lock); rec = STAILQ_FIRST(&mca_freelist); if (rec == NULL) { + mtx_unlock_spin(&mca_lock); printf("MCA: Unable to allocate space for an event.\n"); mca_log(record); - mtx_unlock_spin(&mca_lock); return; } STAILQ_REMOVE_HEAD(&mca_freelist, link); @@ -1015,18 +1044,49 @@ static void mca_process_records(enum scan_mode mode) { struct mca_internal *mca; + STAILQ_HEAD(, mca_internal) tmplist; + /* + * If in an interrupt context, defer the post-scan activities to a + * task queue. + */ + if (mode != POLLED) { + if (mca_startup_done) + taskqueue_enqueue(mca_tq, &mca_postscan_task); + return; + } + + /* + * Copy the pending list to the stack so we can drop the spin lock + * while we are emitting logs. + */ + STAILQ_INIT(&tmplist); mtx_lock_spin(&mca_lock); - while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) { - STAILQ_REMOVE_HEAD(&mca_pending, link); + STAILQ_SWAP(&mca_pending, &tmplist, mca_internal); + mtx_unlock_spin(&mca_lock); + + STAILQ_FOREACH(mca, &tmplist, link) mca_log(&mca->rec); + + mtx_lock_spin(&mca_lock); + while ((mca = STAILQ_FIRST(&tmplist)) != NULL) { + STAILQ_REMOVE_HEAD(&tmplist, link); mca_store_record(mca); } mtx_unlock_spin(&mca_lock); - if (mode == POLLED) - mca_resize_freelist(); - else if (!cold) - taskqueue_enqueue(mca_tq, &mca_resize_task); + mca_resize_freelist(); +} + +/* + * Emit log entries and resize the free list. This is intended to be called + * from a task queue to handle work which does not need to be done (or cannot + * be done) in an interrupt context. + */ +static void +mca_postscan(void *context __unused, int pending __unused) +{ + + mca_process_records(POLLED); } /* @@ -1097,7 +1157,7 @@ sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS) doresize = true; } mtx_unlock_spin(&mca_lock); - if (doresize && !cold) + if (doresize && mca_startup_done) taskqueue_enqueue(mca_tq, &mca_resize_task); return (error); } @@ -1109,12 +1169,16 @@ mca_startup(void *dummy) if (mca_banks <= 0) return; - /* CMCIs during boot may have claimed items from the freelist. */ - mca_resize_freelist(); - taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq"); taskqueue_enqueue_timeout_sbt(mca_tq, &mca_scan_task, mca_ticks * SBT_1S, 0, C_PREL(1)); + mca_startup_done = true; + + /* + * CMCIs during boot may have recorded entries. Conduct the post-scan + * activities now. + */ + mca_postscan(NULL, 0); } SYSINIT(mca_startup, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, mca_startup, NULL); @@ -1174,6 +1238,7 @@ mca_setup(uint64_t mcg_cap) TIMEOUT_TASK_INIT(mca_tq, &mca_scan_task, 0, mca_scan_cpus, NULL); STAILQ_INIT(&mca_freelist); TASK_INIT(&mca_resize_task, 0, mca_resize, NULL); + TASK_INIT(&mca_postscan_task, 0, mca_postscan, NULL); mca_resize_freelist(); SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0, @@ -1577,6 +1642,9 @@ mca_intr(void) panic("Unrecoverable machine check exception"); } + if (count) + mca_process_records(MCE); + /* Clear MCIP. */ wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); } |