aboutsummaryrefslogtreecommitdiff
path: root/sys/x86
diff options
context:
space:
mode:
Diffstat (limited to 'sys/x86')
-rw-r--r--sys/x86/acpica/acpi_apm.c25
-rw-r--r--sys/x86/include/mca.h25
-rw-r--r--sys/x86/include/ucode.h8
-rw-r--r--sys/x86/iommu/amd_intrmap.c14
-rw-r--r--sys/x86/iommu/intel_intrmap.c8
-rw-r--r--sys/x86/x86/identcpu.c4
-rw-r--r--sys/x86/x86/mca.c355
-rw-r--r--sys/x86/x86/ucode.c59
-rw-r--r--sys/x86/x86/ucode_subr.c10
9 files changed, 394 insertions, 114 deletions
diff --git a/sys/x86/acpica/acpi_apm.c b/sys/x86/acpica/acpi_apm.c
index be161cd6171b..8e5785cf0ed6 100644
--- a/sys/x86/acpica/acpi_apm.c
+++ b/sys/x86/acpica/acpi_apm.c
@@ -235,7 +235,7 @@ apmdtor(void *data)
acpi_sc = clone->acpi_sc;
/* We are about to lose a reference so check if suspend should occur */
- if (acpi_sc->acpi_next_sstate != 0 &&
+ if (acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE &&
clone->notify_status != APM_EV_ACKED)
acpi_AckSleepState(clone, 0);
@@ -283,10 +283,10 @@ apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
case APMIO_SUSPEND:
if ((flag & FWRITE) == 0)
return (EPERM);
- if (acpi_sc->acpi_next_sstate == 0) {
- if (acpi_sc->acpi_suspend_sx != ACPI_STATE_S5) {
+ if (acpi_sc->acpi_next_stype == POWER_STYPE_AWAKE) {
+ if (power_suspend_stype != POWER_STYPE_POWEROFF) {
error = acpi_ReqSleepState(acpi_sc,
- acpi_sc->acpi_suspend_sx);
+ power_suspend_stype);
} else {
printf(
"power off via apm suspend not supported\n");
@@ -298,10 +298,10 @@ apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
case APMIO_STANDBY:
if ((flag & FWRITE) == 0)
return (EPERM);
- if (acpi_sc->acpi_next_sstate == 0) {
- if (acpi_sc->acpi_standby_sx != ACPI_STATE_S5) {
+ if (acpi_sc->acpi_next_stype == POWER_STYPE_AWAKE) {
+ if (power_standby_stype != POWER_STYPE_POWEROFF) {
error = acpi_ReqSleepState(acpi_sc,
- acpi_sc->acpi_standby_sx);
+ power_standby_stype);
} else {
printf(
"power off via apm standby not supported\n");
@@ -313,10 +313,11 @@ apmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td
case APMIO_NEXTEVENT:
printf("apm nextevent start\n");
ACPI_LOCK(acpi);
- if (acpi_sc->acpi_next_sstate != 0 && clone->notify_status ==
- APM_EV_NONE) {
+ if (acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE &&
+ clone->notify_status == APM_EV_NONE) {
ev_info = (struct apm_event_info *)addr;
- if (acpi_sc->acpi_next_sstate <= ACPI_STATE_S3)
+ /* XXX Check this. */
+ if (acpi_sc->acpi_next_stype == POWER_STYPE_STANDBY)
ev_info->type = PMEV_STANDBYREQ;
else
ev_info->type = PMEV_SUSPENDREQ;
@@ -392,7 +393,7 @@ apmpoll(struct cdev *dev, int events, struct thread *td)
revents = 0;
devfs_get_cdevpriv((void **)&clone);
ACPI_LOCK(acpi);
- if (clone->acpi_sc->acpi_next_sstate)
+ if (clone->acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE)
revents |= events & (POLLIN | POLLRDNORM);
else
selrecord(td, &clone->sel_read);
@@ -433,7 +434,7 @@ apmreadfilt(struct knote *kn, long hint)
ACPI_LOCK(acpi);
clone = kn->kn_hook;
- sleeping = clone->acpi_sc->acpi_next_sstate ? 1 : 0;
+ sleeping = clone->acpi_sc->acpi_next_stype != POWER_STYPE_AWAKE;
ACPI_UNLOCK(acpi);
return (sleeping);
}
diff --git a/sys/x86/include/mca.h b/sys/x86/include/mca.h
index 183480625f6d..553b5d765f17 100644
--- a/sys/x86/include/mca.h
+++ b/sys/x86/include/mca.h
@@ -44,6 +44,31 @@ struct mca_record {
int mr_cpu;
};
+enum mca_stat_types {
+ MCA_T_NONE = 0,
+ MCA_T_UNCLASSIFIED,
+ MCA_T_UCODE_ROM_PARITY,
+ MCA_T_EXTERNAL,
+ MCA_T_FRC,
+ MCA_T_INTERNAL_PARITY,
+ MCA_T_SMM_HANDLER,
+ MCA_T_INTERNAL_TIMER,
+ MCA_T_GENERIC_IO,
+ MCA_T_INTERNAL,
+ MCA_T_MEMORY,
+ MCA_T_TLB,
+ MCA_T_MEMCONTROLLER_GEN,
+ MCA_T_MEMCONTROLLER_RD,
+ MCA_T_MEMCONTROLLER_WR,
+ MCA_T_MEMCONTROLLER_AC,
+ MCA_T_MEMCONTROLLER_MS,
+ MCA_T_MEMCONTROLLER_OTHER,
+ MCA_T_CACHE,
+ MCA_T_BUS,
+ MCA_T_UNKNOWN,
+ MCA_T_COUNT /* Must stay last */
+};
+
#ifdef _KERNEL
void cmc_intr(void);
diff --git a/sys/x86/include/ucode.h b/sys/x86/include/ucode.h
index 0338d48a0832..ea7cb07669a4 100644
--- a/sys/x86/include/ucode.h
+++ b/sys/x86/include/ucode.h
@@ -62,12 +62,14 @@ struct ucode_intel_extsig_table {
} entries[0];
};
+typedef enum { SAFE, UNSAFE, EARLY } ucode_load_how;
+
const void *ucode_amd_find(const char *path, uint32_t signature,
- uint32_t revision, const uint8_t *fw_data, size_t fw_size,
+ uint32_t *revision, const uint8_t *fw_data, size_t fw_size,
size_t *selected_sizep);
-int ucode_intel_load(const void *data, bool unsafe,
+int ucode_intel_load(const void *data, ucode_load_how unsafe,
uint64_t *nrevp, uint64_t *orevp);
-int ucode_amd_load(const void *data, bool unsafe,
+int ucode_amd_load(const void *data, ucode_load_how how,
uint64_t *nrevp, uint64_t *orevp);
size_t ucode_load_bsp(uintptr_t free);
void ucode_load_ap(int cpu);
diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c
index a4c1a7836268..f8900fe0561f 100644
--- a/sys/x86/iommu/amd_intrmap.c
+++ b/sys/x86/iommu/amd_intrmap.c
@@ -112,6 +112,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
{
struct amdiommu_ctx *ctx;
struct amdiommu_unit *unit;
+ device_t requester;
+ int error __diagused;
uint16_t rid;
bool is_iommu;
@@ -180,7 +182,8 @@ amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
*addr |= ((uint64_t)cpu & 0xffffff00) << 32;
}
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
@@ -220,6 +223,7 @@ static struct amdiommu_ctx *
amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
{
devclass_t src_class;
+ device_t requester;
struct amdiommu_unit *unit;
struct amdiommu_ctx *ctx;
uint32_t edte;
@@ -251,7 +255,8 @@ amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte,
bootverbose);
if (error == 0) {
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
ctx = amdiommu_get_ctx_for_dev(unit, src,
rid, 0, false /* XXXKIB */, false, dte, edte);
}
@@ -266,6 +271,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
u_int cookie)
{
struct amdiommu_unit *unit;
+ device_t requester;
+ int error __diagused;
uint16_t rid;
MPASS(ctx != NULL);
@@ -291,7 +298,8 @@ amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
atomic_thread_fence_rel();
bzero(irte, sizeof(*irte));
}
- iommu_get_requester(src, &rid);
+ error = iommu_get_requester(src, &requester, &rid);
+ MPASS(error == 0);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c
index 06e41523624b..f12a0c9bae9b 100644
--- a/sys/x86/iommu/intel_intrmap.c
+++ b/sys/x86/iommu/intel_intrmap.c
@@ -234,6 +234,8 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
{
devclass_t src_class;
struct dmar_unit *unit;
+ device_t requester;
+ int error __diagused;
/*
* We need to determine if the interrupt source generates FSB
@@ -253,8 +255,10 @@ dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
unit = dmar_find_hpet(src, rid);
} else {
unit = dmar_find(src, bootverbose);
- if (unit != NULL && rid != NULL)
- iommu_get_requester(src, rid);
+ if (unit != NULL && rid != NULL) {
+ error = iommu_get_requester(src, &requester, rid);
+ MPASS(error == 0);
+ }
}
return (unit);
}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index 4d64eaf78b29..7661c82f4394 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -2613,7 +2613,7 @@ print_vmx_info(void)
"\020EPT#VE" /* EPT-violation #VE */
"\021XSAVES" /* Enable XSAVES/XRSTORS */
);
- printf("\n Exit Controls=0x%b", mask,
+ printf("\n Exit Controls=0x%b", exit,
"\020"
"\003DR" /* Save debug controls */
/* Ignore Host address-space size */
@@ -2625,7 +2625,7 @@ print_vmx_info(void)
"\026EFER-LD" /* Load MSR_EFER */
"\027PTMR-SV" /* Save VMX-preemption timer value */
);
- printf("\n Entry Controls=0x%b", mask,
+ printf("\n Entry Controls=0x%b", entry,
"\020"
"\003DR" /* Save debug controls */
/* Ignore IA-32e mode guest */
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c
index 4ba49469d3a2..735efe307215 100644
--- a/sys/x86/x86/mca.c
+++ b/sys/x86/x86/mca.c
@@ -46,9 +46,11 @@
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/intr_machdep.h>
@@ -124,6 +126,22 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN,
&workaround_erratum383, 0,
"Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+#ifdef DIAGNOSTIC
+static uint64_t fake_status;
+SYSCTL_U64(_hw_mca, OID_AUTO, fake_status, CTLFLAG_RW,
+ &fake_status, 0,
+ "Insert artificial MCA with given status (testing purpose only)");
+static int fake_bank;
+SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW,
+ &fake_bank, 0,
+ "Bank to use for artificial MCAs (testing purpose only)");
+#endif
+
+static bool mca_uselog = false;
+SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0,
+ "Should the system send non-fatal machine check errors to the log "
+ "(instead of the console)?");
+
static STAILQ_HEAD(, mca_internal) mca_freelist;
static int mca_freecount;
static STAILQ_HEAD(, mca_internal) mca_records;
@@ -131,8 +149,44 @@ static STAILQ_HEAD(, mca_internal) mca_pending;
static int mca_ticks = 300;
static struct taskqueue *mca_tq;
static struct task mca_resize_task;
+static struct task mca_postscan_task;
static struct timeout_task mca_scan_task;
static struct mtx mca_lock;
+static bool mca_startup_done = false;
+
+/* Static buffer to compose messages while in an interrupt context. */
+static char mca_msg_buf[1024];
+static struct mtx mca_msg_buf_lock;
+
+/* Statistics on number of MCA events by type, updated with the mca_lock. */
+static uint64_t mca_stats[MCA_T_COUNT];
+SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP,
+ mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]),
+ "S", "Array of MCA events by type");
+
+/* Variables to track and control message rate limiting. */
+static struct timeval mca_last_log_time;
+static struct timeval mca_log_interval;
+static int mca_log_skipped;
+
+static int
+sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ u_int val;
+
+ val = mca_log_interval.tv_sec;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ mca_log_interval.tv_sec = val;
+ return (0);
+}
+SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval,
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0,
+ sysctl_mca_log_interval, "IU",
+ "Minimum number of seconds between logging correctable MCAs"
+ " (0 = no limit)");
static unsigned int
mca_ia32_ctl_reg(int bank)
@@ -356,21 +410,27 @@ mca_error_request(uint16_t mca_error)
}
static const char *
-mca_error_mmtype(uint16_t mca_error)
+mca_error_mmtype(uint16_t mca_error, enum mca_stat_types *event_type)
{
switch ((mca_error & 0x70) >> 4) {
case 0x0:
+ *event_type = MCA_T_MEMCONTROLLER_GEN;
return ("GEN");
case 0x1:
+ *event_type = MCA_T_MEMCONTROLLER_RD;
return ("RD");
case 0x2:
+ *event_type = MCA_T_MEMCONTROLLER_WR;
return ("WR");
case 0x3:
+ *event_type = MCA_T_MEMCONTROLLER_AC;
return ("AC");
case 0x4:
+ *event_type = MCA_T_MEMCONTROLLER_MS;
return ("MS");
}
+ *event_type = MCA_T_MEMCONTROLLER_OTHER;
return ("???");
}
@@ -423,87 +483,111 @@ mca_mute(const struct mca_record *rec)
/* Dump details about a single machine check. */
static void
-mca_log(const struct mca_record *rec)
+mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal)
{
+ int error, numskipped;
uint16_t mca_error;
+ enum mca_stat_types event_type;
+ struct sbuf sb;
+ bool uncor, using_shared_buf;
if (mca_mute(rec))
return;
- if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 &&
- (!tes_supported(rec->mr_mcg_cap) ||
+ uncor = (rec->mr_status & MC_STATUS_UC) != 0;
+
+ if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) ||
((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2))
return;
- printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
+ /* Try to use an allocated buffer when not in an interrupt context. */
+ if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL)
+ using_shared_buf = false;
+ else {
+ using_shared_buf = true;
+ mtx_lock_spin(&mca_msg_buf_lock);
+ sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN);
+ }
+
+ sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
(long long)rec->mr_status);
- printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+ sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
(long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
- printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
- rec->mr_cpu_id, rec->mr_apic_id);
- printf("MCA: CPU %d ", rec->mr_cpu);
+ sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n",
+ cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id);
+ sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu);
if (rec->mr_status & MC_STATUS_UC)
- printf("UNCOR ");
+ sbuf_printf(&sb, "UNCOR ");
else {
- printf("COR ");
+ sbuf_printf(&sb, "COR ");
if (cmci_supported(rec->mr_mcg_cap))
- printf("(%lld) ", ((long long)rec->mr_status &
+ sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status &
MC_STATUS_COR_COUNT) >> 38);
if (tes_supported(rec->mr_mcg_cap)) {
switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) {
case 0x1:
- printf("(Green) ");
+ sbuf_printf(&sb, "(Green) ");
break;
case 0x2:
- printf("(Yellow) ");
+ sbuf_printf(&sb, "(Yellow) ");
break;
}
}
}
if (rec->mr_status & MC_STATUS_EN)
- printf("EN ");
+ sbuf_printf(&sb, "EN ");
if (rec->mr_status & MC_STATUS_PCC)
- printf("PCC ");
+ sbuf_printf(&sb, "PCC ");
if (ser_supported(rec->mr_mcg_cap)) {
if (rec->mr_status & MC_STATUS_S)
- printf("S ");
+ sbuf_printf(&sb, "S ");
if (rec->mr_status & MC_STATUS_AR)
- printf("AR ");
+ sbuf_printf(&sb, "AR ");
}
if (rec->mr_status & MC_STATUS_OVER)
- printf("OVER ");
+ sbuf_printf(&sb, "OVER ");
mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
+ event_type = MCA_T_COUNT;
switch (mca_error) {
/* Simple error codes. */
case 0x0000:
- printf("no error");
+ sbuf_printf(&sb, "no error");
+ event_type = MCA_T_NONE;
break;
case 0x0001:
- printf("unclassified error");
+ sbuf_printf(&sb, "unclassified error");
+ event_type = MCA_T_UNCLASSIFIED;
break;
case 0x0002:
- printf("ucode ROM parity error");
+ sbuf_printf(&sb, "ucode ROM parity error");
+ event_type = MCA_T_UCODE_ROM_PARITY;
break;
case 0x0003:
- printf("external error");
+ sbuf_printf(&sb, "external error");
+ event_type = MCA_T_EXTERNAL;
break;
case 0x0004:
- printf("FRC error");
+ sbuf_printf(&sb, "FRC error");
+ event_type = MCA_T_FRC;
break;
case 0x0005:
- printf("internal parity error");
+ sbuf_printf(&sb, "internal parity error");
+ event_type = MCA_T_INTERNAL_PARITY;
break;
case 0x0006:
- printf("SMM handler code access violation");
+ sbuf_printf(&sb, "SMM handler code access violation");
+ event_type = MCA_T_SMM_HANDLER;
break;
case 0x0400:
- printf("internal timer error");
+ sbuf_printf(&sb, "internal timer error");
+ event_type = MCA_T_INTERNAL_TIMER;
break;
case 0x0e0b:
- printf("generic I/O error");
+ sbuf_printf(&sb, "generic I/O error");
+ event_type = MCA_T_GENERIC_IO;
if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL &&
(rec->mr_status & MC_STATUS_MISCV)) {
- printf(" (pci%d:%d:%d:%d)",
+ sbuf_printf(&sb, " (pci%d:%d:%d:%d)",
(int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32),
(int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24),
(int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19),
@@ -512,7 +596,9 @@ mca_log(const struct mca_record *rec)
break;
default:
if ((mca_error & 0xfc00) == 0x0400) {
- printf("internal error %x", mca_error & 0x03ff);
+ sbuf_printf(&sb, "internal error %x",
+ mca_error & 0x03ff);
+ event_type = MCA_T_INTERNAL;
break;
}
@@ -520,101 +606,168 @@ mca_log(const struct mca_record *rec)
/* Memory hierarchy error. */
if ((mca_error & 0xeffc) == 0x000c) {
- printf("%s memory error", mca_error_level(mca_error));
+ sbuf_printf(&sb, "%s memory error",
+ mca_error_level(mca_error));
+ event_type = MCA_T_MEMORY;
break;
}
/* TLB error. */
if ((mca_error & 0xeff0) == 0x0010) {
- printf("%sTLB %s error", mca_error_ttype(mca_error),
+ sbuf_printf(&sb, "%sTLB %s error",
+ mca_error_ttype(mca_error),
mca_error_level(mca_error));
+ event_type = MCA_T_TLB;
break;
}
/* Memory controller error. */
if ((mca_error & 0xef80) == 0x0080) {
- printf("%s channel ", mca_error_mmtype(mca_error));
+ sbuf_printf(&sb, "%s channel ",
+ mca_error_mmtype(mca_error, &event_type));
if ((mca_error & 0x000f) != 0x000f)
- printf("%d", mca_error & 0x000f);
+ sbuf_printf(&sb, "%d", mca_error & 0x000f);
else
- printf("??");
- printf(" memory error");
+ sbuf_printf(&sb, "??");
+ sbuf_printf(&sb, " memory error");
break;
}
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
- printf("%sCACHE %s %s error",
+ sbuf_printf(&sb, "%sCACHE %s %s error",
mca_error_ttype(mca_error),
mca_error_level(mca_error),
mca_error_request(mca_error));
+ event_type = MCA_T_CACHE;
break;
}
/* Extended memory error. */
if ((mca_error & 0xef80) == 0x0280) {
- printf("%s channel ", mca_error_mmtype(mca_error));
+ sbuf_printf(&sb, "%s channel ",
+ mca_error_mmtype(mca_error, &event_type));
if ((mca_error & 0x000f) != 0x000f)
- printf("%d", mca_error & 0x000f);
+ sbuf_printf(&sb, "%d", mca_error & 0x000f);
else
- printf("??");
- printf(" extended memory error");
+ sbuf_printf(&sb, "??");
+ sbuf_printf(&sb, " extended memory error");
break;
}
/* Bus and/or Interconnect error. */
if ((mca_error & 0xe800) == 0x0800) {
- printf("BUS%s ", mca_error_level(mca_error));
+ sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error));
+ event_type = MCA_T_BUS;
switch ((mca_error & 0x0600) >> 9) {
case 0:
- printf("Source");
+ sbuf_printf(&sb, "Source");
break;
case 1:
- printf("Responder");
+ sbuf_printf(&sb, "Responder");
break;
case 2:
- printf("Observer");
+ sbuf_printf(&sb, "Observer");
break;
default:
- printf("???");
+ sbuf_printf(&sb, "???");
break;
}
- printf(" %s ", mca_error_request(mca_error));
+ sbuf_printf(&sb, " %s ", mca_error_request(mca_error));
switch ((mca_error & 0x000c) >> 2) {
case 0:
- printf("Memory");
+ sbuf_printf(&sb, "Memory");
break;
case 2:
- printf("I/O");
+ sbuf_printf(&sb, "I/O");
break;
case 3:
- printf("Other");
+ sbuf_printf(&sb, "Other");
break;
default:
- printf("???");
+ sbuf_printf(&sb, "???");
break;
}
if (mca_error & 0x0100)
- printf(" timed out");
+ sbuf_printf(&sb, " timed out");
break;
}
- printf("unknown error %x", mca_error);
+ sbuf_printf(&sb, "unknown error %x", mca_error);
+ event_type = MCA_T_UNKNOWN;
break;
}
- printf("\n");
+ sbuf_printf(&sb, "\n");
if (rec->mr_status & MC_STATUS_ADDRV) {
- printf("MCA: Address 0x%llx", (long long)rec->mr_addr);
+ sbuf_printf(&sb, "MCA: Address 0x%llx",
+ (long long)rec->mr_addr);
if (ser_supported(rec->mr_mcg_cap) &&
(rec->mr_status & MC_STATUS_MISCV)) {
- printf(" (Mode: %s, LSB: %d)",
+ sbuf_printf(&sb, " (Mode: %s, LSB: %d)",
mca_addres_mode(rec->mr_misc),
(int)(rec->mr_misc & MC_MISC_RA_LSB));
}
- printf("\n");
+ sbuf_printf(&sb, "\n");
}
if (rec->mr_status & MC_STATUS_MISCV)
- printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+ sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+
+ if (event_type < 0 || event_type >= MCA_T_COUNT) {
+ KASSERT(0, ("%s: invalid event type (%d)", __func__,
+ event_type));
+ event_type = MCA_T_UNKNOWN;
+ }
+ numskipped = 0;
+ if (!fatal && !uncor) {
+ /*
+ * Update statistics and check the rate limit for
+ * correctable errors. The rate limit is only applied
+ * after the system records a reasonable number of errors
+ * of the same type. The goal is to reduce the impact of
+ * the system seeing and attempting to log a burst of
+ * similar errors, which (especially when printed to the
+ * console) can be expensive.
+ */
+ mtx_lock_spin(&mca_lock);
+ mca_stats[event_type]++;
+ if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 &&
+ ratecheck(&mca_last_log_time, &mca_log_interval) == 0) {
+ mca_log_skipped++;
+ mtx_unlock_spin(&mca_lock);
+ goto done;
+ }
+ numskipped = mca_log_skipped;
+ mca_log_skipped = 0;
+ mtx_unlock_spin(&mca_lock);
+ }
+
+ error = sbuf_finish(&sb);
+ if (fatal || !mca_uselog) {
+ if (numskipped > 0)
+ printf("MCA: %d events skipped due to rate limit\n",
+ numskipped);
+ if (error)
+ printf("MCA: error logging message (sbuf error %d)\n",
+ error);
+ else
+ sbuf_putbuf(&sb);
+ } else {
+ if (numskipped > 0)
+ log(LOG_ERR,
+ "MCA: %d events skipped due to rate limit\n",
+ numskipped);
+ if (error)
+ log(LOG_ERR,
+ "MCA: error logging message (sbuf error %d)\n",
+ error);
+ else
+ log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb));
+ }
+
+done:
+ sbuf_delete(&sb);
+ if (using_shared_buf)
+ mtx_unlock_spin(&mca_msg_buf_lock);
}
static bool
@@ -662,8 +815,24 @@ mca_check_status(enum scan_mode mode, uint64_t mcg_cap, int bank,
bool mce, recover;
status = rdmsr(mca_msr_ops.status(bank));
- if (!(status & MC_STATUS_VAL))
+ if (!(status & MC_STATUS_VAL)) {
+#ifdef DIAGNOSTIC
+ /*
+ * Check if we have a pending artificial event to generate.
+ * Note that this is potentially racy with the sysctl. The
+ * tradeoff is deemed acceptable given the test nature
+ * of the code.
+ */
+ if (fake_status && bank == fake_bank) {
+ status = fake_status;
+ fake_status = 0;
+ }
+ if (!(status & MC_STATUS_VAL))
+ return (0);
+#else
return (0);
+#endif
+ }
recover = *recoverablep;
mce = mca_is_mce(mcg_cap, status, &recover);
@@ -757,9 +926,9 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record)
mtx_lock_spin(&mca_lock);
rec = STAILQ_FIRST(&mca_freelist);
if (rec == NULL) {
- printf("MCA: Unable to allocate space for an event.\n");
- mca_log(record);
mtx_unlock_spin(&mca_lock);
+ printf("MCA: Unable to allocate space for an event.\n");
+ mca_log(mode, record, false);
return;
}
STAILQ_REMOVE_HEAD(&mca_freelist, link);
@@ -916,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep)
if (*recoverablep)
mca_record_entry(mode, &rec);
else
- mca_log(&rec);
+ mca_log(mode, &rec, true);
}
#ifdef DEV_APIC
@@ -978,18 +1147,49 @@ static void
mca_process_records(enum scan_mode mode)
{
struct mca_internal *mca;
+ STAILQ_HEAD(, mca_internal) tmplist;
+
+ /*
+ * If in an interrupt context, defer the post-scan activities to a
+ * task queue.
+ */
+ if (mode != POLLED) {
+ if (mca_startup_done)
+ taskqueue_enqueue(mca_tq, &mca_postscan_task);
+ return;
+ }
+
+ /*
+ * Copy the pending list to the stack so we can drop the spin lock
+ * while we are emitting logs.
+ */
+ STAILQ_INIT(&tmplist);
+ mtx_lock_spin(&mca_lock);
+ STAILQ_SWAP(&mca_pending, &tmplist, mca_internal);
+ mtx_unlock_spin(&mca_lock);
+
+ STAILQ_FOREACH(mca, &tmplist, link)
+ mca_log(mode, &mca->rec, false);
mtx_lock_spin(&mca_lock);
- while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) {
- STAILQ_REMOVE_HEAD(&mca_pending, link);
- mca_log(&mca->rec);
+ while ((mca = STAILQ_FIRST(&tmplist)) != NULL) {
+ STAILQ_REMOVE_HEAD(&tmplist, link);
mca_store_record(mca);
}
mtx_unlock_spin(&mca_lock);
- if (mode == POLLED)
- mca_resize_freelist();
- else if (!cold)
- taskqueue_enqueue(mca_tq, &mca_resize_task);
+ mca_resize_freelist();
+}
+
+/*
+ * Emit log entries and resize the free list. This is intended to be called
+ * from a task queue to handle work which does not need to be done (or cannot
+ * be done) in an interrupt context.
+ */
+static void
+mca_postscan(void *context __unused, int pending __unused)
+{
+
+ mca_process_records(POLLED);
}
/*
@@ -1060,7 +1260,7 @@ sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS)
doresize = true;
}
mtx_unlock_spin(&mca_lock);
- if (doresize && !cold)
+ if (doresize && mca_startup_done)
taskqueue_enqueue(mca_tq, &mca_resize_task);
return (error);
}
@@ -1072,12 +1272,16 @@ mca_startup(void *dummy)
if (mca_banks <= 0)
return;
- /* CMCIs during boot may have claimed items from the freelist. */
- mca_resize_freelist();
-
taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq");
taskqueue_enqueue_timeout_sbt(mca_tq, &mca_scan_task,
mca_ticks * SBT_1S, 0, C_PREL(1));
+ mca_startup_done = true;
+
+ /*
+ * CMCIs during boot may have recorded entries. Conduct the post-scan
+ * activities now.
+ */
+ mca_postscan(NULL, 0);
}
SYSINIT(mca_startup, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, mca_startup, NULL);
@@ -1130,6 +1334,7 @@ mca_setup(uint64_t mcg_cap)
mca_banks = mcg_cap & MCG_CAP_COUNT;
mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
+ mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN);
STAILQ_INIT(&mca_records);
STAILQ_INIT(&mca_pending);
mca_tq = taskqueue_create_fast("mca", M_WAITOK,
@@ -1137,6 +1342,7 @@ mca_setup(uint64_t mcg_cap)
TIMEOUT_TASK_INIT(mca_tq, &mca_scan_task, 0, mca_scan_cpus, NULL);
STAILQ_INIT(&mca_freelist);
TASK_INIT(&mca_resize_task, 0, mca_resize, NULL);
+ TASK_INIT(&mca_postscan_task, 0, mca_postscan, NULL);
mca_resize_freelist();
SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
"count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0,
@@ -1540,6 +1746,9 @@ mca_intr(void)
panic("Unrecoverable machine check exception");
}
+ if (count)
+ mca_process_records(MCE);
+
/* Clear MCIP. */
wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
}
diff --git a/sys/x86/x86/ucode.c b/sys/x86/x86/ucode.c
index 0c153c0b656c..72133de211f8 100644
--- a/sys/x86/x86/ucode.c
+++ b/sys/x86/x86/ucode.c
@@ -40,6 +40,7 @@
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/md_var.h>
#include <x86/specialreg.h>
#include <x86/ucode.h>
#include <x86/x86_smp.h>
@@ -58,7 +59,7 @@ static const void *ucode_amd_match(const uint8_t *data, size_t *len);
static struct ucode_ops {
const char *vendor;
- int (*load)(const void *, bool, uint64_t *, uint64_t *);
+ int (*load)(const void *, ucode_load_how how, uint64_t *, uint64_t *);
const void *(*match)(const uint8_t *, size_t *);
} loaders[] = {
{
@@ -83,6 +84,7 @@ enum {
NO_ERROR,
NO_MATCH,
VERIFICATION_FAILED,
+ LOAD_FAILED,
} ucode_error = NO_ERROR;
static uint64_t ucode_nrev, ucode_orev;
@@ -103,6 +105,9 @@ log_msg(void *arg __unused)
case VERIFICATION_FAILED:
printf("CPU microcode: microcode verification failed\n");
break;
+ case LOAD_FAILED:
+ printf("CPU microcode load failed. BIOS update advised\n");
+ break;
default:
break;
}
@@ -110,7 +115,8 @@ log_msg(void *arg __unused)
SYSINIT(ucode_log, SI_SUB_CPU, SI_ORDER_FIRST, log_msg, NULL);
int
-ucode_intel_load(const void *data, bool unsafe, uint64_t *nrevp, uint64_t *orevp)
+ucode_intel_load(const void *data, ucode_load_how how, uint64_t *nrevp,
+ uint64_t *orevp)
{
uint64_t nrev, orev;
uint32_t cpuid[4];
@@ -122,10 +128,23 @@ ucode_intel_load(const void *data, bool unsafe, uint64_t *nrevp, uint64_t *orevp
* undocumented errata applying to some Broadwell CPUs.
*/
wbinvd();
- if (unsafe)
+ switch (how) {
+ case SAFE:
wrmsr_safe(MSR_BIOS_UPDT_TRIG, (uint64_t)(uintptr_t)data);
- else
+ break;
+ case EARLY:
+#ifdef __amd64__
+ wrmsr_early_safe_start();
+ if (wrmsr_early_safe(MSR_BIOS_UPDT_TRIG,
+ (uint64_t)(uintptr_t)data) != 0)
+ ucode_error = LOAD_FAILED;
+ wrmsr_early_safe_end();
+ break;
+#endif
+ case UNSAFE:
wrmsr(MSR_BIOS_UPDT_TRIG, (uint64_t)(uintptr_t)data);
+ break;
+ }
wrmsr(MSR_BIOS_SIGN, 0);
/*
@@ -233,20 +252,31 @@ ucode_intel_match(const uint8_t *data, size_t *len)
}
int
-ucode_amd_load(const void *data, bool unsafe, uint64_t *nrevp, uint64_t *orevp)
+ucode_amd_load(const void *data, ucode_load_how how, uint64_t *nrevp,
+ uint64_t *orevp)
{
uint64_t nrev, orev;
uint32_t cpuid[4];
orev = rdmsr(MSR_BIOS_SIGN);
- /*
- * Perform update.
- */
- if (unsafe)
+ switch (how) {
+ case SAFE:
wrmsr_safe(MSR_K8_UCODE_UPDATE, (uint64_t)(uintptr_t)data);
- else
+ break;
+ case EARLY:
+#ifdef __amd64__
+ wrmsr_early_safe_start();
+ if (wrmsr_early_safe(MSR_K8_UCODE_UPDATE,
+ (uint64_t)(uintptr_t)data) != 0)
+ ucode_error = LOAD_FAILED;
+ wrmsr_early_safe_end();
+ break;
+#endif
+ case UNSAFE:
wrmsr(MSR_K8_UCODE_UPDATE, (uint64_t)(uintptr_t)data);
+ break;
+ }
/*
* Serialize instruction flow.
@@ -277,7 +307,8 @@ ucode_amd_match(const uint8_t *data, size_t *len)
signature = regs[0];
revision = rdmsr(MSR_BIOS_SIGN);
- return (ucode_amd_find("loader blob", signature, revision, data, *len, len));
+ return (ucode_amd_find("loader blob", signature, &revision, data, *len,
+ len));
}
/*
@@ -326,8 +357,8 @@ ucode_load_ap(int cpu)
return;
#endif
- if (ucode_data != NULL)
- (void)ucode_loader->load(ucode_data, false, NULL, NULL);
+ if (ucode_data != NULL && ucode_error != LOAD_FAILED)
+ (void)ucode_loader->load(ucode_data, UNSAFE, NULL, NULL);
}
static void *
@@ -414,7 +445,7 @@ ucode_load_bsp(uintptr_t free)
memcpy_early(addr, match, len);
match = addr;
- error = ucode_loader->load(match, false, &nrev, &orev);
+ error = ucode_loader->load(match, EARLY, &nrev, &orev);
if (error == 0) {
ucode_data = early_ucode_data = match;
ucode_nrev = nrev;
diff --git a/sys/x86/x86/ucode_subr.c b/sys/x86/x86/ucode_subr.c
index 9e128ad2bf04..53d7cfc06769 100644
--- a/sys/x86/x86/ucode_subr.c
+++ b/sys/x86/x86/ucode_subr.c
@@ -94,7 +94,7 @@ typedef struct container_header {
* source code.
*/
const void *
-ucode_amd_find(const char *path, uint32_t signature, uint32_t revision,
+ucode_amd_find(const char *path, uint32_t signature, uint32_t *revision,
const uint8_t *fw_data, size_t fw_size, size_t *selected_sizep)
{
const amd_10h_fw_header_t *fw_header;
@@ -112,7 +112,7 @@ ucode_amd_find(const char *path, uint32_t signature, uint32_t revision,
(signature >> 4) & 0x0f,
(signature >> 0) & 0x0f, (signature >> 20) & 0xff,
(signature >> 16) & 0x0f);
- WARNX(1, "microcode revision %#x", revision);
+ WARNX(1, "microcode revision %#x", *revision);
nextfile:
WARNX(1, "checking %s for update.", path);
@@ -212,9 +212,9 @@ nextfile:
fw_header->processor_rev_id, equiv_id);
continue; /* different cpu */
}
- if (fw_header->patch_id <= revision) {
+ if (fw_header->patch_id <= *revision) {
WARNX(1, "patch_id %x, revision %x",
- fw_header->patch_id, revision);
+ fw_header->patch_id, *revision);
continue; /* not newer revision */
}
if (fw_header->nb_dev_id != 0 || fw_header->sb_dev_id != 0) {
@@ -222,7 +222,7 @@ nextfile:
}
WARNX(3, "selecting revision: %x", fw_header->patch_id);
- revision = fw_header->patch_id;
+ *revision = fw_header->patch_id;
selected_fw = fw_header;
selected_size = section_header->size;
}