diff options
| author | Ali Mashtizadeh <ali@mashtizadeh.com> | 2026-01-30 09:12:17 +0000 |
|---|---|---|
| committer | Warner Losh <imp@FreeBSD.org> | 2026-02-27 21:22:16 +0000 |
| commit | e51ef8ae490fc9f73191f33e7ad388c2511c454a (patch) | |
| tree | b5644c5b723e8d249e2c9e9aaeb8a567813e971f | |
| parent | 00c0a1f0bf6c07e63384a389060dfc10924c0ed6 (diff) | |
hwpmc: Initial support for AMD IBS
This patch adds support for AMD IBS. It adds a new class of performance
counter that cotains two events: ibs-fetch and ibs-op events. Unlike
most existing sampled events, IBS events provide a number of values
containing extra information regarding the sample. To support this we
use the existing callchain event, and introduce a new flag for multipart
payloads. The first 8 bytes of the pc_sample contains a header that
defines up to four payloads.
Sponsored by: Netflix
Reviewed by: imp,mhorne
Pull Request: https://github.com/freebsd/freebsd-src/pull/2022
| -rw-r--r-- | lib/libpmc/libpmc.c | 64 | ||||
| -rw-r--r-- | sys/amd64/include/pmc_mdep.h | 5 | ||||
| -rw-r--r-- | sys/conf/files.x86 | 1 | ||||
| -rw-r--r-- | sys/dev/hwpmc/hwpmc_amd.c | 25 | ||||
| -rw-r--r-- | sys/dev/hwpmc/hwpmc_ibs.c | 614 | ||||
| -rw-r--r-- | sys/dev/hwpmc/hwpmc_ibs.h | 176 | ||||
| -rw-r--r-- | sys/dev/hwpmc/hwpmc_mod.c | 96 | ||||
| -rw-r--r-- | sys/dev/hwpmc/pmc_events.h | 13 | ||||
| -rw-r--r-- | sys/i386/include/pmc_mdep.h | 5 | ||||
| -rw-r--r-- | sys/modules/hwpmc/Makefile | 4 | ||||
| -rw-r--r-- | sys/sys/pmc.h | 19 | ||||
| -rw-r--r-- | sys/sys/pmclog.h | 14 | ||||
| -rw-r--r-- | sys/x86/x86/local_apic.c | 27 | ||||
| -rw-r--r-- | usr.sbin/pmcstat/pmcstat_log.c | 99 |
14 files changed, 1130 insertions, 32 deletions
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 10e357f55935..155da7cf6a7b 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -50,8 +50,8 @@ #if defined(__amd64__) || defined(__i386__) static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); -#endif -#if defined(__amd64__) || defined(__i386__) +static int ibs_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif @@ -132,6 +132,7 @@ struct pmc_class_descr { PMC_CLASSDEP_TABLE(iaf, IAF); PMC_CLASSDEP_TABLE(k8, K8); +PMC_CLASSDEP_TABLE(ibs, IBS); PMC_CLASSDEP_TABLE(armv7, ARMV7); PMC_CLASSDEP_TABLE(armv8, ARMV8); PMC_CLASSDEP_TABLE(cmn600_pmu, CMN600_PMU); @@ -201,8 +202,7 @@ static const struct pmc_class_descr NAME##_class_table_descr = \ #if defined(__i386__) || defined(__amd64__) PMC_CLASS_TABLE_DESC(k8, K8, k8, k8); -#endif -#if defined(__i386__) || defined(__amd64__) +PMC_CLASS_TABLE_DESC(ibs, IBS, ibs, ibs); PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc); #endif #if defined(__arm__) @@ -691,9 +691,49 @@ k8_allocate_pmc(enum pmc_event pe, char *ctrspec, return (0); } -#endif +static int +ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + char *e, *p, *q; + uint64_t ctl; + + pmc_config->pm_caps |= + (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE); + pmc_config->pm_md.pm_ibs.ibs_ctl = 0; + + /* setup parsing tables */ + switch (pe) { + case PMC_EV_IBS_FETCH: + pmc_config->pm_md.pm_ibs.ibs_type = IBS_PMC_FETCH; + break; + case PMC_EV_IBS_OP: + pmc_config->pm_md.pm_ibs.ibs_type = IBS_PMC_OP; + break; + default: + return (-1); + } + + /* parse parameters */ + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWPREFIXMATCH(p, "ctl=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return (-1); + + ctl = strtoull(q, &e, 0); + if (e == q || *e != '\0') + return (-1); + + pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl; + } else { + return (-1); + } + } + + return (0); +} -#if defined(__i386__) || defined(__amd64__) static int tsc_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) @@ -1268,6 +1308,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, ev = k8_event_table; count = PMC_EVENT_TABLE_SIZE(k8); break; + case PMC_CLASS_IBS: + ev = ibs_event_table; + count = PMC_EVENT_TABLE_SIZE(ibs); + break; case PMC_CLASS_ARMV7: switch (cpu_info.pm_cputype) { default: @@ -1471,6 +1515,10 @@ pmc_init(void) case PMC_CLASS_K8: pmc_class_table[n++] = &k8_class_table_descr; break; + + case PMC_CLASS_IBS: + pmc_class_table[n++] = &ibs_class_table_descr; + break; #endif case PMC_CLASS_SOFT: @@ -1676,7 +1724,9 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) { ev = k8_event_table; evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8); - + } else if (pe >= PMC_EV_IBS_FIRST && pe <= PMC_EV_IBS_LAST) { + ev = ibs_event_table; + evfence = ibs_event_table + PMC_EVENT_TABLE_SIZE(ibs); } else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) { switch (cpu) { case PMC_CPU_ARMV7_CORTEX_A8: diff --git a/sys/amd64/include/pmc_mdep.h b/sys/amd64/include/pmc_mdep.h index 5c20d8473855..24b785312a16 100644 --- a/sys/amd64/include/pmc_mdep.h +++ b/sys/amd64/include/pmc_mdep.h @@ -41,6 +41,7 @@ struct pmc_mdep; #include <dev/hwpmc/hwpmc_amd.h> #include <dev/hwpmc/hwpmc_core.h> +#include <dev/hwpmc/hwpmc_ibs.h> #include <dev/hwpmc/hwpmc_tsc.h> #include <dev/hwpmc/hwpmc_uncore.h> @@ -51,6 +52,7 @@ struct pmc_mdep; */ #define PMC_MDEP_CLASS_INDEX_TSC 1 #define PMC_MDEP_CLASS_INDEX_K8 2 +#define PMC_MDEP_CLASS_INDEX_IBS 3 #define PMC_MDEP_CLASS_INDEX_P4 2 #define PMC_MDEP_CLASS_INDEX_IAP 2 #define PMC_MDEP_CLASS_INDEX_IAF 3 @@ -62,6 +64,7 @@ struct pmc_mdep; * * TSC The timestamp counter * K8 AMD Athlon64 and Opteron PMCs in 64 bit mode. + * IBS AMD IBS * PIV Intel P4/HTT and P4/EMT64 * IAP Intel Core/Core2/Atom CPUs in 64 bits mode. * IAF Intel fixed-function PMCs in Core2 and later CPUs. @@ -71,6 +74,7 @@ struct pmc_mdep; union pmc_md_op_pmcallocate { struct pmc_md_amd_op_pmcallocate pm_amd; + struct pmc_md_ibs_op_pmcallocate pm_ibs; struct pmc_md_iap_op_pmcallocate pm_iap; struct pmc_md_ucf_op_pmcallocate pm_ucf; struct pmc_md_ucp_op_pmcallocate pm_ucp; @@ -85,6 +89,7 @@ union pmc_md_op_pmcallocate { union pmc_md_pmc { struct pmc_md_amd_pmc pm_amd; + struct pmc_md_ibs_pmc pm_ibs; struct pmc_md_iaf_pmc pm_iaf; struct pmc_md_iap_pmc pm_iap; struct pmc_md_ucf_pmc pm_ucf; diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index fabcd5d9ebe5..8a7e0b78feb4 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -114,6 +114,7 @@ dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hptrr/$M-elf.hptrr_lib.o optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc +dev/hwpmc/hwpmc_ibs.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c index b34cbffcffa8..cf44f9362a72 100644 --- a/sys/dev/hwpmc/hwpmc_amd.c +++ b/sys/dev/hwpmc/hwpmc_amd.c @@ -543,6 +543,10 @@ amd_intr(struct trapframe *tf) pac = amd_pcpu[cpu]; + retval = pmc_ibs_intr(tf); + if (retval) + goto done; + /* * look for all PMCs that have interrupted: * - look for a running, sampling PMC which has overflowed @@ -613,6 +617,7 @@ amd_intr(struct trapframe *tf) } } +done: if (retval) counter_u64_add(pmc_stats.pm_intr_processed, 1); else @@ -760,7 +765,7 @@ pmc_amd_initialize(void) struct pmc_classdep *pcd; struct pmc_mdep *pmc_mdep; enum pmc_cputype cputype; - int error, i, ncpus; + int error, i, ncpus, nclasses; int family, model, stepping; int amd_core_npmcs, amd_l3_npmcs, amd_df_npmcs; struct amd_descr *d; @@ -884,10 +889,16 @@ pmc_amd_initialize(void) M_WAITOK | M_ZERO); /* - * These processors have two classes of PMCs: the TSC and - * programmable PMCs. + * These processors have two or three classes of PMCs: the TSC, + * programmable PMCs, and AMD IBS. */ - pmc_mdep = pmc_mdep_alloc(2); + if ((amd_feature2 & AMDID2_IBS) != 0) { + nclasses = 3; + } else { + nclasses = 2; + } + + pmc_mdep = pmc_mdep_alloc(nclasses); ncpus = pmc_cpu_max(); @@ -927,6 +938,12 @@ pmc_amd_initialize(void) PMCDBG0(MDP, INI, 0, "amd-initialize"); + if (nclasses >= 3) { + error = pmc_ibs_initialize(pmc_mdep, ncpus); + if (error != 0) + goto error; + } + return (pmc_mdep); error: diff --git a/sys/dev/hwpmc/hwpmc_ibs.c b/sys/dev/hwpmc/hwpmc_ibs.c new file mode 100644 index 000000000000..66d3260cf040 --- /dev/null +++ b/sys/dev/hwpmc/hwpmc_ibs.c @@ -0,0 +1,614 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2026, Ali Jose Mashtizadeh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Support for the AMD IBS */ + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/pcpu.h> +#include <sys/pmc.h> +#include <sys/pmckern.h> +#include <sys/pmclog.h> +#include <sys/smp.h> +#include <sys/systm.h> + +#include <machine/cpu.h> +#include <machine/cpufunc.h> +#include <machine/md_var.h> +#include <machine/specialreg.h> + +#define IBS_STOP_ITER 50 /* Stopping iterations */ + +/* AMD IBS PMCs */ +struct ibs_descr { + struct pmc_descr pm_descr; /* "base class" */ +}; + +/* + * Globals + */ +static uint64_t ibs_features; + +/* + * Per-processor information + */ +#define IBS_CPU_RUNNING 1 +#define IBS_CPU_STOPPING 2 +#define IBS_CPU_STOPPED 3 + +struct ibs_cpu { + int pc_status; + struct pmc_hw pc_ibspmcs[IBS_NPMCS]; +}; +static struct ibs_cpu **ibs_pcpu; + +/* + * Read a PMC value from the MSR. + */ +static int +ibs_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v) +{ + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row-index %d", __LINE__, ri)); + KASSERT(ibs_pcpu[cpu], + ("[ibs,%d] null per-cpu, cpu %d", __LINE__, cpu)); + + /* read the IBS ctl */ + switch (ri) { + case IBS_PMC_FETCH: + *v = rdmsr(IBS_FETCH_CTL); + break; + case IBS_PMC_OP: + *v = rdmsr(IBS_OP_CTL); + break; + } + + PMCDBG2(MDP, REA, 2, "ibs-read id=%d -> %jd", ri, *v); + + return (0); +} + +/* + * Write a PMC MSR. + */ +static int +ibs_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v) +{ + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row-index %d", __LINE__, ri)); + + PMCDBG3(MDP, WRI, 1, "ibs-write cpu=%d ri=%d v=%jx", cpu, ri, v); + + return (0); +} + +/* + * Configure hardware PMC according to the configuration recorded in 'pm'. + */ +static int +ibs_config_pmc(int cpu, int ri, struct pmc *pm) +{ + struct pmc_hw *phw; + + PMCDBG3(MDP, CFG, 1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row-index %d", __LINE__, ri)); + + phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri]; + + KASSERT(pm == NULL || phw->phw_pmc == NULL, + ("[ibs,%d] pm=%p phw->pm=%p hwpmc not unconfigured", + __LINE__, pm, phw->phw_pmc)); + + phw->phw_pmc = pm; + + return (0); +} + +/* + * Retrieve a configured PMC pointer from hardware state. + */ +static int +ibs_get_config(int cpu, int ri, struct pmc **ppm) +{ + + *ppm = ibs_pcpu[cpu]->pc_ibspmcs[ri].phw_pmc; + + return (0); +} + +/* + * Check if a given PMC allocation is feasible. + */ +static int +ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm, + const struct pmc_op_pmcallocate *a) +{ + uint64_t caps, config; + + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row index %d", __LINE__, ri)); + + /* check class match */ + if (a->pm_class != PMC_CLASS_IBS) + return (EINVAL); + if (a->pm_md.pm_ibs.ibs_type != ri) + return (EINVAL); + + caps = pm->pm_caps; + + PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps); + + if ((caps & PMC_CAP_SYSTEM) == 0) + return (EINVAL); + + config = a->pm_md.pm_ibs.ibs_ctl; + pm->pm_md.pm_ibs.ibs_ctl = config; + + PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%x", ri, config); + + return (0); +} + +/* + * Release machine dependent state associated with a PMC. This is a + * no-op on this architecture. + */ +static int +ibs_release_pmc(int cpu, int ri, struct pmc *pmc __unused) +{ + struct pmc_hw *phw __diagused; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row-index %d", __LINE__, ri)); + + PMCDBG1(MDP, ALL, 1, "ibs-release ri=%d", ri); + + phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri]; + + KASSERT(phw->phw_pmc == NULL, + ("[ibs,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); + + return (0); +} + +/* + * Start a PMC. + */ +static int +ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm) +{ + uint64_t config; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row-index %d", __LINE__, ri)); + + PMCDBG2(MDP, STA, 1, "ibs-start cpu=%d ri=%d", cpu, ri); + + /* + * This is used to handle spurious NMIs. All that matters is that it + * is not in the stopping state. + */ + atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_RUNNING); + + /* + * Turn on the ENABLE bit. Zeroing out the control register eliminates + * stale valid bits from spurious NMIs and it resets the counter. + */ + switch (ri) { + case IBS_PMC_FETCH: + wrmsr(IBS_FETCH_CTL, 0); + config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE; + wrmsr(IBS_FETCH_CTL, config); + break; + case IBS_PMC_OP: + wrmsr(IBS_OP_CTL, 0); + config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE; + wrmsr(IBS_OP_CTL, config); + break; + } + + return (0); +} + +/* + * Stop a PMC. + */ +static int +ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm) +{ + int i; + uint64_t config; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] illegal row-index %d", __LINE__, ri)); + + PMCDBG1(MDP, STO, 1, "ibs-stop ri=%d", ri); + + /* + * Turn off the ENABLE bit, but unfortunately there are a few quirks + * that generate excess NMIs. Workaround #420 in the Revision Guide + * for AMD Family 10h Processors 41322 Rev. 3.92 March 2012. requires + * that we clear the count before clearing enable. + * + * Even after clearing the counter spurious NMIs are still possible so + * we use a per-CPU atomic variable to notify the interrupt handler we + * are stopping and discard spurious NMIs. We then retry clearing the + * control register for 50us. This gives us enough time and ensures + * that the valid bit is not accidently stuck after a spurious NMI. + */ + config = pm->pm_md.pm_ibs.ibs_ctl; + + atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING); + + switch (ri) { + case IBS_PMC_FETCH: + wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK); + DELAY(1); + config &= ~IBS_FETCH_CTL_ENABLE; + wrmsr(IBS_FETCH_CTL, config); + break; + case IBS_PMC_OP: + wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK); + DELAY(1); + config &= ~IBS_OP_CTL_ENABLE; + wrmsr(IBS_OP_CTL, config); + break; + } + + for (i = 0; i < IBS_STOP_ITER; i++) { + DELAY(1); + + switch (ri) { + case IBS_PMC_FETCH: + wrmsr(IBS_FETCH_CTL, 0); + break; + case IBS_PMC_OP: + wrmsr(IBS_OP_CTL, 0); + break; + } + } + + atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPED); + + return (0); +} + +static void +pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config) +{ + struct pmc_multipart mpd; + + if (pm == NULL) + return; + + if (pm->pm_state != PMC_STATE_RUNNING) + return; + + memset(&mpd, 0, sizeof(mpd)); + + mpd.pl_type = PMC_CC_MULTIPART_IBS_FETCH; + mpd.pl_length = 4; + mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL] = config; + if (ibs_features) { + mpd.pl_mpdata[PMC_MPIDX_FETCH_EXTCTL] = rdmsr(IBS_FETCH_EXTCTL); + } + mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL] = config; + mpd.pl_mpdata[PMC_MPIDX_FETCH_LINADDR] = rdmsr(IBS_FETCH_LINADDR); + if ((config & IBS_FETCH_CTL_PHYSADDRVALID) != 0) { + mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] = + rdmsr(IBS_FETCH_PHYSADDR); + } + + pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd); +} + +static void +pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config) +{ + struct pmc_multipart mpd; + + if (pm == NULL) + return; + + if (pm->pm_state != PMC_STATE_RUNNING) + return; + + memset(&mpd, 0, sizeof(mpd)); + + mpd.pl_type = PMC_CC_MULTIPART_IBS_OP; + mpd.pl_length = 8; + mpd.pl_mpdata[PMC_MPIDX_OP_CTL] = config; + mpd.pl_mpdata[PMC_MPIDX_OP_RIP] = rdmsr(IBS_OP_RIP); + mpd.pl_mpdata[PMC_MPIDX_OP_DATA] = rdmsr(IBS_OP_DATA); + mpd.pl_mpdata[PMC_MPIDX_OP_DATA2] = rdmsr(IBS_OP_DATA2); + mpd.pl_mpdata[PMC_MPIDX_OP_DATA3] = rdmsr(IBS_OP_DATA3); + mpd.pl_mpdata[PMC_MPIDX_OP_DC_LINADDR] = rdmsr(IBS_OP_DC_LINADDR); + mpd.pl_mpdata[PMC_MPIDX_OP_DC_PHYSADDR] = rdmsr(IBS_OP_DC_PHYSADDR); + + pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd); + + wrmsr(IBS_OP_CTL, pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE); +} + +/* + * Interrupt handler. This function needs to return '1' if the + * interrupt was this CPU's PMCs or '0' otherwise. It is not allowed + * to sleep or do anything a 'fast' interrupt handler is not allowed + * to do. + */ +int +pmc_ibs_intr(struct trapframe *tf) +{ + struct ibs_cpu *pac; + struct pmc *pm; + int retval, cpu; + uint64_t config; + + cpu = curcpu; + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] out of range CPU %d", __LINE__, cpu)); + + PMCDBG3(MDP, INT, 1, "cpu=%d tf=%p um=%d", cpu, tf, TRAPF_USERMODE(tf)); + + retval = 0; + + pac = ibs_pcpu[cpu]; + + config = rdmsr(IBS_FETCH_CTL); + if ((config & IBS_FETCH_CTL_VALID) != 0) { + pm = pac->pc_ibspmcs[IBS_PMC_FETCH].phw_pmc; + + retval = 1; + + pmc_ibs_process_fetch(pm, tf, config); + } + + config = rdmsr(IBS_OP_CTL); + if ((retval == 0) && ((config & IBS_OP_CTL_VALID) != 0)) { + pm = pac->pc_ibspmcs[IBS_PMC_OP].phw_pmc; + + retval = 1; + + pmc_ibs_process_op(pm, tf, config); + } + + if (retval == 0) { + // Lets check for a stray NMI when stopping + if (atomic_load_int(&pac->pc_status) == IBS_CPU_STOPPING) { + return (1); + } + } + + + if (retval) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); + + PMCDBG1(MDP, INT, 2, "retval=%d", retval); + + return (retval); +} + +/* + * Describe a PMC. + */ +static int +ibs_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) +{ + struct pmc_hw *phw; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] illegal CPU %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < IBS_NPMCS, + ("[ibs,%d] row-index %d out of range", __LINE__, ri)); + + phw = &ibs_pcpu[cpu]->pc_ibspmcs[ri]; + + if (ri == IBS_PMC_FETCH) { + strlcpy(pi->pm_name, "IBS-FETCH", sizeof(pi->pm_name)); + pi->pm_class = PMC_CLASS_IBS; + pi->pm_enabled = true; + *ppmc = phw->phw_pmc; + } else { + strlcpy(pi->pm_name, "IBS-OP", sizeof(pi->pm_name)); + pi->pm_class = PMC_CLASS_IBS; + pi->pm_enabled = true; + *ppmc = phw->phw_pmc; + } + + return (0); +} + +/* + * Processor-dependent initialization. + */ +static int +ibs_pcpu_init(struct pmc_mdep *md, int cpu) +{ + struct ibs_cpu *pac; + struct pmc_cpu *pc; + struct pmc_hw *phw; + int first_ri, n; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] insane cpu number %d", __LINE__, cpu)); + + PMCDBG1(MDP, INI, 1, "ibs-init cpu=%d", cpu); + + ibs_pcpu[cpu] = pac = malloc(sizeof(struct ibs_cpu), M_PMC, + M_WAITOK | M_ZERO); + + /* + * Set the content of the hardware descriptors to a known + * state and initialize pointers in the MI per-cpu descriptor. + */ + pc = pmc_pcpu[cpu]; + first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS].pcd_ri; + + KASSERT(pc != NULL, ("[ibs,%d] NULL per-cpu pointer", __LINE__)); + + for (n = 0, phw = pac->pc_ibspmcs; n < IBS_NPMCS; n++, phw++) { + phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | + PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); + phw->phw_pmc = NULL; + pc->pc_hwpmcs[n + first_ri] = phw; + } + + return (0); +} + +/* + * Processor-dependent cleanup prior to the KLD being unloaded. + */ +static int +ibs_pcpu_fini(struct pmc_mdep *md, int cpu) +{ + struct ibs_cpu *pac; + struct pmc_cpu *pc; + int first_ri, i; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[ibs,%d] insane cpu number (%d)", __LINE__, cpu)); + + PMCDBG1(MDP, INI, 1, "ibs-cleanup cpu=%d", cpu); + + /* + * Turn off IBS. + */ + wrmsr(IBS_FETCH_CTL, 0); + wrmsr(IBS_OP_CTL, 0); + + /* + * Free up allocated space. + */ + if ((pac = ibs_pcpu[cpu]) == NULL) + return (0); + + ibs_pcpu[cpu] = NULL; + + pc = pmc_pcpu[cpu]; + KASSERT(pc != NULL, ("[ibs,%d] NULL per-cpu state", __LINE__)); + + first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS].pcd_ri; + + /* + * Reset pointers in the MI 'per-cpu' state. + */ + for (i = 0; i < IBS_NPMCS; i++) + pc->pc_hwpmcs[i + first_ri] = NULL; + + free(pac, M_PMC); + + return (0); +} + +/* + * Initialize ourselves. + */ +int +pmc_ibs_initialize(struct pmc_mdep *pmc_mdep, int ncpus) +{ + u_int regs[4]; + struct pmc_classdep *pcd; + + /* + * Allocate space for pointers to PMC HW descriptors and for + * the MDEP structure used by MI code. + */ + ibs_pcpu = malloc(sizeof(struct ibs_cpu *) * pmc_cpu_max(), M_PMC, + M_WAITOK | M_ZERO); + + /* Initialize AMD IBS handling. */ + pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_IBS]; + + pcd->pcd_caps = IBS_PMC_CAPS; + pcd->pcd_class = PMC_CLASS_IBS; + pcd->pcd_num = IBS_NPMCS; + pcd->pcd_ri = pmc_mdep->pmd_npmc; + pcd->pcd_width = 0; + + pcd->pcd_allocate_pmc = ibs_allocate_pmc; + pcd->pcd_config_pmc = ibs_config_pmc; + pcd->pcd_describe = ibs_describe; + pcd->pcd_get_config = ibs_get_config; + pcd->pcd_pcpu_fini = ibs_pcpu_fini; + pcd->pcd_pcpu_init = ibs_pcpu_init; + pcd->pcd_release_pmc = ibs_release_pmc; + pcd->pcd_start_pmc = ibs_start_pmc; + pcd->pcd_stop_pmc = ibs_stop_pmc; + pcd->pcd_read_pmc = ibs_read_pmc; + pcd->pcd_write_pmc = ibs_write_pmc; + + pmc_mdep->pmd_npmc += IBS_NPMCS; + + if (cpu_exthigh >= CPUID_IBSID) { + do_cpuid(CPUID_IBSID, regs); + ibs_features = regs[0]; + } else { + ibs_features = 0; + } + + PMCDBG0(MDP, INI, 0, "ibs-initialize"); + + return (0); +} + +/* + * Finalization code for AMD CPUs. + */ +void +pmc_ibs_finalize(struct pmc_mdep *md) +{ + PMCDBG0(MDP, INI, 1, "ibs-finalize"); + + for (int i = 0; i < pmc_cpu_max(); i++) + KASSERT(ibs_pcpu[i] == NULL, + ("[ibs,%d] non-null pcpu cpu %d", __LINE__, i)); + + free(ibs_pcpu, M_PMC); + ibs_pcpu = NULL; +} diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h new file mode 100644 index 000000000000..4449b44c8368 --- /dev/null +++ b/sys/dev/hwpmc/hwpmc_ibs.h @@ -0,0 +1,176 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2026, Ali Jose Mashtizadeh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWPMC_IBS_H_ +#define _DEV_HWPMC_IBS_H_ 1 + +#define IBS_NPMCS 2 +#define IBS_PMC_FETCH 0 +#define IBS_PMC_OP 1 + +/* + * All of the CPUID definitions come from AMD PPR Vol 1 for AMD Family 1Ah + * Model 02h C1 (57238) 2024-09-29 Revision 0.24. + */ +#define CPUID_IBSID 0x8000001B +#define CPUID_IBSID_IBSFFV 0x00000001 /* IBS Feature Flags Valid */ +#define CPUID_IBSID_FETCHSAM 0x00000002 /* IBS Fetch Sampling */ +#define CPUID_IBSID_OPSAM 0x00000004 /* IBS Execution Sampling */ +#define CPUID_IBSID_RDWROPCNT 0x00000008 /* RdWr Operationg Counter */ +#define CPUID_IBSID_OPCNT 0x00000010 /* Operation Counter */ +#define CPUID_IBSID_BRNTRGT 0x00000020 /* Branch Target Address */ +#define CPUID_IBSID_OPCNTEXT 0x00000040 /* Extend Counter */ +#define CPUID_IBSID_RIPINVALIDCHK 0x00000080 /* Invalid RIP Indication */ +#define CPUID_IBSID_OPFUSE 0x00000010 /* Fused Branch Operation */ +#define CPUID_IBSID_IBSFETCHCTLEXTD 0x00000020 /* IBS Fetch Control Ext */ +#define CPUID_IBSID_IBSOPDATA4 0x00000040 /* IBS OP DATA4 */ +#define CPUID_IBSID_ZEN4IBSEXTENSIONS 0x00000080 /* IBS Zen 4 Extensions */ +#define CPUID_IBSID_IBSLOADLATENCYFILT 0x00000100 /* Load Latency Filtering */ +#define CPUID_IBSID_IBSUPDTDDTLBSTATS 0x00080000 /* Simplified DTLB Stats */ + +/* + * All of these definitions here come from AMD64 Architecture Programmer's + * Manual Volume 2: System Programming (24593) 2025-07-02 Version 3.43. with + * the following exceptions: + * + * OpData4 and fields come from the BKDG for AMD Family 15h Model 70-7Fh + * (55072) 2018-06-20 Revision 3.09. + */ + +/* IBS MSRs */ +#define IBS_CTL 0xC001103A /* IBS Control */ +#define IBS_CTL_LVTOFFSETVALID (1ULL << 8) +#define IBS_CTL_LVTOFFSETMASK 0x0000000F + +/* IBS Fetch Control */ +#define IBS_FETCH_CTL 0xC0011030 /* IBS Fetch Control */ +#define IBS_FETCH_CTL_L3MISS (1ULL << 61) /* L3 Cache Miss */ +#define IBS_FETCH_CTL_OPCACHEMISS (1ULL << 60) /* Op Cache Miss */ +#define IBS_FETCH_CTL_L3MISSONLY (1ULL << 59) /* L3 Miss Filtering */ +#define IBS_FETCH_CTL_RANDOMIZE (1ULL << 57) /* Randomized Tagging */ +#define IBS_FETCH_CTL_L1TLBMISS (1ULL << 55) /* L1 TLB Miss */ +// Page size 54:53 +#define IBS_FETCH_CTL_PHYSADDRVALID (1ULL << 52) /* PHYSADDR Valid */ +#define IBS_FETCH_CTL_ICMISS (1ULL << 51) /* Inst. Cache Miss */ +#define IBS_FETCH_CTL_COMPLETE (1ULL << 50) /* Complete */ +#define IBS_FETCH_CTL_VALID (1ULL << 49) /* Valid */ +#define IBS_FETCH_CTL_ENABLE (1ULL << 48) /* Enable */ +#define IBS_FETCH_CTL_MAXCNTMASK 0x0000FFFFULL + +#define IBS_FETCH_CTL_TO_LAT(_c) ((_c >> 32) & 0x0000FFFF) + +#define IBS_FETCH_LINADDR 0xC0011031 /* Fetch Linear Address */ +#define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */ +#define IBS_FETCH_EXTCTL 0xC001103C /* Fetch Control Extended */ + +#define PMC_MPIDX_FETCH_CTL 0 +#define PMC_MPIDX_FETCH_EXTCTL 1 +#define PMC_MPIDX_FETCH_LINADDR 2 +#define PMC_MPIDX_FETCH_PHYSADDR 3 + +/* IBS Execution Control */ +#define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */ +#define IBS_OP_CTL_COUNTERCONTROL (1ULL << 19) /* Counter Control */ +#define IBS_OP_CTL_VALID (1ULL << 18) /* Valid */ +#define IBS_OP_CTL_ENABLE (1ULL << 17) /* Enable */ +#define IBS_OP_CTL_L3MISSONLY (1ULL << 16) /* L3 Miss Filtering */ +#define IBS_OP_CTL_MAXCNTMASK 0x0000FFFFULL + +#define IBS_OP_RIP 0xC0011034 /* IBS Op RIP */ +#define IBS_OP_DATA 0xC0011035 /* IBS Op Data */ +#define IBS_OP_DATA_RIPINVALID (1ULL << 38) /* RIP Invalid */ +#define IBS_OP_DATA_BRANCHRETIRED (1ULL << 37) /* Branch Retired */ +#define IBS_OP_DATA_BRANCHMISPREDICTED (1ULL << 36) /* Branch Mispredicted */ +#define IBS_OP_DATA_BRANCHTAKEN (1ULL << 35) /* Branch Taken */ +#define IBS_OP_DATA_RETURN (1ULL << 34) /* Return */ + +#define IBS_OP_DATA2 0xC0011036 /* IBS Op Data 2 */ +#define IBS_OP_DATA3 0xC0011037 /* IBS Op Data 3 */ +#define IBS_OP_DATA3_DCPHYADDRVALID (1ULL << 18) /* DC Physical Address */ +#define IBS_OP_DATA3_DCLINADDRVALID (1ULL << 17) /* DC Linear Address */ +#define IBS_OP_DATA3_LOCKEDOP (1ULL << 15) /* DC Locked Op */ +#define IBS_OP_DATA3_UCMEMACCESS (1ULL << 14) /* DC UC Memory Access */ +#define IBS_OP_DATA3_WCMEMACCESS (1ULL << 13) /* DC WC Memory Access */ +#define IBS_OP_DATA3_DCMISALIGN (1ULL << 8) /* DC Misaligned Access */ +#define IBS_OP_DATA3_DCMISS (1ULL << 7) /* DC Miss */ +#define IBS_OP_DATA3_DCL1TLBHIT1G (1ULL << 5) /* DC L1 TLB Hit 1-GB */ +#define IBS_OP_DATA3_DCL1TLBHIT2M (1ULL << 4) /* DC L1 TLB Hit 2-MB */ +#define IBS_OP_DATA3_DCL1TLBMISS (1ULL << 2) /* DC L1 TLB Miss */ +#define IBS_OP_DATA3_STORE (1ULL << 1) /* Store */ +#define IBS_OP_DATA3_LOAD (1ULL << 0) /* Load */ +#define IBS_OP_DATA3_TO_DCLAT(_c) ((_c >> 32) & 0x0000FFFF) + +#define IBS_OP_DC_LINADDR 0xC0011038 /* IBS DC Linear Address */ +#define IBS_OP_DC_PHYSADDR 0xC0011039 /* IBS DC Physical Address */ +#define IBS_TGT_RIP 0xC001103B /* IBS Branch Target */ +#define IBS_OP_DATA4 0xC001103D /* IBS Op Data 4 */ +#define IBS_OP_DATA4_LDRESYNC (1ULL << 0) /* Load Resync */ + +#define PMC_MPIDX_OP_CTL 0 +#define PMC_MPIDX_OP_RIP 1 +#define PMC_MPIDX_OP_DATA 2 +#define PMC_MPIDX_OP_DATA2 3 +#define PMC_MPIDX_OP_DATA3 4 +#define PMC_MPIDX_OP_DC_LINADDR 5 +#define PMC_MPIDX_OP_DC_PHYSADDR 6 +#define PMC_MPIDX_OP_TGT_RIP 7 +#define PMC_MPIDX_OP_DATA4 8 + +/* + * IBS data is encoded as using the multipart flag in the existing callchain + * structure. The PMC ID number tells you if the sample contains a fetch or an + * op sample. The available payload will be encoded in the MSR order with a + * variable length. + */ + +struct pmc_md_ibs_op_pmcallocate { + uint32_t ibs_flag; + uint32_t ibs_type; + uint64_t ibs_ctl; + uint64_t ibs_ctl2; +}; + +#ifdef _KERNEL + +/* MD extension for 'struct pmc' */ +struct pmc_md_ibs_pmc { + uint32_t ibs_flag; + uint32_t ibs_type; + uint64_t ibs_ctl; + uint64_t ibs_ctl2; +}; + +#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | \ + PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE) + +int pmc_ibs_initialize(struct pmc_mdep *md, int ncpu); +void pmc_ibs_finalize(struct pmc_mdep *md); +int pmc_ibs_intr(struct trapframe *tf); + +#endif /* _KERNEL */ +#endif /* _DEV_HWPMC_IBS_H_ */ diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index a6a6ae68996c..1fa021429c5a 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -198,9 +198,15 @@ static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); static int pmc_debugflags_parse(char *newstr, char *fence); #endif +static bool pmc_is_multipart(struct pmc_sample *ps); +static void pmc_multipart_add(struct pmc_sample *ps, int type, + int length); +static void pmc_multipart_copydata(struct pmc_sample *ps, + struct pmc_multipart *mp); + static int load(struct module *module, int cmd, void *arg); static int pmc_add_sample(ring_type_t ring, struct pmc *pm, - struct trapframe *tf); + struct trapframe *tf, struct pmc_multipart *mp); static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); @@ -4587,6 +4593,53 @@ pmc_post_callchain_callback(void) return; } +static bool +pmc_is_multipart(struct pmc_sample *ps) +{ + return ((ps->ps_flags & PMC_CC_F_MULTIPART) != 0); +} + +static void +pmc_multipart_add(struct pmc_sample *ps, int type, int length) +{ + int i; + uint8_t *hdr; + + MPASS(ps->ps_pc != NULL); + MPASS(ps->ps_nsamples_actual != 0); + + hdr = (uint8_t *)ps->ps_pc; + + for (i = 0; i < PMC_MULTIPART_HEADER_ENTRIES; i++) { + if (hdr[2 * i] == PMC_CC_MULTIPART_NONE) { + hdr[2 * i] = type; + hdr[2 * i + 1] = length; + ps->ps_nsamples_actual += length; + return; + } + } + + KASSERT(false, ("Too many parts in the multipart header!")); +} + +static void +pmc_multipart_copydata(struct pmc_sample *ps, struct pmc_multipart *mp) +{ + int i, scale; + uint64_t *ps_pc; + + MPASS(ps->ps_pc != NULL); + MPASS(ps->ps_nsamples_actual != 0); + + ps_pc = (uint64_t *)ps->ps_pc; + + for (i = 0; i < mp->pl_length; i++) + ps_pc[i + 1] = mp->pl_mpdata[i]; + + scale = sizeof(uint64_t) / sizeof(uintptr_t); + pmc_multipart_add(ps, mp->pl_type, scale * mp->pl_length); +} + /* * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit @@ -4597,7 +4650,8 @@ pmc_post_callchain_callback(void) * use any of the locking primitives supplied by the OS. */ static int -pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf) +pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf, + struct pmc_multipart *mp) { struct pmc_sample *ps; struct pmc_samplebuffer *psb; @@ -4641,21 +4695,33 @@ pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf) ps->ps_ticks = ticks; ps->ps_cpu = cpu; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; + ps->ps_nsamples_actual = 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; MPASS(ps->ps_pc != NULL); + + if (mp != NULL) { + /* Set multipart flag, clear header and copy data */ + ps->ps_flags |= PMC_CC_F_MULTIPART; + ps->ps_pc[0] = 0; + ps->ps_nsamples_actual = 1; + pmc_multipart_copydata(ps, mp); + } + if (callchaindepth == 1) { - ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); + ps->ps_pc[ps->ps_nsamples_actual] = PMC_TRAPFRAME_TO_PC(tf); } else { /* * Kernel stack traversals can be done immediately, while we * defer to an AST for user space traversals. */ if (!inuserspace) { - callchaindepth = pmc_save_kernel_callchain(ps->ps_pc, - callchaindepth, tf); + callchaindepth = pmc_save_kernel_callchain( + ps->ps_pc + ps->ps_nsamples_actual, + callchaindepth - ps->ps_nsamples_actual, tf); + callchaindepth += ps->ps_nsamples_actual; } else { pmc_post_callchain_callback(); callchaindepth = PMC_USER_CALLCHAIN_PENDING; @@ -4664,7 +4730,7 @@ pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf) ps->ps_nsamples = callchaindepth; /* mark entry as in-use */ if (ring == PMC_UR) { - ps->ps_nsamples_actual = callchaindepth; + ps->ps_nsamples_actual = ps->ps_nsamples; ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING; } @@ -4690,7 +4756,8 @@ done: * locking primitives supplied by the OS. */ int -pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf) +pmc_process_interrupt_mp(int ring, struct pmc *pm, struct trapframe *tf, + struct pmc_multipart *mp) { struct thread *td; @@ -4698,9 +4765,15 @@ pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf) if ((pm->pm_flags & PMC_F_USERCALLCHAIN) && (td->td_proc->p_flag & P_KPROC) == 0 && !TRAPF_USERMODE(tf)) { atomic_add_int(&td->td_pmcpend, 1); - return (pmc_add_sample(PMC_UR, pm, tf)); + return (pmc_add_sample(PMC_UR, pm, tf, mp)); } - return (pmc_add_sample(ring, pm, tf)); + return (pmc_add_sample(ring, pm, tf, mp)); +} + +int +pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf) +{ + return (pmc_process_interrupt_mp(ring, pm, tf, NULL)); } /* @@ -4763,10 +4836,9 @@ restart: (uintmax_t)counter_u64_fetch(pm->pm_runcount))); if (ring == PMC_UR) { - nsamples = ps->ps_nsamples_actual; counter_u64_add(pmc_stats.pm_merges, 1); - } else - nsamples = 0; + } + nsamples = ps->ps_nsamples_actual; /* * Retrieve the callchain and mark the sample buffer diff --git a/sys/dev/hwpmc/pmc_events.h b/sys/dev/hwpmc/pmc_events.h index ab157cb05dcf..b22ec1919b32 100644 --- a/sys/dev/hwpmc/pmc_events.h +++ b/sys/dev/hwpmc/pmc_events.h @@ -149,6 +149,15 @@ __PMC_EV(K8, NB_HT_BUS2_BANDWIDTH) #define PMC_EV_K8_FIRST PMC_EV_K8_FP_DISPATCHED_FPU_OPS #define PMC_EV_K8_LAST PMC_EV_K8_NB_HT_BUS2_BANDWIDTH +/* AMD IBS PMCs */ + +#define __PMC_EV_IBS() \ +__PMC_EV(IBS, FETCH) \ +__PMC_EV(IBS, OP) + +#define PMC_EV_IBS_FIRST PMC_EV_IBS_FETCH +#define PMC_EV_IBS_LAST PMC_EV_IBS_OP + /* * Events supported by Intel architectural fixed function counters, * from the "Intel 64 and IA-32 Architectures Software Developer's @@ -2398,7 +2407,7 @@ __PMC_EV_ALIAS("unhalted-reference-cycles", IAF_CPU_CLK_UNHALTED_REF) * START #EVENTS DESCRIPTION * 0 0x1000 Reserved * 0x1000 0x0001 TSC - * 0x2000 0x0080 free (was AMD K7 events) + * 0x2000 0x0080 AMD IBS (was AMD K7 events) * 0x2080 0x0100 AMD K8 events * 0x10000 0x0080 INTEL architectural fixed-function events * 0x10080 0x0F80 free (was INTEL architectural programmable events) @@ -2424,6 +2433,8 @@ __PMC_EV_ALIAS("unhalted-reference-cycles", IAF_CPU_CLK_UNHALTED_REF) #define __PMC_EVENTS() \ __PMC_EV_BLOCK(TSC, 0x01000) \ __PMC_EV_TSC() \ + __PMC_EV_BLOCK(IBS, 0x02000) \ + __PMC_EV_IBS() \ __PMC_EV_BLOCK(K8, 0x02080) \ __PMC_EV_K8() \ __PMC_EV_BLOCK(IAF, 0x10000) \ diff --git a/sys/i386/include/pmc_mdep.h b/sys/i386/include/pmc_mdep.h index 77b57b3163f2..31eded611a1e 100644 --- a/sys/i386/include/pmc_mdep.h +++ b/sys/i386/include/pmc_mdep.h @@ -43,6 +43,7 @@ struct pmc_mdep; * TSC The timestamp counter * K7 AMD Athlon XP/MP and other 32 bit processors. * K8 AMD Athlon64 and Opteron PMCs in 32 bit mode. + * IBS AMD IBS * IAP Intel Core/Core2/Atom programmable PMCs. * IAF Intel fixed-function PMCs. * UCP Intel Uncore programmable PMCs. @@ -50,6 +51,7 @@ struct pmc_mdep; */ #include <dev/hwpmc/hwpmc_amd.h> /* K7 and K8 */ +#include <dev/hwpmc/hwpmc_ibs.h> #include <dev/hwpmc/hwpmc_core.h> #include <dev/hwpmc/hwpmc_tsc.h> #include <dev/hwpmc/hwpmc_uncore.h> @@ -62,6 +64,7 @@ struct pmc_mdep; #define PMC_MDEP_CLASS_INDEX_TSC 1 #define PMC_MDEP_CLASS_INDEX_K7 2 #define PMC_MDEP_CLASS_INDEX_K8 2 +#define PMC_MDEP_CLASS_INDEX_IBS 3 #define PMC_MDEP_CLASS_INDEX_IAP 2 #define PMC_MDEP_CLASS_INDEX_IAF 3 #define PMC_MDEP_CLASS_INDEX_UCP 4 @@ -73,6 +76,7 @@ struct pmc_mdep; union pmc_md_op_pmcallocate { struct pmc_md_amd_op_pmcallocate pm_amd; + struct pmc_md_ibs_op_pmcallocate pm_ibs; struct pmc_md_iap_op_pmcallocate pm_iap; struct pmc_md_ucf_op_pmcallocate pm_ucf; struct pmc_md_ucp_op_pmcallocate pm_ucp; @@ -88,6 +92,7 @@ union pmc_md_op_pmcallocate { /* MD extension for 'struct pmc' */ union pmc_md_pmc { struct pmc_md_amd_pmc pm_amd; + struct pmc_md_ibs_pmc pm_ibs; struct pmc_md_iaf_pmc pm_iaf; struct pmc_md_iap_pmc pm_iap; struct pmc_md_ucf_pmc pm_ucf; diff --git a/sys/modules/hwpmc/Makefile b/sys/modules/hwpmc/Makefile index 812e3fbf182c..d13f52009600 100644 --- a/sys/modules/hwpmc/Makefile +++ b/sys/modules/hwpmc/Makefile @@ -16,7 +16,7 @@ SRCS.DEV_ACPI+= hwpmc_dmc620.c pmu_dmc620.c .endif .if ${MACHINE_CPUARCH} == "amd64" -SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_tsc.c +SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_ibs.c hwpmc_intel.c hwpmc_tsc.c SRCS+= hwpmc_x86.c hwpmc_uncore.c .endif @@ -29,7 +29,7 @@ SRCS+= hwpmc_armv7.c .endif .if ${MACHINE_CPUARCH} == "i386" -SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c +SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_ibs.c hwpmc_intel.c SRCS+= hwpmc_tsc.c hwpmc_x86.c hwpmc_uncore.c .endif diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h index 12b8ddcb156f..28484eed187e 100644 --- a/sys/sys/pmc.h +++ b/sys/sys/pmc.h @@ -141,6 +141,7 @@ enum pmc_cputype { #define __PMC_CLASSES() \ __PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \ __PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \ + __PMC_CLASS(IBS, 0x03, "AMD IBS performance counters") \ __PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \ __PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \ __PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \ @@ -386,6 +387,7 @@ enum pmc_ops { #define PMC_CALLCHAIN_DEPTH_MAX 512 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ +#define PMC_CC_F_MULTIPART 0x02 /*multipart data*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. @@ -960,6 +962,18 @@ struct pmc_samplebuffer { #define PMC_PROD_SAMPLE(psb) \ (&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask]) + +/* + * struct pmc_multipart + * + * Multipart payload + */ +struct pmc_multipart { + char pl_type; + char pl_length; + uint64_t pl_mpdata[10]; +}; + /* * struct pmc_cpustate * @@ -1226,7 +1240,10 @@ MALLOC_DECLARE(M_PMC); struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); -int pmc_process_interrupt(int _ring, struct pmc *_pm, struct trapframe *_tf); +int pmc_process_interrupt_mp(int _ring, struct pmc *_pm, + struct trapframe *_tf, struct pmc_multipart *mp); +int pmc_process_interrupt(int _ring, struct pmc *_pm, + struct trapframe *_tf); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, diff --git a/sys/sys/pmclog.h b/sys/sys/pmclog.h index 3f79f17f306c..362792ae9ad1 100644 --- a/sys/sys/pmclog.h +++ b/sys/sys/pmclog.h @@ -125,6 +125,20 @@ struct pmclog_callchain { #define PMC_CALLCHAIN_TO_CPUFLAGS(CPU,FLAGS) \ (((CPU) << 16) | ((FLAGS) & 0xFFFF)) +/* + * If the multipart flag is set, then pl_pc contains multiple data types. The + * first 8 bytes is a header made up of a 1 byte type and 1 byte length that + * describes the use of the remaining pl_pc array. + */ + +#define PMC_MULTIPART_HEADER_LENGTH 8 +#define PMC_MULTIPART_HEADER_ENTRIES 4 + +#define PMC_CC_MULTIPART_NONE 0 +#define PMC_CC_MULTIPART_CALLCHAIN 1 +#define PMC_CC_MULTIPART_IBS_FETCH 2 +#define PMC_CC_MULTIPART_IBS_OP 3 + struct pmclog_closelog { PMCLOG_ENTRY_HEADER }; diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 54026f83dc15..8a8fb8ef41f6 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -230,11 +230,11 @@ static struct lvt elvts[] = { .lvt_edgetrigger = 1, .lvt_activehi = 1, .lvt_masked = 1, - .lvt_active = 0, - .lvt_mode = APIC_LVT_DM_FIXED, + .lvt_active = 1, + .lvt_mode = APIC_LVT_DM_NMI, .lvt_vector = 0, .lvt_reg = LAPIC_EXT_LVT0, - .lvt_desc = "ELVT0", + .lvt_desc = "IBS", }, [APIC_ELVT_MCA] = { .lvt_edgetrigger = 1, @@ -528,7 +528,10 @@ elvt_mode(struct lapic *la, u_int idx, uint32_t value) KASSERT(idx <= APIC_ELVT_MAX, ("%s: idx %u out of range", __func__, idx)); - elvt = &la->la_elvts[idx]; + if (la->la_elvts[idx].lvt_active) + elvt = &la->la_elvts[idx]; + else + elvt = &elvts[idx]; KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); KASSERT(elvt->lvt_edgetrigger, ("%s: ELVT%u is not edge triggered", __func__, idx)); @@ -963,9 +966,16 @@ lapic_reenable_pcint(void) if (refcount_load(&pcint_refcnt) == 0) return; + value = lapic_read32(LAPIC_LVT_PCINT); value &= ~APIC_LVT_M; lapic_write32(LAPIC_LVT_PCINT, value); + + if ((amd_feature2 & AMDID2_IBS) != 0) { + value = lapic_read32(LAPIC_EXT_LVT0); + value &= ~APIC_LVT_M; + lapic_write32(LAPIC_EXT_LVT0, value); + } } static void @@ -976,6 +986,11 @@ lapic_update_pcint(void *dummy) la = &lapics[lapic_id()]; lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, lapic_read32(LAPIC_LVT_PCINT))); + + if ((amd_feature2 & AMDID2_IBS) != 0) { + lapic_write32(LAPIC_EXT_LVT0, elvt_mode(la, APIC_ELVT_IBS, + lapic_read32(LAPIC_EXT_LVT0))); + } } void @@ -1022,6 +1037,9 @@ lapic_enable_pcint(void) return (1); lvts[APIC_LVT_PMC].lvt_masked = 0; + if ((amd_feature2 & AMDID2_IBS) != 0) + elvts[APIC_ELVT_IBS].lvt_masked = 0; + MPASS(mp_ncpus == 1 || smp_started); smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL); return (1); @@ -1045,6 +1063,7 @@ lapic_disable_pcint(void) if (!refcount_release(&pcint_refcnt)) return; lvts[APIC_LVT_PMC].lvt_masked = 1; + elvts[APIC_ELVT_IBS].lvt_masked = 1; #ifdef SMP /* The APs should always be started when hwpmc is unloaded. */ diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index c8fc414f6214..db9cbfabd9da 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -56,6 +56,7 @@ #include <errno.h> #include <fcntl.h> #include <gelf.h> +#include <inttypes.h> #include <libgen.h> #include <limits.h> #include <netdb.h> @@ -367,6 +368,97 @@ pmcstat_pmcindex_to_pmcr(int pmcin) return NULL; } +#if defined(__amd64__) || defined(__i386__) +static void +pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset) +{ + uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset]; + uint64_t ctl; + + ctl = ibsbuf[PMC_MPIDX_FETCH_CTL]; + PMCSTAT_PRINT_ENTRY("ibs-fetch", "%s%s%s%s", + (ctl & IBS_FETCH_CTL_ICMISS) ? "icmiss " : "", + (ctl & IBS_FETCH_CTL_L1TLBMISS) ? "l1tlbmiss " : "", + (ctl & IBS_FETCH_CTL_OPCACHEMISS) ? "opcachemiss " : "", + (ctl & IBS_FETCH_CTL_L3MISS) ? "l3miss" : ""); + PMCSTAT_PRINT_ENTRY("ibs-fetch", "Latency %" PRIu64, + IBS_FETCH_CTL_TO_LAT(ctl)); + PMCSTAT_PRINT_ENTRY("IBS", "Address %" PRIx64, + ibsbuf[PMC_MPIDX_FETCH_LINADDR]); + if ((ctl & IBS_FETCH_CTL_PHYSADDRVALID) != 0) { + PMCSTAT_PRINT_ENTRY("IBS", "Physical Address %" PRIx64, + ibsbuf[PMC_MPIDX_FETCH_PHYSADDR]); + } +} + +static void +pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset) +{ + uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset]; + uint64_t data, data3; + + data = ibsbuf[PMC_MPIDX_OP_DATA]; + data3 = ibsbuf[PMC_MPIDX_OP_DATA3]; + + if ((data & IBS_OP_DATA_RIPINVALID) == 0) { + PMCSTAT_PRINT_ENTRY("ibs-op", "RIP %" PRIx64, + ibsbuf[PMC_MPIDX_OP_RIP]); + } + PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s%s%s", + (data & IBS_OP_DATA_BRANCHRETIRED) ? "branchretired " : "", + (data & IBS_OP_DATA_BRANCHMISPREDICTED) ? "branchmispredicted " : "", + (data & IBS_OP_DATA_BRANCHTAKEN) ? "branchtaken " : "", + (data & IBS_OP_DATA_RETURN) ? "return" : ""); + PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s%s%s%s", + (data3 & IBS_OP_DATA3_LOAD) ? "load " : "", + (data3 & IBS_OP_DATA3_STORE) ? "store " : "", + (data3 & IBS_OP_DATA3_LOCKEDOP) ? "lock " : "", + (data3 & IBS_OP_DATA3_DCL1TLBMISS) ? "l1tlbmiss " : "", + (data3 & IBS_OP_DATA3_DCMISS) ? "dcmiss " : ""); + PMCSTAT_PRINT_ENTRY("ibs-op", "Latency %" PRIu64, + IBS_OP_DATA3_TO_DCLAT(data3)); + if ((data3 & IBS_OP_DATA3_DCLINADDRVALID) != 0) { + PMCSTAT_PRINT_ENTRY("ibs-op", "Address %" PRIx64, + ibsbuf[PMC_MPIDX_OP_DC_LINADDR]); + } + if ((data3 & IBS_OP_DATA3_DCPHYADDRVALID) != 0) { + PMCSTAT_PRINT_ENTRY("ibs-op", "Physical Address %" PRIx64, + ibsbuf[PMC_MPIDX_OP_DC_PHYSADDR]); + } +} +#endif + +static int +pmcstat_print_multipart(struct pmclog_ev_callchain *cc) +{ + int i; + uint8_t *hdr = (uint8_t *)&cc->pl_pc[0]; + int offset = PMC_MULTIPART_HEADER_LENGTH / sizeof(uintptr_t); + + for (i = 0; i < PMC_MULTIPART_HEADER_ENTRIES; i++) { + uint8_t type = hdr[2 * i]; + uint8_t len = hdr[2 * i + 1]; + + if (type == PMC_CC_MULTIPART_NONE) { + break; + } else if (type == PMC_CC_MULTIPART_CALLCHAIN) { + return (offset); +#if defined(__amd64__) || defined(__i386__) + } else if (type == PMC_CC_MULTIPART_IBS_FETCH) { + pmcstat_print_ibs_fetch(cc, offset); + } else if (type == PMC_CC_MULTIPART_IBS_OP) { + pmcstat_print_ibs_op(cc, offset); +#endif + } else { + PMCSTAT_PRINT_ENTRY("unsupported multipart type!"); + } + + offset += len; + } + + return (offset); +} + /* * Print log entries as text. */ @@ -388,7 +480,12 @@ pmcstat_print_log(void) pl_cpuflags), ev.pl_u.pl_cc.pl_npc, PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\ pl_cpuflags) ? 'u' : 's'); - for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++) + if ((ev.pl_u.pl_cc.pl_cpuflags & PMC_CC_F_MULTIPART) + != 0) + npc = pmcstat_print_multipart(&ev.pl_u.pl_cc); + else + npc = 0; + for (; npc < ev.pl_u.pl_cc.pl_npc; npc++) PMCSTAT_PRINT_ENTRY("...", "%p", (void *) ev.pl_u.pl_cc.pl_pc[npc]); break; |
