diff options
author | Justin Hibbits <jhibbits@FreeBSD.org> | 2022-05-31 15:40:20 +0000 |
---|---|---|
committer | Justin Hibbits <jhibbits@FreeBSD.org> | 2022-06-22 15:46:40 +0000 |
commit | c70276686572fa4976babed8aa936ed3d3f2a52d (patch) | |
tree | 52aec5185f1bb1c494c73b230b9765782c94a80b | |
parent | c20ed7dbba589efc9167dfdcc35c98da7f4f8021 (diff) | |
download | src-c70276686572fa4976babed8aa936ed3d3f2a52d.tar.gz src-c70276686572fa4976babed8aa936ed3d3f2a52d.zip |
arm64: Print per-CPU cache summary
Summary:
It can be useful to see a summary of CPU caches on bootup. This is done
for most platforms already, so add this to arm64, in the form of (taken
from Apple M1 pro test):
L1 cache: 192KB (instruction), 128KB (data)
L2 cache: 12288KB (unified)
This is printed out per-CPU, only under bootverbose.
Future refinements could instead determine if a cache level is shared
with other cores (L2 is shared among cores on some SoCs, for instance),
and perform a better calculation to the full true cache sizes. For
instance, it's known that the M1 pro, on which this test was done, has 2
12MB L2 clusters, for a total of 24MB. Seeing each CPU with 12288KB L2
would make one think that there's 12MB * NCPUs, for possibly 120MB
cache, which is incorrect.
Sponsored by: Juniper Networks, Inc.
Reviewed by: #arm64, andrew
Differential Revision: https://reviews.freebsd.org/D35366
(cherry picked from commit 139ba152c9c91fad9b63ccd2382a80f753f217b9)
-rw-r--r-- | sys/arm64/arm64/identcpu.c | 83 | ||||
-rw-r--r-- | sys/arm64/include/armreg.h | 30 |
2 files changed, 112 insertions, 1 deletions
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c index 0b376cd02286..ed92cf412b33 100644 --- a/sys/arm64/arm64/identcpu.c +++ b/sys/arm64/arm64/identcpu.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); static int ident_lock; static void print_cpu_features(u_int cpu); static u_long parse_cpu_features_hwcap(u_int cpu); +static void print_cpu_caches(struct sbuf *sb, u_int); char machine[] = "arm64"; @@ -59,6 +60,8 @@ static char cpu_model[64]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, sizeof(cpu_model), "Machine model"); +#define MAX_CACHES 8 /* Maximum number of caches supported + architecturally. */ /* * Per-CPU affinity as provided in MPIDR_EL1 * Indexed by CPU number in logical order selected by the system. @@ -92,6 +95,8 @@ struct cpu_desc { uint64_t id_aa64mmfr2; uint64_t id_aa64pfr0; uint64_t id_aa64pfr1; + uint64_t clidr; + uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */ }; struct cpu_desc cpu_desc[MAXCPU]; @@ -521,6 +526,62 @@ parse_cpu_features_hwcap(u_int cpu) } static void +print_cpu_cache(u_int cpu, struct sbuf *sb, uint64_t ccs, bool icache, + bool unified) +{ + size_t cache_size; + size_t line_size; + + /* LineSize is Log2(S) - 4. */ + line_size = 1 << ((ccs & CCSIDR_LineSize_MASK) + 4); + /* + * Calculate cache size (sets * ways * line size). There are different + * formats depending on the FEAT_CCIDX bit in ID_AA64MMFR2 feature + * register. + */ + if ((cpu_desc[cpu].id_aa64mmfr2 & ID_AA64MMFR2_CCIDX_64)) + cache_size = (CCSIDR_NSETS_64(ccs) + 1) * + (CCSIDR_ASSOC_64(ccs) + 1); + else + cache_size = (CCSIDR_NSETS(ccs) + 1) * (CCSIDR_ASSOC(ccs) + 1); + + cache_size *= line_size; + sbuf_printf(sb, "%zuKB (%s)", cache_size / 1024, + icache ? "instruction" : unified ? "unified" : "data"); +} + +static void +print_cpu_caches(struct sbuf *sb, u_int cpu) +{ + /* Print out each cache combination */ + uint64_t clidr; + int i = 1; + clidr = cpu_desc[cpu].clidr; + + for (i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) { + int j = 0; + int ctype_m = (clidr & CLIDR_CTYPE_MASK); + + sbuf_printf(sb, " L%d cache: ", i + 1); + if ((clidr & CLIDR_CTYPE_IO)) { + print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j++], + true, false); + /* If there's more, add to the line. */ + if ((ctype_m & ~CLIDR_CTYPE_IO) != 0) + sbuf_printf(sb, ", "); + } + if ((ctype_m & ~CLIDR_CTYPE_IO) != 0) { + print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j], + false, (clidr & CLIDR_CTYPE_UNIFIED)); + } + sbuf_printf(sb, "\n"); + + } + sbuf_finish(sb); + printf("%s", sbuf_data(sb)); +} + +static void print_cpu_features(u_int cpu) { struct sbuf *sb; @@ -1351,6 +1412,8 @@ print_cpu_features(u_int cpu) printf(" Auxiliary Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64afr1); } + if (bootverbose) + print_cpu_caches(sb, cpu); sbuf_delete(sb); sb = NULL; @@ -1360,6 +1423,7 @@ print_cpu_features(u_int cpu) void identify_cpu(void) { + uint64_t clidr; u_int midr; u_int impl_id; u_int part_id; @@ -1407,7 +1471,6 @@ identify_cpu(void) /* Save affinity for current CPU */ cpu_desc[cpu].mpidr = get_mpidr(); CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK; - cpu_desc[cpu].id_aa64dfr0 = READ_SPECIALREG(ID_AA64DFR0_EL1); cpu_desc[cpu].id_aa64dfr1 = READ_SPECIALREG(ID_AA64DFR1_EL1); cpu_desc[cpu].id_aa64isar0 = READ_SPECIALREG(ID_AA64ISAR0_EL1); @@ -1418,6 +1481,24 @@ identify_cpu(void) cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(ID_AA64PFR0_EL1); cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(ID_AA64PFR1_EL1); + cpu_desc[cpu].clidr = READ_SPECIALREG(clidr_el1); + + clidr = cpu_desc[cpu].clidr; + + for (int i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) { + int j = 0; + if ((clidr & CLIDR_CTYPE_IO)) { + WRITE_SPECIALREG(CSSELR_EL1, + CSSELR_Level(i) | CSSELR_InD); + cpu_desc[cpu].ccsidr[i][j++] = + READ_SPECIALREG(CCSIDR_EL1); + } + if ((clidr & ~CLIDR_CTYPE_IO) == 0) + continue; + WRITE_SPECIALREG(CSSELR_EL1, CSSELR_Level(i)); + cpu_desc[cpu].ccsidr[i][j] = READ_SPECIALREG(CCSIDR_EL1); + } + if (cpu != 0) { /* * This code must run on one cpu at a time, but we are diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index d528f1af2377..c3fc7da46c6e 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -45,6 +45,32 @@ #define UL(x) UINT64_C(x) +/* CCSIDR_EL1 - Cache Size ID Register */ +#define CCSIDR_NumSets_MASK 0x0FFFE000 +#define CCSIDR_NumSets64_MASK 0x00FFFFFF00000000 +#define CCSIDR_NumSets_SHIFT 13 +#define CCSIDR_NumSets64_SHIFT 32 +#define CCSIDR_Assoc_MASK 0x00001FF8 +#define CCSIDR_Assoc64_MASK 0x0000000000FFFFF8 +#define CCSIDR_Assoc_SHIFT 3 +#define CCSIDR_Assoc64_SHIFT 3 +#define CCSIDR_LineSize_MASK 0x7 +#define CCSIDR_NSETS(idr) \ + (((idr) & CCSIDR_NumSets_MASK) >> CCSIDR_NumSets_SHIFT) +#define CCSIDR_ASSOC(idr) \ + (((idr) & CCSIDR_Assoc_MASK) >> CCSIDR_Assoc_SHIFT) +#define CCSIDR_NSETS_64(idr) \ + (((idr) & CCSIDR_NumSets64_MASK) >> CCSIDR_NumSets64_SHIFT) +#define CCSIDR_ASSOC_64(idr) \ + (((idr) & CCSIDR_Assoc64_MASK) >> CCSIDR_Assoc64_SHIFT) + +/* CLIDR_EL1 - Cache level ID register */ +#define CLIDR_CTYPE_MASK 0x7 /* Cache type mask bits */ +#define CLIDR_CTYPE_IO 0x1 /* Instruction only */ +#define CLIDR_CTYPE_DO 0x2 /* Data only */ +#define CLIDR_CTYPE_ID 0x3 /* Split instruction and data */ +#define CLIDR_CTYPE_UNIFIED 0x4 /* Unified */ + /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */ #define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */ #define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */ @@ -60,6 +86,10 @@ #define CPACR_FPEN_TRAP_NONE (0x3 << 20) /* No traps */ #define CPACR_TTA (0x1 << 28) +/* CSSELR_EL1 - Cache size selection register */ +#define CSSELR_Level(i) (i << 1) +#define CSSELR_InD 0x00000001 + /* CTR_EL0 - Cache Type Register */ #define CTR_DLINE_SHIFT 16 #define CTR_DLINE_MASK (0xf << CTR_DLINE_SHIFT) |