aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Hibbits <jhibbits@FreeBSD.org>2022-05-31 15:40:20 +0000
committerJustin Hibbits <jhibbits@FreeBSD.org>2022-06-22 15:46:40 +0000
commitc70276686572fa4976babed8aa936ed3d3f2a52d (patch)
tree52aec5185f1bb1c494c73b230b9765782c94a80b
parentc20ed7dbba589efc9167dfdcc35c98da7f4f8021 (diff)
downloadsrc-c70276686572fa4976babed8aa936ed3d3f2a52d.tar.gz
src-c70276686572fa4976babed8aa936ed3d3f2a52d.zip
arm64: Print per-CPU cache summary
Summary: It can be useful to see a summary of CPU caches on bootup. This is done for most platforms already, so add this to arm64, in the form of (taken from Apple M1 pro test): L1 cache: 192KB (instruction), 128KB (data) L2 cache: 12288KB (unified) This is printed out per-CPU, only under bootverbose. Future refinements could instead determine if a cache level is shared with other cores (L2 is shared among cores on some SoCs, for instance), and perform a better calculation to the full true cache sizes. For instance, it's known that the M1 pro, on which this test was done, has 2 12MB L2 clusters, for a total of 24MB. Seeing each CPU with 12288KB L2 would make one think that there's 12MB * NCPUs, for possibly 120MB cache, which is incorrect. Sponsored by: Juniper Networks, Inc. Reviewed by: #arm64, andrew Differential Revision: https://reviews.freebsd.org/D35366 (cherry picked from commit 139ba152c9c91fad9b63ccd2382a80f753f217b9)
-rw-r--r--sys/arm64/arm64/identcpu.c83
-rw-r--r--sys/arm64/include/armreg.h30
2 files changed, 112 insertions, 1 deletions
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index 0b376cd02286..ed92cf412b33 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
static int ident_lock;
static void print_cpu_features(u_int cpu);
static u_long parse_cpu_features_hwcap(u_int cpu);
+static void print_cpu_caches(struct sbuf *sb, u_int);
char machine[] = "arm64";
@@ -59,6 +60,8 @@ static char cpu_model[64];
SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
cpu_model, sizeof(cpu_model), "Machine model");
+#define MAX_CACHES 8 /* Maximum number of caches supported
+ architecturally. */
/*
* Per-CPU affinity as provided in MPIDR_EL1
* Indexed by CPU number in logical order selected by the system.
@@ -92,6 +95,8 @@ struct cpu_desc {
uint64_t id_aa64mmfr2;
uint64_t id_aa64pfr0;
uint64_t id_aa64pfr1;
+ uint64_t clidr;
+ uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
};
struct cpu_desc cpu_desc[MAXCPU];
@@ -521,6 +526,62 @@ parse_cpu_features_hwcap(u_int cpu)
}
static void
+print_cpu_cache(u_int cpu, struct sbuf *sb, uint64_t ccs, bool icache,
+ bool unified)
+{
+ size_t cache_size;
+ size_t line_size;
+
+ /* LineSize is Log2(S) - 4. */
+ line_size = 1 << ((ccs & CCSIDR_LineSize_MASK) + 4);
+ /*
+ * Calculate cache size (sets * ways * line size). There are different
+ * formats depending on the FEAT_CCIDX bit in ID_AA64MMFR2 feature
+ * register.
+ */
+ if ((cpu_desc[cpu].id_aa64mmfr2 & ID_AA64MMFR2_CCIDX_64))
+ cache_size = (CCSIDR_NSETS_64(ccs) + 1) *
+ (CCSIDR_ASSOC_64(ccs) + 1);
+ else
+ cache_size = (CCSIDR_NSETS(ccs) + 1) * (CCSIDR_ASSOC(ccs) + 1);
+
+ cache_size *= line_size;
+ sbuf_printf(sb, "%zuKB (%s)", cache_size / 1024,
+ icache ? "instruction" : unified ? "unified" : "data");
+}
+
+static void
+print_cpu_caches(struct sbuf *sb, u_int cpu)
+{
+ /* Print out each cache combination */
+ uint64_t clidr;
+ int i = 1;
+ clidr = cpu_desc[cpu].clidr;
+
+ for (i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
+ int j = 0;
+ int ctype_m = (clidr & CLIDR_CTYPE_MASK);
+
+ sbuf_printf(sb, " L%d cache: ", i + 1);
+ if ((clidr & CLIDR_CTYPE_IO)) {
+ print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j++],
+ true, false);
+ /* If there's more, add to the line. */
+ if ((ctype_m & ~CLIDR_CTYPE_IO) != 0)
+ sbuf_printf(sb, ", ");
+ }
+ if ((ctype_m & ~CLIDR_CTYPE_IO) != 0) {
+ print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j],
+ false, (clidr & CLIDR_CTYPE_UNIFIED));
+ }
+ sbuf_printf(sb, "\n");
+
+ }
+ sbuf_finish(sb);
+ printf("%s", sbuf_data(sb));
+}
+
+static void
print_cpu_features(u_int cpu)
{
struct sbuf *sb;
@@ -1351,6 +1412,8 @@ print_cpu_features(u_int cpu)
printf(" Auxiliary Features 1 = <%#lx>\n",
cpu_desc[cpu].id_aa64afr1);
}
+ if (bootverbose)
+ print_cpu_caches(sb, cpu);
sbuf_delete(sb);
sb = NULL;
@@ -1360,6 +1423,7 @@ print_cpu_features(u_int cpu)
void
identify_cpu(void)
{
+ uint64_t clidr;
u_int midr;
u_int impl_id;
u_int part_id;
@@ -1407,7 +1471,6 @@ identify_cpu(void)
/* Save affinity for current CPU */
cpu_desc[cpu].mpidr = get_mpidr();
CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK;
-
cpu_desc[cpu].id_aa64dfr0 = READ_SPECIALREG(ID_AA64DFR0_EL1);
cpu_desc[cpu].id_aa64dfr1 = READ_SPECIALREG(ID_AA64DFR1_EL1);
cpu_desc[cpu].id_aa64isar0 = READ_SPECIALREG(ID_AA64ISAR0_EL1);
@@ -1418,6 +1481,24 @@ identify_cpu(void)
cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(ID_AA64PFR0_EL1);
cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(ID_AA64PFR1_EL1);
+ cpu_desc[cpu].clidr = READ_SPECIALREG(clidr_el1);
+
+ clidr = cpu_desc[cpu].clidr;
+
+ for (int i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
+ int j = 0;
+ if ((clidr & CLIDR_CTYPE_IO)) {
+ WRITE_SPECIALREG(CSSELR_EL1,
+ CSSELR_Level(i) | CSSELR_InD);
+ cpu_desc[cpu].ccsidr[i][j++] =
+ READ_SPECIALREG(CCSIDR_EL1);
+ }
+ if ((clidr & ~CLIDR_CTYPE_IO) == 0)
+ continue;
+ WRITE_SPECIALREG(CSSELR_EL1, CSSELR_Level(i));
+ cpu_desc[cpu].ccsidr[i][j] = READ_SPECIALREG(CCSIDR_EL1);
+ }
+
if (cpu != 0) {
/*
* This code must run on one cpu at a time, but we are
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
index d528f1af2377..c3fc7da46c6e 100644
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -45,6 +45,32 @@
#define UL(x) UINT64_C(x)
+/* CCSIDR_EL1 - Cache Size ID Register */
+#define CCSIDR_NumSets_MASK 0x0FFFE000
+#define CCSIDR_NumSets64_MASK 0x00FFFFFF00000000
+#define CCSIDR_NumSets_SHIFT 13
+#define CCSIDR_NumSets64_SHIFT 32
+#define CCSIDR_Assoc_MASK 0x00001FF8
+#define CCSIDR_Assoc64_MASK 0x0000000000FFFFF8
+#define CCSIDR_Assoc_SHIFT 3
+#define CCSIDR_Assoc64_SHIFT 3
+#define CCSIDR_LineSize_MASK 0x7
+#define CCSIDR_NSETS(idr) \
+ (((idr) & CCSIDR_NumSets_MASK) >> CCSIDR_NumSets_SHIFT)
+#define CCSIDR_ASSOC(idr) \
+ (((idr) & CCSIDR_Assoc_MASK) >> CCSIDR_Assoc_SHIFT)
+#define CCSIDR_NSETS_64(idr) \
+ (((idr) & CCSIDR_NumSets64_MASK) >> CCSIDR_NumSets64_SHIFT)
+#define CCSIDR_ASSOC_64(idr) \
+ (((idr) & CCSIDR_Assoc64_MASK) >> CCSIDR_Assoc64_SHIFT)
+
+/* CLIDR_EL1 - Cache level ID register */
+#define CLIDR_CTYPE_MASK 0x7 /* Cache type mask bits */
+#define CLIDR_CTYPE_IO 0x1 /* Instruction only */
+#define CLIDR_CTYPE_DO 0x2 /* Data only */
+#define CLIDR_CTYPE_ID 0x3 /* Split instruction and data */
+#define CLIDR_CTYPE_UNIFIED 0x4 /* Unified */
+
/* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */
#define CNTHCTL_EVNTI_MASK (0xf << 4) /* Bit to trigger event stream */
#define CNTHCTL_EVNTDIR (1 << 3) /* Control transition trigger bit */
@@ -60,6 +86,10 @@
#define CPACR_FPEN_TRAP_NONE (0x3 << 20) /* No traps */
#define CPACR_TTA (0x1 << 28)
+/* CSSELR_EL1 - Cache size selection register */
+#define CSSELR_Level(i) (i << 1)
+#define CSSELR_InD 0x00000001
+
/* CTR_EL0 - Cache Type Register */
#define CTR_DLINE_SHIFT 16
#define CTR_DLINE_MASK (0xf << CTR_DLINE_SHIFT)