aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarius Strobl <marius@FreeBSD.org>2011-07-02 11:14:54 +0000
committerMarius Strobl <marius@FreeBSD.org>2011-07-02 11:14:54 +0000
commit4a35efc72064b9d2a22ff24f2e5edc9d2aa9307a (patch)
tree507ef2da7eed7ae7dd4734f80eb1110001daa7f7
parent80006832f68246053b97877c5fdcf131282d519b (diff)
downloadsrc-4a35efc72064b9d2a22ff24f2e5edc9d2aa9307a.tar.gz
src-4a35efc72064b9d2a22ff24f2e5edc9d2aa9307a.zip
- For Cheetah- and Zeus-class CPUs don't flush all unlocked entries from
the TLBs in order to get rid of the user mappings but instead traverse them an flush only the latter like we also do for the Spitfire-class. Also flushing the unlocked kernel entries can cause instant faults which when called from within cpu_switch() are handled with the scheduler lock held which in turn can cause timeouts on the acquisition of the lock by other CPUs. This was easily seen with a 16-core V890 but occasionally also happened with 2-way machines. While at it, move the SPARC64-V support code entirely to zeus.c. This causes a little bit of duplication but is less confusing than partially using Cheetah-class bits for these. - For SPARC64-V ensure that 4-Mbyte page entries are stored in the 1024- entry, 2-way set associative TLB. - In {d,i}tlb_get_data_sun4u() turn off the interrupts in order to ensure that ASI_{D,I}TLB_DATA_ACCESS_REG actually are read twice back-to-back. Tested by: Peter Jeremy (16-core US-IV), Michael Moll (2-way SPARC64-V)
Notes
Notes: svn path=/head/; revision=223719
-rw-r--r--sys/boot/sparc64/loader/main.c88
-rw-r--r--sys/sparc64/include/cache.h9
-rw-r--r--sys/sparc64/include/cpu.h1
-rw-r--r--sys/sparc64/include/tlb.h15
-rw-r--r--sys/sparc64/sparc64/cache.c6
-rw-r--r--sys/sparc64/sparc64/cheetah.c107
-rw-r--r--sys/sparc64/sparc64/machdep.c5
-rw-r--r--sys/sparc64/sparc64/mp_machdep.c6
-rw-r--r--sys/sparc64/sparc64/pmap.c22
-rw-r--r--sys/sparc64/sparc64/spitfire.c38
-rw-r--r--sys/sparc64/sparc64/zeus.c108
11 files changed, 312 insertions, 93 deletions
diff --git a/sys/boot/sparc64/loader/main.c b/sys/boot/sparc64/loader/main.c
index 9a3ae58b2ddf..be0819f35258 100644
--- a/sys/boot/sparc64/loader/main.c
+++ b/sys/boot/sparc64/loader/main.c
@@ -96,10 +96,10 @@ static struct mmu_ops {
typedef void kernel_entry_t(vm_offset_t mdp, u_long o1, u_long o2, u_long o3,
void *openfirmware);
-static inline u_long dtlb_get_data_sun4u(u_int);
+static inline u_long dtlb_get_data_sun4u(u_int, u_int);
static int dtlb_enter_sun4u(u_int, u_long data, vm_offset_t);
static vm_offset_t dtlb_va_to_pa_sun4u(vm_offset_t);
-static inline u_long itlb_get_data_sun4u(u_int);
+static inline u_long itlb_get_data_sun4u(u_int, u_int);
static int itlb_enter_sun4u(u_int, u_long data, vm_offset_t);
static vm_offset_t itlb_va_to_pa_sun4u(vm_offset_t);
static void itlb_relocate_locked0_sun4u(void);
@@ -136,6 +136,7 @@ u_int itlb_slot;
static int cpu_impl;
static u_int dtlb_slot_max;
static u_int itlb_slot_max;
+static u_int tlb_locked;
static vm_offset_t curkva = 0;
static vm_offset_t heapva;
@@ -355,42 +356,55 @@ __elfN(exec)(struct preloaded_file *fp)
}
static inline u_long
-dtlb_get_data_sun4u(u_int slot)
+dtlb_get_data_sun4u(u_int tlb, u_int slot)
{
+ u_long data, pstate;
+ slot = TLB_DAR_SLOT(tlb, slot);
/*
- * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work
- * around errata of USIII and beyond.
+ * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
+ * work around errata of USIII and beyond.
*/
- (void)ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG);
- return (ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG));
+ pstate = rdpr(pstate);
+ wrpr(pstate, pstate & ~PSTATE_IE, 0);
+ (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ wrpr(pstate, pstate, 0);
+ return (data);
}
static inline u_long
-itlb_get_data_sun4u(u_int slot)
+itlb_get_data_sun4u(u_int tlb, u_int slot)
{
+ u_long data, pstate;
+ slot = TLB_DAR_SLOT(tlb, slot);
/*
- * We read ASI_ITLB_DATA_ACCESS_REG twice in order to work
- * around errata of USIII and beyond.
+ * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
+ * work around errata of USIII and beyond.
*/
- (void)ldxa(TLB_DAR_SLOT(slot), ASI_ITLB_DATA_ACCESS_REG);
- return (ldxa(TLB_DAR_SLOT(slot), ASI_ITLB_DATA_ACCESS_REG));
+ pstate = rdpr(pstate);
+ wrpr(pstate, pstate & ~PSTATE_IE, 0);
+ (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ wrpr(pstate, pstate, 0);
+ return (data);
}
static vm_offset_t
dtlb_va_to_pa_sun4u(vm_offset_t va)
{
u_long pstate, reg;
- int i;
+ u_int i, tlb;
pstate = rdpr(pstate);
wrpr(pstate, pstate & ~PSTATE_IE, 0);
for (i = 0; i < dtlb_slot_max; i++) {
- reg = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_TAG_READ_REG);
+ reg = ldxa(TLB_DAR_SLOT(tlb_locked, i),
+ ASI_DTLB_TAG_READ_REG);
if (TLB_TAR_VA(reg) != va)
continue;
- reg = dtlb_get_data_sun4u(i);
+ reg = dtlb_get_data_sun4u(tlb_locked, i);
wrpr(pstate, pstate, 0);
reg >>= TD_PA_SHIFT;
if (cpu_impl == CPU_IMPL_SPARC64V ||
@@ -411,10 +425,11 @@ itlb_va_to_pa_sun4u(vm_offset_t va)
pstate = rdpr(pstate);
wrpr(pstate, pstate & ~PSTATE_IE, 0);
for (i = 0; i < itlb_slot_max; i++) {
- reg = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_TAG_READ_REG);
+ reg = ldxa(TLB_DAR_SLOT(tlb_locked, i),
+ ASI_ITLB_TAG_READ_REG);
if (TLB_TAR_VA(reg) != va)
continue;
- reg = itlb_get_data_sun4u(i);
+ reg = itlb_get_data_sun4u(tlb_locked, i);
wrpr(pstate, pstate, 0);
reg >>= TD_PA_SHIFT;
if (cpu_impl == CPU_IMPL_SPARC64V ||
@@ -458,14 +473,14 @@ itlb_relocate_locked0_sun4u(void)
pstate = rdpr(pstate);
wrpr(pstate, pstate & ~PSTATE_IE, 0);
- data = itlb_get_data_sun4u(0);
+ data = itlb_get_data_sun4u(tlb_locked, 0);
if ((data & (TD_V | TD_L)) != (TD_V | TD_L)) {
wrpr(pstate, pstate, 0);
return;
}
/* Flush the mapping of slot 0. */
- tag = ldxa(TLB_DAR_SLOT(0), ASI_ITLB_TAG_READ_REG);
+ tag = ldxa(TLB_DAR_SLOT(tlb_locked, 0), ASI_ITLB_TAG_READ_REG);
stxa(TLB_DEMAP_VA(TLB_TAR_VA(tag)) | TLB_DEMAP_PRIMARY |
TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0);
flush(0); /* The USIII-family ignores the address. */
@@ -475,11 +490,12 @@ itlb_relocate_locked0_sun4u(void)
* that formerly were in slot 0.
*/
for (i = 1; i < itlb_slot_max; i++) {
- if ((itlb_get_data_sun4u(i) & TD_V) != 0)
+ if ((itlb_get_data_sun4u(tlb_locked, i) & TD_V) != 0)
continue;
stxa(AA_IMMU_TAR, ASI_IMMU, tag);
- stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, data);
+ stxa(TLB_DAR_SLOT(tlb_locked, i), ASI_ITLB_DATA_ACCESS_REG,
+ data);
flush(0); /* The USIII-family ignores the address. */
break;
}
@@ -651,6 +667,26 @@ tlb_init_sun4u(void)
phandle_t bsp;
cpu_impl = VER_IMPL(rdpr(ver));
+ switch (cpu_impl) {
+ case CPU_IMPL_SPARC64:
+ case CPU_IMPL_ULTRASPARCI:
+ case CPU_IMPL_ULTRASPARCII:
+ case CPU_IMPL_ULTRASPARCIIi:
+ case CPU_IMPL_ULTRASPARCIIe:
+ tlb_locked = TLB_DAR_T32;
+ break;
+ case CPU_IMPL_ULTRASPARCIII:
+ case CPU_IMPL_ULTRASPARCIIIp:
+ case CPU_IMPL_ULTRASPARCIIIi:
+ case CPU_IMPL_ULTRASPARCIIIip:
+ case CPU_IMPL_ULTRASPARCIV:
+ case CPU_IMPL_ULTRASPARCIVp:
+ tlb_locked = TLB_DAR_T16;
+ break;
+ case CPU_IMPL_SPARC64V:
+ tlb_locked = TLB_DAR_FTLB;
+ break;
+ }
bsp = find_bsp_sun4u(OF_child(root), cpu_get_mid_sun4u());
if (bsp == 0)
panic("%s: no node for bootcpu?!?!", __func__);
@@ -821,21 +857,23 @@ pmap_print_tlb_sun4u(void)
pstate = rdpr(pstate);
for (i = 0; i < itlb_slot_max; i++) {
wrpr(pstate, pstate & ~PSTATE_IE, 0);
- tte = itlb_get_data_sun4u(i);
+ tte = itlb_get_data_sun4u(tlb_locked, i);
wrpr(pstate, pstate, 0);
if (!(tte & TD_V))
continue;
- tag = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_TAG_READ_REG);
+ tag = ldxa(TLB_DAR_SLOT(tlb_locked, i),
+ ASI_ITLB_TAG_READ_REG);
printf("iTLB-%2u: ", i);
pmap_print_tte_sun4u(tag, tte);
}
for (i = 0; i < dtlb_slot_max; i++) {
wrpr(pstate, pstate & ~PSTATE_IE, 0);
- tte = dtlb_get_data_sun4u(i);
+ tte = dtlb_get_data_sun4u(tlb_locked, i);
wrpr(pstate, pstate, 0);
if (!(tte & TD_V))
continue;
- tag = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_TAG_READ_REG);
+ tag = ldxa(TLB_DAR_SLOT(tlb_locked, i),
+ ASI_DTLB_TAG_READ_REG);
printf("dTLB-%2u: ", i);
pmap_print_tte_sun4u(tag, tte);
}
diff --git a/sys/sparc64/include/cache.h b/sys/sparc64/include/cache.h
index 3852da4e5113..3d0ac4f17648 100644
--- a/sys/sparc64/include/cache.h
+++ b/sys/sparc64/include/cache.h
@@ -110,15 +110,16 @@ cache_flush_t spitfire_cache_flush;
dcache_page_inval_t spitfire_dcache_page_inval;
icache_page_inval_t spitfire_icache_page_inval;
+cache_enable_t zeus_cache_enable;
+cache_flush_t zeus_cache_flush;
+dcache_page_inval_t zeus_dcache_page_inval;
+icache_page_inval_t zeus_icache_page_inval;
+
extern cache_enable_t *cache_enable;
extern cache_flush_t *cache_flush;
extern dcache_page_inval_t *dcache_page_inval;
extern icache_page_inval_t *icache_page_inval;
-cache_flush_t zeus_cache_flush;
-dcache_page_inval_t zeus_dcache_page_inval;
-icache_page_inval_t zeus_icache_page_inval;
-
#endif /* KERNEL */
#endif /* !LOCORE */
diff --git a/sys/sparc64/include/cpu.h b/sys/sparc64/include/cpu.h
index c0845a0771b1..1634477f4833 100644
--- a/sys/sparc64/include/cpu.h
+++ b/sys/sparc64/include/cpu.h
@@ -57,6 +57,7 @@ void cpu_halt(void);
void cpu_reset(void);
void fork_trampoline(void);
void swi_vm(void *v);
+void zeus_init(u_int cpu_impl);
static __inline u_int64_t
get_cyclecount(void)
diff --git a/sys/sparc64/include/tlb.h b/sys/sparc64/include/tlb.h
index b813b0fe28f7..3f2c3c57639b 100644
--- a/sys/sparc64/include/tlb.h
+++ b/sys/sparc64/include/tlb.h
@@ -44,7 +44,17 @@
(TD_V | TD_4M | (TLB_DIRECT_ADDRESS_MASK - TLB_DIRECT_PAGE_MASK))
#define TLB_DAR_SLOT_SHIFT (3)
-#define TLB_DAR_SLOT(slot) ((slot) << TLB_DAR_SLOT_SHIFT)
+#define TLB_DAR_TLB_SHIFT (16)
+#define TLB_DAR_SLOT(tlb, slot) \
+ ((tlb) << TLB_DAR_TLB_SHIFT | (slot) << TLB_DAR_SLOT_SHIFT)
+#define TLB_DAR_T16 (0) /* US-III{,i,+}, IV{,+} */
+#define TLB_DAR_T32 (0) /* US-I, II{,e,i} */
+#define TLB_DAR_DT512_0 (2) /* US-III{,i,+}, IV{,+} */
+#define TLB_DAR_DT512_1 (3) /* US-III{,i,+}, IV{,+} */
+#define TLB_DAR_IT128 (2) /* US-III{,i,+}, IV */
+#define TLB_DAR_IT512 (2) /* US-IV+ */
+#define TLB_DAR_FTLB (0) /* SPARC64 V, VI, VII, VIIIfx */
+#define TLB_DAR_STLB (2) /* SPARC64 V, VI, VII, VIIIfx */
#define TAR_VPN_SHIFT (13)
#define TAR_CTX_MASK ((1 << TAR_VPN_SHIFT) - 1)
@@ -156,6 +166,9 @@ tlb_flush_user_t cheetah_tlb_flush_user;
tlb_flush_nonlocked_t spitfire_tlb_flush_nonlocked;
tlb_flush_user_t spitfire_tlb_flush_user;
+tlb_flush_nonlocked_t zeus_tlb_flush_nonlocked;
+tlb_flush_user_t zeus_tlb_flush_user;
+
extern tlb_flush_nonlocked_t *tlb_flush_nonlocked;
extern tlb_flush_user_t *tlb_flush_user;
diff --git a/sys/sparc64/sparc64/cache.c b/sys/sparc64/sparc64/cache.c
index 636c18a2a0f2..d29a294fa1ef 100644
--- a/sys/sparc64/sparc64/cache.c
+++ b/sys/sparc64/sparc64/cache.c
@@ -169,12 +169,12 @@ cache_init(struct pcpu *pcpu)
tlb_flush_nonlocked = cheetah_tlb_flush_nonlocked;
tlb_flush_user = cheetah_tlb_flush_user;
} else if (pcpu->pc_impl == CPU_IMPL_SPARC64V) {
- cache_enable = cheetah_cache_enable;
+ cache_enable = zeus_cache_enable;
cache_flush = zeus_cache_flush;
dcache_page_inval = zeus_dcache_page_inval;
icache_page_inval = zeus_icache_page_inval;
- tlb_flush_nonlocked = cheetah_tlb_flush_nonlocked;
- tlb_flush_user = cheetah_tlb_flush_user;
+ tlb_flush_nonlocked = zeus_tlb_flush_nonlocked;
+ tlb_flush_user = zeus_tlb_flush_user;
} else if (pcpu->pc_impl >= CPU_IMPL_ULTRASPARCI &&
pcpu->pc_impl < CPU_IMPL_ULTRASPARCIII) {
cache_enable = spitfire_cache_enable;
diff --git a/sys/sparc64/sparc64/cheetah.c b/sys/sparc64/sparc64/cheetah.c
index 99d38c966ffb..8ecc62f65b52 100644
--- a/sys/sparc64/sparc64/cheetah.c
+++ b/sys/sparc64/sparc64/cheetah.c
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2003 Jake Burkholder.
- * Copyright (c) 2005, 2008, 2010 Marius Strobl <marius@FreeBSD.org>
+ * Copyright (c) 2005 - 2011 Marius Strobl <marius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,8 +28,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "opt_pmap.h"
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
@@ -45,17 +43,19 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/dcr.h>
#include <machine/lsu.h>
-#include <machine/mcntl.h>
#include <machine/smp.h>
#include <machine/tlb.h>
#include <machine/ver.h>
#include <machine/vmparam.h>
#define CHEETAH_ICACHE_TAG_LOWER 0x30
+#define CHEETAH_T16_ENTRIES 16
+#define CHEETAH_DT512_ENTRIES 512
+#define CHEETAH_IT128_ENTRIES 128
+#define CHEETAH_IT512_ENTRIES 512
/*
- * CPU-specific initialization - this is used for both the Sun Cheetah and
- * later as well as the Fujitsu Zeus and later CPUs.
+ * CPU-specific initialization for Sun Cheetah and later CPUs
*/
void
cheetah_init(u_int cpu_impl)
@@ -78,14 +78,6 @@ cheetah_init(u_int cpu_impl)
stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0);
membar(Sync);
- if (cpu_impl == CPU_IMPL_SPARC64V) {
- /* Ensure MCNTL_JPS1_TSBP is 0. */
- val = ldxa(AA_MCNTL, ASI_MCNTL);
- val &= ~MCNTL_JPS1_TSBP;
- stxa(AA_MCNTL, ASI_MCNTL, val);
- return;
- }
-
/*
* Configure the first large dTLB to hold 4MB pages (e.g. for direct
* mappings) for all three contexts and ensure the second one is set
@@ -223,33 +215,92 @@ cheetah_icache_page_inval(vm_paddr_t pa __unused)
}
-#define cheetah_dmap_all() do { \
- stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0); \
- stxa(TLB_DEMAP_ALL, ASI_IMMU_DEMAP, 0); \
- flush(KERNBASE); \
-} while (0)
-
/*
- * Flush all non-locked mappings from the TLB.
+ * Flush all non-locked mappings from the TLBs.
*/
void
cheetah_tlb_flush_nonlocked(void)
{
- cheetah_dmap_all();
+ stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0);
+ stxa(TLB_DEMAP_ALL, ASI_IMMU_DEMAP, 0);
+ flush(KERNBASE);
}
/*
- * Flush all user mappings from the TLB.
+ * Flush all user mappings from the TLBs.
*/
void
-cheetah_tlb_flush_user()
+cheetah_tlb_flush_user(void)
{
+ u_long data, tag;
+ register_t s;
+ u_int i, slot;
/*
- * Just use cheetah_dmap_all() and accept somes TLB misses
- * rather than searching all 1040 D-TLB and 144 I-TLB slots
- * for non-kernel mappings.
+ * We read ASI_{D,I}TLB_DATA_ACCESS_REG twice back-to-back in order
+ * to work around errata of USIII and beyond.
*/
- cheetah_dmap_all();
+ for (i = 0; i < CHEETAH_T16_ENTRIES; i++) {
+ slot = TLB_DAR_SLOT(TLB_DAR_T16, i);
+ s = intr_disable();
+ (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ intr_restore(s);
+ tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ s = intr_disable();
+ (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ intr_restore(s);
+ tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
+ }
+ for (i = 0; i < CHEETAH_DT512_ENTRIES; i++) {
+ slot = TLB_DAR_SLOT(TLB_DAR_DT512_0, i);
+ s = intr_disable();
+ (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ intr_restore(s);
+ tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ slot = TLB_DAR_SLOT(TLB_DAR_DT512_1, i);
+ s = intr_disable();
+ (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ intr_restore(s);
+ tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ }
+ if (PCPU_GET(impl) == CPU_IMPL_ULTRASPARCIVp) {
+ for (i = 0; i < CHEETAH_IT512_ENTRIES; i++) {
+ slot = TLB_DAR_SLOT(TLB_DAR_IT512, i);
+ s = intr_disable();
+ (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ intr_restore(s);
+ tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
+ }
+ } else {
+ for (i = 0; i < CHEETAH_IT128_ENTRIES; i++) {
+ slot = TLB_DAR_SLOT(TLB_DAR_IT128, i);
+ s = intr_disable();
+ (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
+ intr_restore(s);
+ if ((data & TD_V) != 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
+ }
+ }
}
diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c
index abf3a91a9215..c0cc75da6f5f 100644
--- a/sys/sparc64/sparc64/machdep.c
+++ b/sys/sparc64/sparc64/machdep.c
@@ -348,9 +348,10 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
/*
* Do CPU-specific initialization.
*/
- if (cpu_impl == CPU_IMPL_SPARC64V ||
- cpu_impl >= CPU_IMPL_ULTRASPARCIII)
+ if (cpu_impl >= CPU_IMPL_ULTRASPARCIII)
cheetah_init(cpu_impl);
+ else if (cpu_impl == CPU_IMPL_SPARC64V)
+ zeus_init(cpu_impl);
/*
* Clear (S)TICK timer (including NPT).
diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c
index 83d8e9f021cb..1345201510ee 100644
--- a/sys/sparc64/sparc64/mp_machdep.c
+++ b/sys/sparc64/sparc64/mp_machdep.c
@@ -428,9 +428,11 @@ cpu_mp_bootstrap(struct pcpu *pc)
csa = &cpu_start_args;
/* Do CPU-specific initialization. */
- if (pc->pc_impl == CPU_IMPL_SPARC64V ||
- pc->pc_impl >= CPU_IMPL_ULTRASPARCIII)
+ if (pc->pc_impl >= CPU_IMPL_ULTRASPARCIII)
cheetah_init(pc->pc_impl);
+ else if (pc->pc_impl == CPU_IMPL_SPARC64V)
+ zeus_init(pc->pc_impl);
+
/*
* Enable the caches. Note that his may include applying workarounds.
*/
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index f917af923304..9ec19e1fff1e 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -247,7 +247,7 @@ PMAP_STATS_VAR(pmap_ncopy_page_soc);
PMAP_STATS_VAR(pmap_nnew_thread);
PMAP_STATS_VAR(pmap_nnew_thread_oc);
-static inline u_long dtlb_get_data(u_int slot);
+static inline u_long dtlb_get_data(u_int tlb, u_int slot);
/*
* Quick sort callout for comparing memory regions
@@ -288,15 +288,21 @@ om_cmp(const void *a, const void *b)
}
static inline u_long
-dtlb_get_data(u_int slot)
+dtlb_get_data(u_int tlb, u_int slot)
{
+ u_long data;
+ register_t s;
+ slot = TLB_DAR_SLOT(tlb, slot);
/*
- * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work
- * around errata of USIII and beyond.
+ * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
+ * work around errata of USIII and beyond.
*/
- (void)ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG);
- return (ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG));
+ s = intr_disable();
+ (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ intr_restore(s);
+ return (data);
}
/*
@@ -392,7 +398,9 @@ pmap_bootstrap(u_int cpu_impl)
} else {
dtlb_slots_avail = 0;
for (i = 0; i < dtlb_slots; i++) {
- data = dtlb_get_data(i);
+ data = dtlb_get_data(cpu_impl ==
+ CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
+ TLB_DAR_T32, i);
if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
dtlb_slots_avail++;
}
diff --git a/sys/sparc64/sparc64/spitfire.c b/sys/sparc64/sparc64/spitfire.c
index d6e25b970b73..7e51f2dca0c6 100644
--- a/sys/sparc64/sparc64/spitfire.c
+++ b/sys/sparc64/sparc64/spitfire.c
@@ -140,47 +140,45 @@ spitfire_icache_page_inval(vm_paddr_t pa)
}
/*
- * Flush all non-locked mappings from the TLB.
+ * Flush all non-locked mappings from the TLBs.
*/
void
spitfire_tlb_flush_nonlocked(void)
{
- int i;
+ u_int i;
+ u_int slot;
for (i = 0; i < SPITFIRE_TLB_ENTRIES; i++) {
- if ((ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG) &
- TD_L) == 0)
- stxa_sync(TLB_DAR_SLOT(i),
- ASI_DTLB_DATA_ACCESS_REG, 0);
- if ((ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG) &
- TD_L) == 0)
- stxa_sync(TLB_DAR_SLOT(i),
- ASI_ITLB_DATA_ACCESS_REG, 0);
+ slot = TLB_DAR_SLOT(TLB_DAR_T32, i);
+ if ((ldxa(slot, ASI_DTLB_DATA_ACCESS_REG) & TD_L) == 0)
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ if ((ldxa(slot, ASI_ITLB_DATA_ACCESS_REG) & TD_L) == 0)
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
}
}
/*
- * Flush all user mappings from the TLB.
+ * Flush all user mappings from the TLBs.
*/
void
spitfire_tlb_flush_user(void)
{
u_long data;
u_long tag;
- int i;
+ u_int i;
+ u_int slot;
for (i = 0; i < SPITFIRE_TLB_ENTRIES; i++) {
- data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG);
- tag = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_TAG_READ_REG);
+ slot = TLB_DAR_SLOT(TLB_DAR_T32, i);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
- stxa_sync(TLB_DAR_SLOT(i),
- ASI_DTLB_DATA_ACCESS_REG, 0);
- data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG);
- tag = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_TAG_READ_REG);
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
- stxa_sync(TLB_DAR_SLOT(i),
- ASI_ITLB_DATA_ACCESS_REG, 0);
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
}
}
diff --git a/sys/sparc64/sparc64/zeus.c b/sys/sparc64/sparc64/zeus.c
index a3dc4e56e922..e6a31aafe890 100644
--- a/sys/sparc64/sparc64/zeus.c
+++ b/sys/sparc64/sparc64/zeus.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2010 Marius Strobl <marius@FreeBSD.org>
+ * Copyright (c) 2010 - 2011 Marius Strobl <marius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,64 @@ __FBSDID("$FreeBSD$");
#include <machine/asi.h>
#include <machine/cache.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
+#include <machine/mcntl.h>
+#include <machine/lsu.h>
+#include <machine/tlb.h>
+#include <machine/tte.h>
+#include <machine/vmparam.h>
+
+#define ZEUS_FTLB_ENTRIES 32
+#define ZEUS_STLB_ENTRIES 2048
+
+/*
+ * CPU-specific initialization for Fujitsu Zeus CPUs
+ */
+void
+zeus_init(u_int cpu_impl)
+{
+ u_long val;
+
+ /* Ensure the TSB Extension Registers hold 0 as TSB_Base. */
+
+ stxa(AA_DMMU_TSB_PEXT_REG, ASI_DMMU, 0);
+ stxa(AA_IMMU_TSB_PEXT_REG, ASI_IMMU, 0);
+ membar(Sync);
+
+ stxa(AA_DMMU_TSB_SEXT_REG, ASI_DMMU, 0);
+ /*
+ * NB: the secondary context was removed from the iMMU.
+ */
+ membar(Sync);
+
+ stxa(AA_DMMU_TSB_NEXT_REG, ASI_DMMU, 0);
+ stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0);
+ membar(Sync);
+
+ val = ldxa(AA_MCNTL, ASI_MCNTL);
+ /* Ensure MCNTL_JPS1_TSBP is 0. */
+ val &= ~MCNTL_JPS1_TSBP;
+ /*
+ * Ensure 4-Mbyte page entries are stored in the 1024-entry, 2-way set
+ * associative TLB.
+ */
+ val = (val & ~MCNTL_RMD_MASK) | MCNTL_RMD_1024;
+ stxa(AA_MCNTL, ASI_MCNTL, val);
+}
+
+/*
+ * Enable level 1 caches.
+ */
+void
+zeus_cache_enable(u_int cpu_impl)
+{
+ u_long lsu;
+
+ lsu = ldxa(0, ASI_LSU_CTL_REG);
+ stxa(0, ASI_LSU_CTL_REG, lsu | LSU_IC | LSU_DC);
+ flush(KERNBASE);
+}
/*
* Flush all lines from the level 1 caches.
@@ -63,3 +120,52 @@ zeus_icache_page_inval(vm_paddr_t pa __unused)
{
}
+
+/*
+ * Flush all non-locked mappings from the TLBs.
+ */
+void
+zeus_tlb_flush_nonlocked(void)
+{
+
+ stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0);
+ stxa(TLB_DEMAP_ALL, ASI_IMMU_DEMAP, 0);
+ flush(KERNBASE);
+}
+
+/*
+ * Flush all user mappings from the TLBs.
+ */
+void
+zeus_tlb_flush_user(void)
+{
+ u_long data, tag;
+ u_int i, slot;
+
+ for (i = 0; i < ZEUS_FTLB_ENTRIES; i++) {
+ slot = TLB_DAR_SLOT(TLB_DAR_FTLB, i);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
+ }
+ for (i = 0; i < ZEUS_STLB_ENTRIES; i++) {
+ slot = TLB_DAR_SLOT(TLB_DAR_STLB, i);
+ data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
+ data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
+ tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
+ if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
+ TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
+ stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
+ }
+}