aboutsummaryrefslogtreecommitdiff
path: root/sys/x86/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'sys/x86/iommu')
-rw-r--r--sys/x86/iommu/intel_ctx.c100
-rw-r--r--sys/x86/iommu/intel_dmar.h116
-rw-r--r--sys/x86/iommu/intel_drv.c236
-rw-r--r--sys/x86/iommu/intel_fault.c17
-rw-r--r--sys/x86/iommu/intel_idpgtbl.c135
-rw-r--r--sys/x86/iommu/intel_intrmap.c27
-rw-r--r--sys/x86/iommu/intel_qi.c365
-rw-r--r--sys/x86/iommu/intel_quirks.c8
-rw-r--r--sys/x86/iommu/intel_reg.h15
-rw-r--r--sys/x86/iommu/intel_utils.c157
-rw-r--r--sys/x86/iommu/iommu_utils.c751
-rw-r--r--sys/x86/iommu/x86_iommu.h196
12 files changed, 1314 insertions, 809 deletions
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 49c87cf0b39f..5047acd283e9 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -65,6 +65,7 @@
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
@@ -74,6 +75,9 @@ static void dmar_unref_domain_locked(struct dmar_unit *dmar,
struct dmar_domain *domain);
static void dmar_domain_destroy(struct dmar_domain *domain);
+static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
+static void dmar_free_ctx(struct dmar_ctx *ctx);
+
static void
dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
{
@@ -84,7 +88,7 @@ dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
/*
* Allocated context page must be linked.
*/
- ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
+ ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
if (ctxm != NULL)
return;
@@ -95,14 +99,14 @@ dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
* threads are equal.
*/
TD_PREP_PINNED_ASSERT;
- ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
+ ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
IOMMU_PGF_WAITOK);
- re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
+ re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
re += bus;
dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
VM_PAGE_TO_PHYS(ctxm)));
dmar_flush_root_to_ram(dmar, re);
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
}
@@ -114,32 +118,13 @@ dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
dmar = CTX2DMAR(ctx);
- ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
+ ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
ctxp += ctx->context.rid & 0xff;
return (ctxp);
}
static void
-device_tag_init(struct dmar_ctx *ctx, device_t dev)
-{
- struct dmar_domain *domain;
- bus_addr_t maxaddr;
-
- domain = CTX2DOM(ctx);
- maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
- ctx->context.tag->common.impl = &bus_dma_iommu_impl;
- ctx->context.tag->common.boundary = 0;
- ctx->context.tag->common.lowaddr = maxaddr;
- ctx->context.tag->common.highaddr = maxaddr;
- ctx->context.tag->common.maxsize = maxaddr;
- ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
- ctx->context.tag->common.maxsegsz = maxaddr;
- ctx->context.tag->ctx = CTX2IOCTX(ctx);
- ctx->context.tag->owner = dev;
-}
-
-static void
ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
vm_page_t ctx_root)
{
@@ -186,7 +171,7 @@ ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
("ctx %p non-null pgtbl_obj", ctx));
ctx_root = NULL;
} else {
- ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0,
+ ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0,
IOMMU_PGF_NOALLOC);
}
@@ -272,7 +257,7 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
"region (%jx, %jx) corrected\n",
domain->iodom.iommu->unit, start, end);
}
- entry->end += DMAR_PAGE_SIZE * 0x20;
+ entry->end += IOMMU_PAGE_SIZE * 0x20;
}
size = OFF_TO_IDX(entry->end - entry->start);
ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
@@ -419,7 +404,7 @@ dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
}
domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
} else {
- error = domain_alloc_pgtbl(domain);
+ error = dmar_domain_alloc_pgtbl(domain);
if (error != 0)
goto fail;
/* Disable local apic region access */
@@ -505,7 +490,7 @@ dmar_domain_destroy(struct dmar_domain *domain)
if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
if (domain->pgtbl_obj != NULL)
DMAR_DOMAIN_PGLOCK(domain);
- domain_free_pgtbl(domain);
+ dmar_domain_free_pgtbl(domain);
}
iommu_domain_fini(iodom);
dmar = DOM2DMAR(domain);
@@ -582,7 +567,7 @@ dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
ctx = ctx1;
dmar_ctx_link(ctx);
ctx->context.tag->owner = dev;
- device_tag_init(ctx, dev);
+ iommu_device_tag_init(CTX2IOCTX(ctx), dev);
/*
* This is the first activated context for the
@@ -601,9 +586,9 @@ dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
func, rid, domain->domain, domain->mgaw,
domain->agaw, id_mapped ? "id" : "re");
}
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
} else {
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
dmar_domain_destroy(domain1);
/* Nothing needs to be done to destroy ctx1. */
free(ctx1, M_DMAR_CTX);
@@ -703,7 +688,7 @@ dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
ctx->context.domain = &domain->iodom;
dmar_ctx_link(ctx);
ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
error = dmar_flush_for_ctx_entry(dmar, true);
/* If flush failed, rolling back would not work as well. */
printf("dmar%d rid %x domain %d->%d %s-mapped\n",
@@ -744,7 +729,7 @@ dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
dmar_domain_destroy(domain);
}
-void
+static void
dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
{
struct sf_buf *sf;
@@ -787,7 +772,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
if (ctx->refs > 1) {
ctx->refs--;
DMAR_UNLOCK(dmar);
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
return;
}
@@ -809,7 +794,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
else
dmar_inv_iotlb_glob(dmar);
}
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
domain = CTX2DOM(ctx);
dmar_ctx_unlink(ctx);
free(ctx->context.tag, M_DMAR_CTX);
@@ -818,7 +803,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
TD_PINNED_ASSERT;
}
-void
+static void
dmar_free_ctx(struct dmar_ctx *ctx)
{
struct dmar_unit *dmar;
@@ -848,25 +833,12 @@ dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
return (NULL);
}
-void
-dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
-{
- if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
- iommu_gas_free_region(entry);
- else
- iommu_gas_free_space(entry);
- if (free)
- iommu_gas_free_entry(entry);
- else
- entry->flags = 0;
-}
-
/*
* If the given value for "free" is true, then the caller must not be using
* the entry's dmamap_link field.
*/
void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep)
{
struct dmar_domain *domain;
@@ -883,17 +855,18 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
- dmar_qi_invalidate_locked(domain, entry, true);
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
+ true);
DMAR_UNLOCK(unit);
} else {
- dmar_qi_invalidate_sync(domain, entry->start,
+ iommu_qi_invalidate_sync(&domain->iodom, entry->start,
entry->end - entry->start, cansleep);
- dmar_domain_free_entry(entry, false);
+ iommu_domain_free_entry(entry, false);
}
} else {
domain_flush_iotlb_sync(domain, entry->start, entry->end -
entry->start);
- dmar_domain_free_entry(entry, free);
+ iommu_domain_free_entry(entry, free);
}
}
@@ -904,11 +877,11 @@ dmar_domain_unload_emit_wait(struct dmar_domain *domain,
if (TAILQ_NEXT(entry, dmamap_link) == NULL)
return (true);
- return (domain->batch_no++ % dmar_batch_coalesce == 0);
+ return (domain->batch_no++ % iommu_qi_batch_coalesce == 0);
}
void
-iommu_domain_unload(struct iommu_domain *iodom,
+dmar_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
struct dmar_domain *domain;
@@ -929,7 +902,7 @@ iommu_domain_unload(struct iommu_domain *iodom,
domain_flush_iotlb_sync(domain, entry->start,
entry->end - entry->start);
TAILQ_REMOVE(entries, entry, dmamap_link);
- dmar_domain_free_entry(entry, true);
+ iommu_domain_free_entry(entry, true);
}
}
if (TAILQ_EMPTY(entries))
@@ -939,44 +912,41 @@ iommu_domain_unload(struct iommu_domain *iodom,
DMAR_LOCK(unit);
while ((entry = TAILQ_FIRST(entries)) != NULL) {
TAILQ_REMOVE(entries, entry, dmamap_link);
- dmar_qi_invalidate_locked(domain, entry,
+ iommu_qi_invalidate_locked(&domain->iodom, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
DMAR_UNLOCK(unit);
}
struct iommu_ctx *
-iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
+dmar_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
bool id_mapped, bool rmrr_init)
{
struct dmar_unit *dmar;
struct dmar_ctx *ret;
dmar = IOMMU2DMAR(iommu);
-
ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
-
return (CTX2IOCTX(ret));
}
void
-iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
+dmar_free_ctx_locked_method(struct iommu_unit *iommu,
+ struct iommu_ctx *context)
{
struct dmar_unit *dmar;
struct dmar_ctx *ctx;
dmar = IOMMU2DMAR(iommu);
ctx = IOCTX2CTX(context);
-
dmar_free_ctx_locked(dmar, ctx);
}
void
-iommu_free_ctx(struct iommu_ctx *context)
+dmar_free_ctx_method(struct iommu_ctx *context)
{
struct dmar_ctx *ctx;
ctx = IOCTX2CTX(context);
-
dmar_free_ctx(ctx);
}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index e20144094c80..188e40dec36c 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -87,15 +87,15 @@ struct dmar_ctx {
#define DMAR_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock)
#define DMAR_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->iodom.lock, MA_OWNED)
-#define DMAR2IOMMU(dmar) &((dmar)->iommu)
+#define DMAR2IOMMU(dmar) (&((dmar)->iommu))
#define IOMMU2DMAR(dmar) \
__containerof((dmar), struct dmar_unit, iommu)
-#define DOM2IODOM(domain) &((domain)->iodom)
+#define DOM2IODOM(domain) (&((domain)->iodom))
#define IODOM2DOM(domain) \
__containerof((domain), struct dmar_domain, iodom)
-#define CTX2IOCTX(ctx) &((ctx)->context)
+#define CTX2IOCTX(ctx) (&((ctx)->context))
#define IOCTX2CTX(ctx) \
__containerof((ctx), struct dmar_ctx, context)
@@ -103,27 +103,13 @@ struct dmar_ctx {
#define CTX2DMAR(ctx) (CTX2DOM(ctx)->dmar)
#define DOM2DMAR(domain) ((domain)->dmar)
-struct dmar_msi_data {
- int irq;
- int irq_rid;
- struct resource *irq_res;
- void *intr_handle;
- int (*handler)(void *);
- int msi_data_reg;
- int msi_addr_reg;
- int msi_uaddr_reg;
- void (*enable_intr)(struct dmar_unit *);
- void (*disable_intr)(struct dmar_unit *);
- const char *name;
-};
-
#define DMAR_INTR_FAULT 0
#define DMAR_INTR_QI 1
#define DMAR_INTR_TOTAL 2
struct dmar_unit {
struct iommu_unit iommu;
- device_t dev;
+ struct x86_unit_common x86c;
uint16_t segment;
uint64_t base;
@@ -131,8 +117,6 @@ struct dmar_unit {
int reg_rid;
struct resource *regs;
- struct dmar_msi_data intrs[DMAR_INTR_TOTAL];
-
/* Hardware registers cache */
uint32_t hw_ver;
uint64_t hw_cap;
@@ -156,17 +140,6 @@ struct dmar_unit {
/* QI */
int qi_enabled;
- char *inv_queue;
- vm_size_t inv_queue_size;
- uint32_t inv_queue_avail;
- uint32_t inv_queue_tail;
- volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
- descr completion */
- uint64_t inv_waitd_seq_hw_phys;
- uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
- u_int inv_waitd_gen; /* seq number generation AKA seq overflows */
- u_int inv_seq_waiters; /* count of waiters for seq */
- u_int inv_queue_full; /* informational counter */
/* IR */
int ir_enabled;
@@ -174,41 +147,11 @@ struct dmar_unit {
dmar_irte_t *irt;
u_int irte_cnt;
vmem_t *irtids;
-
- /*
- * Delayed freeing of map entries queue processing:
- *
- * tlb_flush_head and tlb_flush_tail are used to implement a FIFO
- * queue that supports concurrent dequeues and enqueues. However,
- * there can only be a single dequeuer (accessing tlb_flush_head) and
- * a single enqueuer (accessing tlb_flush_tail) at a time. Since the
- * unit's qi_task is the only dequeuer, it can access tlb_flush_head
- * without any locking. In contrast, there may be multiple enqueuers,
- * so the enqueuers acquire the iommu unit lock to serialize their
- * accesses to tlb_flush_tail.
- *
- * In this FIFO queue implementation, the key to enabling concurrent
- * dequeues and enqueues is that the dequeuer never needs to access
- * tlb_flush_tail and the enqueuer never needs to access
- * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail
- * are never NULL, so neither a dequeuer nor an enqueuer ever needs to
- * update both. Instead, tlb_flush_head always points to a "zombie"
- * struct, which previously held the last dequeued item. Thus, the
- * zombie's next field actually points to the struct holding the first
- * item in the queue. When an item is dequeued, the current zombie is
- * finally freed, and the struct that held the just dequeued item
- * becomes the new zombie. When the queue is empty, tlb_flush_tail
- * also points to the zombie.
- */
- struct iommu_map_entry *tlb_flush_head;
- struct iommu_map_entry *tlb_flush_tail;
- struct task qi_task;
- struct taskqueue *qi_taskqueue;
};
-#define DMAR_LOCK(dmar) mtx_lock(&(dmar)->iommu.lock)
-#define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->iommu.lock)
-#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->iommu.lock, MA_OWNED)
+#define DMAR_LOCK(dmar) mtx_lock(&DMAR2IOMMU(dmar)->lock)
+#define DMAR_UNLOCK(dmar) mtx_unlock(&DMAR2IOMMU(dmar)->lock)
+#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&DMAR2IOMMU(dmar)->lock, MA_OWNED)
#define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock)
#define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock)
@@ -223,6 +166,8 @@ struct dmar_unit {
#define DMAR_BARRIER_RMRR 0
#define DMAR_BARRIER_USEQ 1
+SYSCTL_DECL(_hw_iommu_dmar);
+
struct dmar_unit *dmar_find(device_t dev, bool verbose);
struct dmar_unit *dmar_find_hpet(device_t dev, uint16_t *rid);
struct dmar_unit *dmar_find_ioapic(u_int apic_id, uint16_t *rid);
@@ -232,22 +177,15 @@ bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl);
int domain_set_agaw(struct dmar_domain *domain, int mgaw);
int dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr,
bool allow_less);
-vm_pindex_t pglvl_max_pages(int pglvl);
int domain_is_sp_lvl(struct dmar_domain *domain, int lvl);
-iommu_gaddr_t pglvl_page_size(int total_pglvl, int lvl);
iommu_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl);
int calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size,
iommu_gaddr_t *isizep);
-struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags);
-void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
-void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
- struct sf_buf **sf);
-void dmar_unmap_pgtbl(struct sf_buf *sf);
int dmar_load_root_entry_ptr(struct dmar_unit *unit);
int dmar_inv_ctx_glob(struct dmar_unit *unit);
int dmar_inv_iotlb_glob(struct dmar_unit *unit);
int dmar_flush_write_bufs(struct dmar_unit *unit);
-void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst);
+void dmar_flush_pte_to_ram(struct dmar_unit *unit, iommu_pte_t *dst);
void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst);
void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst);
int dmar_disable_protected_regions(struct dmar_unit *unit);
@@ -262,14 +200,14 @@ uint64_t dmar_get_timeout(void);
void dmar_update_timeout(uint64_t newval);
int dmar_fault_intr(void *arg);
-void dmar_enable_fault_intr(struct dmar_unit *unit);
-void dmar_disable_fault_intr(struct dmar_unit *unit);
+void dmar_enable_fault_intr(struct iommu_unit *unit);
+void dmar_disable_fault_intr(struct iommu_unit *unit);
int dmar_init_fault_log(struct dmar_unit *unit);
void dmar_fini_fault_log(struct dmar_unit *unit);
int dmar_qi_intr(void *arg);
-void dmar_enable_qi_intr(struct dmar_unit *unit);
-void dmar_disable_qi_intr(struct dmar_unit *unit);
+void dmar_enable_qi_intr(struct iommu_unit *unit);
+void dmar_disable_qi_intr(struct iommu_unit *unit);
int dmar_init_qi(struct dmar_unit *unit);
void dmar_fini_qi(struct dmar_unit *unit);
void dmar_qi_invalidate_locked(struct dmar_domain *domain,
@@ -286,8 +224,8 @@ vm_object_t domain_get_idmap_pgtbl(struct dmar_domain *domain,
void put_idmap_pgtbl(vm_object_t obj);
void domain_flush_iotlb_sync(struct dmar_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size);
-int domain_alloc_pgtbl(struct dmar_domain *domain);
-void domain_free_pgtbl(struct dmar_domain *domain);
+int dmar_domain_alloc_pgtbl(struct dmar_domain *domain);
+void dmar_domain_free_pgtbl(struct dmar_domain *domain);
extern const struct iommu_domain_map_ops dmar_domain_map_ops;
int dmar_dev_depth(device_t child);
@@ -299,10 +237,16 @@ struct dmar_ctx *dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
bool id_mapped, bool rmrr_init);
int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx);
-void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
-void dmar_free_ctx(struct dmar_ctx *ctx);
+void dmar_free_ctx_locked_method(struct iommu_unit *dmar,
+ struct iommu_ctx *ctx);
+void dmar_free_ctx_method(struct iommu_ctx *ctx);
struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid);
-void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free);
+struct iommu_ctx *dmar_get_ctx(struct iommu_unit *iommu, device_t dev,
+ uint16_t rid, bool id_mapped, bool rmrr_init);
+void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep);
+void dmar_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep);
void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,
int dev_busno, const void *dev_path, int dev_path_len,
@@ -314,11 +258,15 @@ void dmar_quirks_pre_use(struct iommu_unit *dmar);
int dmar_init_irt(struct dmar_unit *unit);
void dmar_fini_irt(struct dmar_unit *unit);
+int dmar_alloc_msi_intr(device_t src, u_int *cookies, u_int count);
+int dmar_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
+ uint64_t *addr, uint32_t *data);
+int dmar_unmap_msi_intr(device_t src, u_int cookie);
+int dmar_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
+ bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo);
+int dmar_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie);
-extern iommu_haddr_t dmar_high;
extern int haw;
-extern int dmar_tbl_pagecnt;
-extern int dmar_batch_coalesce;
extern int dmar_rmrr_enable;
static inline uint32_t
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index 7346162d1502..05fb49538add 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -64,9 +64,12 @@
#include <dev/pci/pcivar.h>
#include <machine/bus.h>
#include <machine/pci_cfgreg.h>
+#include <machine/md_var.h>
+#include <machine/cputypes.h>
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
#ifdef DEV_APIC
@@ -179,9 +182,9 @@ dmar_identify(driver_t *driver, device_t parent)
return;
haw = dmartbl->Width + 1;
if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR)
- dmar_high = BUS_SPACE_MAXADDR;
+ iommu_high = BUS_SPACE_MAXADDR;
else
- dmar_high = 1ULL << (haw + 1);
+ iommu_high = 1ULL << (haw + 1);
if (bootverbose) {
printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width,
(unsigned)dmartbl->Flags,
@@ -228,22 +231,6 @@ dmar_probe(device_t dev)
}
static void
-dmar_release_intr(device_t dev, struct dmar_unit *unit, int idx)
-{
- struct dmar_msi_data *dmd;
-
- dmd = &unit->intrs[idx];
- if (dmd->irq == -1)
- return;
- bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
- bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
- bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
- PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
- dev, dmd->irq);
- dmd->irq = -1;
-}
-
-static void
dmar_release_resources(device_t dev, struct dmar_unit *unit)
{
int i;
@@ -253,7 +240,7 @@ dmar_release_resources(device_t dev, struct dmar_unit *unit)
dmar_fini_qi(unit);
dmar_fini_fault_log(unit);
for (i = 0; i < DMAR_INTR_TOTAL; i++)
- dmar_release_intr(dev, unit, i);
+ iommu_release_intr(DMAR2IOMMU(unit), i);
if (unit->regs != NULL) {
bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
unit->regs);
@@ -271,84 +258,19 @@ dmar_release_resources(device_t dev, struct dmar_unit *unit)
}
}
-static int
-dmar_alloc_irq(device_t dev, struct dmar_unit *unit, int idx)
-{
- device_t pcib;
- struct dmar_msi_data *dmd;
- uint64_t msi_addr;
- uint32_t msi_data;
- int error;
-
- dmd = &unit->intrs[idx];
- pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
- error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
- if (error != 0) {
- device_printf(dev, "cannot allocate %s interrupt, %d\n",
- dmd->name, error);
- goto err1;
- }
- error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
- dmd->irq, 1);
- if (error != 0) {
- device_printf(dev, "cannot set %s interrupt resource, %d\n",
- dmd->name, error);
- goto err2;
- }
- dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
- &dmd->irq_rid, RF_ACTIVE);
- if (dmd->irq_res == NULL) {
- device_printf(dev,
- "cannot allocate resource for %s interrupt\n", dmd->name);
- error = ENXIO;
- goto err3;
- }
- error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
- dmd->handler, NULL, unit, &dmd->intr_handle);
- if (error != 0) {
- device_printf(dev, "cannot setup %s interrupt, %d\n",
- dmd->name, error);
- goto err4;
- }
- bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
- error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
- if (error != 0) {
- device_printf(dev, "cannot map %s interrupt, %d\n",
- dmd->name, error);
- goto err5;
- }
- dmar_write4(unit, dmd->msi_data_reg, msi_data);
- dmar_write4(unit, dmd->msi_addr_reg, msi_addr);
- /* Only for xAPIC mode */
- dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32);
- return (0);
-
-err5:
- bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
-err4:
- bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
-err3:
- bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
-err2:
- PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
- dmd->irq = -1;
-err1:
- return (error);
-}
-
#ifdef DEV_APIC
static int
dmar_remap_intr(device_t dev, device_t child, u_int irq)
{
struct dmar_unit *unit;
- struct dmar_msi_data *dmd;
+ struct iommu_msi_data *dmd;
uint64_t msi_addr;
uint32_t msi_data;
int i, error;
unit = device_get_softc(dev);
for (i = 0; i < DMAR_INTR_TOTAL; i++) {
- dmd = &unit->intrs[i];
+ dmd = &unit->x86c.intrs[i];
if (irq == dmd->irq) {
error = PCIB_MAP_MSI(device_get_parent(
device_get_parent(dev)),
@@ -356,11 +278,14 @@ dmar_remap_intr(device_t dev, device_t child, u_int irq)
if (error != 0)
return (error);
DMAR_LOCK(unit);
- (dmd->disable_intr)(unit);
- dmar_write4(unit, dmd->msi_data_reg, msi_data);
- dmar_write4(unit, dmd->msi_addr_reg, msi_addr);
- dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32);
- (dmd->enable_intr)(unit);
+ dmd->msi_data = msi_data;
+ dmd->msi_addr = msi_addr;
+ (dmd->disable_intr)(DMAR2IOMMU(unit));
+ dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
+ dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
+ dmar_write4(unit, dmd->msi_uaddr_reg,
+ dmd->msi_addr >> 32);
+ (dmd->enable_intr)(DMAR2IOMMU(unit));
DMAR_UNLOCK(unit);
return (0);
}
@@ -404,12 +329,12 @@ dmar_attach(device_t dev)
{
struct dmar_unit *unit;
ACPI_DMAR_HARDWARE_UNIT *dmaru;
+ struct iommu_msi_data *dmd;
uint64_t timeout;
int disable_pmr;
int i, error;
unit = device_get_softc(dev);
- unit->dev = dev;
unit->iommu.unit = device_get_unit(dev);
unit->iommu.dev = dev;
dmaru = dmar_find_by_index(unit->iommu.unit);
@@ -422,6 +347,7 @@ dmar_attach(device_t dev)
&unit->reg_rid, RF_ACTIVE);
if (unit->regs == NULL) {
device_printf(dev, "cannot allocate register window\n");
+ dmar_devs[unit->iommu.unit] = NULL;
return (ENOMEM);
}
unit->hw_ver = dmar_read4(unit, DMAR_VER_REG);
@@ -436,35 +362,47 @@ dmar_attach(device_t dev)
dmar_update_timeout(timeout);
for (i = 0; i < DMAR_INTR_TOTAL; i++)
- unit->intrs[i].irq = -1;
-
- unit->intrs[DMAR_INTR_FAULT].name = "fault";
- unit->intrs[DMAR_INTR_FAULT].irq_rid = DMAR_FAULT_IRQ_RID;
- unit->intrs[DMAR_INTR_FAULT].handler = dmar_fault_intr;
- unit->intrs[DMAR_INTR_FAULT].msi_data_reg = DMAR_FEDATA_REG;
- unit->intrs[DMAR_INTR_FAULT].msi_addr_reg = DMAR_FEADDR_REG;
- unit->intrs[DMAR_INTR_FAULT].msi_uaddr_reg = DMAR_FEUADDR_REG;
- unit->intrs[DMAR_INTR_FAULT].enable_intr = dmar_enable_fault_intr;
- unit->intrs[DMAR_INTR_FAULT].disable_intr = dmar_disable_fault_intr;
- error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT);
+ unit->x86c.intrs[i].irq = -1;
+
+ dmd = &unit->x86c.intrs[DMAR_INTR_FAULT];
+ dmd->name = "fault";
+ dmd->irq_rid = DMAR_FAULT_IRQ_RID;
+ dmd->handler = dmar_fault_intr;
+ dmd->msi_data_reg = DMAR_FEDATA_REG;
+ dmd->msi_addr_reg = DMAR_FEADDR_REG;
+ dmd->msi_uaddr_reg = DMAR_FEUADDR_REG;
+ dmd->enable_intr = dmar_enable_fault_intr;
+ dmd->disable_intr = dmar_disable_fault_intr;
+ error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_FAULT);
if (error != 0) {
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
+ dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
+ dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
+ dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32);
+
if (DMAR_HAS_QI(unit)) {
- unit->intrs[DMAR_INTR_QI].name = "qi";
- unit->intrs[DMAR_INTR_QI].irq_rid = DMAR_QI_IRQ_RID;
- unit->intrs[DMAR_INTR_QI].handler = dmar_qi_intr;
- unit->intrs[DMAR_INTR_QI].msi_data_reg = DMAR_IEDATA_REG;
- unit->intrs[DMAR_INTR_QI].msi_addr_reg = DMAR_IEADDR_REG;
- unit->intrs[DMAR_INTR_QI].msi_uaddr_reg = DMAR_IEUADDR_REG;
- unit->intrs[DMAR_INTR_QI].enable_intr = dmar_enable_qi_intr;
- unit->intrs[DMAR_INTR_QI].disable_intr = dmar_disable_qi_intr;
- error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI);
+ dmd = &unit->x86c.intrs[DMAR_INTR_QI];
+ dmd->name = "qi";
+ dmd->irq_rid = DMAR_QI_IRQ_RID;
+ dmd->handler = dmar_qi_intr;
+ dmd->msi_data_reg = DMAR_IEDATA_REG;
+ dmd->msi_addr_reg = DMAR_IEADDR_REG;
+ dmd->msi_uaddr_reg = DMAR_IEUADDR_REG;
+ dmd->enable_intr = dmar_enable_qi_intr;
+ dmd->disable_intr = dmar_disable_qi_intr;
+ error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_QI);
if (error != 0) {
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
+
+ dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
+ dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
+ dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32);
}
mtx_init(&unit->iommu.lock, "dmarhw", NULL, MTX_DEF);
@@ -490,18 +428,20 @@ dmar_attach(device_t dev)
* address translation after the required invalidations are
* done.
*/
- dmar_pgalloc(unit->ctx_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO);
+ iommu_pgalloc(unit->ctx_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO);
DMAR_LOCK(unit);
error = dmar_load_root_entry_ptr(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_inv_ctx_glob(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) {
@@ -509,6 +449,7 @@ dmar_attach(device_t dev)
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
}
@@ -517,16 +458,19 @@ dmar_attach(device_t dev)
error = dmar_init_fault_log(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_init_qi(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_init_irt(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
@@ -542,6 +486,7 @@ dmar_attach(device_t dev)
error = iommu_init_busdma(&unit->iommu);
if (error != 0) {
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
@@ -551,6 +496,7 @@ dmar_attach(device_t dev)
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
+ dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
DMAR_UNLOCK(unit);
@@ -1289,20 +1235,20 @@ dmar_print_one(int idx, bool show_domains, bool show_mappings)
db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) "
"size 0x%jx\n"
" head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
- " hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n",
- (uintmax_t)unit->inv_queue,
+ " hw compl 0x%jx@%p/phys@%jx next seq 0x%x gen 0x%x\n",
+ (uintmax_t)unit->x86c.inv_queue,
(uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
- (uintmax_t)unit->inv_queue_size,
+ (uintmax_t)unit->x86c.inv_queue_size,
dmar_read4(unit, DMAR_IQH_REG),
dmar_read4(unit, DMAR_IQT_REG),
- unit->inv_queue_avail,
+ unit->x86c.inv_queue_avail,
dmar_read4(unit, DMAR_ICS_REG),
dmar_read4(unit, DMAR_IECTL_REG),
- unit->inv_waitd_seq_hw,
- &unit->inv_waitd_seq_hw,
- (uintmax_t)unit->inv_waitd_seq_hw_phys,
- unit->inv_waitd_seq,
- unit->inv_waitd_gen);
+ (uintmax_t)unit->x86c.inv_waitd_seq_hw,
+ &unit->x86c.inv_waitd_seq_hw,
+ (uintmax_t)unit->x86c.inv_waitd_seq_hw_phys,
+ unit->x86c.inv_waitd_seq,
+ unit->x86c.inv_waitd_gen);
} else {
db_printf("qi is disabled\n");
}
@@ -1346,12 +1292,52 @@ DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars)
}
#endif
-struct iommu_unit *
-iommu_find(device_t dev, bool verbose)
+static struct iommu_unit *
+dmar_find_method(device_t dev, bool verbose)
{
struct dmar_unit *dmar;
dmar = dmar_find(dev, verbose);
-
return (&dmar->iommu);
}
+
+static struct x86_unit_common *
+dmar_get_x86_common(struct iommu_unit *unit)
+{
+ struct dmar_unit *dmar;
+
+ dmar = IOMMU2DMAR(unit);
+ return (&dmar->x86c);
+}
+
+static void
+dmar_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+ dmar_quirks_pre_use(unit);
+ dmar_instantiate_rmrr_ctxs(unit);
+}
+
+static struct x86_iommu dmar_x86_iommu = {
+ .get_x86_common = dmar_get_x86_common,
+ .unit_pre_instantiate_ctx = dmar_unit_pre_instantiate_ctx,
+ .domain_unload_entry = dmar_domain_unload_entry,
+ .domain_unload = dmar_domain_unload,
+ .get_ctx = dmar_get_ctx,
+ .free_ctx_locked = dmar_free_ctx_locked_method,
+ .free_ctx = dmar_free_ctx_method,
+ .find = dmar_find_method,
+ .alloc_msi_intr = dmar_alloc_msi_intr,
+ .map_msi_intr = dmar_map_msi_intr,
+ .unmap_msi_intr = dmar_unmap_msi_intr,
+ .map_ioapic_intr = dmar_map_ioapic_intr,
+ .unmap_ioapic_intr = dmar_unmap_ioapic_intr,
+};
+
+static void
+x86_iommu_set_intel(void *arg __unused)
+{
+ if (cpu_vendor_id == CPU_VENDOR_INTEL)
+ set_x86_iommu(&dmar_x86_iommu);
+}
+
+SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_intel, NULL);
diff --git a/sys/x86/iommu/intel_fault.c b/sys/x86/iommu/intel_fault.c
index e275304c8d51..1064165ea5d7 100644
--- a/sys/x86/iommu/intel_fault.c
+++ b/sys/x86/iommu/intel_fault.c
@@ -54,6 +54,7 @@
#include <x86/include/busdma_impl.h>
#include <x86/iommu/intel_reg.h>
#include <dev/iommu/busdma_iommu.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
/*
@@ -126,7 +127,7 @@ dmar_fault_intr(void *arg)
int fri, frir, faultp;
bool enqueue;
- unit = arg;
+ unit = IOMMU2DMAR((struct iommu_unit *)arg);
enqueue = false;
fsts = dmar_read4(unit, DMAR_FSTS_REG);
dmar_fault_intr_clear(unit, fsts);
@@ -275,9 +276,9 @@ dmar_init_fault_log(struct dmar_unit *unit)
"dmar%d fault taskq", unit->iommu.unit);
DMAR_LOCK(unit);
- dmar_disable_fault_intr(unit);
+ dmar_disable_fault_intr(&unit->iommu);
dmar_clear_faults(unit);
- dmar_enable_fault_intr(unit);
+ dmar_enable_fault_intr(&unit->iommu);
DMAR_UNLOCK(unit);
return (0);
@@ -291,7 +292,7 @@ dmar_fini_fault_log(struct dmar_unit *unit)
return;
DMAR_LOCK(unit);
- dmar_disable_fault_intr(unit);
+ dmar_disable_fault_intr(&unit->iommu);
DMAR_UNLOCK(unit);
taskqueue_drain(unit->fault_taskqueue, &unit->fault_task);
@@ -305,10 +306,12 @@ dmar_fini_fault_log(struct dmar_unit *unit)
}
void
-dmar_enable_fault_intr(struct dmar_unit *unit)
+dmar_enable_fault_intr(struct iommu_unit *iommu)
{
+ struct dmar_unit *unit;
uint32_t fectl;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
fectl = dmar_read4(unit, DMAR_FECTL_REG);
fectl &= ~DMAR_FECTL_IM;
@@ -316,10 +319,12 @@ dmar_enable_fault_intr(struct dmar_unit *unit)
}
void
-dmar_disable_fault_intr(struct dmar_unit *unit)
+dmar_disable_fault_intr(struct iommu_unit *iommu)
{
+ struct dmar_unit *unit;
uint32_t fectl;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
fectl = dmar_read4(unit, DMAR_FECTL_REG);
dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM);
diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c
index 929f8656d1eb..fbc0e9e97b64 100644
--- a/sys/x86/iommu/intel_idpgtbl.c
+++ b/sys/x86/iommu/intel_idpgtbl.c
@@ -47,6 +47,7 @@
#include <sys/tree.h>
#include <sys/uio.h>
#include <sys/vmem.h>
+#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@@ -63,6 +64,7 @@
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
static int domain_unmap_buf_locked(struct dmar_domain *domain,
@@ -108,7 +110,7 @@ domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
iommu_gaddr_t addr)
{
vm_page_t m1;
- dmar_pte_t *pte;
+ iommu_pte_t *pte;
struct sf_buf *sf;
iommu_gaddr_t f, pg_sz;
vm_pindex_t base;
@@ -117,28 +119,28 @@ domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj);
if (addr >= tbl->maxaddr)
return;
- (void)dmar_pgalloc(tbl->pgtbl_obj, idx, IOMMU_PGF_OBJL |
+ (void)iommu_pgalloc(tbl->pgtbl_obj, idx, IOMMU_PGF_OBJL |
IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO);
- base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */
+ base = idx * IOMMU_NPTEPG + 1; /* Index of the first child page of idx */
pg_sz = pglvl_page_size(tbl->pglvl, lvl);
if (lvl != tbl->leaf) {
- for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz)
+ for (i = 0, f = addr; i < IOMMU_NPTEPG; i++, f += pg_sz)
domain_idmap_nextlvl(tbl, lvl + 1, base + i, f);
}
VM_OBJECT_WUNLOCK(tbl->pgtbl_obj);
- pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, IOMMU_PGF_WAITOK, &sf);
+ pte = iommu_map_pgtbl(tbl->pgtbl_obj, idx, IOMMU_PGF_WAITOK, &sf);
if (lvl == tbl->leaf) {
- for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) {
+ for (i = 0, f = addr; i < IOMMU_NPTEPG; i++, f += pg_sz) {
if (f >= tbl->maxaddr)
break;
pte[i].pte = (DMAR_PTE_ADDR_MASK & f) |
DMAR_PTE_R | DMAR_PTE_W;
}
} else {
- for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) {
+ for (i = 0, f = addr; i < IOMMU_NPTEPG; i++, f += pg_sz) {
if (f >= tbl->maxaddr)
break;
- m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i,
+ m1 = iommu_pgalloc(tbl->pgtbl_obj, base + i,
IOMMU_PGF_NOALLOC);
KASSERT(m1 != NULL, ("lost page table page"));
pte[i].pte = (DMAR_PTE_ADDR_MASK &
@@ -146,7 +148,7 @@ domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
}
}
/* domain_get_idmap_pgtbl flushes CPU cache if needed. */
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
VM_OBJECT_WLOCK(tbl->pgtbl_obj);
}
@@ -300,7 +302,7 @@ put_idmap_pgtbl(vm_object_t obj)
rmobj = tbl->pgtbl_obj;
if (rmobj->ref_count == 1) {
LIST_REMOVE(tbl, link);
- atomic_subtract_int(&dmar_tbl_pagecnt,
+ atomic_subtract_int(&iommu_tbl_pagecnt,
rmobj->resident_page_count);
vm_object_deallocate(rmobj);
free(tbl, M_DMAR_IDPGTBL);
@@ -314,61 +316,27 @@ put_idmap_pgtbl(vm_object_t obj)
* address. Support superpages.
*/
-/*
- * Index of the pte for the guest address base in the page table at
- * the level lvl.
- */
-static int
-domain_pgtbl_pte_off(struct dmar_domain *domain, iommu_gaddr_t base, int lvl)
-{
-
- base >>= DMAR_PAGE_SHIFT + (domain->pglvl - lvl - 1) *
- DMAR_NPTEPGSHIFT;
- return (base & DMAR_PTEMASK);
-}
-
-/*
- * Returns the page index of the page table page in the page table
- * object, which maps the given address base at the page table level
- * lvl.
- */
-static vm_pindex_t
-domain_pgtbl_get_pindex(struct dmar_domain *domain, iommu_gaddr_t base, int lvl)
-{
- vm_pindex_t idx, pidx;
- int i;
-
- KASSERT(lvl >= 0 && lvl < domain->pglvl,
- ("wrong lvl %p %d", domain, lvl));
-
- for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) {
- idx = domain_pgtbl_pte_off(domain, base, i) +
- pidx * DMAR_NPTEPG + 1;
- }
- return (idx);
-}
-
-static dmar_pte_t *
+static iommu_pte_t *
domain_pgtbl_map_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl,
int flags, vm_pindex_t *idxp, struct sf_buf **sf)
{
vm_page_t m;
struct sf_buf *sfp;
- dmar_pte_t *pte, *ptep;
+ iommu_pte_t *pte, *ptep;
vm_pindex_t idx, idx1;
DMAR_DOMAIN_ASSERT_PGLOCKED(domain);
KASSERT((flags & IOMMU_PGF_OBJL) != 0, ("lost PGF_OBJL"));
- idx = domain_pgtbl_get_pindex(domain, base, lvl);
+ idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
if (*sf != NULL && idx == *idxp) {
- pte = (dmar_pte_t *)sf_buf_kva(*sf);
+ pte = (iommu_pte_t *)sf_buf_kva(*sf);
} else {
if (*sf != NULL)
- dmar_unmap_pgtbl(*sf);
+ iommu_unmap_pgtbl(*sf);
*idxp = idx;
retry:
- pte = dmar_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
+ pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
if (pte == NULL) {
KASSERT(lvl > 0,
("lost root page table page %p", domain));
@@ -377,7 +345,7 @@ retry:
* it and create a pte in the preceeding page level
* to reference the allocated page table page.
*/
- m = dmar_pgalloc(domain->pgtbl_obj, idx, flags |
+ m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
IOMMU_PGF_ZERO);
if (m == NULL)
return (NULL);
@@ -389,7 +357,7 @@ retry:
* pte write and clean while the lock is
* dropped.
*/
- m->ref_count++;
+ vm_page_wire(m);
sfp = NULL;
ptep = domain_pgtbl_map_pte(domain, base, lvl - 1,
@@ -397,22 +365,22 @@ retry:
if (ptep == NULL) {
KASSERT(m->pindex != 0,
("loosing root page %p", domain));
- m->ref_count--;
- dmar_pgfree(domain->pgtbl_obj, m->pindex,
+ vm_page_unwire_noq(m);
+ iommu_pgfree(domain->pgtbl_obj, m->pindex,
flags);
return (NULL);
}
dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W |
VM_PAGE_TO_PHYS(m));
dmar_flush_pte_to_ram(domain->dmar, ptep);
- sf_buf_page(sfp)->ref_count += 1;
- m->ref_count--;
- dmar_unmap_pgtbl(sfp);
+ vm_page_wire(sf_buf_page(sfp));
+ vm_page_unwire_noq(m);
+ iommu_unmap_pgtbl(sfp);
/* Only executed once. */
goto retry;
}
}
- pte += domain_pgtbl_pte_off(domain, base, lvl);
+ pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
return (pte);
}
@@ -420,7 +388,7 @@ static int
domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags)
{
- dmar_pte_t *pte;
+ iommu_pte_t *pte;
struct sf_buf *sf;
iommu_gaddr_t pg_sz, base1;
vm_pindex_t pi, c, idx, run_sz;
@@ -437,7 +405,7 @@ domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
pi += run_sz) {
for (lvl = 0, c = 0, superpage = false;; lvl++) {
pg_sz = domain_page_size(domain, lvl);
- run_sz = pg_sz >> DMAR_PAGE_SHIFT;
+ run_sz = pg_sz >> IOMMU_PAGE_SHIFT;
if (lvl == domain->pglvl - 1)
break;
/*
@@ -476,7 +444,7 @@ domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
("failed waitable pte alloc %p", domain));
if (sf != NULL)
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
domain_unmap_buf_locked(domain, base1, base - base1,
flags);
TD_PINNED_ASSERT;
@@ -485,10 +453,10 @@ domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags |
(superpage ? DMAR_PTE_SP : 0));
dmar_flush_pte_to_ram(domain->dmar, pte);
- sf_buf_page(sf)->ref_count += 1;
+ vm_page_wire(sf_buf_page(sf));
}
if (sf != NULL)
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
return (0);
}
@@ -510,12 +478,12 @@ domain_map_buf(struct iommu_domain *iodom, iommu_gaddr_t base,
domain = IODOM2DOM(iodom);
unit = domain->dmar;
- KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
+ KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
("modifying idmap pagetable domain %p", domain));
- KASSERT((base & DMAR_PAGE_MASK) == 0,
+ KASSERT((base & IOMMU_PAGE_MASK) == 0,
("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
- KASSERT((size & DMAR_PAGE_MASK) == 0,
+ KASSERT((size & IOMMU_PAGE_MASK) == 0,
("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
@@ -562,7 +530,7 @@ domain_map_buf(struct iommu_domain *iodom, iommu_gaddr_t base,
}
static void domain_unmap_clear_pte(struct dmar_domain *domain,
- iommu_gaddr_t base, int lvl, int flags, dmar_pte_t *pte,
+ iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
struct sf_buf **sf, bool free_fs);
static void
@@ -570,7 +538,7 @@ domain_free_pgtbl_pde(struct dmar_domain *domain, iommu_gaddr_t base,
int lvl, int flags)
{
struct sf_buf *sf;
- dmar_pte_t *pde;
+ iommu_pte_t *pde;
vm_pindex_t idx;
sf = NULL;
@@ -580,7 +548,7 @@ domain_free_pgtbl_pde(struct dmar_domain *domain, iommu_gaddr_t base,
static void
domain_unmap_clear_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl,
- int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf)
+ int flags, iommu_pte_t *pte, struct sf_buf **sf, bool free_sf)
{
vm_page_t m;
@@ -588,11 +556,10 @@ domain_unmap_clear_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl,
dmar_flush_pte_to_ram(domain->dmar, pte);
m = sf_buf_page(*sf);
if (free_sf) {
- dmar_unmap_pgtbl(*sf);
+ iommu_unmap_pgtbl(*sf);
*sf = NULL;
}
- m->ref_count--;
- if (m->ref_count != 0)
+ if (!vm_page_unwire_noq(m))
return;
KASSERT(lvl != 0,
("lost reference (lvl) on root pg domain %p base %jx lvl %d",
@@ -600,7 +567,7 @@ domain_unmap_clear_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl,
KASSERT(m->pindex != 0,
("lost reference (idx) on root pg domain %p base %jx lvl %d",
domain, (uintmax_t)base, lvl));
- dmar_pgfree(domain->pgtbl_obj, m->pindex, flags);
+ iommu_pgfree(domain->pgtbl_obj, m->pindex, flags);
domain_free_pgtbl_pde(domain, base, lvl - 1, flags);
}
@@ -611,7 +578,7 @@ static int
domain_unmap_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, int flags)
{
- dmar_pte_t *pte;
+ iommu_pte_t *pte;
struct sf_buf *sf;
vm_pindex_t idx;
iommu_gaddr_t pg_sz;
@@ -623,10 +590,10 @@ domain_unmap_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
("modifying idmap pagetable domain %p", domain));
- KASSERT((base & DMAR_PAGE_MASK) == 0,
+ KASSERT((base & IOMMU_PAGE_MASK) == 0,
("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
- KASSERT((size & DMAR_PAGE_MASK) == 0,
+ KASSERT((size & IOMMU_PAGE_MASK) == 0,
("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT(base < (1ULL << domain->agaw),
@@ -669,7 +636,7 @@ domain_unmap_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base,
(uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
}
if (sf != NULL)
- dmar_unmap_pgtbl(sf);
+ iommu_unmap_pgtbl(sf);
/*
* See 11.1 Write Buffer Flushing for an explanation why RWBF
* can be ignored there.
@@ -695,7 +662,7 @@ domain_unmap_buf(struct iommu_domain *iodom, iommu_gaddr_t base,
}
int
-domain_alloc_pgtbl(struct dmar_domain *domain)
+dmar_domain_alloc_pgtbl(struct dmar_domain *domain)
{
vm_page_t m;
@@ -705,10 +672,10 @@ domain_alloc_pgtbl(struct dmar_domain *domain)
domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
DMAR_DOMAIN_PGLOCK(domain);
- m = dmar_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
+ m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
/* No implicit free of the top level page table page. */
- m->ref_count = 1;
+ vm_page_wire(m);
DMAR_DOMAIN_PGUNLOCK(domain);
DMAR_LOCK(domain->dmar);
domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
@@ -717,7 +684,7 @@ domain_alloc_pgtbl(struct dmar_domain *domain)
}
void
-domain_free_pgtbl(struct dmar_domain *domain)
+dmar_domain_free_pgtbl(struct dmar_domain *domain)
{
vm_object_t obj;
vm_page_t m;
@@ -740,8 +707,10 @@ domain_free_pgtbl(struct dmar_domain *domain)
/* Obliterate ref_counts */
VM_OBJECT_ASSERT_WLOCKED(obj);
- for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
- m->ref_count = 0;
+ for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) {
+ vm_page_clearref(m);
+ vm_wire_sub(1);
+ }
VM_OBJECT_WUNLOCK(obj);
vm_object_deallocate(obj);
}
diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c
index 09271a6f6cc9..ec3cd35e4f4e 100644
--- a/sys/x86/iommu/intel_intrmap.c
+++ b/sys/x86/iommu/intel_intrmap.c
@@ -54,6 +54,7 @@
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
#include <x86/iommu/iommu_intrmap.h>
@@ -64,7 +65,7 @@ static void dmar_ir_program_irte(struct dmar_unit *unit, u_int idx,
static int dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie);
int
-iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
+dmar_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
{
struct dmar_unit *unit;
vmem_addr_t vmem_res;
@@ -92,7 +93,7 @@ iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
}
int
-iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
+dmar_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
uint64_t *addr, uint32_t *data)
{
struct dmar_unit *unit;
@@ -138,7 +139,7 @@ iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
}
int
-iommu_unmap_msi_intr(device_t src, u_int cookie)
+dmar_unmap_msi_intr(device_t src, u_int cookie)
{
struct dmar_unit *unit;
@@ -149,7 +150,7 @@ iommu_unmap_msi_intr(device_t src, u_int cookie)
}
int
-iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
+dmar_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
{
struct dmar_unit *unit;
@@ -212,7 +213,7 @@ iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
}
int
-iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
+dmar_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
{
struct dmar_unit *unit;
u_int idx;
@@ -270,7 +271,7 @@ dmar_ir_program_irte(struct dmar_unit *unit, u_int idx, uint64_t low,
high = DMAR_IRTE2_SVT_RID | DMAR_IRTE2_SQ_RID |
DMAR_IRTE2_SID_RID(rid);
if (bootverbose) {
- device_printf(unit->dev,
+ device_printf(unit->iommu.dev,
"programming irte[%d] rid %#x high %#jx low %#jx\n",
idx, rid, (uintmax_t)high, (uintmax_t)low);
}
@@ -314,13 +315,6 @@ dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie)
return (0);
}
-static u_int
-clp2(u_int v)
-{
-
- return (powerof2(v) ? v : 1 << fls(v));
-}
-
int
dmar_init_irt(struct dmar_unit *unit)
{
@@ -329,18 +323,19 @@ dmar_init_irt(struct dmar_unit *unit)
return (0);
unit->ir_enabled = 1;
TUNABLE_INT_FETCH("hw.dmar.ir", &unit->ir_enabled);
+ TUNABLE_INT_FETCH("hw.iommu.ir", &unit->ir_enabled);
if (!unit->ir_enabled)
return (0);
if (!unit->qi_enabled) {
unit->ir_enabled = 0;
if (bootverbose)
- device_printf(unit->dev,
+ device_printf(unit->iommu.dev,
"QI disabled, disabling interrupt remapping\n");
return (0);
}
- unit->irte_cnt = clp2(num_io_irqs);
+ unit->irte_cnt = roundup_pow_of_two(num_io_irqs);
unit->irt = kmem_alloc_contig(unit->irte_cnt * sizeof(dmar_irte_t),
- M_ZERO | M_WAITOK, 0, dmar_high, PAGE_SIZE, 0,
+ M_ZERO | M_WAITOK, 0, iommu_high, PAGE_SIZE, 0,
DMAR_IS_COHERENT(unit) ?
VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE);
if (unit->irt == NULL)
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 37e2bf211e32..c11946ad9447 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -55,19 +55,9 @@
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
-static bool
-dmar_qi_seq_processed(const struct dmar_unit *unit,
- const struct iommu_qi_genseq *pseq)
-{
- u_int gen;
-
- gen = unit->inv_waitd_gen;
- return (pseq->gen < gen ||
- (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw));
-}
-
static int
dmar_enable_qi(struct dmar_unit *unit)
{
@@ -95,32 +85,36 @@ dmar_disable_qi(struct dmar_unit *unit)
}
static void
-dmar_qi_advance_tail(struct dmar_unit *unit)
+dmar_qi_advance_tail(struct iommu_unit *iommu)
{
+ struct dmar_unit *unit;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
- dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
+ dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail);
}
static void
-dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
+dmar_qi_ensure(struct iommu_unit *iommu, int descr_count)
{
+ struct dmar_unit *unit;
uint32_t head;
int bytes;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
for (;;) {
- if (bytes <= unit->inv_queue_avail)
+ if (bytes <= unit->x86c.inv_queue_avail)
break;
/* refill */
head = dmar_read4(unit, DMAR_IQH_REG);
head &= DMAR_IQH_MASK;
- unit->inv_queue_avail = head - unit->inv_queue_tail -
+ unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
DMAR_IQ_DESCR_SZ;
- if (head <= unit->inv_queue_tail)
- unit->inv_queue_avail += unit->inv_queue_size;
- if (bytes <= unit->inv_queue_avail)
+ if (head <= unit->x86c.inv_queue_tail)
+ unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+ if (bytes <= unit->x86c.inv_queue_avail)
break;
/*
@@ -133,11 +127,11 @@ dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
* See dmar_qi_invalidate_locked() for a discussion
* about data race prevention.
*/
- dmar_qi_advance_tail(unit);
- unit->inv_queue_full++;
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
+ unit->x86c.inv_queue_full++;
cpu_spinwait();
}
- unit->inv_queue_avail -= bytes;
+ unit->x86c.inv_queue_avail -= bytes;
}
static void
@@ -145,208 +139,106 @@ dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
{
DMAR_ASSERT_LOCKED(unit);
- *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
- unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
- KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
- ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
- (uintmax_t)unit->inv_queue_size));
- unit->inv_queue_tail &= unit->inv_queue_size - 1;
- *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
- unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
- KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
- ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
- (uintmax_t)unit->inv_queue_size));
- unit->inv_queue_tail &= unit->inv_queue_size - 1;
+#ifdef __LP64__
+ atomic_store_64((uint64_t *)(unit->x86c.inv_queue +
+ unit->x86c.inv_queue_tail), data1);
+#else
+ *(volatile uint64_t *)(unit->x86c.inv_queue +
+ unit->x86c.inv_queue_tail) = data1;
+#endif
+ unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+#ifdef __LP64__
+ atomic_store_64((uint64_t *)(unit->x86c.inv_queue +
+ unit->x86c.inv_queue_tail), data2);
+#else
+ *(volatile uint64_t *)(unit->x86c.inv_queue +
+ unit->x86c.inv_queue_tail) = data2;
+#endif
+ unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+ KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+ (uintmax_t)unit->x86c.inv_queue_size));
+ unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
}
static void
-dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
+dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr,
bool memw, bool fence)
{
+ struct dmar_unit *unit;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
(intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
(memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
(fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
(memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
- memw ? unit->inv_waitd_seq_hw_phys : 0);
+ memw ? unit->x86c.inv_waitd_seq_hw_phys : 0);
}
static void
-dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq,
- bool emit_wait)
-{
- struct iommu_qi_genseq gsec;
- uint32_t seq;
-
- KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
- DMAR_ASSERT_LOCKED(unit);
- if (unit->inv_waitd_seq == 0xffffffff) {
- gsec.gen = unit->inv_waitd_gen;
- gsec.seq = unit->inv_waitd_seq;
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
- dmar_qi_advance_tail(unit);
- while (!dmar_qi_seq_processed(unit, &gsec))
- cpu_spinwait();
- unit->inv_waitd_gen++;
- unit->inv_waitd_seq = 1;
- }
- seq = unit->inv_waitd_seq++;
- pseq->gen = unit->inv_waitd_gen;
- pseq->seq = seq;
- if (emit_wait) {
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_descr(unit, seq, true, true, false);
- }
-}
-
-/*
- * To avoid missed wakeups, callers must increment the unit's waiters count
- * before advancing the tail past the wait descriptor.
- */
-static void
-dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq,
- bool nowait)
-{
-
- DMAR_ASSERT_LOCKED(unit);
- KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__));
- while (!dmar_qi_seq_processed(unit, gseq)) {
- if (cold || nowait) {
- cpu_spinwait();
- } else {
- msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0,
- "dmarse", hz);
- }
- }
- unit->inv_seq_waiters--;
-}
-
-static void
-dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base,
iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
{
struct dmar_unit *unit;
+ struct dmar_domain *domain;
iommu_gaddr_t isize;
int am;
+ domain = __containerof(idomain, struct dmar_domain, iodom);
unit = domain->dmar;
DMAR_ASSERT_LOCKED(unit);
for (; size > 0; base += isize, size -= isize) {
am = calc_am(unit, base, size, &isize);
- dmar_qi_ensure(unit, 1);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 1);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
DMAR_IQ_DESCR_IOTLB_DR |
DMAR_IQ_DESCR_IOTLB_DID(domain->domain),
base | am);
}
- dmar_qi_emit_wait_seq(unit, pseq, emit_wait);
-}
-
-/*
- * The caller must not be using the entry's dmamap_link field.
- */
-void
-dmar_qi_invalidate_locked(struct dmar_domain *domain,
- struct iommu_map_entry *entry, bool emit_wait)
-{
- struct dmar_unit *unit;
-
- unit = domain->dmar;
- DMAR_ASSERT_LOCKED(unit);
- dmar_qi_invalidate_emit(domain, entry->start, entry->end -
- entry->start, &entry->gseq, emit_wait);
-
- /*
- * To avoid a data race in dmar_qi_task(), the entry's gseq must be
- * initialized before the entry is added to the TLB flush list, and the
- * entry must be added to that list before the tail is advanced. More
- * precisely, the tail must not be advanced past the wait descriptor
- * that will generate the interrupt that schedules dmar_qi_task() for
- * execution before the entry is added to the list. While an earlier
- * call to dmar_qi_ensure() might have advanced the tail, it will not
- * advance it past the wait descriptor.
- *
- * See the definition of struct dmar_unit for more information on
- * synchronization.
- */
- entry->tlb_flush_next = NULL;
- atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next,
- (uintptr_t)entry);
- unit->tlb_flush_tail = entry;
-
- dmar_qi_advance_tail(unit);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait);
}
-void
-dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
- iommu_gaddr_t size, bool cansleep)
+static void
+dmar_qi_invalidate_glob_impl(struct dmar_unit *unit, uint64_t data1)
{
- struct dmar_unit *unit;
struct iommu_qi_genseq gseq;
- unit = domain->dmar;
- DMAR_LOCK(unit);
- dmar_qi_invalidate_emit(domain, base, size, &gseq, true);
-
- /*
- * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count
- * must be incremented before the tail is advanced.
- */
- unit->inv_seq_waiters++;
-
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, !cansleep);
- DMAR_UNLOCK(unit);
+ DMAR_ASSERT_LOCKED(unit);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 2);
+ dmar_qi_emit(unit, data1, 0);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
+ /* See dmar_qi_invalidate_sync(). */
+ unit->x86c.inv_seq_waiters++;
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
+ iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
}
void
dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
{
- struct iommu_qi_genseq gseq;
-
- DMAR_ASSERT_LOCKED(unit);
- dmar_qi_ensure(unit, 2);
- dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
- /* See dmar_qi_invalidate_sync(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
+ dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_CTX_INV |
+ DMAR_IQ_DESCR_CTX_GLOB);
}
void
dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
{
- struct iommu_qi_genseq gseq;
-
- DMAR_ASSERT_LOCKED(unit);
- dmar_qi_ensure(unit, 2);
- dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
- DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
- /* See dmar_qi_invalidate_sync(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
+ dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_IOTLB_INV |
+ DMAR_IQ_DESCR_IOTLB_GLOB | DMAR_IQ_DESCR_IOTLB_DW |
+ DMAR_IQ_DESCR_IOTLB_DR);
}
void
dmar_qi_invalidate_iec_glob(struct dmar_unit *unit)
{
- struct iommu_qi_genseq gseq;
-
- DMAR_ASSERT_LOCKED(unit);
- dmar_qi_ensure(unit, 2);
- dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
- /* See dmar_qi_invalidate_sync(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
+ dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_IEC_INV);
}
void
@@ -362,21 +254,21 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
for (; cnt > 0; cnt -= c, start += c) {
l = ffs(start | cnt) - 1;
c = 1 << l;
- dmar_qi_ensure(unit, 1);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 1);
dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV |
DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) |
DMAR_IQ_DESCR_IEC_IM(l), 0);
}
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
+ dmar_qi_ensure(DMAR2IOMMU(unit), 1);
+ iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
/*
- * Since dmar_qi_wait_for_seq() will not sleep, this increment's
+ * Since iommu_qi_wait_for_seq() will not sleep, this increment's
* placement relative to advancing the tail doesn't matter.
*/
- unit->inv_seq_waiters++;
+ unit->x86c.inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
+ dmar_qi_advance_tail(DMAR2IOMMU(unit));
/*
* The caller of the function, in particular,
@@ -393,7 +285,7 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
* queue is processed, which includes requests possibly issued
* before our request.
*/
- dmar_qi_wait_for_seq(unit, &gseq, true);
+ iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true);
}
int
@@ -401,41 +293,21 @@ dmar_qi_intr(void *arg)
{
struct dmar_unit *unit;
- unit = arg;
+ unit = IOMMU2DMAR((struct iommu_unit *)arg);
KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled",
unit->iommu.unit));
- taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task);
+ taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task);
return (FILTER_HANDLED);
}
static void
-dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
-{
- struct iommu_map_entry *entry, *head;
-
- for (head = unit->tlb_flush_head;; head = entry) {
- entry = (struct iommu_map_entry *)
- atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
- if (entry == NULL ||
- !dmar_qi_seq_processed(unit, &entry->gseq))
- break;
- unit->tlb_flush_head = entry;
- iommu_gas_free_entry(head);
- if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
- iommu_gas_free_region(entry);
- else
- iommu_gas_free_space(entry);
- }
-}
-
-static void
dmar_qi_task(void *arg, int pending __unused)
{
struct dmar_unit *unit;
uint32_t ics;
- unit = arg;
- dmar_qi_drain_tlb_flush(unit);
+ unit = IOMMU2DMAR(arg);
+ iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
/*
* Request an interrupt on the completion of the next invalidation
@@ -452,16 +324,16 @@ dmar_qi_task(void *arg, int pending __unused)
* Otherwise, such entries will linger until a later entry
* that requests an interrupt is processed.
*/
- dmar_qi_drain_tlb_flush(unit);
+ iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
}
- if (unit->inv_seq_waiters > 0) {
+ if (unit->x86c.inv_seq_waiters > 0) {
/*
* Acquire the DMAR lock so that wakeup() is called only after
* the waiter is sleeping.
*/
DMAR_LOCK(unit);
- wakeup(&unit->inv_seq_waiters);
+ wakeup(&unit->x86c.inv_seq_waiters);
DMAR_UNLOCK(unit);
}
}
@@ -471,7 +343,7 @@ dmar_init_qi(struct dmar_unit *unit)
{
uint64_t iqa;
uint32_t ics;
- int qi_sz;
+ u_int qi_sz;
if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
return (0);
@@ -480,34 +352,19 @@ dmar_init_qi(struct dmar_unit *unit)
if (!unit->qi_enabled)
return (0);
- unit->tlb_flush_head = unit->tlb_flush_tail =
- iommu_gas_alloc_entry(NULL, 0);
- TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
- unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK,
- taskqueue_thread_enqueue, &unit->qi_taskqueue);
- taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
- "dmar%d qi taskq", unit->iommu.unit);
-
- unit->inv_waitd_gen = 0;
- unit->inv_waitd_seq = 1;
-
- qi_sz = DMAR_IQA_QS_DEF;
- TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
- if (qi_sz > DMAR_IQA_QS_MAX)
- qi_sz = DMAR_IQA_QS_MAX;
- unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
- /* Reserve one descriptor to prevent wraparound. */
- unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
-
- /* The invalidation queue reads by DMARs are always coherent. */
- unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK |
- M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
- unit->inv_waitd_seq_hw_phys = pmap_kextract(
- (vm_offset_t)&unit->inv_waitd_seq_hw);
+ unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX;
+ unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ;
+ iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task);
+ get_x86_iommu()->qi_ensure = dmar_qi_ensure;
+ get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr;
+ get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail;
+ get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit;
+
+ qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE);
DMAR_LOCK(unit);
dmar_write8(unit, DMAR_IQT_REG, 0);
- iqa = pmap_kextract((uintptr_t)unit->inv_queue);
+ iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
iqa |= qi_sz;
dmar_write8(unit, DMAR_IQA_REG, iqa);
dmar_enable_qi(unit);
@@ -516,49 +373,35 @@ dmar_init_qi(struct dmar_unit *unit)
ics = DMAR_ICS_IWC;
dmar_write4(unit, DMAR_ICS_REG, ics);
}
- dmar_enable_qi_intr(unit);
+ dmar_enable_qi_intr(DMAR2IOMMU(unit));
DMAR_UNLOCK(unit);
return (0);
}
+static void
+dmar_fini_qi_helper(struct iommu_unit *iommu)
+{
+ dmar_disable_qi_intr(iommu);
+ dmar_disable_qi(IOMMU2DMAR(iommu));
+}
+
void
dmar_fini_qi(struct dmar_unit *unit)
{
- struct iommu_qi_genseq gseq;
-
if (!unit->qi_enabled)
return;
- taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
- taskqueue_free(unit->qi_taskqueue);
- unit->qi_taskqueue = NULL;
-
- DMAR_LOCK(unit);
- /* quisce */
- dmar_qi_ensure(unit, 1);
- dmar_qi_emit_wait_seq(unit, &gseq, true);
- /* See dmar_qi_invalidate_sync_locked(). */
- unit->inv_seq_waiters++;
- dmar_qi_advance_tail(unit);
- dmar_qi_wait_for_seq(unit, &gseq, false);
- /* only after the quisce, disable queue */
- dmar_disable_qi_intr(unit);
- dmar_disable_qi(unit);
- KASSERT(unit->inv_seq_waiters == 0,
- ("dmar%d: waiters on disabled queue", unit->iommu.unit));
- DMAR_UNLOCK(unit);
-
- kmem_free(unit->inv_queue, unit->inv_queue_size);
- unit->inv_queue = NULL;
- unit->inv_queue_size = 0;
+ iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper);
unit->qi_enabled = 0;
}
void
-dmar_enable_qi_intr(struct dmar_unit *unit)
+dmar_enable_qi_intr(struct iommu_unit *iommu)
{
+ struct dmar_unit *unit;
uint32_t iectl;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported",
unit->iommu.unit));
@@ -568,10 +411,12 @@ dmar_enable_qi_intr(struct dmar_unit *unit)
}
void
-dmar_disable_qi_intr(struct dmar_unit *unit)
+dmar_disable_qi_intr(struct iommu_unit *iommu)
{
+ struct dmar_unit *unit;
uint32_t iectl;
+ unit = IOMMU2DMAR(iommu);
DMAR_ASSERT_LOCKED(unit);
KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported",
unit->iommu.unit));
diff --git a/sys/x86/iommu/intel_quirks.c b/sys/x86/iommu/intel_quirks.c
index cccb503b6047..751237a3ab54 100644
--- a/sys/x86/iommu/intel_quirks.c
+++ b/sys/x86/iommu/intel_quirks.c
@@ -58,6 +58,7 @@
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
typedef void (*dmar_quirk_cpu_fun)(struct dmar_unit *);
@@ -107,7 +108,7 @@ dmar_match_quirks(struct dmar_unit *dmar,
(nb_quirk->rev_no == rev_no ||
nb_quirk->rev_no == QUIRK_NB_ALL_REV)) {
if (bootverbose) {
- device_printf(dmar->dev,
+ device_printf(dmar->iommu.dev,
"NB IOMMU quirk %s\n",
nb_quirk->descr);
}
@@ -115,7 +116,8 @@ dmar_match_quirks(struct dmar_unit *dmar,
}
}
} else {
- device_printf(dmar->dev, "cannot find northbridge\n");
+ device_printf(dmar->iommu.dev,
+ "cannot find northbridge\n");
}
}
if (cpu_quirks != NULL) {
@@ -134,7 +136,7 @@ dmar_match_quirks(struct dmar_unit *dmar,
(cpu_quirk->stepping == -1 ||
cpu_quirk->stepping == stepping)) {
if (bootverbose) {
- device_printf(dmar->dev,
+ device_printf(dmar->iommu.dev,
"CPU IOMMU quirk %s\n",
cpu_quirk->descr);
}
diff --git a/sys/x86/iommu/intel_reg.h b/sys/x86/iommu/intel_reg.h
index 26a18ff94890..0fafcce7accf 100644
--- a/sys/x86/iommu/intel_reg.h
+++ b/sys/x86/iommu/intel_reg.h
@@ -31,16 +31,6 @@
#ifndef __X86_IOMMU_INTEL_REG_H
#define __X86_IOMMU_INTEL_REG_H
-#define DMAR_PAGE_SIZE PAGE_SIZE
-#define DMAR_PAGE_MASK (DMAR_PAGE_SIZE - 1)
-#define DMAR_PAGE_SHIFT PAGE_SHIFT
-#define DMAR_NPTEPG (DMAR_PAGE_SIZE / sizeof(dmar_pte_t))
-#define DMAR_NPTEPGSHIFT 9
-#define DMAR_PTEMASK (DMAR_NPTEPG - 1)
-
-#define IOMMU_PAGE_SIZE DMAR_PAGE_SIZE
-#define IOMMU_PAGE_MASK DMAR_PAGE_MASK
-
typedef struct dmar_root_entry {
uint64_t r1;
uint64_t r2;
@@ -49,7 +39,7 @@ typedef struct dmar_root_entry {
#define DMAR_ROOT_R1_CTP_MASK 0xfffffffffffff000 /* Mask for Context-Entry
Table Pointer */
-#define DMAR_CTX_CNT (DMAR_PAGE_SIZE / sizeof(dmar_root_entry_t))
+#define DMAR_CTX_CNT (IOMMU_PAGE_SIZE / sizeof(dmar_root_entry_t))
typedef struct dmar_ctx_entry {
uint64_t ctx1;
@@ -73,9 +63,6 @@ typedef struct dmar_ctx_entry {
#define DMAR_CTX2_DID(x) ((x) << 8) /* Domain Identifier */
#define DMAR_CTX2_GET_DID(ctx2) (((ctx2) & DMAR_CTX2_DID_MASK) >> 8)
-typedef struct dmar_pte {
- uint64_t pte;
-} dmar_pte_t;
#define DMAR_PTE_R 1 /* Read */
#define DMAR_PTE_W (1 << 1) /* Write */
#define DMAR_PTE_SP (1 << 7) /* Super Page */
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c
index 4d680cc7d9e8..287b5fe9376a 100644
--- a/sys/x86/iommu/intel_utils.c
+++ b/sys/x86/iommu/intel_utils.c
@@ -63,6 +63,7 @@
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/intel_dmar.h>
u_int
@@ -135,7 +136,7 @@ domain_set_agaw(struct dmar_domain *domain, int mgaw)
return (0);
}
}
- device_printf(domain->dmar->dev,
+ device_printf(domain->dmar->iommu.dev,
"context request mgaw %d: no agaw found, sagaw %x\n",
mgaw, sagaw);
return (EINVAL);
@@ -172,23 +173,6 @@ dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr, bool allow_less
}
/*
- * Calculate the total amount of page table pages needed to map the
- * whole bus address space on the context with the selected agaw.
- */
-vm_pindex_t
-pglvl_max_pages(int pglvl)
-{
- vm_pindex_t res;
- int i;
-
- for (res = 0, i = pglvl; i > 0; i--) {
- res *= DMAR_NPTEPG;
- res++;
- }
- return (res);
-}
-
-/*
* Return true if the page table level lvl supports the superpage for
* the context ctx.
*/
@@ -209,26 +193,6 @@ domain_is_sp_lvl(struct dmar_domain *domain, int lvl)
}
iommu_gaddr_t
-pglvl_page_size(int total_pglvl, int lvl)
-{
- int rlvl;
- static const iommu_gaddr_t pg_sz[] = {
- (iommu_gaddr_t)DMAR_PAGE_SIZE,
- (iommu_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
- (iommu_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
- (iommu_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
- (iommu_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
- (iommu_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
- };
-
- KASSERT(lvl >= 0 && lvl < total_pglvl,
- ("total %d lvl %d", total_pglvl, lvl));
- rlvl = total_pglvl - lvl - 1;
- KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
- return (pg_sz[rlvl]);
-}
-
-iommu_gaddr_t
domain_page_size(struct dmar_domain *domain, int lvl)
{
@@ -243,7 +207,7 @@ calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size,
int am;
for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
- isize = 1ULL << (am + DMAR_PAGE_SHIFT);
+ isize = 1ULL << (am + IOMMU_PAGE_SHIFT);
if ((base & (isize - 1)) == 0 && size >= isize)
break;
if (am == 0)
@@ -253,113 +217,9 @@ calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size,
return (am);
}
-iommu_haddr_t dmar_high;
int haw;
int dmar_tbl_pagecnt;
-vm_page_t
-dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
-{
- vm_page_t m;
- int zeroed, aflags;
-
- zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
- aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
- ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
- VM_ALLOC_NOWAIT);
- for (;;) {
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WLOCK(obj);
- m = vm_page_lookup(obj, idx);
- if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WUNLOCK(obj);
- break;
- }
- m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
- dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WUNLOCK(obj);
- if (m != NULL) {
- if (zeroed && (m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
- atomic_add_int(&dmar_tbl_pagecnt, 1);
- break;
- }
- if ((flags & IOMMU_PGF_WAITOK) == 0)
- break;
- }
- return (m);
-}
-
-void
-dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
-{
- vm_page_t m;
-
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WLOCK(obj);
- m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
- if (m != NULL) {
- vm_page_free(m);
- atomic_subtract_int(&dmar_tbl_pagecnt, 1);
- }
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WUNLOCK(obj);
-}
-
-void *
-dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
- struct sf_buf **sf)
-{
- vm_page_t m;
- bool allocated;
-
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WLOCK(obj);
- m = vm_page_lookup(obj, idx);
- if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
- m = dmar_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
- allocated = true;
- } else
- allocated = false;
- if (m == NULL) {
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WUNLOCK(obj);
- return (NULL);
- }
- /* Sleepable allocations cannot fail. */
- if ((flags & IOMMU_PGF_WAITOK) != 0)
- VM_OBJECT_WUNLOCK(obj);
- sched_pin();
- *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
- == 0 ? SFB_NOWAIT : 0));
- if (*sf == NULL) {
- sched_unpin();
- if (allocated) {
- VM_OBJECT_ASSERT_WLOCKED(obj);
- dmar_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL);
- }
- if ((flags & IOMMU_PGF_OBJL) == 0)
- VM_OBJECT_WUNLOCK(obj);
- return (NULL);
- }
- if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
- (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
- VM_OBJECT_WLOCK(obj);
- else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
- VM_OBJECT_WUNLOCK(obj);
- return ((void *)sf_buf_kva(*sf));
-}
-
-void
-dmar_unmap_pgtbl(struct sf_buf *sf)
-{
-
- sf_buf_free(sf);
- sched_unpin();
-}
-
static void
dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
{
@@ -374,7 +234,7 @@ dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
}
void
-dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
+dmar_flush_pte_to_ram(struct dmar_unit *unit, iommu_pte_t *dst)
{
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
@@ -647,7 +507,6 @@ dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
DMAR_UNLOCK(dmar);
}
-int dmar_batch_coalesce = 100;
struct timespec dmar_hw_timeout = {
.tv_sec = 0,
.tv_nsec = 1000000
@@ -686,14 +545,6 @@ dmar_timeout_sysctl(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
- NULL, "");
-SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
- &dmar_tbl_pagecnt, 0,
- "Count of pages used for DMAR pagetables");
-SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
- &dmar_batch_coalesce, 0,
- "Number of qi batches between interrupt");
SYSCTL_PROC(_hw_iommu_dmar, OID_AUTO, timeout,
CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
dmar_timeout_sysctl, "QU",
diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c
new file mode 100644
index 000000000000..2011c632f770
--- /dev/null
+++ b/sys/x86/iommu/iommu_utils.c
@@ -0,0 +1,751 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_acpi.h"
+#if defined(__amd64__)
+#define DEV_APIC
+#else
+#include "opt_apic.h"
+#endif
+
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <dev/iommu/busdma_iommu.h>
+#include <dev/iommu/iommu.h>
+#include <x86/iommu/x86_iommu.h>
+#include <x86/iommu/iommu_intrmap.h>
+#ifdef DEV_APIC
+#include "pcib_if.h"
+#include <machine/intr_machdep.h>
+#include <x86/apicreg.h>
+#include <x86/apicvar.h>
+#endif
+
+vm_page_t
+iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
+{
+ vm_page_t m;
+ int zeroed, aflags;
+
+ zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
+ aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
+ ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
+ VM_ALLOC_NOWAIT);
+ for (;;) {
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ break;
+ }
+ m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
+ iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ if (m != NULL) {
+ if (zeroed && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ atomic_add_int(&iommu_tbl_pagecnt, 1);
+ break;
+ }
+ if ((flags & IOMMU_PGF_WAITOK) == 0)
+ break;
+ }
+ return (m);
+}
+
+void
+iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
+{
+ vm_page_t m;
+
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
+ if (m != NULL) {
+ vm_page_free(m);
+ atomic_subtract_int(&iommu_tbl_pagecnt, 1);
+ }
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+}
+
+void *
+iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
+ struct sf_buf **sf)
+{
+ vm_page_t m;
+ bool allocated;
+
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
+ m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
+ allocated = true;
+ } else
+ allocated = false;
+ if (m == NULL) {
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return (NULL);
+ }
+ /* Sleepable allocations cannot fail. */
+ if ((flags & IOMMU_PGF_WAITOK) != 0)
+ VM_OBJECT_WUNLOCK(obj);
+ sched_pin();
+ *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
+ == 0 ? SFB_NOWAIT : 0));
+ if (*sf == NULL) {
+ sched_unpin();
+ if (allocated) {
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL);
+ }
+ if ((flags & IOMMU_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return (NULL);
+ }
+ if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
+ (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
+ VM_OBJECT_WLOCK(obj);
+ else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return ((void *)sf_buf_kva(*sf));
+}
+
+void
+iommu_unmap_pgtbl(struct sf_buf *sf)
+{
+
+ sf_buf_free(sf);
+ sched_unpin();
+}
+
+iommu_haddr_t iommu_high;
+int iommu_tbl_pagecnt;
+
+SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, "");
+SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
+ &iommu_tbl_pagecnt, 0,
+ "Count of pages used for IOMMU pagetables");
+
+int iommu_qi_batch_coalesce = 100;
+SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
+ &iommu_qi_batch_coalesce, 0,
+ "Number of qi batches between interrupt");
+
+static struct iommu_unit *
+x86_no_iommu_find(device_t dev, bool verbose)
+{
+ return (NULL);
+}
+
+static int
+x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
+ u_int cookie, uint64_t *addr, uint32_t *data)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie)
+{
+ return (0);
+}
+
+static int
+x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
+ bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
+ uint32_t *lo)
+{
+ return (EOPNOTSUPP);
+}
+
+static int
+x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
+{
+ return (0);
+}
+
+static struct x86_iommu x86_no_iommu = {
+ .find = x86_no_iommu_find,
+ .alloc_msi_intr = x86_no_iommu_alloc_msi_intr,
+ .map_msi_intr = x86_no_iommu_map_msi_intr,
+ .unmap_msi_intr = x86_no_iommu_unmap_msi_intr,
+ .map_ioapic_intr = x86_no_iommu_map_ioapic_intr,
+ .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr,
+};
+
+static struct x86_iommu *x86_iommu = &x86_no_iommu;
+
+void
+set_x86_iommu(struct x86_iommu *x)
+{
+ MPASS(x86_iommu == &x86_no_iommu);
+ x86_iommu = x;
+}
+
+struct x86_iommu *
+get_x86_iommu(void)
+{
+ return (x86_iommu);
+}
+
+void
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+ bool cansleep)
+{
+ x86_iommu->domain_unload_entry(entry, free, cansleep);
+}
+
+void
+iommu_domain_unload(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep)
+{
+ x86_iommu->domain_unload(iodom, entries, cansleep);
+}
+
+struct iommu_ctx *
+iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
+ bool id_mapped, bool rmrr_init)
+{
+ return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init));
+}
+
+void
+iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
+{
+ x86_iommu->free_ctx_locked(iommu, context);
+}
+
+void
+iommu_free_ctx(struct iommu_ctx *context)
+{
+ x86_iommu->free_ctx(context);
+}
+
+struct iommu_unit *
+iommu_find(device_t dev, bool verbose)
+{
+ return (x86_iommu->find(dev, verbose));
+}
+
+int
+iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
+{
+ return (x86_iommu->alloc_msi_intr(src, cookies, count));
+}
+
+int
+iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
+ uint64_t *addr, uint32_t *data)
+{
+ return (x86_iommu->map_msi_intr(src, cpu, vector, cookie,
+ addr, data));
+}
+
+int
+iommu_unmap_msi_intr(device_t src, u_int cookie)
+{
+ return (x86_iommu->unmap_msi_intr(src, cookie));
+}
+
+int
+iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
+ bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
+{
+ return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge,
+ activehi, irq, cookie, hi, lo));
+}
+
+int
+iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
+{
+ return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
+}
+
+void
+iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+ x86_iommu->unit_pre_instantiate_ctx(unit);
+}
+
+#define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu))
+
+static bool
+iommu_qi_seq_processed(struct iommu_unit *unit,
+ const struct iommu_qi_genseq *pseq)
+{
+ struct x86_unit_common *x86c;
+ u_int gen;
+
+ x86c = IOMMU2X86C(unit);
+ gen = x86c->inv_waitd_gen;
+ return (pseq->gen < gen ||
+ (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw));
+}
+
+void
+iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
+ bool emit_wait)
+{
+ struct x86_unit_common *x86c;
+ struct iommu_qi_genseq gsec;
+ uint32_t seq;
+
+ KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
+ IOMMU_ASSERT_LOCKED(unit);
+ x86c = IOMMU2X86C(unit);
+
+ if (x86c->inv_waitd_seq == 0xffffffff) {
+ gsec.gen = x86c->inv_waitd_gen;
+ gsec.seq = x86c->inv_waitd_seq;
+ x86_iommu->qi_ensure(unit, 1);
+ x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
+ true, false);
+ x86_iommu->qi_advance_tail(unit);
+ while (!iommu_qi_seq_processed(unit, &gsec))
+ cpu_spinwait();
+ x86c->inv_waitd_gen++;
+ x86c->inv_waitd_seq = 1;
+ }
+ seq = x86c->inv_waitd_seq++;
+ pseq->gen = x86c->inv_waitd_gen;
+ pseq->seq = seq;
+ if (emit_wait) {
+ x86_iommu->qi_ensure(unit, 1);
+ x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
+ }
+}
+
+/*
+ * To avoid missed wakeups, callers must increment the unit's waiters count
+ * before advancing the tail past the wait descriptor.
+ */
+void
+iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
+ gseq, bool nowait)
+{
+ struct x86_unit_common *x86c;
+
+ IOMMU_ASSERT_LOCKED(unit);
+ x86c = IOMMU2X86C(unit);
+
+ KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
+ while (!iommu_qi_seq_processed(unit, gseq)) {
+ if (cold || nowait) {
+ cpu_spinwait();
+ } else {
+ msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
+ "dmarse", hz);
+ }
+ }
+ x86c->inv_seq_waiters--;
+}
+
+/*
+ * The caller must not be using the entry's dmamap_link field.
+ */
+void
+iommu_qi_invalidate_locked(struct iommu_domain *domain,
+ struct iommu_map_entry *entry, bool emit_wait)
+{
+ struct iommu_unit *unit;
+ struct x86_unit_common *x86c;
+
+ unit = domain->iommu;
+ x86c = IOMMU2X86C(unit);
+ IOMMU_ASSERT_LOCKED(unit);
+
+ x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
+ entry->start, &entry->gseq, emit_wait);
+
+ /*
+ * To avoid a data race in dmar_qi_task(), the entry's gseq must be
+ * initialized before the entry is added to the TLB flush list, and the
+ * entry must be added to that list before the tail is advanced. More
+ * precisely, the tail must not be advanced past the wait descriptor
+ * that will generate the interrupt that schedules dmar_qi_task() for
+ * execution before the entry is added to the list. While an earlier
+ * call to dmar_qi_ensure() might have advanced the tail, it will not
+ * advance it past the wait descriptor.
+ *
+ * See the definition of struct dmar_unit for more information on
+ * synchronization.
+ */
+ entry->tlb_flush_next = NULL;
+ atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
+ tlb_flush_next, (uintptr_t)entry);
+ x86c->tlb_flush_tail = entry;
+
+ x86_iommu->qi_advance_tail(unit);
+}
+
+void
+iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, bool cansleep)
+{
+ struct iommu_unit *unit;
+ struct iommu_qi_genseq gseq;
+
+ unit = domain->iommu;
+ IOMMU_LOCK(unit);
+ x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
+
+ /*
+ * To avoid a missed wakeup in iommu_qi_task(), the unit's
+ * waiters count must be incremented before the tail is
+ * advanced.
+ */
+ IOMMU2X86C(unit)->inv_seq_waiters++;
+
+ x86_iommu->qi_advance_tail(unit);
+ iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
+ IOMMU_UNLOCK(unit);
+}
+
+void
+iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
+{
+ struct x86_unit_common *x86c;
+ struct iommu_map_entry *entry, *head;
+
+ x86c = IOMMU2X86C(unit);
+ for (head = x86c->tlb_flush_head;; head = entry) {
+ entry = (struct iommu_map_entry *)
+ atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
+ if (entry == NULL ||
+ !iommu_qi_seq_processed(unit, &entry->gseq))
+ break;
+ x86c->tlb_flush_head = entry;
+ iommu_gas_free_entry(head);
+ if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+ iommu_gas_free_region(entry);
+ else
+ iommu_gas_free_space(entry);
+ }
+}
+
+void
+iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
+{
+ struct x86_unit_common *x86c;
+ u_int qi_sz;
+
+ x86c = IOMMU2X86C(unit);
+
+ x86c->tlb_flush_head = x86c->tlb_flush_tail =
+ iommu_gas_alloc_entry(NULL, 0);
+ TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
+ x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
+ taskqueue_thread_enqueue, &x86c->qi_taskqueue);
+ taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
+ "iommu%d qi taskq", unit->unit);
+
+ x86c->inv_waitd_gen = 0;
+ x86c->inv_waitd_seq = 1;
+
+ qi_sz = 3;
+ TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
+ if (qi_sz > x86c->qi_buf_maxsz)
+ qi_sz = x86c->qi_buf_maxsz;
+ x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
+ /* Reserve one descriptor to prevent wraparound. */
+ x86c->inv_queue_avail = x86c->inv_queue_size -
+ x86c->qi_cmd_sz;
+
+ /*
+ * The invalidation queue reads by DMARs/AMDIOMMUs are always
+ * coherent.
+ */
+ x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
+ M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
+ VM_MEMATTR_DEFAULT);
+ x86c->inv_waitd_seq_hw_phys = pmap_kextract(
+ (vm_offset_t)&x86c->inv_waitd_seq_hw);
+}
+
+void
+iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
+ struct iommu_unit *))
+{
+ struct x86_unit_common *x86c;
+ struct iommu_qi_genseq gseq;
+
+ x86c = IOMMU2X86C(unit);
+
+ taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
+ taskqueue_free(x86c->qi_taskqueue);
+ x86c->qi_taskqueue = NULL;
+
+ IOMMU_LOCK(unit);
+ /* quisce */
+ x86_iommu->qi_ensure(unit, 1);
+ iommu_qi_emit_wait_seq(unit, &gseq, true);
+ /* See iommu_qi_invalidate_locked(). */
+ x86c->inv_seq_waiters++;
+ x86_iommu->qi_advance_tail(unit);
+ iommu_qi_wait_for_seq(unit, &gseq, false);
+ /* only after the quisce, disable queue */
+ disable_qi(unit);
+ KASSERT(x86c->inv_seq_waiters == 0,
+ ("iommu%d: waiters on disabled queue", unit->unit));
+ IOMMU_UNLOCK(unit);
+
+ kmem_free(x86c->inv_queue, x86c->inv_queue_size);
+ x86c->inv_queue = NULL;
+ x86c->inv_queue_size = 0;
+}
+
+int
+iommu_alloc_irq(struct iommu_unit *unit, int idx)
+{
+ device_t dev, pcib;
+ struct iommu_msi_data *dmd;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ int error;
+
+ MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
+
+ dev = unit->dev;
+ dmd = &IOMMU2X86C(unit)->intrs[idx];
+ pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
+ error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
+ if (error != 0) {
+ device_printf(dev, "cannot allocate %s interrupt, %d\n",
+ dmd->name, error);
+ goto err1;
+ }
+ error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
+ dmd->irq, 1);
+ if (error != 0) {
+ device_printf(dev, "cannot set %s interrupt resource, %d\n",
+ dmd->name, error);
+ goto err2;
+ }
+ dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &dmd->irq_rid, RF_ACTIVE);
+ if (dmd->irq_res == NULL) {
+ device_printf(dev,
+ "cannot allocate resource for %s interrupt\n", dmd->name);
+ error = ENXIO;
+ goto err3;
+ }
+ error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
+ dmd->handler, NULL, unit, &dmd->intr_handle);
+ if (error != 0) {
+ device_printf(dev, "cannot setup %s interrupt, %d\n",
+ dmd->name, error);
+ goto err4;
+ }
+ bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
+ error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
+ if (error != 0) {
+ device_printf(dev, "cannot map %s interrupt, %d\n",
+ dmd->name, error);
+ goto err5;
+ }
+
+ dmd->msi_data = msi_data;
+ dmd->msi_addr = msi_addr;
+
+ return (0);
+
+err5:
+ bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
+err4:
+ bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
+err3:
+ bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
+err2:
+ PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
+ dmd->irq = -1;
+err1:
+ return (error);
+}
+
+void
+iommu_release_intr(struct iommu_unit *unit, int idx)
+{
+ device_t dev;
+ struct iommu_msi_data *dmd;
+
+ MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
+
+ dmd = &IOMMU2X86C(unit)->intrs[idx];
+ if (dmd->handler == NULL || dmd->irq == -1)
+ return;
+ dev = unit->dev;
+
+ bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
+ bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
+ bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
+ PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
+ dev, dmd->irq);
+ dmd->irq = -1;
+}
+
+void
+iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev)
+{
+ bus_addr_t maxaddr;
+
+ maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
+ ctx->tag->common.impl = &bus_dma_iommu_impl;
+ ctx->tag->common.boundary = 0;
+ ctx->tag->common.lowaddr = maxaddr;
+ ctx->tag->common.highaddr = maxaddr;
+ ctx->tag->common.maxsize = maxaddr;
+ ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
+ ctx->tag->common.maxsegsz = maxaddr;
+ ctx->tag->ctx = ctx;
+ ctx->tag->owner = dev;
+}
+
+void
+iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
+{
+ if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+ iommu_gas_free_region(entry);
+ else
+ iommu_gas_free_space(entry);
+ if (free)
+ iommu_gas_free_entry(entry);
+ else
+ entry->flags = 0;
+}
+
+/*
+ * Index of the pte for the guest address base in the page table at
+ * the level lvl.
+ */
+int
+pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl)
+{
+
+ base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) *
+ IOMMU_NPTEPGSHIFT;
+ return (base & IOMMU_PTEMASK);
+}
+
+/*
+ * Returns the page index of the page table page in the page table
+ * object, which maps the given address base at the page table level
+ * lvl.
+ */
+vm_pindex_t
+pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl)
+{
+ vm_pindex_t idx, pidx;
+ int i;
+
+ KASSERT(lvl >= 0 && lvl < pglvl,
+ ("wrong lvl %d %d", pglvl, lvl));
+
+ for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) {
+ idx = pglvl_pgtbl_pte_off(pglvl, base, i) +
+ pidx * IOMMU_NPTEPG + 1;
+ }
+ return (idx);
+}
+
+/*
+ * Calculate the total amount of page table pages needed to map the
+ * whole bus address space on the context with the selected agaw.
+ */
+vm_pindex_t
+pglvl_max_pages(int pglvl)
+{
+ vm_pindex_t res;
+ int i;
+
+ for (res = 0, i = pglvl; i > 0; i--) {
+ res *= IOMMU_NPTEPG;
+ res++;
+ }
+ return (res);
+}
+
+iommu_gaddr_t
+pglvl_page_size(int total_pglvl, int lvl)
+{
+ int rlvl;
+ static const iommu_gaddr_t pg_sz[] = {
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE,
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT,
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT),
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT),
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT),
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT),
+ (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT),
+ };
+
+ KASSERT(lvl >= 0 && lvl < total_pglvl,
+ ("total %d lvl %d", total_pglvl, lvl));
+ rlvl = total_pglvl - lvl - 1;
+ KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
+ return (pg_sz[rlvl]);
+}
diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h
new file mode 100644
index 000000000000..a1ed5c71c513
--- /dev/null
+++ b/sys/x86/iommu/x86_iommu.h
@@ -0,0 +1,196 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2013-2015, 2024 The FreeBSD Foundation
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __X86_IOMMU_X86_IOMMU_H
+#define __X86_IOMMU_X86_IOMMU_H
+
+/* Both Intel and AMD are not too crazy to have different sizes. */
+typedef struct iommu_pte {
+ uint64_t pte;
+} iommu_pte_t;
+
+#define IOMMU_PAGE_SIZE PAGE_SIZE
+#define IOMMU_PAGE_MASK (IOMMU_PAGE_SIZE - 1)
+#define IOMMU_PAGE_SHIFT PAGE_SHIFT
+#define IOMMU_NPTEPG (IOMMU_PAGE_SIZE / sizeof(iommu_pte_t))
+#define IOMMU_NPTEPGSHIFT 9
+#define IOMMU_PTEMASK (IOMMU_NPTEPG - 1)
+
+struct sf_buf;
+struct vm_object;
+
+struct vm_page *iommu_pgalloc(struct vm_object *obj, vm_pindex_t idx,
+ int flags);
+void iommu_pgfree(struct vm_object *obj, vm_pindex_t idx, int flags);
+void *iommu_map_pgtbl(struct vm_object *obj, vm_pindex_t idx, int flags,
+ struct sf_buf **sf);
+void iommu_unmap_pgtbl(struct sf_buf *sf);
+
+extern iommu_haddr_t iommu_high;
+extern int iommu_tbl_pagecnt;
+extern int iommu_qi_batch_coalesce;
+
+SYSCTL_DECL(_hw_iommu);
+
+struct x86_unit_common;
+
+struct x86_iommu {
+ struct x86_unit_common *(*get_x86_common)(struct
+ iommu_unit *iommu);
+ void (*unit_pre_instantiate_ctx)(struct iommu_unit *iommu);
+ void (*qi_ensure)(struct iommu_unit *unit, int descr_count);
+ void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq,
+ bool, bool, bool);
+ void (*qi_advance_tail)(struct iommu_unit *unit);
+ void (*qi_invalidate_emit)(struct iommu_domain *idomain,
+ iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *
+ pseq, bool emit_wait);
+ void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free,
+ bool cansleep);
+ void (*domain_unload)(struct iommu_domain *iodom,
+ struct iommu_map_entries_tailq *entries, bool cansleep);
+ struct iommu_ctx *(*get_ctx)(struct iommu_unit *iommu,
+ device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init);
+ void (*free_ctx_locked)(struct iommu_unit *iommu,
+ struct iommu_ctx *context);
+ void (*free_ctx)(struct iommu_ctx *context);
+ struct iommu_unit *(*find)(device_t dev, bool verbose);
+ int (*alloc_msi_intr)(device_t src, u_int *cookies, u_int count);
+ int (*map_msi_intr)(device_t src, u_int cpu, u_int vector,
+ u_int cookie, uint64_t *addr, uint32_t *data);
+ int (*unmap_msi_intr)(device_t src, u_int cookie);
+ int (*map_ioapic_intr)(u_int ioapic_id, u_int cpu, u_int vector,
+ bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
+ uint32_t *lo);
+ int (*unmap_ioapic_intr)(u_int ioapic_id, u_int *cookie);
+};
+void set_x86_iommu(struct x86_iommu *);
+struct x86_iommu *get_x86_iommu(void);
+
+struct iommu_msi_data {
+ int irq;
+ int irq_rid;
+ struct resource *irq_res;
+ void *intr_handle;
+ int (*handler)(void *);
+ int msi_data_reg;
+ int msi_addr_reg;
+ int msi_uaddr_reg;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ void (*enable_intr)(struct iommu_unit *);
+ void (*disable_intr)(struct iommu_unit *);
+ const char *name;
+};
+
+#define IOMMU_MAX_MSI 3
+
+struct x86_unit_common {
+ uint32_t qi_buf_maxsz;
+ uint32_t qi_cmd_sz;
+
+ char *inv_queue;
+ vm_size_t inv_queue_size;
+ uint32_t inv_queue_avail;
+ uint32_t inv_queue_tail;
+
+ /*
+ * Hw writes there on completion of wait descriptor
+ * processing. Intel writes 4 bytes, while AMD does the
+ * 8-bytes write. Due to little-endian, and use of 4-byte
+ * sequence numbers, the difference does not matter for us.
+ */
+ volatile uint64_t inv_waitd_seq_hw;
+
+ uint64_t inv_waitd_seq_hw_phys;
+ uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
+ u_int inv_waitd_gen; /* seq number generation AKA seq overflows */
+ u_int inv_seq_waiters; /* count of waiters for seq */
+ u_int inv_queue_full; /* informational counter */
+
+ /*
+ * Delayed freeing of map entries queue processing:
+ *
+ * tlb_flush_head and tlb_flush_tail are used to implement a FIFO
+ * queue that supports concurrent dequeues and enqueues. However,
+ * there can only be a single dequeuer (accessing tlb_flush_head) and
+ * a single enqueuer (accessing tlb_flush_tail) at a time. Since the
+ * unit's qi_task is the only dequeuer, it can access tlb_flush_head
+ * without any locking. In contrast, there may be multiple enqueuers,
+ * so the enqueuers acquire the iommu unit lock to serialize their
+ * accesses to tlb_flush_tail.
+ *
+ * In this FIFO queue implementation, the key to enabling concurrent
+ * dequeues and enqueues is that the dequeuer never needs to access
+ * tlb_flush_tail and the enqueuer never needs to access
+ * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail
+ * are never NULL, so neither a dequeuer nor an enqueuer ever needs to
+ * update both. Instead, tlb_flush_head always points to a "zombie"
+ * struct, which previously held the last dequeued item. Thus, the
+ * zombie's next field actually points to the struct holding the first
+ * item in the queue. When an item is dequeued, the current zombie is
+ * finally freed, and the struct that held the just dequeued item
+ * becomes the new zombie. When the queue is empty, tlb_flush_tail
+ * also points to the zombie.
+ */
+ struct iommu_map_entry *tlb_flush_head;
+ struct iommu_map_entry *tlb_flush_tail;
+ struct task qi_task;
+ struct taskqueue *qi_taskqueue;
+
+ struct iommu_msi_data intrs[IOMMU_MAX_MSI];
+};
+
+void iommu_domain_free_entry(struct iommu_map_entry *entry, bool free);
+
+void iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *
+ pseq, bool emit_wait);
+void iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct
+ iommu_qi_genseq *gseq, bool nowait);
+void iommu_qi_drain_tlb_flush(struct iommu_unit *unit);
+void iommu_qi_invalidate_locked(struct iommu_domain *domain,
+ struct iommu_map_entry *entry, bool emit_wait);
+void iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
+ iommu_gaddr_t size, bool cansleep);
+void iommu_qi_common_init(struct iommu_unit *unit, task_fn_t taskfunc);
+void iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
+ struct iommu_unit *));
+
+int iommu_alloc_irq(struct iommu_unit *unit, int idx);
+void iommu_release_intr(struct iommu_unit *unit, int idx);
+
+void iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev);
+
+int pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl);
+vm_pindex_t pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl);
+vm_pindex_t pglvl_max_pages(int pglvl);
+iommu_gaddr_t pglvl_page_size(int total_pglvl, int lvl);
+
+#endif