diff options
Diffstat (limited to 'sys/x86/iommu')
-rw-r--r-- | sys/x86/iommu/intel_ctx.c | 100 | ||||
-rw-r--r-- | sys/x86/iommu/intel_dmar.h | 116 | ||||
-rw-r--r-- | sys/x86/iommu/intel_drv.c | 236 | ||||
-rw-r--r-- | sys/x86/iommu/intel_fault.c | 17 | ||||
-rw-r--r-- | sys/x86/iommu/intel_idpgtbl.c | 135 | ||||
-rw-r--r-- | sys/x86/iommu/intel_intrmap.c | 27 | ||||
-rw-r--r-- | sys/x86/iommu/intel_qi.c | 365 | ||||
-rw-r--r-- | sys/x86/iommu/intel_quirks.c | 8 | ||||
-rw-r--r-- | sys/x86/iommu/intel_reg.h | 15 | ||||
-rw-r--r-- | sys/x86/iommu/intel_utils.c | 157 | ||||
-rw-r--r-- | sys/x86/iommu/iommu_utils.c | 751 | ||||
-rw-r--r-- | sys/x86/iommu/x86_iommu.h | 196 |
12 files changed, 1314 insertions, 809 deletions
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index 49c87cf0b39f..5047acd283e9 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -65,6 +65,7 @@ #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); @@ -74,6 +75,9 @@ static void dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain); static void dmar_domain_destroy(struct dmar_domain *domain); +static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); +static void dmar_free_ctx(struct dmar_ctx *ctx); + static void dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) { @@ -84,7 +88,7 @@ dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) /* * Allocated context page must be linked. */ - ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC); + ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC); if (ctxm != NULL) return; @@ -95,14 +99,14 @@ dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) * threads are equal. */ TD_PREP_PINNED_ASSERT; - ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO | + ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO | IOMMU_PGF_WAITOK); - re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf); + re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf); re += bus; dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & VM_PAGE_TO_PHYS(ctxm))); dmar_flush_root_to_ram(dmar, re); - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); TD_PINNED_ASSERT; } @@ -114,32 +118,13 @@ dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) dmar = CTX2DMAR(ctx); - ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid), + ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid), IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp); ctxp += ctx->context.rid & 0xff; return (ctxp); } static void -device_tag_init(struct dmar_ctx *ctx, device_t dev) -{ - struct dmar_domain *domain; - bus_addr_t maxaddr; - - domain = CTX2DOM(ctx); - maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR); - ctx->context.tag->common.impl = &bus_dma_iommu_impl; - ctx->context.tag->common.boundary = 0; - ctx->context.tag->common.lowaddr = maxaddr; - ctx->context.tag->common.highaddr = maxaddr; - ctx->context.tag->common.maxsize = maxaddr; - ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED; - ctx->context.tag->common.maxsegsz = maxaddr; - ctx->context.tag->ctx = CTX2IOCTX(ctx); - ctx->context.tag->owner = dev; -} - -static void ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain, vm_page_t ctx_root) { @@ -186,7 +171,7 @@ ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move, ("ctx %p non-null pgtbl_obj", ctx)); ctx_root = NULL; } else { - ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, + ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_NOALLOC); } @@ -272,7 +257,7 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus, "region (%jx, %jx) corrected\n", domain->iodom.iommu->unit, start, end); } - entry->end += DMAR_PAGE_SIZE * 0x20; + entry->end += IOMMU_PAGE_SIZE * 0x20; } size = OFF_TO_IDX(entry->end - entry->start); ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK); @@ -419,7 +404,7 @@ dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) } domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; } else { - error = domain_alloc_pgtbl(domain); + error = dmar_domain_alloc_pgtbl(domain); if (error != 0) goto fail; /* Disable local apic region access */ @@ -505,7 +490,7 @@ dmar_domain_destroy(struct dmar_domain *domain) if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { if (domain->pgtbl_obj != NULL) DMAR_DOMAIN_PGLOCK(domain); - domain_free_pgtbl(domain); + dmar_domain_free_pgtbl(domain); } iommu_domain_fini(iodom); dmar = DOM2DMAR(domain); @@ -582,7 +567,7 @@ dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, ctx = ctx1; dmar_ctx_link(ctx); ctx->context.tag->owner = dev; - device_tag_init(ctx, dev); + iommu_device_tag_init(CTX2IOCTX(ctx), dev); /* * This is the first activated context for the @@ -601,9 +586,9 @@ dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, func, rid, domain->domain, domain->mgaw, domain->agaw, id_mapped ? "id" : "re"); } - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); } else { - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); dmar_domain_destroy(domain1); /* Nothing needs to be done to destroy ctx1. */ free(ctx1, M_DMAR_CTX); @@ -703,7 +688,7 @@ dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) ctx->context.domain = &domain->iodom; dmar_ctx_link(ctx); ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100); - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); error = dmar_flush_for_ctx_entry(dmar, true); /* If flush failed, rolling back would not work as well. */ printf("dmar%d rid %x domain %d->%d %s-mapped\n", @@ -744,7 +729,7 @@ dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain) dmar_domain_destroy(domain); } -void +static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) { struct sf_buf *sf; @@ -787,7 +772,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) if (ctx->refs > 1) { ctx->refs--; DMAR_UNLOCK(dmar); - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); TD_PINNED_ASSERT; return; } @@ -809,7 +794,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) else dmar_inv_iotlb_glob(dmar); } - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); domain = CTX2DOM(ctx); dmar_ctx_unlink(ctx); free(ctx->context.tag, M_DMAR_CTX); @@ -818,7 +803,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) TD_PINNED_ASSERT; } -void +static void dmar_free_ctx(struct dmar_ctx *ctx) { struct dmar_unit *dmar; @@ -848,25 +833,12 @@ dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) return (NULL); } -void -dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) -{ - if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) - iommu_gas_free_region(entry); - else - iommu_gas_free_space(entry); - if (free) - iommu_gas_free_entry(entry); - else - entry->flags = 0; -} - /* * If the given value for "free" is true, then the caller must not be using * the entry's dmamap_link field. */ void -iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, +dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free, bool cansleep) { struct dmar_domain *domain; @@ -883,17 +855,18 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, if (unit->qi_enabled) { if (free) { DMAR_LOCK(unit); - dmar_qi_invalidate_locked(domain, entry, true); + iommu_qi_invalidate_locked(&domain->iodom, entry, + true); DMAR_UNLOCK(unit); } else { - dmar_qi_invalidate_sync(domain, entry->start, + iommu_qi_invalidate_sync(&domain->iodom, entry->start, entry->end - entry->start, cansleep); - dmar_domain_free_entry(entry, false); + iommu_domain_free_entry(entry, false); } } else { domain_flush_iotlb_sync(domain, entry->start, entry->end - entry->start); - dmar_domain_free_entry(entry, free); + iommu_domain_free_entry(entry, free); } } @@ -904,11 +877,11 @@ dmar_domain_unload_emit_wait(struct dmar_domain *domain, if (TAILQ_NEXT(entry, dmamap_link) == NULL) return (true); - return (domain->batch_no++ % dmar_batch_coalesce == 0); + return (domain->batch_no++ % iommu_qi_batch_coalesce == 0); } void -iommu_domain_unload(struct iommu_domain *iodom, +dmar_domain_unload(struct iommu_domain *iodom, struct iommu_map_entries_tailq *entries, bool cansleep) { struct dmar_domain *domain; @@ -929,7 +902,7 @@ iommu_domain_unload(struct iommu_domain *iodom, domain_flush_iotlb_sync(domain, entry->start, entry->end - entry->start); TAILQ_REMOVE(entries, entry, dmamap_link); - dmar_domain_free_entry(entry, true); + iommu_domain_free_entry(entry, true); } } if (TAILQ_EMPTY(entries)) @@ -939,44 +912,41 @@ iommu_domain_unload(struct iommu_domain *iodom, DMAR_LOCK(unit); while ((entry = TAILQ_FIRST(entries)) != NULL) { TAILQ_REMOVE(entries, entry, dmamap_link); - dmar_qi_invalidate_locked(domain, entry, + iommu_qi_invalidate_locked(&domain->iodom, entry, dmar_domain_unload_emit_wait(domain, entry)); } DMAR_UNLOCK(unit); } struct iommu_ctx * -iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, +dmar_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) { struct dmar_unit *dmar; struct dmar_ctx *ret; dmar = IOMMU2DMAR(iommu); - ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init); - return (CTX2IOCTX(ret)); } void -iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) +dmar_free_ctx_locked_method(struct iommu_unit *iommu, + struct iommu_ctx *context) { struct dmar_unit *dmar; struct dmar_ctx *ctx; dmar = IOMMU2DMAR(iommu); ctx = IOCTX2CTX(context); - dmar_free_ctx_locked(dmar, ctx); } void -iommu_free_ctx(struct iommu_ctx *context) +dmar_free_ctx_method(struct iommu_ctx *context) { struct dmar_ctx *ctx; ctx = IOCTX2CTX(context); - dmar_free_ctx(ctx); } diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h index e20144094c80..188e40dec36c 100644 --- a/sys/x86/iommu/intel_dmar.h +++ b/sys/x86/iommu/intel_dmar.h @@ -87,15 +87,15 @@ struct dmar_ctx { #define DMAR_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock) #define DMAR_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->iodom.lock, MA_OWNED) -#define DMAR2IOMMU(dmar) &((dmar)->iommu) +#define DMAR2IOMMU(dmar) (&((dmar)->iommu)) #define IOMMU2DMAR(dmar) \ __containerof((dmar), struct dmar_unit, iommu) -#define DOM2IODOM(domain) &((domain)->iodom) +#define DOM2IODOM(domain) (&((domain)->iodom)) #define IODOM2DOM(domain) \ __containerof((domain), struct dmar_domain, iodom) -#define CTX2IOCTX(ctx) &((ctx)->context) +#define CTX2IOCTX(ctx) (&((ctx)->context)) #define IOCTX2CTX(ctx) \ __containerof((ctx), struct dmar_ctx, context) @@ -103,27 +103,13 @@ struct dmar_ctx { #define CTX2DMAR(ctx) (CTX2DOM(ctx)->dmar) #define DOM2DMAR(domain) ((domain)->dmar) -struct dmar_msi_data { - int irq; - int irq_rid; - struct resource *irq_res; - void *intr_handle; - int (*handler)(void *); - int msi_data_reg; - int msi_addr_reg; - int msi_uaddr_reg; - void (*enable_intr)(struct dmar_unit *); - void (*disable_intr)(struct dmar_unit *); - const char *name; -}; - #define DMAR_INTR_FAULT 0 #define DMAR_INTR_QI 1 #define DMAR_INTR_TOTAL 2 struct dmar_unit { struct iommu_unit iommu; - device_t dev; + struct x86_unit_common x86c; uint16_t segment; uint64_t base; @@ -131,8 +117,6 @@ struct dmar_unit { int reg_rid; struct resource *regs; - struct dmar_msi_data intrs[DMAR_INTR_TOTAL]; - /* Hardware registers cache */ uint32_t hw_ver; uint64_t hw_cap; @@ -156,17 +140,6 @@ struct dmar_unit { /* QI */ int qi_enabled; - char *inv_queue; - vm_size_t inv_queue_size; - uint32_t inv_queue_avail; - uint32_t inv_queue_tail; - volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait - descr completion */ - uint64_t inv_waitd_seq_hw_phys; - uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */ - u_int inv_waitd_gen; /* seq number generation AKA seq overflows */ - u_int inv_seq_waiters; /* count of waiters for seq */ - u_int inv_queue_full; /* informational counter */ /* IR */ int ir_enabled; @@ -174,41 +147,11 @@ struct dmar_unit { dmar_irte_t *irt; u_int irte_cnt; vmem_t *irtids; - - /* - * Delayed freeing of map entries queue processing: - * - * tlb_flush_head and tlb_flush_tail are used to implement a FIFO - * queue that supports concurrent dequeues and enqueues. However, - * there can only be a single dequeuer (accessing tlb_flush_head) and - * a single enqueuer (accessing tlb_flush_tail) at a time. Since the - * unit's qi_task is the only dequeuer, it can access tlb_flush_head - * without any locking. In contrast, there may be multiple enqueuers, - * so the enqueuers acquire the iommu unit lock to serialize their - * accesses to tlb_flush_tail. - * - * In this FIFO queue implementation, the key to enabling concurrent - * dequeues and enqueues is that the dequeuer never needs to access - * tlb_flush_tail and the enqueuer never needs to access - * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail - * are never NULL, so neither a dequeuer nor an enqueuer ever needs to - * update both. Instead, tlb_flush_head always points to a "zombie" - * struct, which previously held the last dequeued item. Thus, the - * zombie's next field actually points to the struct holding the first - * item in the queue. When an item is dequeued, the current zombie is - * finally freed, and the struct that held the just dequeued item - * becomes the new zombie. When the queue is empty, tlb_flush_tail - * also points to the zombie. - */ - struct iommu_map_entry *tlb_flush_head; - struct iommu_map_entry *tlb_flush_tail; - struct task qi_task; - struct taskqueue *qi_taskqueue; }; -#define DMAR_LOCK(dmar) mtx_lock(&(dmar)->iommu.lock) -#define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->iommu.lock) -#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->iommu.lock, MA_OWNED) +#define DMAR_LOCK(dmar) mtx_lock(&DMAR2IOMMU(dmar)->lock) +#define DMAR_UNLOCK(dmar) mtx_unlock(&DMAR2IOMMU(dmar)->lock) +#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&DMAR2IOMMU(dmar)->lock, MA_OWNED) #define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock) #define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock) @@ -223,6 +166,8 @@ struct dmar_unit { #define DMAR_BARRIER_RMRR 0 #define DMAR_BARRIER_USEQ 1 +SYSCTL_DECL(_hw_iommu_dmar); + struct dmar_unit *dmar_find(device_t dev, bool verbose); struct dmar_unit *dmar_find_hpet(device_t dev, uint16_t *rid); struct dmar_unit *dmar_find_ioapic(u_int apic_id, uint16_t *rid); @@ -232,22 +177,15 @@ bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl); int domain_set_agaw(struct dmar_domain *domain, int mgaw); int dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr, bool allow_less); -vm_pindex_t pglvl_max_pages(int pglvl); int domain_is_sp_lvl(struct dmar_domain *domain, int lvl); -iommu_gaddr_t pglvl_page_size(int total_pglvl, int lvl); iommu_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl); int calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size, iommu_gaddr_t *isizep); -struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags); -void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags); -void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, - struct sf_buf **sf); -void dmar_unmap_pgtbl(struct sf_buf *sf); int dmar_load_root_entry_ptr(struct dmar_unit *unit); int dmar_inv_ctx_glob(struct dmar_unit *unit); int dmar_inv_iotlb_glob(struct dmar_unit *unit); int dmar_flush_write_bufs(struct dmar_unit *unit); -void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst); +void dmar_flush_pte_to_ram(struct dmar_unit *unit, iommu_pte_t *dst); void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst); void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst); int dmar_disable_protected_regions(struct dmar_unit *unit); @@ -262,14 +200,14 @@ uint64_t dmar_get_timeout(void); void dmar_update_timeout(uint64_t newval); int dmar_fault_intr(void *arg); -void dmar_enable_fault_intr(struct dmar_unit *unit); -void dmar_disable_fault_intr(struct dmar_unit *unit); +void dmar_enable_fault_intr(struct iommu_unit *unit); +void dmar_disable_fault_intr(struct iommu_unit *unit); int dmar_init_fault_log(struct dmar_unit *unit); void dmar_fini_fault_log(struct dmar_unit *unit); int dmar_qi_intr(void *arg); -void dmar_enable_qi_intr(struct dmar_unit *unit); -void dmar_disable_qi_intr(struct dmar_unit *unit); +void dmar_enable_qi_intr(struct iommu_unit *unit); +void dmar_disable_qi_intr(struct iommu_unit *unit); int dmar_init_qi(struct dmar_unit *unit); void dmar_fini_qi(struct dmar_unit *unit); void dmar_qi_invalidate_locked(struct dmar_domain *domain, @@ -286,8 +224,8 @@ vm_object_t domain_get_idmap_pgtbl(struct dmar_domain *domain, void put_idmap_pgtbl(vm_object_t obj); void domain_flush_iotlb_sync(struct dmar_domain *domain, iommu_gaddr_t base, iommu_gaddr_t size); -int domain_alloc_pgtbl(struct dmar_domain *domain); -void domain_free_pgtbl(struct dmar_domain *domain); +int dmar_domain_alloc_pgtbl(struct dmar_domain *domain); +void dmar_domain_free_pgtbl(struct dmar_domain *domain); extern const struct iommu_domain_map_ops dmar_domain_map_ops; int dmar_dev_depth(device_t child); @@ -299,10 +237,16 @@ struct dmar_ctx *dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, bool id_mapped, bool rmrr_init); int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx); -void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); -void dmar_free_ctx(struct dmar_ctx *ctx); +void dmar_free_ctx_locked_method(struct iommu_unit *dmar, + struct iommu_ctx *ctx); +void dmar_free_ctx_method(struct iommu_ctx *ctx); struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid); -void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free); +struct iommu_ctx *dmar_get_ctx(struct iommu_unit *iommu, device_t dev, + uint16_t rid, bool id_mapped, bool rmrr_init); +void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free, + bool cansleep); +void dmar_domain_unload(struct iommu_domain *iodom, + struct iommu_map_entries_tailq *entries, bool cansleep); void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, @@ -314,11 +258,15 @@ void dmar_quirks_pre_use(struct iommu_unit *dmar); int dmar_init_irt(struct dmar_unit *unit); void dmar_fini_irt(struct dmar_unit *unit); +int dmar_alloc_msi_intr(device_t src, u_int *cookies, u_int count); +int dmar_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, + uint64_t *addr, uint32_t *data); +int dmar_unmap_msi_intr(device_t src, u_int cookie); +int dmar_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, + bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo); +int dmar_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie); -extern iommu_haddr_t dmar_high; extern int haw; -extern int dmar_tbl_pagecnt; -extern int dmar_batch_coalesce; extern int dmar_rmrr_enable; static inline uint32_t diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c index 7346162d1502..05fb49538add 100644 --- a/sys/x86/iommu/intel_drv.c +++ b/sys/x86/iommu/intel_drv.c @@ -64,9 +64,12 @@ #include <dev/pci/pcivar.h> #include <machine/bus.h> #include <machine/pci_cfgreg.h> +#include <machine/md_var.h> +#include <machine/cputypes.h> #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> #ifdef DEV_APIC @@ -179,9 +182,9 @@ dmar_identify(driver_t *driver, device_t parent) return; haw = dmartbl->Width + 1; if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR) - dmar_high = BUS_SPACE_MAXADDR; + iommu_high = BUS_SPACE_MAXADDR; else - dmar_high = 1ULL << (haw + 1); + iommu_high = 1ULL << (haw + 1); if (bootverbose) { printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width, (unsigned)dmartbl->Flags, @@ -228,22 +231,6 @@ dmar_probe(device_t dev) } static void -dmar_release_intr(device_t dev, struct dmar_unit *unit, int idx) -{ - struct dmar_msi_data *dmd; - - dmd = &unit->intrs[idx]; - if (dmd->irq == -1) - return; - bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); - bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); - bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); - PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), - dev, dmd->irq); - dmd->irq = -1; -} - -static void dmar_release_resources(device_t dev, struct dmar_unit *unit) { int i; @@ -253,7 +240,7 @@ dmar_release_resources(device_t dev, struct dmar_unit *unit) dmar_fini_qi(unit); dmar_fini_fault_log(unit); for (i = 0; i < DMAR_INTR_TOTAL; i++) - dmar_release_intr(dev, unit, i); + iommu_release_intr(DMAR2IOMMU(unit), i); if (unit->regs != NULL) { bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid, unit->regs); @@ -271,84 +258,19 @@ dmar_release_resources(device_t dev, struct dmar_unit *unit) } } -static int -dmar_alloc_irq(device_t dev, struct dmar_unit *unit, int idx) -{ - device_t pcib; - struct dmar_msi_data *dmd; - uint64_t msi_addr; - uint32_t msi_data; - int error; - - dmd = &unit->intrs[idx]; - pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ - error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); - if (error != 0) { - device_printf(dev, "cannot allocate %s interrupt, %d\n", - dmd->name, error); - goto err1; - } - error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, - dmd->irq, 1); - if (error != 0) { - device_printf(dev, "cannot set %s interrupt resource, %d\n", - dmd->name, error); - goto err2; - } - dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, - &dmd->irq_rid, RF_ACTIVE); - if (dmd->irq_res == NULL) { - device_printf(dev, - "cannot allocate resource for %s interrupt\n", dmd->name); - error = ENXIO; - goto err3; - } - error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, - dmd->handler, NULL, unit, &dmd->intr_handle); - if (error != 0) { - device_printf(dev, "cannot setup %s interrupt, %d\n", - dmd->name, error); - goto err4; - } - bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); - error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); - if (error != 0) { - device_printf(dev, "cannot map %s interrupt, %d\n", - dmd->name, error); - goto err5; - } - dmar_write4(unit, dmd->msi_data_reg, msi_data); - dmar_write4(unit, dmd->msi_addr_reg, msi_addr); - /* Only for xAPIC mode */ - dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32); - return (0); - -err5: - bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); -err4: - bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); -err3: - bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); -err2: - PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); - dmd->irq = -1; -err1: - return (error); -} - #ifdef DEV_APIC static int dmar_remap_intr(device_t dev, device_t child, u_int irq) { struct dmar_unit *unit; - struct dmar_msi_data *dmd; + struct iommu_msi_data *dmd; uint64_t msi_addr; uint32_t msi_data; int i, error; unit = device_get_softc(dev); for (i = 0; i < DMAR_INTR_TOTAL; i++) { - dmd = &unit->intrs[i]; + dmd = &unit->x86c.intrs[i]; if (irq == dmd->irq) { error = PCIB_MAP_MSI(device_get_parent( device_get_parent(dev)), @@ -356,11 +278,14 @@ dmar_remap_intr(device_t dev, device_t child, u_int irq) if (error != 0) return (error); DMAR_LOCK(unit); - (dmd->disable_intr)(unit); - dmar_write4(unit, dmd->msi_data_reg, msi_data); - dmar_write4(unit, dmd->msi_addr_reg, msi_addr); - dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32); - (dmd->enable_intr)(unit); + dmd->msi_data = msi_data; + dmd->msi_addr = msi_addr; + (dmd->disable_intr)(DMAR2IOMMU(unit)); + dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data); + dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr); + dmar_write4(unit, dmd->msi_uaddr_reg, + dmd->msi_addr >> 32); + (dmd->enable_intr)(DMAR2IOMMU(unit)); DMAR_UNLOCK(unit); return (0); } @@ -404,12 +329,12 @@ dmar_attach(device_t dev) { struct dmar_unit *unit; ACPI_DMAR_HARDWARE_UNIT *dmaru; + struct iommu_msi_data *dmd; uint64_t timeout; int disable_pmr; int i, error; unit = device_get_softc(dev); - unit->dev = dev; unit->iommu.unit = device_get_unit(dev); unit->iommu.dev = dev; dmaru = dmar_find_by_index(unit->iommu.unit); @@ -422,6 +347,7 @@ dmar_attach(device_t dev) &unit->reg_rid, RF_ACTIVE); if (unit->regs == NULL) { device_printf(dev, "cannot allocate register window\n"); + dmar_devs[unit->iommu.unit] = NULL; return (ENOMEM); } unit->hw_ver = dmar_read4(unit, DMAR_VER_REG); @@ -436,35 +362,47 @@ dmar_attach(device_t dev) dmar_update_timeout(timeout); for (i = 0; i < DMAR_INTR_TOTAL; i++) - unit->intrs[i].irq = -1; - - unit->intrs[DMAR_INTR_FAULT].name = "fault"; - unit->intrs[DMAR_INTR_FAULT].irq_rid = DMAR_FAULT_IRQ_RID; - unit->intrs[DMAR_INTR_FAULT].handler = dmar_fault_intr; - unit->intrs[DMAR_INTR_FAULT].msi_data_reg = DMAR_FEDATA_REG; - unit->intrs[DMAR_INTR_FAULT].msi_addr_reg = DMAR_FEADDR_REG; - unit->intrs[DMAR_INTR_FAULT].msi_uaddr_reg = DMAR_FEUADDR_REG; - unit->intrs[DMAR_INTR_FAULT].enable_intr = dmar_enable_fault_intr; - unit->intrs[DMAR_INTR_FAULT].disable_intr = dmar_disable_fault_intr; - error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT); + unit->x86c.intrs[i].irq = -1; + + dmd = &unit->x86c.intrs[DMAR_INTR_FAULT]; + dmd->name = "fault"; + dmd->irq_rid = DMAR_FAULT_IRQ_RID; + dmd->handler = dmar_fault_intr; + dmd->msi_data_reg = DMAR_FEDATA_REG; + dmd->msi_addr_reg = DMAR_FEADDR_REG; + dmd->msi_uaddr_reg = DMAR_FEUADDR_REG; + dmd->enable_intr = dmar_enable_fault_intr; + dmd->disable_intr = dmar_disable_fault_intr; + error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_FAULT); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } + dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data); + dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr); + dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32); + if (DMAR_HAS_QI(unit)) { - unit->intrs[DMAR_INTR_QI].name = "qi"; - unit->intrs[DMAR_INTR_QI].irq_rid = DMAR_QI_IRQ_RID; - unit->intrs[DMAR_INTR_QI].handler = dmar_qi_intr; - unit->intrs[DMAR_INTR_QI].msi_data_reg = DMAR_IEDATA_REG; - unit->intrs[DMAR_INTR_QI].msi_addr_reg = DMAR_IEADDR_REG; - unit->intrs[DMAR_INTR_QI].msi_uaddr_reg = DMAR_IEUADDR_REG; - unit->intrs[DMAR_INTR_QI].enable_intr = dmar_enable_qi_intr; - unit->intrs[DMAR_INTR_QI].disable_intr = dmar_disable_qi_intr; - error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI); + dmd = &unit->x86c.intrs[DMAR_INTR_QI]; + dmd->name = "qi"; + dmd->irq_rid = DMAR_QI_IRQ_RID; + dmd->handler = dmar_qi_intr; + dmd->msi_data_reg = DMAR_IEDATA_REG; + dmd->msi_addr_reg = DMAR_IEADDR_REG; + dmd->msi_uaddr_reg = DMAR_IEUADDR_REG; + dmd->enable_intr = dmar_enable_qi_intr; + dmd->disable_intr = dmar_disable_qi_intr; + error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_QI); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } + + dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data); + dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr); + dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32); } mtx_init(&unit->iommu.lock, "dmarhw", NULL, MTX_DEF); @@ -490,18 +428,20 @@ dmar_attach(device_t dev) * address translation after the required invalidations are * done. */ - dmar_pgalloc(unit->ctx_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO); + iommu_pgalloc(unit->ctx_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO); DMAR_LOCK(unit); error = dmar_load_root_entry_ptr(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } error = dmar_inv_ctx_glob(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) { @@ -509,6 +449,7 @@ dmar_attach(device_t dev) if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } } @@ -517,16 +458,19 @@ dmar_attach(device_t dev) error = dmar_init_fault_log(unit); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } error = dmar_init_qi(unit); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } error = dmar_init_irt(unit); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } @@ -542,6 +486,7 @@ dmar_attach(device_t dev) error = iommu_init_busdma(&unit->iommu); if (error != 0) { dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } @@ -551,6 +496,7 @@ dmar_attach(device_t dev) if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); + dmar_devs[unit->iommu.unit] = NULL; return (error); } DMAR_UNLOCK(unit); @@ -1289,20 +1235,20 @@ dmar_print_one(int idx, bool show_domains, bool show_mappings) db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) " "size 0x%jx\n" " head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n" - " hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n", - (uintmax_t)unit->inv_queue, + " hw compl 0x%jx@%p/phys@%jx next seq 0x%x gen 0x%x\n", + (uintmax_t)unit->x86c.inv_queue, (uintmax_t)dmar_read8(unit, DMAR_IQA_REG), - (uintmax_t)unit->inv_queue_size, + (uintmax_t)unit->x86c.inv_queue_size, dmar_read4(unit, DMAR_IQH_REG), dmar_read4(unit, DMAR_IQT_REG), - unit->inv_queue_avail, + unit->x86c.inv_queue_avail, dmar_read4(unit, DMAR_ICS_REG), dmar_read4(unit, DMAR_IECTL_REG), - unit->inv_waitd_seq_hw, - &unit->inv_waitd_seq_hw, - (uintmax_t)unit->inv_waitd_seq_hw_phys, - unit->inv_waitd_seq, - unit->inv_waitd_gen); + (uintmax_t)unit->x86c.inv_waitd_seq_hw, + &unit->x86c.inv_waitd_seq_hw, + (uintmax_t)unit->x86c.inv_waitd_seq_hw_phys, + unit->x86c.inv_waitd_seq, + unit->x86c.inv_waitd_gen); } else { db_printf("qi is disabled\n"); } @@ -1346,12 +1292,52 @@ DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars) } #endif -struct iommu_unit * -iommu_find(device_t dev, bool verbose) +static struct iommu_unit * +dmar_find_method(device_t dev, bool verbose) { struct dmar_unit *dmar; dmar = dmar_find(dev, verbose); - return (&dmar->iommu); } + +static struct x86_unit_common * +dmar_get_x86_common(struct iommu_unit *unit) +{ + struct dmar_unit *dmar; + + dmar = IOMMU2DMAR(unit); + return (&dmar->x86c); +} + +static void +dmar_unit_pre_instantiate_ctx(struct iommu_unit *unit) +{ + dmar_quirks_pre_use(unit); + dmar_instantiate_rmrr_ctxs(unit); +} + +static struct x86_iommu dmar_x86_iommu = { + .get_x86_common = dmar_get_x86_common, + .unit_pre_instantiate_ctx = dmar_unit_pre_instantiate_ctx, + .domain_unload_entry = dmar_domain_unload_entry, + .domain_unload = dmar_domain_unload, + .get_ctx = dmar_get_ctx, + .free_ctx_locked = dmar_free_ctx_locked_method, + .free_ctx = dmar_free_ctx_method, + .find = dmar_find_method, + .alloc_msi_intr = dmar_alloc_msi_intr, + .map_msi_intr = dmar_map_msi_intr, + .unmap_msi_intr = dmar_unmap_msi_intr, + .map_ioapic_intr = dmar_map_ioapic_intr, + .unmap_ioapic_intr = dmar_unmap_ioapic_intr, +}; + +static void +x86_iommu_set_intel(void *arg __unused) +{ + if (cpu_vendor_id == CPU_VENDOR_INTEL) + set_x86_iommu(&dmar_x86_iommu); +} + +SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_intel, NULL); diff --git a/sys/x86/iommu/intel_fault.c b/sys/x86/iommu/intel_fault.c index e275304c8d51..1064165ea5d7 100644 --- a/sys/x86/iommu/intel_fault.c +++ b/sys/x86/iommu/intel_fault.c @@ -54,6 +54,7 @@ #include <x86/include/busdma_impl.h> #include <x86/iommu/intel_reg.h> #include <dev/iommu/busdma_iommu.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> /* @@ -126,7 +127,7 @@ dmar_fault_intr(void *arg) int fri, frir, faultp; bool enqueue; - unit = arg; + unit = IOMMU2DMAR((struct iommu_unit *)arg); enqueue = false; fsts = dmar_read4(unit, DMAR_FSTS_REG); dmar_fault_intr_clear(unit, fsts); @@ -275,9 +276,9 @@ dmar_init_fault_log(struct dmar_unit *unit) "dmar%d fault taskq", unit->iommu.unit); DMAR_LOCK(unit); - dmar_disable_fault_intr(unit); + dmar_disable_fault_intr(&unit->iommu); dmar_clear_faults(unit); - dmar_enable_fault_intr(unit); + dmar_enable_fault_intr(&unit->iommu); DMAR_UNLOCK(unit); return (0); @@ -291,7 +292,7 @@ dmar_fini_fault_log(struct dmar_unit *unit) return; DMAR_LOCK(unit); - dmar_disable_fault_intr(unit); + dmar_disable_fault_intr(&unit->iommu); DMAR_UNLOCK(unit); taskqueue_drain(unit->fault_taskqueue, &unit->fault_task); @@ -305,10 +306,12 @@ dmar_fini_fault_log(struct dmar_unit *unit) } void -dmar_enable_fault_intr(struct dmar_unit *unit) +dmar_enable_fault_intr(struct iommu_unit *iommu) { + struct dmar_unit *unit; uint32_t fectl; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); fectl = dmar_read4(unit, DMAR_FECTL_REG); fectl &= ~DMAR_FECTL_IM; @@ -316,10 +319,12 @@ dmar_enable_fault_intr(struct dmar_unit *unit) } void -dmar_disable_fault_intr(struct dmar_unit *unit) +dmar_disable_fault_intr(struct iommu_unit *iommu) { + struct dmar_unit *unit; uint32_t fectl; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); fectl = dmar_read4(unit, DMAR_FECTL_REG); dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM); diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c index 929f8656d1eb..fbc0e9e97b64 100644 --- a/sys/x86/iommu/intel_idpgtbl.c +++ b/sys/x86/iommu/intel_idpgtbl.c @@ -47,6 +47,7 @@ #include <sys/tree.h> #include <sys/uio.h> #include <sys/vmem.h> +#include <sys/vmmeter.h> #include <vm/vm.h> #include <vm/vm_extern.h> #include <vm/vm_kern.h> @@ -63,6 +64,7 @@ #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> static int domain_unmap_buf_locked(struct dmar_domain *domain, @@ -108,7 +110,7 @@ domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, iommu_gaddr_t addr) { vm_page_t m1; - dmar_pte_t *pte; + iommu_pte_t *pte; struct sf_buf *sf; iommu_gaddr_t f, pg_sz; vm_pindex_t base; @@ -117,28 +119,28 @@ domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj); if (addr >= tbl->maxaddr) return; - (void)dmar_pgalloc(tbl->pgtbl_obj, idx, IOMMU_PGF_OBJL | + (void)iommu_pgalloc(tbl->pgtbl_obj, idx, IOMMU_PGF_OBJL | IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO); - base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */ + base = idx * IOMMU_NPTEPG + 1; /* Index of the first child page of idx */ pg_sz = pglvl_page_size(tbl->pglvl, lvl); if (lvl != tbl->leaf) { - for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) + for (i = 0, f = addr; i < IOMMU_NPTEPG; i++, f += pg_sz) domain_idmap_nextlvl(tbl, lvl + 1, base + i, f); } VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); - pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, IOMMU_PGF_WAITOK, &sf); + pte = iommu_map_pgtbl(tbl->pgtbl_obj, idx, IOMMU_PGF_WAITOK, &sf); if (lvl == tbl->leaf) { - for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { + for (i = 0, f = addr; i < IOMMU_NPTEPG; i++, f += pg_sz) { if (f >= tbl->maxaddr) break; pte[i].pte = (DMAR_PTE_ADDR_MASK & f) | DMAR_PTE_R | DMAR_PTE_W; } } else { - for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { + for (i = 0, f = addr; i < IOMMU_NPTEPG; i++, f += pg_sz) { if (f >= tbl->maxaddr) break; - m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i, + m1 = iommu_pgalloc(tbl->pgtbl_obj, base + i, IOMMU_PGF_NOALLOC); KASSERT(m1 != NULL, ("lost page table page")); pte[i].pte = (DMAR_PTE_ADDR_MASK & @@ -146,7 +148,7 @@ domain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, } } /* domain_get_idmap_pgtbl flushes CPU cache if needed. */ - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); VM_OBJECT_WLOCK(tbl->pgtbl_obj); } @@ -300,7 +302,7 @@ put_idmap_pgtbl(vm_object_t obj) rmobj = tbl->pgtbl_obj; if (rmobj->ref_count == 1) { LIST_REMOVE(tbl, link); - atomic_subtract_int(&dmar_tbl_pagecnt, + atomic_subtract_int(&iommu_tbl_pagecnt, rmobj->resident_page_count); vm_object_deallocate(rmobj); free(tbl, M_DMAR_IDPGTBL); @@ -314,61 +316,27 @@ put_idmap_pgtbl(vm_object_t obj) * address. Support superpages. */ -/* - * Index of the pte for the guest address base in the page table at - * the level lvl. - */ -static int -domain_pgtbl_pte_off(struct dmar_domain *domain, iommu_gaddr_t base, int lvl) -{ - - base >>= DMAR_PAGE_SHIFT + (domain->pglvl - lvl - 1) * - DMAR_NPTEPGSHIFT; - return (base & DMAR_PTEMASK); -} - -/* - * Returns the page index of the page table page in the page table - * object, which maps the given address base at the page table level - * lvl. - */ -static vm_pindex_t -domain_pgtbl_get_pindex(struct dmar_domain *domain, iommu_gaddr_t base, int lvl) -{ - vm_pindex_t idx, pidx; - int i; - - KASSERT(lvl >= 0 && lvl < domain->pglvl, - ("wrong lvl %p %d", domain, lvl)); - - for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { - idx = domain_pgtbl_pte_off(domain, base, i) + - pidx * DMAR_NPTEPG + 1; - } - return (idx); -} - -static dmar_pte_t * +static iommu_pte_t * domain_pgtbl_map_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf) { vm_page_t m; struct sf_buf *sfp; - dmar_pte_t *pte, *ptep; + iommu_pte_t *pte, *ptep; vm_pindex_t idx, idx1; DMAR_DOMAIN_ASSERT_PGLOCKED(domain); KASSERT((flags & IOMMU_PGF_OBJL) != 0, ("lost PGF_OBJL")); - idx = domain_pgtbl_get_pindex(domain, base, lvl); + idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl); if (*sf != NULL && idx == *idxp) { - pte = (dmar_pte_t *)sf_buf_kva(*sf); + pte = (iommu_pte_t *)sf_buf_kva(*sf); } else { if (*sf != NULL) - dmar_unmap_pgtbl(*sf); + iommu_unmap_pgtbl(*sf); *idxp = idx; retry: - pte = dmar_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); + pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); if (pte == NULL) { KASSERT(lvl > 0, ("lost root page table page %p", domain)); @@ -377,7 +345,7 @@ retry: * it and create a pte in the preceeding page level * to reference the allocated page table page. */ - m = dmar_pgalloc(domain->pgtbl_obj, idx, flags | + m = iommu_pgalloc(domain->pgtbl_obj, idx, flags | IOMMU_PGF_ZERO); if (m == NULL) return (NULL); @@ -389,7 +357,7 @@ retry: * pte write and clean while the lock is * dropped. */ - m->ref_count++; + vm_page_wire(m); sfp = NULL; ptep = domain_pgtbl_map_pte(domain, base, lvl - 1, @@ -397,22 +365,22 @@ retry: if (ptep == NULL) { KASSERT(m->pindex != 0, ("loosing root page %p", domain)); - m->ref_count--; - dmar_pgfree(domain->pgtbl_obj, m->pindex, + vm_page_unwire_noq(m); + iommu_pgfree(domain->pgtbl_obj, m->pindex, flags); return (NULL); } dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W | VM_PAGE_TO_PHYS(m)); dmar_flush_pte_to_ram(domain->dmar, ptep); - sf_buf_page(sfp)->ref_count += 1; - m->ref_count--; - dmar_unmap_pgtbl(sfp); + vm_page_wire(sf_buf_page(sfp)); + vm_page_unwire_noq(m); + iommu_unmap_pgtbl(sfp); /* Only executed once. */ goto retry; } } - pte += domain_pgtbl_pte_off(domain, base, lvl); + pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl); return (pte); } @@ -420,7 +388,7 @@ static int domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags) { - dmar_pte_t *pte; + iommu_pte_t *pte; struct sf_buf *sf; iommu_gaddr_t pg_sz, base1; vm_pindex_t pi, c, idx, run_sz; @@ -437,7 +405,7 @@ domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, pi += run_sz) { for (lvl = 0, c = 0, superpage = false;; lvl++) { pg_sz = domain_page_size(domain, lvl); - run_sz = pg_sz >> DMAR_PAGE_SHIFT; + run_sz = pg_sz >> IOMMU_PAGE_SHIFT; if (lvl == domain->pglvl - 1) break; /* @@ -476,7 +444,7 @@ domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, KASSERT((flags & IOMMU_PGF_WAITOK) == 0, ("failed waitable pte alloc %p", domain)); if (sf != NULL) - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); domain_unmap_buf_locked(domain, base1, base - base1, flags); TD_PINNED_ASSERT; @@ -485,10 +453,10 @@ domain_map_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags | (superpage ? DMAR_PTE_SP : 0)); dmar_flush_pte_to_ram(domain->dmar, pte); - sf_buf_page(sf)->ref_count += 1; + vm_page_wire(sf_buf_page(sf)); } if (sf != NULL) - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); TD_PINNED_ASSERT; return (0); } @@ -510,12 +478,12 @@ domain_map_buf(struct iommu_domain *iodom, iommu_gaddr_t base, domain = IODOM2DOM(iodom); unit = domain->dmar; - KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0, + KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0, ("modifying idmap pagetable domain %p", domain)); - KASSERT((base & DMAR_PAGE_MASK) == 0, + KASSERT((base & IOMMU_PAGE_MASK) == 0, ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); - KASSERT((size & DMAR_PAGE_MASK) == 0, + KASSERT((size & IOMMU_PAGE_MASK) == 0, ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base, @@ -562,7 +530,7 @@ domain_map_buf(struct iommu_domain *iodom, iommu_gaddr_t base, } static void domain_unmap_clear_pte(struct dmar_domain *domain, - iommu_gaddr_t base, int lvl, int flags, dmar_pte_t *pte, + iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf, bool free_fs); static void @@ -570,7 +538,7 @@ domain_free_pgtbl_pde(struct dmar_domain *domain, iommu_gaddr_t base, int lvl, int flags) { struct sf_buf *sf; - dmar_pte_t *pde; + iommu_pte_t *pde; vm_pindex_t idx; sf = NULL; @@ -580,7 +548,7 @@ domain_free_pgtbl_pde(struct dmar_domain *domain, iommu_gaddr_t base, static void domain_unmap_clear_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl, - int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf) + int flags, iommu_pte_t *pte, struct sf_buf **sf, bool free_sf) { vm_page_t m; @@ -588,11 +556,10 @@ domain_unmap_clear_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl, dmar_flush_pte_to_ram(domain->dmar, pte); m = sf_buf_page(*sf); if (free_sf) { - dmar_unmap_pgtbl(*sf); + iommu_unmap_pgtbl(*sf); *sf = NULL; } - m->ref_count--; - if (m->ref_count != 0) + if (!vm_page_unwire_noq(m)) return; KASSERT(lvl != 0, ("lost reference (lvl) on root pg domain %p base %jx lvl %d", @@ -600,7 +567,7 @@ domain_unmap_clear_pte(struct dmar_domain *domain, iommu_gaddr_t base, int lvl, KASSERT(m->pindex != 0, ("lost reference (idx) on root pg domain %p base %jx lvl %d", domain, (uintmax_t)base, lvl)); - dmar_pgfree(domain->pgtbl_obj, m->pindex, flags); + iommu_pgfree(domain->pgtbl_obj, m->pindex, flags); domain_free_pgtbl_pde(domain, base, lvl - 1, flags); } @@ -611,7 +578,7 @@ static int domain_unmap_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, iommu_gaddr_t size, int flags) { - dmar_pte_t *pte; + iommu_pte_t *pte; struct sf_buf *sf; vm_pindex_t idx; iommu_gaddr_t pg_sz; @@ -623,10 +590,10 @@ domain_unmap_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0, ("modifying idmap pagetable domain %p", domain)); - KASSERT((base & DMAR_PAGE_MASK) == 0, + KASSERT((base & IOMMU_PAGE_MASK) == 0, ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); - KASSERT((size & DMAR_PAGE_MASK) == 0, + KASSERT((size & IOMMU_PAGE_MASK) == 0, ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, (uintmax_t)size)); KASSERT(base < (1ULL << domain->agaw), @@ -669,7 +636,7 @@ domain_unmap_buf_locked(struct dmar_domain *domain, iommu_gaddr_t base, (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); } if (sf != NULL) - dmar_unmap_pgtbl(sf); + iommu_unmap_pgtbl(sf); /* * See 11.1 Write Buffer Flushing for an explanation why RWBF * can be ignored there. @@ -695,7 +662,7 @@ domain_unmap_buf(struct iommu_domain *iodom, iommu_gaddr_t base, } int -domain_alloc_pgtbl(struct dmar_domain *domain) +dmar_domain_alloc_pgtbl(struct dmar_domain *domain) { vm_page_t m; @@ -705,10 +672,10 @@ domain_alloc_pgtbl(struct dmar_domain *domain) domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL); DMAR_DOMAIN_PGLOCK(domain); - m = dmar_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK | + m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO | IOMMU_PGF_OBJL); /* No implicit free of the top level page table page. */ - m->ref_count = 1; + vm_page_wire(m); DMAR_DOMAIN_PGUNLOCK(domain); DMAR_LOCK(domain->dmar); domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED; @@ -717,7 +684,7 @@ domain_alloc_pgtbl(struct dmar_domain *domain) } void -domain_free_pgtbl(struct dmar_domain *domain) +dmar_domain_free_pgtbl(struct dmar_domain *domain) { vm_object_t obj; vm_page_t m; @@ -740,8 +707,10 @@ domain_free_pgtbl(struct dmar_domain *domain) /* Obliterate ref_counts */ VM_OBJECT_ASSERT_WLOCKED(obj); - for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) - m->ref_count = 0; + for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) { + vm_page_clearref(m); + vm_wire_sub(1); + } VM_OBJECT_WUNLOCK(obj); vm_object_deallocate(obj); } diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c index 09271a6f6cc9..ec3cd35e4f4e 100644 --- a/sys/x86/iommu/intel_intrmap.c +++ b/sys/x86/iommu/intel_intrmap.c @@ -54,6 +54,7 @@ #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> #include <x86/iommu/iommu_intrmap.h> @@ -64,7 +65,7 @@ static void dmar_ir_program_irte(struct dmar_unit *unit, u_int idx, static int dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie); int -iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) +dmar_alloc_msi_intr(device_t src, u_int *cookies, u_int count) { struct dmar_unit *unit; vmem_addr_t vmem_res; @@ -92,7 +93,7 @@ iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) } int -iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, +dmar_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, uint64_t *addr, uint32_t *data) { struct dmar_unit *unit; @@ -138,7 +139,7 @@ iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, } int -iommu_unmap_msi_intr(device_t src, u_int cookie) +dmar_unmap_msi_intr(device_t src, u_int cookie) { struct dmar_unit *unit; @@ -149,7 +150,7 @@ iommu_unmap_msi_intr(device_t src, u_int cookie) } int -iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, +dmar_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) { struct dmar_unit *unit; @@ -212,7 +213,7 @@ iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, } int -iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) +dmar_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) { struct dmar_unit *unit; u_int idx; @@ -270,7 +271,7 @@ dmar_ir_program_irte(struct dmar_unit *unit, u_int idx, uint64_t low, high = DMAR_IRTE2_SVT_RID | DMAR_IRTE2_SQ_RID | DMAR_IRTE2_SID_RID(rid); if (bootverbose) { - device_printf(unit->dev, + device_printf(unit->iommu.dev, "programming irte[%d] rid %#x high %#jx low %#jx\n", idx, rid, (uintmax_t)high, (uintmax_t)low); } @@ -314,13 +315,6 @@ dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie) return (0); } -static u_int -clp2(u_int v) -{ - - return (powerof2(v) ? v : 1 << fls(v)); -} - int dmar_init_irt(struct dmar_unit *unit) { @@ -329,18 +323,19 @@ dmar_init_irt(struct dmar_unit *unit) return (0); unit->ir_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.ir", &unit->ir_enabled); + TUNABLE_INT_FETCH("hw.iommu.ir", &unit->ir_enabled); if (!unit->ir_enabled) return (0); if (!unit->qi_enabled) { unit->ir_enabled = 0; if (bootverbose) - device_printf(unit->dev, + device_printf(unit->iommu.dev, "QI disabled, disabling interrupt remapping\n"); return (0); } - unit->irte_cnt = clp2(num_io_irqs); + unit->irte_cnt = roundup_pow_of_two(num_io_irqs); unit->irt = kmem_alloc_contig(unit->irte_cnt * sizeof(dmar_irte_t), - M_ZERO | M_WAITOK, 0, dmar_high, PAGE_SIZE, 0, + M_ZERO | M_WAITOK, 0, iommu_high, PAGE_SIZE, 0, DMAR_IS_COHERENT(unit) ? VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE); if (unit->irt == NULL) diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c index 37e2bf211e32..c11946ad9447 100644 --- a/sys/x86/iommu/intel_qi.c +++ b/sys/x86/iommu/intel_qi.c @@ -55,19 +55,9 @@ #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> -static bool -dmar_qi_seq_processed(const struct dmar_unit *unit, - const struct iommu_qi_genseq *pseq) -{ - u_int gen; - - gen = unit->inv_waitd_gen; - return (pseq->gen < gen || - (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw)); -} - static int dmar_enable_qi(struct dmar_unit *unit) { @@ -95,32 +85,36 @@ dmar_disable_qi(struct dmar_unit *unit) } static void -dmar_qi_advance_tail(struct dmar_unit *unit) +dmar_qi_advance_tail(struct iommu_unit *iommu) { + struct dmar_unit *unit; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); - dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail); + dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail); } static void -dmar_qi_ensure(struct dmar_unit *unit, int descr_count) +dmar_qi_ensure(struct iommu_unit *iommu, int descr_count) { + struct dmar_unit *unit; uint32_t head; int bytes; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT; for (;;) { - if (bytes <= unit->inv_queue_avail) + if (bytes <= unit->x86c.inv_queue_avail) break; /* refill */ head = dmar_read4(unit, DMAR_IQH_REG); head &= DMAR_IQH_MASK; - unit->inv_queue_avail = head - unit->inv_queue_tail - + unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail - DMAR_IQ_DESCR_SZ; - if (head <= unit->inv_queue_tail) - unit->inv_queue_avail += unit->inv_queue_size; - if (bytes <= unit->inv_queue_avail) + if (head <= unit->x86c.inv_queue_tail) + unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size; + if (bytes <= unit->x86c.inv_queue_avail) break; /* @@ -133,11 +127,11 @@ dmar_qi_ensure(struct dmar_unit *unit, int descr_count) * See dmar_qi_invalidate_locked() for a discussion * about data race prevention. */ - dmar_qi_advance_tail(unit); - unit->inv_queue_full++; + dmar_qi_advance_tail(DMAR2IOMMU(unit)); + unit->x86c.inv_queue_full++; cpu_spinwait(); } - unit->inv_queue_avail -= bytes; + unit->x86c.inv_queue_avail -= bytes; } static void @@ -145,208 +139,106 @@ dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2) { DMAR_ASSERT_LOCKED(unit); - *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1; - unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; - KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, - ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, - (uintmax_t)unit->inv_queue_size)); - unit->inv_queue_tail &= unit->inv_queue_size - 1; - *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2; - unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; - KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, - ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, - (uintmax_t)unit->inv_queue_size)); - unit->inv_queue_tail &= unit->inv_queue_size - 1; +#ifdef __LP64__ + atomic_store_64((uint64_t *)(unit->x86c.inv_queue + + unit->x86c.inv_queue_tail), data1); +#else + *(volatile uint64_t *)(unit->x86c.inv_queue + + unit->x86c.inv_queue_tail) = data1; +#endif + unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; + KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, + ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, + (uintmax_t)unit->x86c.inv_queue_size)); + unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; +#ifdef __LP64__ + atomic_store_64((uint64_t *)(unit->x86c.inv_queue + + unit->x86c.inv_queue_tail), data2); +#else + *(volatile uint64_t *)(unit->x86c.inv_queue + + unit->x86c.inv_queue_tail) = data2; +#endif + unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; + KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, + ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, + (uintmax_t)unit->x86c.inv_queue_size)); + unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; } static void -dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr, +dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr, bool memw, bool fence) { + struct dmar_unit *unit; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID | (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) | (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0), - memw ? unit->inv_waitd_seq_hw_phys : 0); + memw ? unit->x86c.inv_waitd_seq_hw_phys : 0); } static void -dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq, - bool emit_wait) -{ - struct iommu_qi_genseq gsec; - uint32_t seq; - - KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); - DMAR_ASSERT_LOCKED(unit); - if (unit->inv_waitd_seq == 0xffffffff) { - gsec.gen = unit->inv_waitd_gen; - gsec.seq = unit->inv_waitd_seq; - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false); - dmar_qi_advance_tail(unit); - while (!dmar_qi_seq_processed(unit, &gsec)) - cpu_spinwait(); - unit->inv_waitd_gen++; - unit->inv_waitd_seq = 1; - } - seq = unit->inv_waitd_seq++; - pseq->gen = unit->inv_waitd_gen; - pseq->seq = seq; - if (emit_wait) { - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_descr(unit, seq, true, true, false); - } -} - -/* - * To avoid missed wakeups, callers must increment the unit's waiters count - * before advancing the tail past the wait descriptor. - */ -static void -dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq, - bool nowait) -{ - - DMAR_ASSERT_LOCKED(unit); - KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__)); - while (!dmar_qi_seq_processed(unit, gseq)) { - if (cold || nowait) { - cpu_spinwait(); - } else { - msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0, - "dmarse", hz); - } - } - unit->inv_seq_waiters--; -} - -static void -dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base, +dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait) { struct dmar_unit *unit; + struct dmar_domain *domain; iommu_gaddr_t isize; int am; + domain = __containerof(idomain, struct dmar_domain, iodom); unit = domain->dmar; DMAR_ASSERT_LOCKED(unit); for (; size > 0; base += isize, size -= isize) { am = calc_am(unit, base, size, &isize); - dmar_qi_ensure(unit, 1); + dmar_qi_ensure(DMAR2IOMMU(unit), 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR | DMAR_IQ_DESCR_IOTLB_DID(domain->domain), base | am); } - dmar_qi_emit_wait_seq(unit, pseq, emit_wait); -} - -/* - * The caller must not be using the entry's dmamap_link field. - */ -void -dmar_qi_invalidate_locked(struct dmar_domain *domain, - struct iommu_map_entry *entry, bool emit_wait) -{ - struct dmar_unit *unit; - - unit = domain->dmar; - DMAR_ASSERT_LOCKED(unit); - dmar_qi_invalidate_emit(domain, entry->start, entry->end - - entry->start, &entry->gseq, emit_wait); - - /* - * To avoid a data race in dmar_qi_task(), the entry's gseq must be - * initialized before the entry is added to the TLB flush list, and the - * entry must be added to that list before the tail is advanced. More - * precisely, the tail must not be advanced past the wait descriptor - * that will generate the interrupt that schedules dmar_qi_task() for - * execution before the entry is added to the list. While an earlier - * call to dmar_qi_ensure() might have advanced the tail, it will not - * advance it past the wait descriptor. - * - * See the definition of struct dmar_unit for more information on - * synchronization. - */ - entry->tlb_flush_next = NULL; - atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next, - (uintptr_t)entry); - unit->tlb_flush_tail = entry; - - dmar_qi_advance_tail(unit); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait); } -void -dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base, - iommu_gaddr_t size, bool cansleep) +static void +dmar_qi_invalidate_glob_impl(struct dmar_unit *unit, uint64_t data1) { - struct dmar_unit *unit; struct iommu_qi_genseq gseq; - unit = domain->dmar; - DMAR_LOCK(unit); - dmar_qi_invalidate_emit(domain, base, size, &gseq, true); - - /* - * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count - * must be incremented before the tail is advanced. - */ - unit->inv_seq_waiters++; - - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, !cansleep); - DMAR_UNLOCK(unit); + DMAR_ASSERT_LOCKED(unit); + dmar_qi_ensure(DMAR2IOMMU(unit), 2); + dmar_qi_emit(unit, data1, 0); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true); + /* See dmar_qi_invalidate_sync(). */ + unit->x86c.inv_seq_waiters++; + dmar_qi_advance_tail(DMAR2IOMMU(unit)); + iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false); } void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit) { - struct iommu_qi_genseq gseq; - - DMAR_ASSERT_LOCKED(unit); - dmar_qi_ensure(unit, 2); - dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0); - dmar_qi_emit_wait_seq(unit, &gseq, true); - /* See dmar_qi_invalidate_sync(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); + dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_CTX_INV | + DMAR_IQ_DESCR_CTX_GLOB); } void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit) { - struct iommu_qi_genseq gseq; - - DMAR_ASSERT_LOCKED(unit); - dmar_qi_ensure(unit, 2); - dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB | - DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0); - dmar_qi_emit_wait_seq(unit, &gseq, true); - /* See dmar_qi_invalidate_sync(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); + dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_IOTLB_INV | + DMAR_IQ_DESCR_IOTLB_GLOB | DMAR_IQ_DESCR_IOTLB_DW | + DMAR_IQ_DESCR_IOTLB_DR); } void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit) { - struct iommu_qi_genseq gseq; - - DMAR_ASSERT_LOCKED(unit); - dmar_qi_ensure(unit, 2); - dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0); - dmar_qi_emit_wait_seq(unit, &gseq, true); - /* See dmar_qi_invalidate_sync(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); + dmar_qi_invalidate_glob_impl(unit, DMAR_IQ_DESCR_IEC_INV); } void @@ -362,21 +254,21 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) for (; cnt > 0; cnt -= c, start += c) { l = ffs(start | cnt) - 1; c = 1 << l; - dmar_qi_ensure(unit, 1); + dmar_qi_ensure(DMAR2IOMMU(unit), 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV | DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) | DMAR_IQ_DESCR_IEC_IM(l), 0); } - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_seq(unit, &gseq, true); + dmar_qi_ensure(DMAR2IOMMU(unit), 1); + iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true); /* - * Since dmar_qi_wait_for_seq() will not sleep, this increment's + * Since iommu_qi_wait_for_seq() will not sleep, this increment's * placement relative to advancing the tail doesn't matter. */ - unit->inv_seq_waiters++; + unit->x86c.inv_seq_waiters++; - dmar_qi_advance_tail(unit); + dmar_qi_advance_tail(DMAR2IOMMU(unit)); /* * The caller of the function, in particular, @@ -393,7 +285,7 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) * queue is processed, which includes requests possibly issued * before our request. */ - dmar_qi_wait_for_seq(unit, &gseq, true); + iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true); } int @@ -401,41 +293,21 @@ dmar_qi_intr(void *arg) { struct dmar_unit *unit; - unit = arg; + unit = IOMMU2DMAR((struct iommu_unit *)arg); KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->iommu.unit)); - taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task); + taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task); return (FILTER_HANDLED); } static void -dmar_qi_drain_tlb_flush(struct dmar_unit *unit) -{ - struct iommu_map_entry *entry, *head; - - for (head = unit->tlb_flush_head;; head = entry) { - entry = (struct iommu_map_entry *) - atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); - if (entry == NULL || - !dmar_qi_seq_processed(unit, &entry->gseq)) - break; - unit->tlb_flush_head = entry; - iommu_gas_free_entry(head); - if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) - iommu_gas_free_region(entry); - else - iommu_gas_free_space(entry); - } -} - -static void dmar_qi_task(void *arg, int pending __unused) { struct dmar_unit *unit; uint32_t ics; - unit = arg; - dmar_qi_drain_tlb_flush(unit); + unit = IOMMU2DMAR(arg); + iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit)); /* * Request an interrupt on the completion of the next invalidation @@ -452,16 +324,16 @@ dmar_qi_task(void *arg, int pending __unused) * Otherwise, such entries will linger until a later entry * that requests an interrupt is processed. */ - dmar_qi_drain_tlb_flush(unit); + iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit)); } - if (unit->inv_seq_waiters > 0) { + if (unit->x86c.inv_seq_waiters > 0) { /* * Acquire the DMAR lock so that wakeup() is called only after * the waiter is sleeping. */ DMAR_LOCK(unit); - wakeup(&unit->inv_seq_waiters); + wakeup(&unit->x86c.inv_seq_waiters); DMAR_UNLOCK(unit); } } @@ -471,7 +343,7 @@ dmar_init_qi(struct dmar_unit *unit) { uint64_t iqa; uint32_t ics; - int qi_sz; + u_int qi_sz; if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0) return (0); @@ -480,34 +352,19 @@ dmar_init_qi(struct dmar_unit *unit) if (!unit->qi_enabled) return (0); - unit->tlb_flush_head = unit->tlb_flush_tail = - iommu_gas_alloc_entry(NULL, 0); - TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit); - unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK, - taskqueue_thread_enqueue, &unit->qi_taskqueue); - taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV, - "dmar%d qi taskq", unit->iommu.unit); - - unit->inv_waitd_gen = 0; - unit->inv_waitd_seq = 1; - - qi_sz = DMAR_IQA_QS_DEF; - TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz); - if (qi_sz > DMAR_IQA_QS_MAX) - qi_sz = DMAR_IQA_QS_MAX; - unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; - /* Reserve one descriptor to prevent wraparound. */ - unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ; - - /* The invalidation queue reads by DMARs are always coherent. */ - unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK | - M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); - unit->inv_waitd_seq_hw_phys = pmap_kextract( - (vm_offset_t)&unit->inv_waitd_seq_hw); + unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX; + unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ; + iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task); + get_x86_iommu()->qi_ensure = dmar_qi_ensure; + get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr; + get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail; + get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit; + + qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE); DMAR_LOCK(unit); dmar_write8(unit, DMAR_IQT_REG, 0); - iqa = pmap_kextract((uintptr_t)unit->inv_queue); + iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue); iqa |= qi_sz; dmar_write8(unit, DMAR_IQA_REG, iqa); dmar_enable_qi(unit); @@ -516,49 +373,35 @@ dmar_init_qi(struct dmar_unit *unit) ics = DMAR_ICS_IWC; dmar_write4(unit, DMAR_ICS_REG, ics); } - dmar_enable_qi_intr(unit); + dmar_enable_qi_intr(DMAR2IOMMU(unit)); DMAR_UNLOCK(unit); return (0); } +static void +dmar_fini_qi_helper(struct iommu_unit *iommu) +{ + dmar_disable_qi_intr(iommu); + dmar_disable_qi(IOMMU2DMAR(iommu)); +} + void dmar_fini_qi(struct dmar_unit *unit) { - struct iommu_qi_genseq gseq; - if (!unit->qi_enabled) return; - taskqueue_drain(unit->qi_taskqueue, &unit->qi_task); - taskqueue_free(unit->qi_taskqueue); - unit->qi_taskqueue = NULL; - - DMAR_LOCK(unit); - /* quisce */ - dmar_qi_ensure(unit, 1); - dmar_qi_emit_wait_seq(unit, &gseq, true); - /* See dmar_qi_invalidate_sync_locked(). */ - unit->inv_seq_waiters++; - dmar_qi_advance_tail(unit); - dmar_qi_wait_for_seq(unit, &gseq, false); - /* only after the quisce, disable queue */ - dmar_disable_qi_intr(unit); - dmar_disable_qi(unit); - KASSERT(unit->inv_seq_waiters == 0, - ("dmar%d: waiters on disabled queue", unit->iommu.unit)); - DMAR_UNLOCK(unit); - - kmem_free(unit->inv_queue, unit->inv_queue_size); - unit->inv_queue = NULL; - unit->inv_queue_size = 0; + iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper); unit->qi_enabled = 0; } void -dmar_enable_qi_intr(struct dmar_unit *unit) +dmar_enable_qi_intr(struct iommu_unit *iommu) { + struct dmar_unit *unit; uint32_t iectl; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->iommu.unit)); @@ -568,10 +411,12 @@ dmar_enable_qi_intr(struct dmar_unit *unit) } void -dmar_disable_qi_intr(struct dmar_unit *unit) +dmar_disable_qi_intr(struct iommu_unit *iommu) { + struct dmar_unit *unit; uint32_t iectl; + unit = IOMMU2DMAR(iommu); DMAR_ASSERT_LOCKED(unit); KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->iommu.unit)); diff --git a/sys/x86/iommu/intel_quirks.c b/sys/x86/iommu/intel_quirks.c index cccb503b6047..751237a3ab54 100644 --- a/sys/x86/iommu/intel_quirks.c +++ b/sys/x86/iommu/intel_quirks.c @@ -58,6 +58,7 @@ #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> typedef void (*dmar_quirk_cpu_fun)(struct dmar_unit *); @@ -107,7 +108,7 @@ dmar_match_quirks(struct dmar_unit *dmar, (nb_quirk->rev_no == rev_no || nb_quirk->rev_no == QUIRK_NB_ALL_REV)) { if (bootverbose) { - device_printf(dmar->dev, + device_printf(dmar->iommu.dev, "NB IOMMU quirk %s\n", nb_quirk->descr); } @@ -115,7 +116,8 @@ dmar_match_quirks(struct dmar_unit *dmar, } } } else { - device_printf(dmar->dev, "cannot find northbridge\n"); + device_printf(dmar->iommu.dev, + "cannot find northbridge\n"); } } if (cpu_quirks != NULL) { @@ -134,7 +136,7 @@ dmar_match_quirks(struct dmar_unit *dmar, (cpu_quirk->stepping == -1 || cpu_quirk->stepping == stepping)) { if (bootverbose) { - device_printf(dmar->dev, + device_printf(dmar->iommu.dev, "CPU IOMMU quirk %s\n", cpu_quirk->descr); } diff --git a/sys/x86/iommu/intel_reg.h b/sys/x86/iommu/intel_reg.h index 26a18ff94890..0fafcce7accf 100644 --- a/sys/x86/iommu/intel_reg.h +++ b/sys/x86/iommu/intel_reg.h @@ -31,16 +31,6 @@ #ifndef __X86_IOMMU_INTEL_REG_H #define __X86_IOMMU_INTEL_REG_H -#define DMAR_PAGE_SIZE PAGE_SIZE -#define DMAR_PAGE_MASK (DMAR_PAGE_SIZE - 1) -#define DMAR_PAGE_SHIFT PAGE_SHIFT -#define DMAR_NPTEPG (DMAR_PAGE_SIZE / sizeof(dmar_pte_t)) -#define DMAR_NPTEPGSHIFT 9 -#define DMAR_PTEMASK (DMAR_NPTEPG - 1) - -#define IOMMU_PAGE_SIZE DMAR_PAGE_SIZE -#define IOMMU_PAGE_MASK DMAR_PAGE_MASK - typedef struct dmar_root_entry { uint64_t r1; uint64_t r2; @@ -49,7 +39,7 @@ typedef struct dmar_root_entry { #define DMAR_ROOT_R1_CTP_MASK 0xfffffffffffff000 /* Mask for Context-Entry Table Pointer */ -#define DMAR_CTX_CNT (DMAR_PAGE_SIZE / sizeof(dmar_root_entry_t)) +#define DMAR_CTX_CNT (IOMMU_PAGE_SIZE / sizeof(dmar_root_entry_t)) typedef struct dmar_ctx_entry { uint64_t ctx1; @@ -73,9 +63,6 @@ typedef struct dmar_ctx_entry { #define DMAR_CTX2_DID(x) ((x) << 8) /* Domain Identifier */ #define DMAR_CTX2_GET_DID(ctx2) (((ctx2) & DMAR_CTX2_DID_MASK) >> 8) -typedef struct dmar_pte { - uint64_t pte; -} dmar_pte_t; #define DMAR_PTE_R 1 /* Read */ #define DMAR_PTE_W (1 << 1) /* Write */ #define DMAR_PTE_SP (1 << 7) /* Super Page */ diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c index 4d680cc7d9e8..287b5fe9376a 100644 --- a/sys/x86/iommu/intel_utils.c +++ b/sys/x86/iommu/intel_utils.c @@ -63,6 +63,7 @@ #include <x86/include/busdma_impl.h> #include <dev/iommu/busdma_iommu.h> #include <x86/iommu/intel_reg.h> +#include <x86/iommu/x86_iommu.h> #include <x86/iommu/intel_dmar.h> u_int @@ -135,7 +136,7 @@ domain_set_agaw(struct dmar_domain *domain, int mgaw) return (0); } } - device_printf(domain->dmar->dev, + device_printf(domain->dmar->iommu.dev, "context request mgaw %d: no agaw found, sagaw %x\n", mgaw, sagaw); return (EINVAL); @@ -172,23 +173,6 @@ dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr, bool allow_less } /* - * Calculate the total amount of page table pages needed to map the - * whole bus address space on the context with the selected agaw. - */ -vm_pindex_t -pglvl_max_pages(int pglvl) -{ - vm_pindex_t res; - int i; - - for (res = 0, i = pglvl; i > 0; i--) { - res *= DMAR_NPTEPG; - res++; - } - return (res); -} - -/* * Return true if the page table level lvl supports the superpage for * the context ctx. */ @@ -209,26 +193,6 @@ domain_is_sp_lvl(struct dmar_domain *domain, int lvl) } iommu_gaddr_t -pglvl_page_size(int total_pglvl, int lvl) -{ - int rlvl; - static const iommu_gaddr_t pg_sz[] = { - (iommu_gaddr_t)DMAR_PAGE_SIZE, - (iommu_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT, - (iommu_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT), - (iommu_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT), - (iommu_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT), - (iommu_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT) - }; - - KASSERT(lvl >= 0 && lvl < total_pglvl, - ("total %d lvl %d", total_pglvl, lvl)); - rlvl = total_pglvl - lvl - 1; - KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); - return (pg_sz[rlvl]); -} - -iommu_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl) { @@ -243,7 +207,7 @@ calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size, int am; for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) { - isize = 1ULL << (am + DMAR_PAGE_SHIFT); + isize = 1ULL << (am + IOMMU_PAGE_SHIFT); if ((base & (isize - 1)) == 0 && size >= isize) break; if (am == 0) @@ -253,113 +217,9 @@ calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size, return (am); } -iommu_haddr_t dmar_high; int haw; int dmar_tbl_pagecnt; -vm_page_t -dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) -{ - vm_page_t m; - int zeroed, aflags; - - zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; - aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | - ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : - VM_ALLOC_NOWAIT); - for (;;) { - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WLOCK(obj); - m = vm_page_lookup(obj, idx); - if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) { - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WUNLOCK(obj); - break; - } - m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, - dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WUNLOCK(obj); - if (m != NULL) { - if (zeroed && (m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - atomic_add_int(&dmar_tbl_pagecnt, 1); - break; - } - if ((flags & IOMMU_PGF_WAITOK) == 0) - break; - } - return (m); -} - -void -dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) -{ - vm_page_t m; - - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WLOCK(obj); - m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT); - if (m != NULL) { - vm_page_free(m); - atomic_subtract_int(&dmar_tbl_pagecnt, 1); - } - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WUNLOCK(obj); -} - -void * -dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, - struct sf_buf **sf) -{ - vm_page_t m; - bool allocated; - - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WLOCK(obj); - m = vm_page_lookup(obj, idx); - if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) { - m = dmar_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL); - allocated = true; - } else - allocated = false; - if (m == NULL) { - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WUNLOCK(obj); - return (NULL); - } - /* Sleepable allocations cannot fail. */ - if ((flags & IOMMU_PGF_WAITOK) != 0) - VM_OBJECT_WUNLOCK(obj); - sched_pin(); - *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK) - == 0 ? SFB_NOWAIT : 0)); - if (*sf == NULL) { - sched_unpin(); - if (allocated) { - VM_OBJECT_ASSERT_WLOCKED(obj); - dmar_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL); - } - if ((flags & IOMMU_PGF_OBJL) == 0) - VM_OBJECT_WUNLOCK(obj); - return (NULL); - } - if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == - (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) - VM_OBJECT_WLOCK(obj); - else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0) - VM_OBJECT_WUNLOCK(obj); - return ((void *)sf_buf_kva(*sf)); -} - -void -dmar_unmap_pgtbl(struct sf_buf *sf) -{ - - sf_buf_free(sf); - sched_unpin(); -} - static void dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz) { @@ -374,7 +234,7 @@ dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz) } void -dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst) +dmar_flush_pte_to_ram(struct dmar_unit *unit, iommu_pte_t *dst) { dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); @@ -647,7 +507,6 @@ dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id) DMAR_UNLOCK(dmar); } -int dmar_batch_coalesce = 100; struct timespec dmar_hw_timeout = { .tv_sec = 0, .tv_nsec = 1000000 @@ -686,14 +545,6 @@ dmar_timeout_sysctl(SYSCTL_HANDLER_ARGS) return (error); } -static SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE, - NULL, ""); -SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, - &dmar_tbl_pagecnt, 0, - "Count of pages used for DMAR pagetables"); -SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, - &dmar_batch_coalesce, 0, - "Number of qi batches between interrupt"); SYSCTL_PROC(_hw_iommu_dmar, OID_AUTO, timeout, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, dmar_timeout_sysctl, "QU", diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c new file mode 100644 index 000000000000..2011c632f770 --- /dev/null +++ b/sys/x86/iommu/iommu_utils.c @@ -0,0 +1,751 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" +#if defined(__amd64__) +#define DEV_APIC +#else +#include "opt_apic.h" +#endif + +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/memdesc.h> +#include <sys/mutex.h> +#include <sys/sf_buf.h> +#include <sys/sysctl.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/rman.h> +#include <sys/rwlock.h> +#include <sys/taskqueue.h> +#include <sys/tree.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> +#include <machine/atomic.h> +#include <machine/bus.h> +#include <machine/cpu.h> +#include <x86/include/busdma_impl.h> +#include <dev/iommu/busdma_iommu.h> +#include <dev/iommu/iommu.h> +#include <x86/iommu/x86_iommu.h> +#include <x86/iommu/iommu_intrmap.h> +#ifdef DEV_APIC +#include "pcib_if.h" +#include <machine/intr_machdep.h> +#include <x86/apicreg.h> +#include <x86/apicvar.h> +#endif + +vm_page_t +iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) +{ + vm_page_t m; + int zeroed, aflags; + + zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; + aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | + ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : + VM_ALLOC_NOWAIT); + for (;;) { + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) { + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + break; + } + m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, + iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + if (m != NULL) { + if (zeroed && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + atomic_add_int(&iommu_tbl_pagecnt, 1); + break; + } + if ((flags & IOMMU_PGF_WAITOK) == 0) + break; + } + return (m); +} + +void +iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) +{ + vm_page_t m; + + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT); + if (m != NULL) { + vm_page_free(m); + atomic_subtract_int(&iommu_tbl_pagecnt, 1); + } + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); +} + +void * +iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, + struct sf_buf **sf) +{ + vm_page_t m; + bool allocated; + + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) { + m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL); + allocated = true; + } else + allocated = false; + if (m == NULL) { + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + return (NULL); + } + /* Sleepable allocations cannot fail. */ + if ((flags & IOMMU_PGF_WAITOK) != 0) + VM_OBJECT_WUNLOCK(obj); + sched_pin(); + *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK) + == 0 ? SFB_NOWAIT : 0)); + if (*sf == NULL) { + sched_unpin(); + if (allocated) { + VM_OBJECT_ASSERT_WLOCKED(obj); + iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL); + } + if ((flags & IOMMU_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + return (NULL); + } + if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == + (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) + VM_OBJECT_WLOCK(obj); + else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0) + VM_OBJECT_WUNLOCK(obj); + return ((void *)sf_buf_kva(*sf)); +} + +void +iommu_unmap_pgtbl(struct sf_buf *sf) +{ + + sf_buf_free(sf); + sched_unpin(); +} + +iommu_haddr_t iommu_high; +int iommu_tbl_pagecnt; + +SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, ""); +SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, + &iommu_tbl_pagecnt, 0, + "Count of pages used for IOMMU pagetables"); + +int iommu_qi_batch_coalesce = 100; +SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, + &iommu_qi_batch_coalesce, 0, + "Number of qi batches between interrupt"); + +static struct iommu_unit * +x86_no_iommu_find(device_t dev, bool verbose) +{ + return (NULL); +} + +static int +x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) +{ + return (EOPNOTSUPP); +} + +static int +x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, + u_int cookie, uint64_t *addr, uint32_t *data) +{ + return (EOPNOTSUPP); +} + +static int +x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie) +{ + return (0); +} + +static int +x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, + bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, + uint32_t *lo) +{ + return (EOPNOTSUPP); +} + +static int +x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) +{ + return (0); +} + +static struct x86_iommu x86_no_iommu = { + .find = x86_no_iommu_find, + .alloc_msi_intr = x86_no_iommu_alloc_msi_intr, + .map_msi_intr = x86_no_iommu_map_msi_intr, + .unmap_msi_intr = x86_no_iommu_unmap_msi_intr, + .map_ioapic_intr = x86_no_iommu_map_ioapic_intr, + .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr, +}; + +static struct x86_iommu *x86_iommu = &x86_no_iommu; + +void +set_x86_iommu(struct x86_iommu *x) +{ + MPASS(x86_iommu == &x86_no_iommu); + x86_iommu = x; +} + +struct x86_iommu * +get_x86_iommu(void) +{ + return (x86_iommu); +} + +void +iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, + bool cansleep) +{ + x86_iommu->domain_unload_entry(entry, free, cansleep); +} + +void +iommu_domain_unload(struct iommu_domain *iodom, + struct iommu_map_entries_tailq *entries, bool cansleep) +{ + x86_iommu->domain_unload(iodom, entries, cansleep); +} + +struct iommu_ctx * +iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, + bool id_mapped, bool rmrr_init) +{ + return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init)); +} + +void +iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) +{ + x86_iommu->free_ctx_locked(iommu, context); +} + +void +iommu_free_ctx(struct iommu_ctx *context) +{ + x86_iommu->free_ctx(context); +} + +struct iommu_unit * +iommu_find(device_t dev, bool verbose) +{ + return (x86_iommu->find(dev, verbose)); +} + +int +iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) +{ + return (x86_iommu->alloc_msi_intr(src, cookies, count)); +} + +int +iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, + uint64_t *addr, uint32_t *data) +{ + return (x86_iommu->map_msi_intr(src, cpu, vector, cookie, + addr, data)); +} + +int +iommu_unmap_msi_intr(device_t src, u_int cookie) +{ + return (x86_iommu->unmap_msi_intr(src, cookie)); +} + +int +iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, + bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) +{ + return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge, + activehi, irq, cookie, hi, lo)); +} + +int +iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) +{ + return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie)); +} + +void +iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) +{ + x86_iommu->unit_pre_instantiate_ctx(unit); +} + +#define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu)) + +static bool +iommu_qi_seq_processed(struct iommu_unit *unit, + const struct iommu_qi_genseq *pseq) +{ + struct x86_unit_common *x86c; + u_int gen; + + x86c = IOMMU2X86C(unit); + gen = x86c->inv_waitd_gen; + return (pseq->gen < gen || + (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw)); +} + +void +iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq, + bool emit_wait) +{ + struct x86_unit_common *x86c; + struct iommu_qi_genseq gsec; + uint32_t seq; + + KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); + IOMMU_ASSERT_LOCKED(unit); + x86c = IOMMU2X86C(unit); + + if (x86c->inv_waitd_seq == 0xffffffff) { + gsec.gen = x86c->inv_waitd_gen; + gsec.seq = x86c->inv_waitd_seq; + x86_iommu->qi_ensure(unit, 1); + x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false, + true, false); + x86_iommu->qi_advance_tail(unit); + while (!iommu_qi_seq_processed(unit, &gsec)) + cpu_spinwait(); + x86c->inv_waitd_gen++; + x86c->inv_waitd_seq = 1; + } + seq = x86c->inv_waitd_seq++; + pseq->gen = x86c->inv_waitd_gen; + pseq->seq = seq; + if (emit_wait) { + x86_iommu->qi_ensure(unit, 1); + x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false); + } +} + +/* + * To avoid missed wakeups, callers must increment the unit's waiters count + * before advancing the tail past the wait descriptor. + */ +void +iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq * + gseq, bool nowait) +{ + struct x86_unit_common *x86c; + + IOMMU_ASSERT_LOCKED(unit); + x86c = IOMMU2X86C(unit); + + KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__)); + while (!iommu_qi_seq_processed(unit, gseq)) { + if (cold || nowait) { + cpu_spinwait(); + } else { + msleep(&x86c->inv_seq_waiters, &unit->lock, 0, + "dmarse", hz); + } + } + x86c->inv_seq_waiters--; +} + +/* + * The caller must not be using the entry's dmamap_link field. + */ +void +iommu_qi_invalidate_locked(struct iommu_domain *domain, + struct iommu_map_entry *entry, bool emit_wait) +{ + struct iommu_unit *unit; + struct x86_unit_common *x86c; + + unit = domain->iommu; + x86c = IOMMU2X86C(unit); + IOMMU_ASSERT_LOCKED(unit); + + x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end - + entry->start, &entry->gseq, emit_wait); + + /* + * To avoid a data race in dmar_qi_task(), the entry's gseq must be + * initialized before the entry is added to the TLB flush list, and the + * entry must be added to that list before the tail is advanced. More + * precisely, the tail must not be advanced past the wait descriptor + * that will generate the interrupt that schedules dmar_qi_task() for + * execution before the entry is added to the list. While an earlier + * call to dmar_qi_ensure() might have advanced the tail, it will not + * advance it past the wait descriptor. + * + * See the definition of struct dmar_unit for more information on + * synchronization. + */ + entry->tlb_flush_next = NULL; + atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail-> + tlb_flush_next, (uintptr_t)entry); + x86c->tlb_flush_tail = entry; + + x86_iommu->qi_advance_tail(unit); +} + +void +iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, + iommu_gaddr_t size, bool cansleep) +{ + struct iommu_unit *unit; + struct iommu_qi_genseq gseq; + + unit = domain->iommu; + IOMMU_LOCK(unit); + x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true); + + /* + * To avoid a missed wakeup in iommu_qi_task(), the unit's + * waiters count must be incremented before the tail is + * advanced. + */ + IOMMU2X86C(unit)->inv_seq_waiters++; + + x86_iommu->qi_advance_tail(unit); + iommu_qi_wait_for_seq(unit, &gseq, !cansleep); + IOMMU_UNLOCK(unit); +} + +void +iommu_qi_drain_tlb_flush(struct iommu_unit *unit) +{ + struct x86_unit_common *x86c; + struct iommu_map_entry *entry, *head; + + x86c = IOMMU2X86C(unit); + for (head = x86c->tlb_flush_head;; head = entry) { + entry = (struct iommu_map_entry *) + atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); + if (entry == NULL || + !iommu_qi_seq_processed(unit, &entry->gseq)) + break; + x86c->tlb_flush_head = entry; + iommu_gas_free_entry(head); + if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) + iommu_gas_free_region(entry); + else + iommu_gas_free_space(entry); + } +} + +void +iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task) +{ + struct x86_unit_common *x86c; + u_int qi_sz; + + x86c = IOMMU2X86C(unit); + + x86c->tlb_flush_head = x86c->tlb_flush_tail = + iommu_gas_alloc_entry(NULL, 0); + TASK_INIT(&x86c->qi_task, 0, qi_task, unit); + x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK, + taskqueue_thread_enqueue, &x86c->qi_taskqueue); + taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV, + "iommu%d qi taskq", unit->unit); + + x86c->inv_waitd_gen = 0; + x86c->inv_waitd_seq = 1; + + qi_sz = 3; + TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz); + if (qi_sz > x86c->qi_buf_maxsz) + qi_sz = x86c->qi_buf_maxsz; + x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; + /* Reserve one descriptor to prevent wraparound. */ + x86c->inv_queue_avail = x86c->inv_queue_size - + x86c->qi_cmd_sz; + + /* + * The invalidation queue reads by DMARs/AMDIOMMUs are always + * coherent. + */ + x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size, + M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + x86c->inv_waitd_seq_hw_phys = pmap_kextract( + (vm_offset_t)&x86c->inv_waitd_seq_hw); +} + +void +iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( + struct iommu_unit *)) +{ + struct x86_unit_common *x86c; + struct iommu_qi_genseq gseq; + + x86c = IOMMU2X86C(unit); + + taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task); + taskqueue_free(x86c->qi_taskqueue); + x86c->qi_taskqueue = NULL; + + IOMMU_LOCK(unit); + /* quisce */ + x86_iommu->qi_ensure(unit, 1); + iommu_qi_emit_wait_seq(unit, &gseq, true); + /* See iommu_qi_invalidate_locked(). */ + x86c->inv_seq_waiters++; + x86_iommu->qi_advance_tail(unit); + iommu_qi_wait_for_seq(unit, &gseq, false); + /* only after the quisce, disable queue */ + disable_qi(unit); + KASSERT(x86c->inv_seq_waiters == 0, + ("iommu%d: waiters on disabled queue", unit->unit)); + IOMMU_UNLOCK(unit); + + kmem_free(x86c->inv_queue, x86c->inv_queue_size); + x86c->inv_queue = NULL; + x86c->inv_queue_size = 0; +} + +int +iommu_alloc_irq(struct iommu_unit *unit, int idx) +{ + device_t dev, pcib; + struct iommu_msi_data *dmd; + uint64_t msi_addr; + uint32_t msi_data; + int error; + + MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); + + dev = unit->dev; + dmd = &IOMMU2X86C(unit)->intrs[idx]; + pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ + error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); + if (error != 0) { + device_printf(dev, "cannot allocate %s interrupt, %d\n", + dmd->name, error); + goto err1; + } + error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, + dmd->irq, 1); + if (error != 0) { + device_printf(dev, "cannot set %s interrupt resource, %d\n", + dmd->name, error); + goto err2; + } + dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &dmd->irq_rid, RF_ACTIVE); + if (dmd->irq_res == NULL) { + device_printf(dev, + "cannot allocate resource for %s interrupt\n", dmd->name); + error = ENXIO; + goto err3; + } + error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, + dmd->handler, NULL, unit, &dmd->intr_handle); + if (error != 0) { + device_printf(dev, "cannot setup %s interrupt, %d\n", + dmd->name, error); + goto err4; + } + bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); + error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); + if (error != 0) { + device_printf(dev, "cannot map %s interrupt, %d\n", + dmd->name, error); + goto err5; + } + + dmd->msi_data = msi_data; + dmd->msi_addr = msi_addr; + + return (0); + +err5: + bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); +err4: + bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); +err3: + bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); +err2: + PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); + dmd->irq = -1; +err1: + return (error); +} + +void +iommu_release_intr(struct iommu_unit *unit, int idx) +{ + device_t dev; + struct iommu_msi_data *dmd; + + MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); + + dmd = &IOMMU2X86C(unit)->intrs[idx]; + if (dmd->handler == NULL || dmd->irq == -1) + return; + dev = unit->dev; + + bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); + bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); + bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); + PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), + dev, dmd->irq); + dmd->irq = -1; +} + +void +iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev) +{ + bus_addr_t maxaddr; + + maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); + ctx->tag->common.impl = &bus_dma_iommu_impl; + ctx->tag->common.boundary = 0; + ctx->tag->common.lowaddr = maxaddr; + ctx->tag->common.highaddr = maxaddr; + ctx->tag->common.maxsize = maxaddr; + ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED; + ctx->tag->common.maxsegsz = maxaddr; + ctx->tag->ctx = ctx; + ctx->tag->owner = dev; +} + +void +iommu_domain_free_entry(struct iommu_map_entry *entry, bool free) +{ + if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) + iommu_gas_free_region(entry); + else + iommu_gas_free_space(entry); + if (free) + iommu_gas_free_entry(entry); + else + entry->flags = 0; +} + +/* + * Index of the pte for the guest address base in the page table at + * the level lvl. + */ +int +pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl) +{ + + base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) * + IOMMU_NPTEPGSHIFT; + return (base & IOMMU_PTEMASK); +} + +/* + * Returns the page index of the page table page in the page table + * object, which maps the given address base at the page table level + * lvl. + */ +vm_pindex_t +pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl) +{ + vm_pindex_t idx, pidx; + int i; + + KASSERT(lvl >= 0 && lvl < pglvl, + ("wrong lvl %d %d", pglvl, lvl)); + + for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { + idx = pglvl_pgtbl_pte_off(pglvl, base, i) + + pidx * IOMMU_NPTEPG + 1; + } + return (idx); +} + +/* + * Calculate the total amount of page table pages needed to map the + * whole bus address space on the context with the selected agaw. + */ +vm_pindex_t +pglvl_max_pages(int pglvl) +{ + vm_pindex_t res; + int i; + + for (res = 0, i = pglvl; i > 0; i--) { + res *= IOMMU_NPTEPG; + res++; + } + return (res); +} + +iommu_gaddr_t +pglvl_page_size(int total_pglvl, int lvl) +{ + int rlvl; + static const iommu_gaddr_t pg_sz[] = { + (iommu_gaddr_t)IOMMU_PAGE_SIZE, + (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT, + (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT), + (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT), + (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT), + (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT), + (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT), + }; + + KASSERT(lvl >= 0 && lvl < total_pglvl, + ("total %d lvl %d", total_pglvl, lvl)); + rlvl = total_pglvl - lvl - 1; + KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); + return (pg_sz[rlvl]); +} diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h new file mode 100644 index 000000000000..a1ed5c71c513 --- /dev/null +++ b/sys/x86/iommu/x86_iommu.h @@ -0,0 +1,196 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2013-2015, 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __X86_IOMMU_X86_IOMMU_H +#define __X86_IOMMU_X86_IOMMU_H + +/* Both Intel and AMD are not too crazy to have different sizes. */ +typedef struct iommu_pte { + uint64_t pte; +} iommu_pte_t; + +#define IOMMU_PAGE_SIZE PAGE_SIZE +#define IOMMU_PAGE_MASK (IOMMU_PAGE_SIZE - 1) +#define IOMMU_PAGE_SHIFT PAGE_SHIFT +#define IOMMU_NPTEPG (IOMMU_PAGE_SIZE / sizeof(iommu_pte_t)) +#define IOMMU_NPTEPGSHIFT 9 +#define IOMMU_PTEMASK (IOMMU_NPTEPG - 1) + +struct sf_buf; +struct vm_object; + +struct vm_page *iommu_pgalloc(struct vm_object *obj, vm_pindex_t idx, + int flags); +void iommu_pgfree(struct vm_object *obj, vm_pindex_t idx, int flags); +void *iommu_map_pgtbl(struct vm_object *obj, vm_pindex_t idx, int flags, + struct sf_buf **sf); +void iommu_unmap_pgtbl(struct sf_buf *sf); + +extern iommu_haddr_t iommu_high; +extern int iommu_tbl_pagecnt; +extern int iommu_qi_batch_coalesce; + +SYSCTL_DECL(_hw_iommu); + +struct x86_unit_common; + +struct x86_iommu { + struct x86_unit_common *(*get_x86_common)(struct + iommu_unit *iommu); + void (*unit_pre_instantiate_ctx)(struct iommu_unit *iommu); + void (*qi_ensure)(struct iommu_unit *unit, int descr_count); + void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq, + bool, bool, bool); + void (*qi_advance_tail)(struct iommu_unit *unit); + void (*qi_invalidate_emit)(struct iommu_domain *idomain, + iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq * + pseq, bool emit_wait); + void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free, + bool cansleep); + void (*domain_unload)(struct iommu_domain *iodom, + struct iommu_map_entries_tailq *entries, bool cansleep); + struct iommu_ctx *(*get_ctx)(struct iommu_unit *iommu, + device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init); + void (*free_ctx_locked)(struct iommu_unit *iommu, + struct iommu_ctx *context); + void (*free_ctx)(struct iommu_ctx *context); + struct iommu_unit *(*find)(device_t dev, bool verbose); + int (*alloc_msi_intr)(device_t src, u_int *cookies, u_int count); + int (*map_msi_intr)(device_t src, u_int cpu, u_int vector, + u_int cookie, uint64_t *addr, uint32_t *data); + int (*unmap_msi_intr)(device_t src, u_int cookie); + int (*map_ioapic_intr)(u_int ioapic_id, u_int cpu, u_int vector, + bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, + uint32_t *lo); + int (*unmap_ioapic_intr)(u_int ioapic_id, u_int *cookie); +}; +void set_x86_iommu(struct x86_iommu *); +struct x86_iommu *get_x86_iommu(void); + +struct iommu_msi_data { + int irq; + int irq_rid; + struct resource *irq_res; + void *intr_handle; + int (*handler)(void *); + int msi_data_reg; + int msi_addr_reg; + int msi_uaddr_reg; + uint64_t msi_addr; + uint32_t msi_data; + void (*enable_intr)(struct iommu_unit *); + void (*disable_intr)(struct iommu_unit *); + const char *name; +}; + +#define IOMMU_MAX_MSI 3 + +struct x86_unit_common { + uint32_t qi_buf_maxsz; + uint32_t qi_cmd_sz; + + char *inv_queue; + vm_size_t inv_queue_size; + uint32_t inv_queue_avail; + uint32_t inv_queue_tail; + + /* + * Hw writes there on completion of wait descriptor + * processing. Intel writes 4 bytes, while AMD does the + * 8-bytes write. Due to little-endian, and use of 4-byte + * sequence numbers, the difference does not matter for us. + */ + volatile uint64_t inv_waitd_seq_hw; + + uint64_t inv_waitd_seq_hw_phys; + uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */ + u_int inv_waitd_gen; /* seq number generation AKA seq overflows */ + u_int inv_seq_waiters; /* count of waiters for seq */ + u_int inv_queue_full; /* informational counter */ + + /* + * Delayed freeing of map entries queue processing: + * + * tlb_flush_head and tlb_flush_tail are used to implement a FIFO + * queue that supports concurrent dequeues and enqueues. However, + * there can only be a single dequeuer (accessing tlb_flush_head) and + * a single enqueuer (accessing tlb_flush_tail) at a time. Since the + * unit's qi_task is the only dequeuer, it can access tlb_flush_head + * without any locking. In contrast, there may be multiple enqueuers, + * so the enqueuers acquire the iommu unit lock to serialize their + * accesses to tlb_flush_tail. + * + * In this FIFO queue implementation, the key to enabling concurrent + * dequeues and enqueues is that the dequeuer never needs to access + * tlb_flush_tail and the enqueuer never needs to access + * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail + * are never NULL, so neither a dequeuer nor an enqueuer ever needs to + * update both. Instead, tlb_flush_head always points to a "zombie" + * struct, which previously held the last dequeued item. Thus, the + * zombie's next field actually points to the struct holding the first + * item in the queue. When an item is dequeued, the current zombie is + * finally freed, and the struct that held the just dequeued item + * becomes the new zombie. When the queue is empty, tlb_flush_tail + * also points to the zombie. + */ + struct iommu_map_entry *tlb_flush_head; + struct iommu_map_entry *tlb_flush_tail; + struct task qi_task; + struct taskqueue *qi_taskqueue; + + struct iommu_msi_data intrs[IOMMU_MAX_MSI]; +}; + +void iommu_domain_free_entry(struct iommu_map_entry *entry, bool free); + +void iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq * + pseq, bool emit_wait); +void iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct + iommu_qi_genseq *gseq, bool nowait); +void iommu_qi_drain_tlb_flush(struct iommu_unit *unit); +void iommu_qi_invalidate_locked(struct iommu_domain *domain, + struct iommu_map_entry *entry, bool emit_wait); +void iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, + iommu_gaddr_t size, bool cansleep); +void iommu_qi_common_init(struct iommu_unit *unit, task_fn_t taskfunc); +void iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( + struct iommu_unit *)); + +int iommu_alloc_irq(struct iommu_unit *unit, int idx); +void iommu_release_intr(struct iommu_unit *unit, int idx); + +void iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev); + +int pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl); +vm_pindex_t pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl); +vm_pindex_t pglvl_max_pages(int pglvl); +iommu_gaddr_t pglvl_page_size(int total_pglvl, int lvl); + +#endif |