diff options
Diffstat (limited to 'sys/amd64/vmm/io/ppt.c')
-rw-r--r-- | sys/amd64/vmm/io/ppt.c | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c new file mode 100644 index 000000000000..547a14e6464e --- /dev/null +++ b/sys/amd64/vmm/io/ppt.c @@ -0,0 +1,702 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/pciio.h> +#include <sys/rman.h> +#include <sys/smp.h> +#include <sys/sysctl.h> + +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> + +#include <machine/resource.h> + +#include <machine/vmm.h> +#include <machine/vmm_dev.h> + +#include "vmm_lapic.h" +#include "vmm_ktr.h" + +#include "iommu.h" +#include "ppt.h" + +/* XXX locking */ + +#define MAX_MSIMSGS 32 + +/* + * If the MSI-X table is located in the middle of a BAR then that MMIO + * region gets split into two segments - one segment above the MSI-X table + * and the other segment below the MSI-X table - with a hole in place of + * the MSI-X table so accesses to it can be trapped and emulated. + * + * So, allocate a MMIO segment for each BAR register + 1 additional segment. + */ +#define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) + +MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); + +struct pptintr_arg { /* pptintr(pptintr_arg) */ + struct pptdev *pptdev; + uint64_t addr; + uint64_t msg_data; +}; + +struct pptseg { + vm_paddr_t gpa; + size_t len; + int wired; +}; + +struct pptdev { + device_t dev; + struct vm *vm; /* owner of this device */ + TAILQ_ENTRY(pptdev) next; + struct pptseg mmio[MAX_MMIOSEGS]; + struct { + int num_msgs; /* guest state */ + + int startrid; /* host state */ + struct resource *res[MAX_MSIMSGS]; + void *cookie[MAX_MSIMSGS]; + struct pptintr_arg arg[MAX_MSIMSGS]; + } msi; + + struct { + int num_msgs; + int startrid; + int msix_table_rid; + int msix_pba_rid; + struct resource *msix_table_res; + struct resource *msix_pba_res; + struct resource **res; + void **cookie; + struct pptintr_arg *arg; + } msix; +}; + +SYSCTL_DECL(_hw_vmm); +SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "bhyve passthru devices"); + +static int num_pptdevs; +SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0, + "number of pci passthru devices"); + +static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list); + +static int +ppt_probe(device_t dev) +{ + int bus, slot, func; + struct pci_devinfo *dinfo; + + dinfo = (struct pci_devinfo *)device_get_ivars(dev); + + bus = pci_get_bus(dev); + slot = pci_get_slot(dev); + func = pci_get_function(dev); + + /* + * To qualify as a pci passthrough device a device must: + * - be allowed by administrator to be used in this role + * - be an endpoint device + */ + if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL) + return (ENXIO); + else if (vmm_is_pptdev(bus, slot, func)) + return (0); + else + /* + * Returning BUS_PROBE_NOWILDCARD here matches devices that the + * SR-IOV infrastructure specified as "ppt" passthrough devices. + * All normal devices that did not have "ppt" specified as their + * driver will not be matched by this. + */ + return (BUS_PROBE_NOWILDCARD); +} + +static int +ppt_attach(device_t dev) +{ + struct pptdev *ppt; + + ppt = device_get_softc(dev); + + iommu_remove_device(iommu_host_domain(), pci_get_rid(dev)); + num_pptdevs++; + TAILQ_INSERT_TAIL(&pptdev_list, ppt, next); + ppt->dev = dev; + + if (bootverbose) + device_printf(dev, "attached\n"); + + return (0); +} + +static int +ppt_detach(device_t dev) +{ + struct pptdev *ppt; + + ppt = device_get_softc(dev); + + if (ppt->vm != NULL) + return (EBUSY); + num_pptdevs--; + TAILQ_REMOVE(&pptdev_list, ppt, next); + pci_disable_busmaster(dev); + iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); + + return (0); +} + +static device_method_t ppt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ppt_probe), + DEVMETHOD(device_attach, ppt_attach), + DEVMETHOD(device_detach, ppt_detach), + {0, 0} +}; + +static devclass_t ppt_devclass; +DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev)); +DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); + +static struct pptdev * +ppt_find(int bus, int slot, int func) +{ + device_t dev; + struct pptdev *ppt; + int b, s, f; + + TAILQ_FOREACH(ppt, &pptdev_list, next) { + dev = ppt->dev; + b = pci_get_bus(dev); + s = pci_get_slot(dev); + f = pci_get_function(dev); + if (bus == b && slot == s && func == f) + return (ppt); + } + return (NULL); +} + +static void +ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) +{ + int i; + struct pptseg *seg; + + for (i = 0; i < MAX_MMIOSEGS; i++) { + seg = &ppt->mmio[i]; + if (seg->len == 0) + continue; + (void)vm_unmap_mmio(vm, seg->gpa, seg->len); + bzero(seg, sizeof(struct pptseg)); + } +} + +static void +ppt_teardown_msi(struct pptdev *ppt) +{ + int i, rid; + void *cookie; + struct resource *res; + + if (ppt->msi.num_msgs == 0) + return; + + for (i = 0; i < ppt->msi.num_msgs; i++) { + rid = ppt->msi.startrid + i; + res = ppt->msi.res[i]; + cookie = ppt->msi.cookie[i]; + + if (cookie != NULL) + bus_teardown_intr(ppt->dev, res, cookie); + + if (res != NULL) + bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); + + ppt->msi.res[i] = NULL; + ppt->msi.cookie[i] = NULL; + } + + if (ppt->msi.startrid == 1) + pci_release_msi(ppt->dev); + + ppt->msi.num_msgs = 0; +} + +static void +ppt_teardown_msix_intr(struct pptdev *ppt, int idx) +{ + int rid; + struct resource *res; + void *cookie; + + rid = ppt->msix.startrid + idx; + res = ppt->msix.res[idx]; + cookie = ppt->msix.cookie[idx]; + + if (cookie != NULL) + bus_teardown_intr(ppt->dev, res, cookie); + + if (res != NULL) + bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); + + ppt->msix.res[idx] = NULL; + ppt->msix.cookie[idx] = NULL; +} + +static void +ppt_teardown_msix(struct pptdev *ppt) +{ + int i; + + if (ppt->msix.num_msgs == 0) + return; + + for (i = 0; i < ppt->msix.num_msgs; i++) + ppt_teardown_msix_intr(ppt, i); + + free(ppt->msix.res, M_PPTMSIX); + free(ppt->msix.cookie, M_PPTMSIX); + free(ppt->msix.arg, M_PPTMSIX); + + pci_release_msi(ppt->dev); + + if (ppt->msix.msix_table_res) { + bus_release_resource(ppt->dev, SYS_RES_MEMORY, + ppt->msix.msix_table_rid, + ppt->msix.msix_table_res); + ppt->msix.msix_table_res = NULL; + ppt->msix.msix_table_rid = 0; + } + if (ppt->msix.msix_pba_res) { + bus_release_resource(ppt->dev, SYS_RES_MEMORY, + ppt->msix.msix_pba_rid, + ppt->msix.msix_pba_res); + ppt->msix.msix_pba_res = NULL; + ppt->msix.msix_pba_rid = 0; + } + + ppt->msix.num_msgs = 0; +} + +int +ppt_avail_devices(void) +{ + + return (num_pptdevs); +} + +int +ppt_assigned_devices(struct vm *vm) +{ + struct pptdev *ppt; + int num; + + num = 0; + TAILQ_FOREACH(ppt, &pptdev_list, next) { + if (ppt->vm == vm) + num++; + } + return (num); +} + +bool +ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) +{ + int i; + struct pptdev *ppt; + struct pptseg *seg; + + TAILQ_FOREACH(ppt, &pptdev_list, next) { + if (ppt->vm != vm) + continue; + + for (i = 0; i < MAX_MMIOSEGS; i++) { + seg = &ppt->mmio[i]; + if (seg->len == 0) + continue; + if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) + return (true); + } + } + + return (false); +} + +static void +ppt_pci_reset(device_t dev) +{ + + if (pcie_flr(dev, + max(pcie_get_max_completion_timeout(dev) / 1000, 10), true)) + return; + + pci_power_reset(dev); +} + +int +ppt_assign_device(struct vm *vm, int bus, int slot, int func) +{ + struct pptdev *ppt; + + ppt = ppt_find(bus, slot, func); + if (ppt != NULL) { + /* + * If this device is owned by a different VM then we + * cannot change its owner. + */ + if (ppt->vm != NULL && ppt->vm != vm) + return (EBUSY); + + pci_save_state(ppt->dev); + ppt_pci_reset(ppt->dev); + pci_restore_state(ppt->dev); + ppt->vm = vm; + iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + return (0); + } + return (ENOENT); +} + +int +ppt_unassign_device(struct vm *vm, int bus, int slot, int func) +{ + struct pptdev *ppt; + + ppt = ppt_find(bus, slot, func); + if (ppt != NULL) { + /* + * If this device is not owned by this 'vm' then bail out. + */ + if (ppt->vm != vm) + return (EBUSY); + + pci_save_state(ppt->dev); + ppt_pci_reset(ppt->dev); + pci_restore_state(ppt->dev); + ppt_unmap_mmio(vm, ppt); + ppt_teardown_msi(ppt); + ppt_teardown_msix(ppt); + iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + ppt->vm = NULL; + return (0); + } + return (ENOENT); +} + +int +ppt_unassign_all(struct vm *vm) +{ + struct pptdev *ppt; + int bus, slot, func; + device_t dev; + + TAILQ_FOREACH(ppt, &pptdev_list, next) { + if (ppt->vm == vm) { + dev = ppt->dev; + bus = pci_get_bus(dev); + slot = pci_get_slot(dev); + func = pci_get_function(dev); + vm_unassign_pptdev(vm, bus, slot, func); + } + } + + return (0); +} + +int +ppt_map_mmio(struct vm *vm, int bus, int slot, int func, + vm_paddr_t gpa, size_t len, vm_paddr_t hpa) +{ + int i, error; + struct pptseg *seg; + struct pptdev *ppt; + + ppt = ppt_find(bus, slot, func); + if (ppt != NULL) { + if (ppt->vm != vm) + return (EBUSY); + + for (i = 0; i < MAX_MMIOSEGS; i++) { + seg = &ppt->mmio[i]; + if (seg->len == 0) { + error = vm_map_mmio(vm, gpa, len, hpa); + if (error == 0) { + seg->gpa = gpa; + seg->len = len; + } + return (error); + } + } + return (ENOSPC); + } + return (ENOENT); +} + +static int +pptintr(void *arg) +{ + struct pptdev *ppt; + struct pptintr_arg *pptarg; + + pptarg = arg; + ppt = pptarg->pptdev; + + if (ppt->vm != NULL) + lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); + else { + /* + * XXX + * This is not expected to happen - panic? + */ + } + + /* + * For legacy interrupts give other filters a chance in case + * the interrupt was not generated by the passthrough device. + */ + if (ppt->msi.startrid == 0) + return (FILTER_STRAY); + else + return (FILTER_HANDLED); +} + +int +ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, + uint64_t addr, uint64_t msg, int numvec) +{ + int i, rid, flags; + int msi_count, startrid, error, tmp; + struct pptdev *ppt; + + if (numvec < 0 || numvec > MAX_MSIMSGS) + return (EINVAL); + + ppt = ppt_find(bus, slot, func); + if (ppt == NULL) + return (ENOENT); + if (ppt->vm != vm) /* Make sure we own this device */ + return (EBUSY); + + /* Free any allocated resources */ + ppt_teardown_msi(ppt); + + if (numvec == 0) /* nothing more to do */ + return (0); + + flags = RF_ACTIVE; + msi_count = pci_msi_count(ppt->dev); + if (msi_count == 0) { + startrid = 0; /* legacy interrupt */ + msi_count = 1; + flags |= RF_SHAREABLE; + } else + startrid = 1; /* MSI */ + + /* + * The device must be capable of supporting the number of vectors + * the guest wants to allocate. + */ + if (numvec > msi_count) + return (EINVAL); + + /* + * Make sure that we can allocate all the MSI vectors that are needed + * by the guest. + */ + if (startrid == 1) { + tmp = numvec; + error = pci_alloc_msi(ppt->dev, &tmp); + if (error) + return (error); + else if (tmp != numvec) { + pci_release_msi(ppt->dev); + return (ENOSPC); + } else { + /* success */ + } + } + + ppt->msi.startrid = startrid; + + /* + * Allocate the irq resource and attach it to the interrupt handler. + */ + for (i = 0; i < numvec; i++) { + ppt->msi.num_msgs = i + 1; + ppt->msi.cookie[i] = NULL; + + rid = startrid + i; + ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, + &rid, flags); + if (ppt->msi.res[i] == NULL) + break; + + ppt->msi.arg[i].pptdev = ppt; + ppt->msi.arg[i].addr = addr; + ppt->msi.arg[i].msg_data = msg + i; + + error = bus_setup_intr(ppt->dev, ppt->msi.res[i], + INTR_TYPE_NET | INTR_MPSAFE, + pptintr, NULL, &ppt->msi.arg[i], + &ppt->msi.cookie[i]); + if (error != 0) + break; + } + + if (i < numvec) { + ppt_teardown_msi(ppt); + return (ENXIO); + } + + return (0); +} + +int +ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, + int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) +{ + struct pptdev *ppt; + struct pci_devinfo *dinfo; + int numvec, alloced, rid, error; + size_t res_size, cookie_size, arg_size; + + ppt = ppt_find(bus, slot, func); + if (ppt == NULL) + return (ENOENT); + if (ppt->vm != vm) /* Make sure we own this device */ + return (EBUSY); + + dinfo = device_get_ivars(ppt->dev); + if (!dinfo) + return (ENXIO); + + /* + * First-time configuration: + * Allocate the MSI-X table + * Allocate the IRQ resources + * Set up some variables in ppt->msix + */ + if (ppt->msix.num_msgs == 0) { + numvec = pci_msix_count(ppt->dev); + if (numvec <= 0) + return (EINVAL); + + ppt->msix.startrid = 1; + ppt->msix.num_msgs = numvec; + + res_size = numvec * sizeof(ppt->msix.res[0]); + cookie_size = numvec * sizeof(ppt->msix.cookie[0]); + arg_size = numvec * sizeof(ppt->msix.arg[0]); + + ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO); + ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, + M_WAITOK | M_ZERO); + ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO); + + rid = dinfo->cfg.msix.msix_table_bar; + ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); + + if (ppt->msix.msix_table_res == NULL) { + ppt_teardown_msix(ppt); + return (ENOSPC); + } + ppt->msix.msix_table_rid = rid; + + if (dinfo->cfg.msix.msix_table_bar != + dinfo->cfg.msix.msix_pba_bar) { + rid = dinfo->cfg.msix.msix_pba_bar; + ppt->msix.msix_pba_res = bus_alloc_resource_any( + ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + + if (ppt->msix.msix_pba_res == NULL) { + ppt_teardown_msix(ppt); + return (ENOSPC); + } + ppt->msix.msix_pba_rid = rid; + } + + alloced = numvec; + error = pci_alloc_msix(ppt->dev, &alloced); + if (error || alloced != numvec) { + ppt_teardown_msix(ppt); + return (error == 0 ? ENOSPC: error); + } + } + + if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { + /* Tear down the IRQ if it's already set up */ + ppt_teardown_msix_intr(ppt, idx); + + /* Allocate the IRQ resource */ + ppt->msix.cookie[idx] = NULL; + rid = ppt->msix.startrid + idx; + ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, + &rid, RF_ACTIVE); + if (ppt->msix.res[idx] == NULL) + return (ENXIO); + + ppt->msix.arg[idx].pptdev = ppt; + ppt->msix.arg[idx].addr = addr; + ppt->msix.arg[idx].msg_data = msg; + + /* Setup the MSI-X interrupt */ + error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], + INTR_TYPE_NET | INTR_MPSAFE, + pptintr, NULL, &ppt->msix.arg[idx], + &ppt->msix.cookie[idx]); + + if (error != 0) { + bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); + ppt->msix.cookie[idx] = NULL; + ppt->msix.res[idx] = NULL; + return (ENXIO); + } + } else { + /* Masked, tear it down if it's already been set up */ + ppt_teardown_msix_intr(ppt, idx); + } + + return (0); +} |