aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--share/man/man4/pvscsi.474
-rw-r--r--sys/amd64/conf/GENERIC1
-rw-r--r--sys/conf/files.amd641
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/dev/vmware/pvscsi/LICENSE51
-rw-r--r--sys/dev/vmware/pvscsi/pvscsi.c1804
-rw-r--r--sys/dev/vmware/pvscsi/pvscsi.h215
-rw-r--r--sys/i386/conf/GENERIC1
-rw-r--r--sys/modules/vmware/Makefile2
-rw-r--r--sys/modules/vmware/pvscsi/Makefile10
10 files changed, 2159 insertions, 1 deletions
diff --git a/share/man/man4/pvscsi.4 b/share/man/man4/pvscsi.4
new file mode 100644
index 000000000000..1e9b08f10250
--- /dev/null
+++ b/share/man/man4/pvscsi.4
@@ -0,0 +1,74 @@
+.\" Copyright (c) 2018 VMware, Inc.
+.\"
+.\" SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
+.\"
+.\" $FreeBSD$
+.Dd December 5, 2018
+.Dt PVSCSI 4
+.Os
+.Sh NAME
+.Nm pvscsi
+.Nd VMware Paravirtual SCSI Controller
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device pci"
+.Cd "device scbus"
+.Cd "device pvscsi"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+pvscsi_load="YES"
+.Ed
+.Pp
+The following tunables are settable from the
+.Xr loader 8 :
+.Bl -ohang
+.It Va hw.pvscsi.request_ring_pages
+controls how many pages are allocated for the device request ring.
+A non-positive value will cause the driver to choose the value based on device
+capabilities.
+A non-zero value will use that many number of pages up to a maximum of 32.
+The default setting is 0.
+.It Va hw.pvscsi.max_queue_depth
+controls the queue size for the adapter.
+A non-positive value will cause the driver to choose the value based on number
+of request ring pages.
+A non-zero value will set the queue size up to a maximum allowed by the number
+of request ring pages.
+Default is 0.
+.It Va hw.pvscsi.use_msg
+setting to nonzero value enables the use of the PVSCSI message queue allowing
+for disk hot-add and remove without manual rescan needed.
+Default is 1.
+.It Va hw.pvscsi.use_msi
+setting to nonzero value enables the use of MSI interrupts.
+Default is 1.
+.It Va hw.pvscsi.use_msix
+setting to nonzero value enables the use of MSI-X interrupts.
+Default is 1.
+.It Va hw.pvscsi.use_req_call_threshold
+setting to nonzero value enables the request call threshold functionality.
+TODO.
+Default is 1.
+.El
+.Sh DESCRIPTION
+The
+.Nm
+driver provides support for the VMware Paravirtual SCSI Controller (PVSCSI) in
+virtual machines by VMware.
+.Sh SEE ALSO
+.Xr cam 4 ,
+.Xr da 4
+.Sh HISTORY
+The
+.Nm
+driver first appeared in
+.Fx 13.0 .
+.Sh AUTHORS
+.An Vishal Bhakta Aq Mt vbhakta@vmware.com .
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 30246adb4857..8f667a805068 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -152,6 +152,7 @@ device sym # NCR/Symbios Logic
device trm # Tekram DC395U/UW/F DC315U adapters
device isci # Intel C600 SAS controller
device ocs_fc # Emulex FC adapters
+device pvscsi # VMware PVSCSI
# ATA/SCSI peripherals
device scbus # SCSI bus (required for ATA/SCSI)
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 5cc582ecace2..294b8878df5a 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -345,6 +345,7 @@ dev/vmware/vmci/vmci_kernel_if.c optional vmci
dev/vmware/vmci/vmci_qpair.c optional vmci
dev/vmware/vmci/vmci_queue_pair.c optional vmci
dev/vmware/vmci/vmci_resource.c optional vmci
+dev/vmware/pvscsi/pvscsi.c optional pvscsi
dev/vmd/vmd.c optional vmd
dev/vmd/vmd_bus.c optional vmd_bus
dev/wbwd/wbwd.c optional wbwd
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index eced33566b43..33d2f953857a 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -162,6 +162,7 @@ dev/vmware/vmci/vmci_kernel_if.c optional vmci
dev/vmware/vmci/vmci_qpair.c optional vmci
dev/vmware/vmci/vmci_queue_pair.c optional vmci
dev/vmware/vmci/vmci_resource.c optional vmci
+dev/vmware/pvscsi/pvscsi.c optional pvscsi
dev/acpi_support/acpi_wmi_if.m standard
dev/wbwd/wbwd.c optional wbwd
i386/acpica/acpi_machdep.c optional acpi
diff --git a/sys/dev/vmware/pvscsi/LICENSE b/sys/dev/vmware/pvscsi/LICENSE
new file mode 100644
index 000000000000..a736cc1324a7
--- /dev/null
+++ b/sys/dev/vmware/pvscsi/LICENSE
@@ -0,0 +1,51 @@
+$FreeBSD$
+
+These files are provided under a dual BSD-2 Clause/GPLv2 license. When
+using or redistributing this file, you may do so under either license.
+
+BSD-2 Clause License
+
+Copyright (c) 2018 VMware, Inc.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+GPL License Summary
+
+Copyright (c) 2018 VMware, Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of version 2 of the GNU General Public License as
+published by the Free Software Foundation.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+The full GNU General Public License is included in this distribution
+in the file called LICENSE.GPL.
diff --git a/sys/dev/vmware/pvscsi/pvscsi.c b/sys/dev/vmware/pvscsi/pvscsi.c
new file mode 100644
index 000000000000..f7181ecdf37b
--- /dev/null
+++ b/sys/dev/vmware/pvscsi/pvscsi.c
@@ -0,0 +1,1804 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+#include <cam/cam_debug.h>
+#include <cam/cam_sim.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/scsi/scsi_message.h>
+
+#include "pvscsi.h"
+
+#define PVSCSI_DEFAULT_NUM_PAGES_REQ_RING 8
+#define PVSCSI_SENSE_LENGTH 256
+
+MALLOC_DECLARE(M_PVSCSI);
+MALLOC_DEFINE(M_PVSCSI, "pvscsi", "PVSCSI memory");
+
+#ifdef PVSCSI_DEBUG_LOGGING
+#define DEBUG_PRINTF(level, dev, fmt, ...) \
+ do { \
+ if (pvscsi_log_level >= (level)) { \
+ device_printf((dev), (fmt), ##__VA_ARGS__); \
+ } \
+ } while(0)
+#else
+#define DEBUG_PRINTF(level, dev, fmt, ...)
+#endif /* PVSCSI_DEBUG_LOGGING */
+
+#define ccb_pvscsi_hcb spriv_ptr0
+#define ccb_pvscsi_sc spriv_ptr1
+
+struct pvscsi_softc;
+static timeout_t pvscsi_timeout;
+struct pvscsi_hcb;
+struct pvscsi_dma;
+
+static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
+ uint32_t offset);
+static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
+ uint32_t val);
+static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
+static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
+ uint32_t val);
+static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
+static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
+static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
+static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
+ uint32_t len);
+static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
+static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
+static void pvscsi_setup_rings(struct pvscsi_softc *sc);
+static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
+static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
+
+static void pvscsi_timeout(void *arg);
+static void pvscsi_freeze(struct pvscsi_softc *sc);
+static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
+static void pvscsi_bus_reset(struct pvscsi_softc *sc);
+static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
+static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
+ union ccb *ccb);
+
+static void pvscsi_process_completion(struct pvscsi_softc *sc,
+ struct pvscsi_ring_cmp_desc *e);
+static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
+static void pvscsi_process_msg(struct pvscsi_softc *sc,
+ struct pvscsi_ring_msg_desc *e);
+static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
+
+static void pvscsi_intr_locked(struct pvscsi_softc *sc);
+static void pvscsi_intr(void *xsc);
+static void pvscsi_poll(struct cam_sim *sim);
+
+static void pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg,
+ int error);
+static void pvscsi_action(struct cam_sim *sim, union ccb *ccb);
+
+static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
+ struct pvscsi_hcb *hcb);
+static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
+ uint64_t context);
+static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
+static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
+
+static void pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg,
+ int error);
+static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
+static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
+ bus_size_t size, bus_size_t alignment);
+static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
+ struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
+static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
+ uint32_t hcbs_allocated);
+static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
+static void pvscsi_free_rings(struct pvscsi_softc *sc);
+static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
+static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
+static int pvscsi_setup_interrupts(struct pvscsi_softc *sc);
+static void pvscsi_free_all(struct pvscsi_softc *sc);
+
+static int pvscsi_attach(device_t dev);
+static int pvscsi_detach(device_t dev);
+static int pvscsi_probe(device_t dev);
+static int pvscsi_shutdown(device_t dev);
+static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value);
+
+
+#ifdef PVSCSI_DEBUG_LOGGING
+static int pvscsi_log_level = 0;
+static SYSCTL_NODE(_hw, OID_AUTO, pvscsi, CTLFLAG_RD, 0,
+ "PVSCSI driver parameters");
+SYSCTL_INT(_hw_pvscsi, OID_AUTO, log_level, CTLFLAG_RWTUN, &pvscsi_log_level,
+ 0, "PVSCSI debug log level");
+#endif
+
+static int pvscsi_request_ring_pages = 0;
+TUNABLE_INT("hw.pvscsi.request_ring_pages", &pvscsi_request_ring_pages);
+
+static int pvscsi_use_msg = 1;
+TUNABLE_INT("hw.pvscsi.use_msg", &pvscsi_use_msg);
+
+static int pvscsi_use_msi = 1;
+TUNABLE_INT("hw.pvscsi.use_msi", &pvscsi_use_msi);
+
+static int pvscsi_use_msix = 1;
+TUNABLE_INT("hw.pvscsi.use_msix", &pvscsi_use_msix);
+
+static int pvscsi_use_req_call_threshold = 1;
+TUNABLE_INT("hw.pvscsi.use_req_call_threshold", &pvscsi_use_req_call_threshold);
+
+static int pvscsi_max_queue_depth = 0;
+TUNABLE_INT("hw.pvscsi.max_queue_depth", &pvscsi_max_queue_depth);
+
+
+struct pvscsi_sg_list {
+ struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
+};
+
+
+#define PVSCSI_ABORT_TIMEOUT 2
+#define PVSCSI_RESET_TIMEOUT 10
+
+#define PVSCSI_HCB_NONE 0
+#define PVSCSI_HCB_ABORT 1
+#define PVSCSI_HCB_DEVICE_RESET 2
+#define PVSCSI_HCB_BUS_RESET 3
+
+struct pvscsi_hcb {
+ union ccb *ccb;
+ struct pvscsi_ring_req_desc *e;
+ int recovery;
+ SLIST_ENTRY(pvscsi_hcb) links;
+
+ struct callout callout;
+ bus_dmamap_t dma_map;
+ void *sense_buffer;
+ bus_addr_t sense_buffer_paddr;
+ struct pvscsi_sg_list *sg_list;
+ bus_addr_t sg_list_paddr;
+};
+
+struct pvscsi_dma
+{
+ bus_dma_tag_t tag;
+ bus_dmamap_t map;
+ void *vaddr;
+ bus_addr_t paddr;
+ bus_size_t size;
+};
+
+struct pvscsi_softc {
+ device_t dev;
+ struct mtx lock;
+ struct cam_sim *sim;
+ struct cam_path *bus_path;
+ int frozen;
+ struct pvscsi_rings_state *rings_state;
+ struct pvscsi_ring_req_desc *req_ring;
+ struct pvscsi_ring_cmp_desc *cmp_ring;
+ struct pvscsi_ring_msg_desc *msg_ring;
+ uint32_t hcb_cnt;
+ struct pvscsi_hcb *hcbs;
+ SLIST_HEAD(, pvscsi_hcb) free_list;
+ bus_dma_tag_t parent_dmat;
+ bus_dma_tag_t buffer_dmat;
+
+ bool use_msg;
+ uint32_t max_targets;
+ int mm_rid;
+ struct resource *mm_res;
+ int irq_id;
+ struct resource *irq_res;
+ void *irq_handler;
+ int use_req_call_threshold;
+ int use_msi_or_msix;
+
+ uint64_t rings_state_ppn;
+ uint32_t req_ring_num_pages;
+ uint64_t req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
+ uint32_t cmp_ring_num_pages;
+ uint64_t cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
+ uint32_t msg_ring_num_pages;
+ uint64_t msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
+
+ struct pvscsi_dma rings_state_dma;
+ struct pvscsi_dma req_ring_dma;
+ struct pvscsi_dma cmp_ring_dma;
+ struct pvscsi_dma msg_ring_dma;
+
+ struct pvscsi_dma sg_list_dma;
+ struct pvscsi_dma sense_buffer_dma;
+};
+
+static int pvscsi_get_tunable(struct pvscsi_softc *sc, char *name, int value)
+{
+ char cfg[64];
+
+ snprintf(cfg, sizeof(cfg), "hw.pvscsi.%d.%s", device_get_unit(sc->dev),
+ name);
+ TUNABLE_INT_FETCH(cfg, &value);
+
+ return (value);
+}
+
+static void
+pvscsi_freeze(struct pvscsi_softc *sc)
+{
+
+ if (!sc->frozen) {
+ xpt_freeze_simq(sc->sim, 1);
+ sc->frozen = 1;
+ }
+}
+
+static inline uint32_t
+pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
+{
+
+ return (bus_read_4(sc->mm_res, offset));
+}
+
+static inline void
+pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
+{
+
+ bus_write_4(sc->mm_res, offset, val);
+}
+
+static inline uint32_t
+pvscsi_read_intr_status(struct pvscsi_softc *sc)
+{
+
+ return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
+}
+
+static inline void
+pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
+{
+
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
+}
+
+static inline void
+pvscsi_intr_enable(struct pvscsi_softc *sc)
+{
+ uint32_t mask;
+
+ mask = PVSCSI_INTR_CMPL_MASK;
+ if (sc->use_msg) {
+ mask |= PVSCSI_INTR_MSG_MASK;
+ }
+
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
+}
+
+static inline void
+pvscsi_intr_disable(struct pvscsi_softc *sc)
+{
+
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
+}
+
+static void
+pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
+{
+ struct pvscsi_rings_state *s;
+
+ if (cdb0 == READ_6 || cdb0 == READ_10 ||
+ cdb0 == READ_12 || cdb0 == READ_16 ||
+ cdb0 == WRITE_6 || cdb0 == WRITE_10 ||
+ cdb0 == WRITE_12 || cdb0 == WRITE_16) {
+ s = sc->rings_state;
+
+ if (!sc->use_req_call_threshold ||
+ (s->req_prod_idx - s->req_cons_idx) >=
+ s->req_call_threshold) {
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
+ }
+ } else {
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
+ }
+}
+
+static void
+pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
+ uint32_t len)
+{
+ uint32_t *data_ptr;
+ int i;
+
+ KASSERT(len % sizeof(uint32_t) == 0,
+ ("command size not a multiple of 4"));
+
+ data_ptr = data;
+ len /= sizeof(uint32_t);
+
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
+ for (i = 0; i < len; ++i) {
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
+ data_ptr[i]);
+ }
+}
+
+static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
+ struct pvscsi_hcb *hcb)
+{
+
+ /* Offset by 1 because context must not be 0 */
+ return (hcb - sc->hcbs + 1);
+}
+
+static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
+ uint64_t context)
+{
+
+ return (sc->hcbs + (context - 1));
+}
+
+static struct pvscsi_hcb *
+pvscsi_hcb_get(struct pvscsi_softc *sc)
+{
+ struct pvscsi_hcb *hcb;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ hcb = SLIST_FIRST(&sc->free_list);
+ if (hcb) {
+ SLIST_REMOVE_HEAD(&sc->free_list, links);
+ }
+
+ return (hcb);
+}
+
+static void
+pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
+{
+
+ mtx_assert(&sc->lock, MA_OWNED);
+ hcb->ccb = NULL;
+ hcb->e = NULL;
+ hcb->recovery = PVSCSI_HCB_NONE;
+ SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
+}
+
+static uint32_t
+pvscsi_get_max_targets(struct pvscsi_softc *sc)
+{
+ uint32_t max_targets;
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
+
+ max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
+
+ if (max_targets == ~0) {
+ max_targets = 16;
+ }
+
+ return (max_targets);
+}
+
+static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
+{
+ uint32_t status;
+ struct pvscsi_cmd_desc_setup_req_call cmd;
+
+ if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
+ pvscsi_use_req_call_threshold)) {
+ return (0);
+ }
+
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
+ PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
+ status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
+
+ if (status != -1) {
+ bzero(&cmd, sizeof(cmd));
+ cmd.enable = enable;
+ pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
+ &cmd, sizeof(cmd));
+ status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
+
+ return (status != 0);
+ } else {
+ return (0);
+ }
+}
+
+static void
+pvscsi_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+ bus_addr_t *dest;
+
+ KASSERT(nseg == 1, ("more than one segment"));
+
+ dest = arg;
+
+ if (!error) {
+ *dest = segs->ds_addr;
+ }
+}
+
+static void
+pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
+{
+
+ if (dma->tag != NULL) {
+ if (dma->paddr != 0) {
+ bus_dmamap_unload(dma->tag, dma->map);
+ }
+
+ if (dma->vaddr != NULL) {
+ bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
+ }
+
+ bus_dma_tag_destroy(dma->tag);
+ }
+
+ bzero(dma, sizeof(*dma));
+}
+
+static int
+pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
+ bus_size_t size, bus_size_t alignment)
+{
+ int error;
+
+ bzero(dma, sizeof(*dma));
+
+ error = bus_dma_tag_create(sc->parent_dmat, alignment, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size,
+ BUS_DMA_ALLOCNOW, NULL, NULL, &dma->tag);
+ if (error) {
+ device_printf(sc->dev, "error creating dma tag, error %d\n",
+ error);
+ goto fail;
+ }
+
+ error = bus_dmamem_alloc(dma->tag, &dma->vaddr,
+ BUS_DMA_NOWAIT | BUS_DMA_ZERO, &dma->map);
+ if (error) {
+ device_printf(sc->dev, "error allocating dma mem, error %d\n",
+ error);
+ goto fail;
+ }
+
+ error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
+ pvscsi_dma_cb, &dma->paddr, BUS_DMA_NOWAIT);
+ if (error) {
+ device_printf(sc->dev, "error mapping dma mam, error %d\n",
+ error);
+ goto fail;
+ }
+
+ dma->size = size;
+
+fail:
+ if (error) {
+ pvscsi_dma_free(sc, dma);
+ }
+ return (error);
+}
+
+static int
+pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
+ uint64_t *ppn_list, uint32_t num_pages)
+{
+ int error;
+ uint32_t i;
+ uint64_t ppn;
+
+ error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
+ if (error) {
+ device_printf(sc->dev, "Error allocating pages, error %d\n",
+ error);
+ return (error);
+ }
+
+ ppn = dma->paddr >> PAGE_SHIFT;
+ for (i = 0; i < num_pages; i++) {
+ ppn_list[i] = ppn + i;
+ }
+
+ return (0);
+}
+
+static void
+pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
+{
+ int i;
+ int lock_owned;
+ struct pvscsi_hcb *hcb;
+
+ lock_owned = mtx_owned(&sc->lock);
+
+ if (lock_owned) {
+ mtx_unlock(&sc->lock);
+ }
+ for (i = 0; i < hcbs_allocated; ++i) {
+ hcb = sc->hcbs + i;
+ callout_drain(&hcb->callout);
+ };
+ if (lock_owned) {
+ mtx_lock(&sc->lock);
+ }
+
+ for (i = 0; i < hcbs_allocated; ++i) {
+ hcb = sc->hcbs + i;
+ bus_dmamap_destroy(sc->buffer_dmat, hcb->dma_map);
+ };
+
+ pvscsi_dma_free(sc, &sc->sense_buffer_dma);
+ pvscsi_dma_free(sc, &sc->sg_list_dma);
+}
+
+static int
+pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
+{
+ int i;
+ int error;
+ struct pvscsi_hcb *hcb;
+
+ i = 0;
+
+ error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
+ sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
+ if (error) {
+ device_printf(sc->dev,
+ "Error allocation sg list DMA memory, error %d\n", error);
+ goto fail;
+ }
+
+ error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
+ PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
+ if (error) {
+ device_printf(sc->dev,
+ "Error allocation sg list DMA memory, error %d\n", error);
+ goto fail;
+ }
+
+ for (i = 0; i < sc->hcb_cnt; ++i) {
+ hcb = sc->hcbs + i;
+
+ error = bus_dmamap_create(sc->buffer_dmat, 0, &hcb->dma_map);
+ if (error) {
+ device_printf(sc->dev,
+ "Error creating dma map for hcb %d, error %d\n",
+ i, error);
+ goto fail;
+ }
+
+ hcb->sense_buffer =
+ (void *)((caddr_t)sc->sense_buffer_dma.vaddr +
+ PVSCSI_SENSE_LENGTH * i);
+ hcb->sense_buffer_paddr =
+ sc->sense_buffer_dma.paddr + PVSCSI_SENSE_LENGTH * i;
+
+ hcb->sg_list =
+ (struct pvscsi_sg_list *)((caddr_t)sc->sg_list_dma.vaddr +
+ sizeof(struct pvscsi_sg_list) * i);
+ hcb->sg_list_paddr =
+ sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
+
+ callout_init_mtx(&hcb->callout, &sc->lock, 0);
+ }
+
+ SLIST_INIT(&sc->free_list);
+ for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
+ hcb = sc->hcbs + i;
+ SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
+ }
+
+fail:
+ if (error) {
+ pvscsi_dma_free_per_hcb(sc, i);
+ }
+
+ return (error);
+}
+
+static void
+pvscsi_free_rings(struct pvscsi_softc *sc)
+{
+
+ pvscsi_dma_free(sc, &sc->rings_state_dma);
+ pvscsi_dma_free(sc, &sc->req_ring_dma);
+ pvscsi_dma_free(sc, &sc->cmp_ring_dma);
+ if (sc->use_msg) {
+ pvscsi_dma_free(sc, &sc->msg_ring_dma);
+ }
+}
+
+static int
+pvscsi_allocate_rings(struct pvscsi_softc *sc)
+{
+ int error;
+
+ error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
+ &sc->rings_state_ppn, 1);
+ if (error) {
+ device_printf(sc->dev,
+ "Error allocating rings state, error = %d\n", error);
+ goto fail;
+ }
+ sc->rings_state = sc->rings_state_dma.vaddr;
+
+ error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
+ sc->req_ring_num_pages);
+ if (error) {
+ device_printf(sc->dev,
+ "Error allocating req ring pages, error = %d\n", error);
+ goto fail;
+ }
+ sc->req_ring = sc->req_ring_dma.vaddr;
+
+ error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
+ sc->cmp_ring_num_pages);
+ if (error) {
+ device_printf(sc->dev,
+ "Error allocating cmp ring pages, error = %d\n", error);
+ goto fail;
+ }
+ sc->cmp_ring = sc->cmp_ring_dma.vaddr;
+
+ sc->msg_ring = NULL;
+ if (sc->use_msg) {
+ error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
+ sc->msg_ring_ppn, sc->msg_ring_num_pages);
+ if (error) {
+ device_printf(sc->dev,
+ "Error allocating cmp ring pages, error = %d\n",
+ error);
+ goto fail;
+ }
+ sc->msg_ring = sc->msg_ring_dma.vaddr;
+ }
+
+ DEBUG_PRINTF(1, sc->dev, "rings_state: %p\n", sc->rings_state);
+ DEBUG_PRINTF(1, sc->dev, "req_ring: %p - %u pages\n", sc->req_ring,
+ sc->req_ring_num_pages);
+ DEBUG_PRINTF(1, sc->dev, "cmp_ring: %p - %u pages\n", sc->cmp_ring,
+ sc->cmp_ring_num_pages);
+ DEBUG_PRINTF(1, sc->dev, "msg_ring: %p - %u pages\n", sc->msg_ring,
+ sc->msg_ring_num_pages);
+
+fail:
+ if (error) {
+ pvscsi_free_rings(sc);
+ }
+ return (error);
+}
+
+static void
+pvscsi_setup_rings(struct pvscsi_softc *sc)
+{
+ struct pvscsi_cmd_desc_setup_rings cmd;
+ uint32_t i;
+
+ bzero(&cmd, sizeof(cmd));
+
+ cmd.rings_state_ppn = sc->rings_state_ppn;
+
+ cmd.req_ring_num_pages = sc->req_ring_num_pages;
+ for (i = 0; i < sc->req_ring_num_pages; ++i) {
+ cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
+ }
+
+ cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
+ for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
+ cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
+ }
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
+}
+
+static int
+pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
+{
+ uint32_t status;
+
+ pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
+ PVSCSI_CMD_SETUP_MSG_RING);
+ status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
+
+ return (status != -1);
+}
+
+static void
+pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
+{
+ struct pvscsi_cmd_desc_setup_msg_ring cmd;
+ uint32_t i;
+
+ KASSERT(sc->use_msg, ("msg is not being used"));
+
+ bzero(&cmd, sizeof(cmd));
+
+ cmd.num_pages = sc->msg_ring_num_pages;
+ for (i = 0; i < sc->msg_ring_num_pages; ++i) {
+ cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
+ }
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
+}
+
+static void
+pvscsi_adapter_reset(struct pvscsi_softc *sc)
+{
+ uint32_t val;
+
+ device_printf(sc->dev, "Adapter Reset\n");
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
+ val = pvscsi_read_intr_status(sc);
+
+ DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
+}
+
+static void
+pvscsi_bus_reset(struct pvscsi_softc *sc)
+{
+
+ device_printf(sc->dev, "Bus Reset\n");
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
+ pvscsi_process_cmp_ring(sc);
+
+ DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
+}
+
+static void
+pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
+{
+ struct pvscsi_cmd_desc_reset_device cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.target = target;
+
+ device_printf(sc->dev, "Device reset for target %u\n", target);
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
+ pvscsi_process_cmp_ring(sc);
+
+ DEBUG_PRINTF(2, sc->dev, "device reset done\n");
+}
+
+static void
+pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, union ccb *ccb)
+{
+ struct pvscsi_cmd_desc_abort_cmd cmd;
+ struct pvscsi_hcb *hcb;
+ uint64_t context;
+
+ pvscsi_process_cmp_ring(sc);
+
+ hcb = ccb->ccb_h.ccb_pvscsi_hcb;
+
+ if (hcb != NULL) {
+ context = pvscsi_hcb_to_context(sc, hcb);
+
+ memset(&cmd, 0, sizeof cmd);
+ cmd.target = target;
+ cmd.context = context;
+
+ device_printf(sc->dev, "Abort for target %u context %llx\n",
+ target, (unsigned long long)context);
+
+ pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
+ pvscsi_process_cmp_ring(sc);
+
+ DEBUG_PRINTF(2, sc->dev, "abort done\n");
+ } else {
+ DEBUG_PRINTF(1, sc->dev,
+ "Target %u ccb %p not found for abort\n", target, ccb);
+ }
+}
+
+static int
+pvscsi_probe(device_t dev)
+{
+
+ if (pci_get_vendor(dev) == PCI_VENDOR_ID_VMWARE &&
+ pci_get_device(dev) == PCI_DEVICE_ID_VMWARE_PVSCSI) {
+ device_set_desc(dev, "VMware Paravirtual SCSI Controller");
+ return (BUS_PROBE_DEFAULT);
+ }
+ return (ENXIO);
+}
+
+static int
+pvscsi_shutdown(device_t dev)
+{
+
+ return (0);
+}
+
+static void
+pvscsi_timeout(void *arg)
+{
+ struct pvscsi_hcb *hcb;
+ struct pvscsi_softc *sc;
+ union ccb *ccb;
+
+ hcb = arg;
+ ccb = hcb->ccb;
+
+ if (ccb == NULL) {
+ /* Already completed */
+ return;
+ }
+
+ sc = ccb->ccb_h.ccb_pvscsi_sc;
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ device_printf(sc->dev, "Command timed out hcb=%p ccb=%p.\n", hcb, ccb);
+
+ switch (hcb->recovery) {
+ case PVSCSI_HCB_NONE:
+ hcb->recovery = PVSCSI_HCB_ABORT;
+ pvscsi_abort(sc, ccb->ccb_h.target_id, ccb);
+ callout_reset_sbt(&hcb->callout, PVSCSI_ABORT_TIMEOUT * SBT_1S,
+ 0, pvscsi_timeout, hcb, 0);
+ break;
+ case PVSCSI_HCB_ABORT:
+ hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
+ pvscsi_freeze(sc);
+ pvscsi_device_reset(sc, ccb->ccb_h.target_id);
+ callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
+ 0, pvscsi_timeout, hcb, 0);
+ break;
+ case PVSCSI_HCB_DEVICE_RESET:
+ hcb->recovery = PVSCSI_HCB_BUS_RESET;
+ pvscsi_freeze(sc);
+ pvscsi_bus_reset(sc);
+ callout_reset_sbt(&hcb->callout, PVSCSI_RESET_TIMEOUT * SBT_1S,
+ 0, pvscsi_timeout, hcb, 0);
+ break;
+ case PVSCSI_HCB_BUS_RESET:
+ pvscsi_freeze(sc);
+ pvscsi_adapter_reset(sc);
+ break;
+ };
+}
+
+static void
+pvscsi_process_completion(struct pvscsi_softc *sc,
+ struct pvscsi_ring_cmp_desc *e)
+{
+ struct pvscsi_hcb *hcb;
+ union ccb *ccb;
+ uint32_t status;
+ uint32_t btstat;
+ uint32_t sdstat;
+ bus_dmasync_op_t op;
+
+ hcb = pvscsi_context_to_hcb(sc, e->context);
+
+ callout_stop(&hcb->callout);
+
+ ccb = hcb->ccb;
+
+ btstat = e->host_status;
+ sdstat = e->scsi_status;
+
+ ccb->csio.scsi_status = sdstat;
+ ccb->csio.resid = ccb->csio.dxfer_len - e->data_len;
+
+ if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
+ if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
+ op = BUS_DMASYNC_POSTREAD;
+ } else {
+ op = BUS_DMASYNC_POSTWRITE;
+ }
+ bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
+ bus_dmamap_unload(sc->buffer_dmat, hcb->dma_map);
+ }
+
+ if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_STATUS_OK) {
+ DEBUG_PRINTF(3, sc->dev,
+ "completing command context %llx success\n",
+ (unsigned long long)e->context);
+ ccb->csio.resid = 0;
+ status = CAM_REQ_CMP;
+ } else {
+ switch (btstat) {
+ case BTSTAT_SUCCESS:
+ case BTSTAT_LINKED_COMMAND_COMPLETED:
+ case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
+ switch (sdstat) {
+ case SCSI_STATUS_OK:
+ ccb->csio.resid = 0;
+ status = CAM_REQ_CMP;
+ break;
+ case SCSI_STATUS_CHECK_COND:
+ status = CAM_SCSI_STATUS_ERROR;
+
+ if (ccb->csio.sense_len != 0) {
+ status |= CAM_AUTOSNS_VALID;
+
+ memset(&ccb->csio.sense_data, 0,
+ sizeof(ccb->csio.sense_data));
+ memcpy(&ccb->csio.sense_data,
+ hcb->sense_buffer,
+ MIN(ccb->csio.sense_len,
+ e->sense_len));
+ }
+ break;
+ case SCSI_STATUS_BUSY:
+ case SCSI_STATUS_QUEUE_FULL:
+ status = CAM_REQUEUE_REQ;
+ break;
+ case SCSI_STATUS_CMD_TERMINATED:
+ case SCSI_STATUS_TASK_ABORTED:
+ status = CAM_REQ_ABORTED;
+ break;
+ default:
+ DEBUG_PRINTF(1, sc->dev,
+ "ccb: %p sdstat=0x%x\n", ccb, sdstat);
+ status = CAM_SCSI_STATUS_ERROR;
+ break;
+ }
+ break;
+ case BTSTAT_SELTIMEO:
+ status = CAM_SEL_TIMEOUT;
+ break;
+ case BTSTAT_DATARUN:
+ case BTSTAT_DATA_UNDERRUN:
+ status = CAM_DATA_RUN_ERR;
+ break;
+ case BTSTAT_ABORTQUEUE:
+ case BTSTAT_HATIMEOUT:
+ status = CAM_REQUEUE_REQ;
+ break;
+ case BTSTAT_NORESPONSE:
+ case BTSTAT_SENTRST:
+ case BTSTAT_RECVRST:
+ case BTSTAT_BUSRESET:
+ status = CAM_SCSI_BUS_RESET;
+ break;
+ case BTSTAT_SCSIPARITY:
+ status = CAM_UNCOR_PARITY;
+ break;
+ case BTSTAT_BUSFREE:
+ status = CAM_UNEXP_BUSFREE;
+ break;
+ case BTSTAT_INVPHASE:
+ status = CAM_SEQUENCE_FAIL;
+ break;
+ case BTSTAT_SENSFAILED:
+ status = CAM_AUTOSENSE_FAIL;
+ break;
+ case BTSTAT_LUNMISMATCH:
+ case BTSTAT_TAGREJECT:
+ case BTSTAT_DISCONNECT:
+ case BTSTAT_BADMSG:
+ case BTSTAT_INVPARAM:
+ status = CAM_REQ_CMP_ERR;
+ break;
+ case BTSTAT_HASOFTWARE:
+ case BTSTAT_HAHARDWARE:
+ status = CAM_NO_HBA;
+ break;
+ default:
+ device_printf(sc->dev, "unknown hba status: 0x%x\n",
+ btstat);
+ status = CAM_NO_HBA;
+ break;
+ }
+
+ DEBUG_PRINTF(3, sc->dev,
+ "completing command context %llx btstat %x sdstat %x - status %x\n",
+ (unsigned long long)e->context, btstat, sdstat, status);
+ }
+
+ ccb->ccb_h.ccb_pvscsi_hcb = NULL;
+ ccb->ccb_h.ccb_pvscsi_sc = NULL;
+ pvscsi_hcb_put(sc, hcb);
+
+ ccb->ccb_h.status =
+ status | (ccb->ccb_h.status & ~(CAM_STATUS_MASK | CAM_SIM_QUEUED));
+
+ if (sc->frozen) {
+ ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
+ sc->frozen = 0;
+ }
+
+ if (status != CAM_REQ_CMP) {
+ ccb->ccb_h.status |= CAM_DEV_QFRZN;
+ xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
+ }
+ xpt_done(ccb);
+}
+
+static void
+pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
+{
+ struct pvscsi_ring_cmp_desc *ring;
+ struct pvscsi_rings_state *s;
+ struct pvscsi_ring_cmp_desc *e;
+ uint32_t mask;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ s = sc->rings_state;
+ ring = sc->cmp_ring;
+ mask = MASK(s->cmp_num_entries_log2);
+
+ while (s->cmp_cons_idx != s->cmp_prod_idx) {
+ e = ring + (s->cmp_cons_idx & mask);
+
+ pvscsi_process_completion(sc, e);
+
+ mb();
+ s->cmp_cons_idx++;
+ }
+}
+
+static void
+pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
+{
+ struct pvscsi_ring_msg_dev_status_changed *desc;
+
+ union ccb *ccb;
+ switch (e->type) {
+ case PVSCSI_MSG_DEV_ADDED:
+ case PVSCSI_MSG_DEV_REMOVED: {
+ desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
+
+ device_printf(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
+ desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
+ desc->bus, desc->target, desc->lun[1]);
+
+ ccb = xpt_alloc_ccb_nowait();
+ if (ccb == NULL) {
+ device_printf(sc->dev,
+ "Error allocating CCB for dev change.\n");
+ break;
+ }
+
+ if (xpt_create_path(&ccb->ccb_h.path, NULL,
+ cam_sim_path(sc->sim), desc->target, desc->lun[1])
+ != CAM_REQ_CMP) {
+ device_printf(sc->dev,
+ "Error creating path for dev change.\n");
+ xpt_free_ccb(ccb);
+ break;
+ }
+
+ xpt_rescan(ccb);
+ } break;
+ default:
+ device_printf(sc->dev, "Unknown msg type 0x%x\n", e->type);
+ };
+}
+
+static void
+pvscsi_process_msg_ring(struct pvscsi_softc *sc)
+{
+ struct pvscsi_ring_msg_desc *ring;
+ struct pvscsi_rings_state *s;
+ struct pvscsi_ring_msg_desc *e;
+ uint32_t mask;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ s = sc->rings_state;
+ ring = sc->msg_ring;
+ mask = MASK(s->msg_num_entries_log2);
+
+ while (s->msg_cons_idx != s->msg_prod_idx) {
+ e = ring + (s->msg_cons_idx & mask);
+
+ pvscsi_process_msg(sc, e);
+
+ mb();
+ s->msg_cons_idx++;
+ }
+}
+
+static void
+pvscsi_intr_locked(struct pvscsi_softc *sc)
+{
+ uint32_t val;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ val = pvscsi_read_intr_status(sc);
+
+ if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
+ pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
+ pvscsi_process_cmp_ring(sc);
+ if (sc->use_msg) {
+ pvscsi_process_msg_ring(sc);
+ }
+ }
+}
+
+static void
+pvscsi_intr(void *xsc)
+{
+ struct pvscsi_softc *sc;
+
+ sc = xsc;
+
+ mtx_assert(&sc->lock, MA_NOTOWNED);
+
+ mtx_lock(&sc->lock);
+ pvscsi_intr_locked(xsc);
+ mtx_unlock(&sc->lock);
+}
+
+static void
+pvscsi_poll(struct cam_sim *sim)
+{
+ struct pvscsi_softc *sc;
+
+ sc = cam_sim_softc(sim);
+
+ mtx_assert(&sc->lock, MA_OWNED);
+ pvscsi_intr_locked(sc);
+}
+
+static void
+pvscsi_execute_ccb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+ struct pvscsi_hcb *hcb;
+ struct pvscsi_ring_req_desc *e;
+ union ccb *ccb;
+ struct pvscsi_softc *sc;
+ struct pvscsi_rings_state *s;
+ uint8_t cdb0;
+ bus_dmasync_op_t op;
+
+ hcb = arg;
+ ccb = hcb->ccb;
+ e = hcb->e;
+ sc = ccb->ccb_h.ccb_pvscsi_sc;
+ s = sc->rings_state;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ if (error) {
+ device_printf(sc->dev, "pvscsi_execute_ccb error %d\n", error);
+
+ if (error == EFBIG) {
+ ccb->ccb_h.status = CAM_REQ_TOO_BIG;
+ } else {
+ ccb->ccb_h.status = CAM_REQ_CMP_ERR;
+ }
+
+ pvscsi_hcb_put(sc, hcb);
+ xpt_done(ccb);
+ return;
+ }
+
+ e->flags = 0;
+ op = 0;
+ switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
+ case CAM_DIR_NONE:
+ e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
+ break;
+ case CAM_DIR_IN:
+ e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
+ op = BUS_DMASYNC_PREREAD;
+ break;
+ case CAM_DIR_OUT:
+ e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
+ op = BUS_DMASYNC_PREWRITE;
+ break;
+ case CAM_DIR_BOTH:
+ /* TODO: does this need handling? */
+ break;
+ }
+
+ if (nseg != 0) {
+ if (nseg > 1) {
+ int i;
+ struct pvscsi_sg_element *sge;
+
+ KASSERT(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
+ ("too many sg segments"));
+
+ sge = hcb->sg_list->sge;
+ e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
+
+ for (i = 0; i < nseg; ++i) {
+ sge[i].addr = segs[i].ds_addr;
+ sge[i].length = segs[i].ds_len;
+ sge[i].flags = 0;
+ }
+
+ e->data_addr = hcb->sg_list_paddr;
+ } else {
+ e->data_addr = segs->ds_addr;
+ }
+
+ bus_dmamap_sync(sc->buffer_dmat, hcb->dma_map, op);
+ } else {
+ e->data_addr = 0;
+ }
+
+ cdb0 = e->cdb[0];
+ ccb->ccb_h.status |= CAM_SIM_QUEUED;
+
+ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
+ callout_reset_sbt(&hcb->callout, ccb->ccb_h.timeout * SBT_1MS,
+ 0, pvscsi_timeout, hcb, 0);
+ }
+
+ mb();
+ s->req_prod_idx++;
+ pvscsi_kick_io(sc, cdb0);
+}
+
+static void
+pvscsi_action(struct cam_sim *sim, union ccb *ccb)
+{
+ struct pvscsi_softc *sc;
+ struct ccb_hdr *ccb_h;
+
+ sc = cam_sim_softc(sim);
+ ccb_h = &ccb->ccb_h;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ switch (ccb_h->func_code) {
+ case XPT_SCSI_IO:
+ {
+ struct ccb_scsiio *csio;
+ uint32_t req_num_entries_log2;
+ struct pvscsi_ring_req_desc *ring;
+ struct pvscsi_ring_req_desc *e;
+ struct pvscsi_rings_state *s;
+ struct pvscsi_hcb *hcb;
+
+ csio = &ccb->csio;
+ ring = sc->req_ring;
+ s = sc->rings_state;
+
+ hcb = NULL;
+
+ /*
+ * Check if it was completed already (such as aborted
+ * by upper layers)
+ */
+ if ((ccb_h->status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
+ xpt_done(ccb);
+ return;
+ }
+
+ req_num_entries_log2 = s->req_num_entries_log2;
+
+ if (s->req_prod_idx - s->cmp_cons_idx >=
+ (1 << req_num_entries_log2)) {
+ device_printf(sc->dev,
+ "Not enough room on completion ring.\n");
+ pvscsi_freeze(sc);
+ ccb_h->status = CAM_REQUEUE_REQ;
+ goto finish_ccb;
+ }
+
+ hcb = pvscsi_hcb_get(sc);
+ if (hcb == NULL) {
+ device_printf(sc->dev, "No free hcbs.\n");
+ pvscsi_freeze(sc);
+ ccb_h->status = CAM_REQUEUE_REQ;
+ goto finish_ccb;
+ }
+
+ hcb->ccb = ccb;
+ ccb_h->ccb_pvscsi_hcb = hcb;
+ ccb_h->ccb_pvscsi_sc = sc;
+
+ if (csio->cdb_len > sizeof(e->cdb)) {
+ DEBUG_PRINTF(2, sc->dev, "cdb length %u too large\n",
+ csio->cdb_len);
+ ccb_h->status = CAM_REQ_INVALID;
+ goto finish_ccb;
+ }
+
+ if (ccb_h->flags & CAM_CDB_PHYS) {
+ DEBUG_PRINTF(2, sc->dev,
+ "CAM_CDB_PHYS not implemented\n");
+ ccb_h->status = CAM_REQ_INVALID;
+ goto finish_ccb;
+ }
+
+ e = ring + (s->req_prod_idx & MASK(req_num_entries_log2));
+
+ e->bus = cam_sim_bus(sim);
+ e->target = ccb_h->target_id;
+ memset(e->lun, 0, sizeof(e->lun));
+ e->lun[1] = ccb_h->target_lun;
+ e->data_addr = 0;
+ e->data_len = csio->dxfer_len;
+ e->vcpu_hint = curcpu;
+
+ e->cdb_len = csio->cdb_len;
+ memcpy(e->cdb, scsiio_cdb_ptr(csio), csio->cdb_len);
+
+ e->sense_addr = 0;
+ e->sense_len = csio->sense_len;
+ if (e->sense_len > 0) {
+ e->sense_addr = hcb->sense_buffer_paddr;
+ }
+
+ e->tag = MSG_SIMPLE_Q_TAG;
+ if (ccb_h->flags & CAM_TAG_ACTION_VALID) {
+ e->tag = csio->tag_action;
+ }
+
+ e->context = pvscsi_hcb_to_context(sc, hcb);
+ hcb->e = e;
+
+ DEBUG_PRINTF(3, sc->dev,
+ " queuing command %02x context %llx\n", e->cdb[0],
+ (unsigned long long)e->context);
+ bus_dmamap_load_ccb(sc->buffer_dmat, hcb->dma_map, ccb,
+ pvscsi_execute_ccb, hcb, 0);
+ break;
+
+finish_ccb:
+ if (hcb != NULL) {
+ pvscsi_hcb_put(sc, hcb);
+ }
+ xpt_done(ccb);
+ } break;
+ case XPT_ABORT:
+ {
+ struct pvscsi_hcb *abort_hcb;
+ union ccb *abort_ccb;
+
+ abort_ccb = ccb->cab.abort_ccb;
+ abort_hcb = abort_ccb->ccb_h.ccb_pvscsi_hcb;
+
+ if (abort_hcb->ccb != NULL && abort_hcb->ccb == abort_ccb) {
+ if (abort_ccb->ccb_h.func_code == XPT_SCSI_IO) {
+ pvscsi_abort(sc, ccb_h->target_id, abort_ccb);
+ ccb_h->status = CAM_REQ_CMP;
+ } else {
+ ccb_h->status = CAM_UA_ABORT;
+ }
+ } else {
+ device_printf(sc->dev,
+ "Could not find hcb for ccb %p (tgt %u)\n",
+ ccb, ccb_h->target_id);
+ ccb_h->status = CAM_REQ_CMP;
+ }
+ xpt_done(ccb);
+ } break;
+ case XPT_RESET_DEV:
+ {
+ pvscsi_device_reset(sc, ccb_h->target_id);
+ ccb_h->status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ } break;
+ case XPT_RESET_BUS:
+ {
+ pvscsi_bus_reset(sc);
+ ccb_h->status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ } break;
+ case XPT_PATH_INQ:
+ {
+ struct ccb_pathinq *cpi;
+
+ cpi = &ccb->cpi;
+
+ cpi->version_num = 1;
+ cpi->hba_inquiry = PI_TAG_ABLE;
+ cpi->target_sprt = 0;
+ cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED;
+ cpi->hba_eng_cnt = 0;
+ /* cpi->vuhba_flags = 0; */
+ cpi->max_target = sc->max_targets;
+ cpi->max_lun = 0;
+ cpi->async_flags = 0;
+ cpi->hpath_id = 0;
+ cpi->unit_number = cam_sim_unit(sim);
+ cpi->bus_id = cam_sim_bus(sim);
+ cpi->initiator_id = 7;
+ cpi->base_transfer_speed = 750000;
+ strlcpy(cpi->sim_vid, "VMware", SIM_IDLEN);
+ strlcpy(cpi->hba_vid, "VMware", HBA_IDLEN);
+ strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
+ cpi->maxio = PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE;
+ cpi->protocol = PROTO_SCSI;
+ cpi->protocol_version = SCSI_REV_SPC2;
+ cpi->transport = XPORT_SAS;
+ cpi->transport_version = 0;
+
+ ccb_h->status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ } break;
+ case XPT_GET_TRAN_SETTINGS:
+ {
+ struct ccb_trans_settings *cts;
+
+ cts = &ccb->cts;
+
+ cts->protocol = PROTO_SCSI;
+ cts->protocol_version = SCSI_REV_SPC2;
+ cts->transport = XPORT_SAS;
+ cts->transport_version = 0;
+
+ cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
+ cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
+
+ ccb_h->status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ } break;
+ case XPT_CALC_GEOMETRY:
+ {
+ cam_calc_geometry(&ccb->ccg, 1);
+ xpt_done(ccb);
+ } break;
+ default:
+ ccb_h->status = CAM_REQ_INVALID;
+ xpt_done(ccb);
+ break;
+ }
+}
+
+static void
+pvscsi_free_interrupts(struct pvscsi_softc *sc)
+{
+
+ if (sc->irq_handler != NULL) {
+ bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handler);
+ }
+ if (sc->irq_res != NULL) {
+ bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_id,
+ sc->irq_res);
+ }
+ if (sc->use_msi_or_msix) {
+ pci_release_msi(sc->dev);
+ }
+}
+
+static int
+pvscsi_setup_interrupts(struct pvscsi_softc *sc)
+{
+ int error;
+ int flags;
+ int use_msix;
+ int use_msi;
+ int count;
+
+ sc->use_msi_or_msix = 0;
+
+ use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
+ use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
+
+ if (use_msix && pci_msix_count(sc->dev) > 0) {
+ count = 1;
+ if (pci_alloc_msix(sc->dev, &count) == 0 && count == 1) {
+ sc->use_msi_or_msix = 1;
+ device_printf(sc->dev, "Interrupt: MSI-X\n");
+ } else {
+ pci_release_msi(sc->dev);
+ }
+ }
+
+ if (sc->use_msi_or_msix == 0 && use_msi && pci_msi_count(sc->dev) > 0) {
+ count = 1;
+ if (pci_alloc_msi(sc->dev, &count) == 0 && count == 1) {
+ sc->use_msi_or_msix = 1;
+ device_printf(sc->dev, "Interrupt: MSI\n");
+ } else {
+ pci_release_msi(sc->dev);
+ }
+ }
+
+ flags = RF_ACTIVE;
+ if (sc->use_msi_or_msix) {
+ sc->irq_id = 1;
+ } else {
+ device_printf(sc->dev, "Interrupt: INT\n");
+ sc->irq_id = 0;
+ flags |= RF_SHAREABLE;
+ }
+
+ sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_id,
+ flags);
+ if (sc->irq_res == NULL) {
+ device_printf(sc->dev, "IRQ allocation failed\n");
+ if (sc->use_msi_or_msix) {
+ pci_release_msi(sc->dev);
+ }
+ return (ENXIO);
+ }
+
+ error = bus_setup_intr(sc->dev, sc->irq_res,
+ INTR_TYPE_CAM | INTR_MPSAFE, NULL, pvscsi_intr, sc,
+ &sc->irq_handler);
+ if (error) {
+ device_printf(sc->dev, "IRQ handler setup failed\n");
+ pvscsi_free_interrupts(sc);
+ return (error);
+ }
+
+ return (0);
+}
+
+static void
+pvscsi_free_all(struct pvscsi_softc *sc)
+{
+
+ if (sc->sim) {
+ int32_t status;
+
+ if (sc->bus_path) {
+ xpt_free_path(sc->bus_path);
+ }
+
+ status = xpt_bus_deregister(cam_sim_path(sc->sim));
+ if (status != CAM_REQ_CMP) {
+ device_printf(sc->dev,
+ "Error deregistering bus, status=%d\n", status);
+ }
+
+ cam_sim_free(sc->sim, TRUE);
+ }
+
+ pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
+
+ if (sc->hcbs) {
+ free(sc->hcbs, M_PVSCSI);
+ }
+
+ pvscsi_free_rings(sc);
+
+ pvscsi_free_interrupts(sc);
+
+ if (sc->buffer_dmat != NULL) {
+ bus_dma_tag_destroy(sc->buffer_dmat);
+ }
+
+ if (sc->parent_dmat != NULL) {
+ bus_dma_tag_destroy(sc->parent_dmat);
+ }
+
+ if (sc->mm_res != NULL) {
+ bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->mm_rid,
+ sc->mm_res);
+ }
+}
+
+static int
+pvscsi_attach(device_t dev)
+{
+ struct pvscsi_softc *sc;
+ int rid;
+ int barid;
+ int error;
+ int max_queue_depth;
+ int adapter_queue_size;
+ struct cam_devq *devq;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+
+ mtx_init(&sc->lock, "pvscsi", NULL, MTX_DEF);
+
+ pci_enable_busmaster(dev);
+
+ sc->mm_rid = -1;
+ for (barid = 0; barid <= PCIR_MAX_BAR_0; ++barid) {
+ rid = PCIR_BAR(barid);
+
+ sc->mm_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
+ RF_ACTIVE);
+ if (sc->mm_res != NULL) {
+ sc->mm_rid = rid;
+ break;
+ }
+ }
+
+ if (sc->mm_res == NULL) {
+ device_printf(dev, "could not map device memory\n");
+ return (ENXIO);
+ }
+
+ error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
+ BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
+ &sc->parent_dmat);
+ if (error) {
+ device_printf(dev, "parent dma tag create failure, error %d\n",
+ error);
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ error = bus_dma_tag_create(sc->parent_dmat, 1, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+ PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT * PAGE_SIZE,
+ PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT, PAGE_SIZE, BUS_DMA_ALLOCNOW,
+ NULL, NULL, &sc->buffer_dmat);
+ if (error) {
+ device_printf(dev, "parent dma tag create failure, error %d\n",
+ error);
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ error = pvscsi_setup_interrupts(sc);
+ if (error) {
+ device_printf(dev, "Interrupt setup failed\n");
+ pvscsi_free_all(sc);
+ return (error);
+ }
+
+ sc->max_targets = pvscsi_get_max_targets(sc);
+
+ sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
+ pvscsi_hw_supports_msg(sc);
+ sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
+
+ sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
+ pvscsi_request_ring_pages);
+ if (sc->req_ring_num_pages <= 0) {
+ if (sc->max_targets <= 16) {
+ sc->req_ring_num_pages =
+ PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
+ } else {
+ sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
+ }
+ } else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
+ sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
+ }
+ sc->cmp_ring_num_pages = sc->req_ring_num_pages;
+
+ max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
+ pvscsi_max_queue_depth);
+
+ adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
+ sizeof(struct pvscsi_ring_req_desc);
+ if (max_queue_depth > 0) {
+ adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
+ }
+ adapter_queue_size = MIN(adapter_queue_size,
+ PVSCSI_MAX_REQ_QUEUE_DEPTH);
+
+ device_printf(sc->dev, "Use Msg: %d\n", sc->use_msg);
+ device_printf(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
+ device_printf(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
+ device_printf(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
+ device_printf(sc->dev, "Queue size: %d\n", adapter_queue_size);
+
+ if (pvscsi_allocate_rings(sc)) {
+ device_printf(dev, "ring allocation failed\n");
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ sc->hcb_cnt = adapter_queue_size;
+ sc->hcbs = malloc(sc->hcb_cnt * sizeof(*sc->hcbs), M_PVSCSI,
+ M_NOWAIT | M_ZERO);
+ if (sc->hcbs == NULL) {
+ device_printf(dev, "error allocating hcb array\n");
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ if (pvscsi_dma_alloc_per_hcb(sc)) {
+ device_printf(dev, "error allocating per hcb dma memory\n");
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ pvscsi_adapter_reset(sc);
+
+ devq = cam_simq_alloc(adapter_queue_size);
+ if (devq == NULL) {
+ device_printf(dev, "cam devq alloc failed\n");
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ sc->sim = cam_sim_alloc(pvscsi_action, pvscsi_poll, "pvscsi", sc,
+ device_get_unit(dev), &sc->lock, 1, adapter_queue_size, devq);
+ if (sc->sim == NULL) {
+ device_printf(dev, "cam sim alloc failed\n");
+ cam_simq_free(devq);
+ pvscsi_free_all(sc);
+ return (ENXIO);
+ }
+
+ mtx_lock(&sc->lock);
+
+ if (xpt_bus_register(sc->sim, dev, 0) != CAM_SUCCESS) {
+ device_printf(dev, "xpt bus register failed\n");
+ pvscsi_free_all(sc);
+ mtx_unlock(&sc->lock);
+ return (ENXIO);
+ }
+
+ if (xpt_create_path(&sc->bus_path, NULL, cam_sim_path(sc->sim),
+ CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+ device_printf(dev, "xpt create path failed\n");
+ pvscsi_free_all(sc);
+ mtx_unlock(&sc->lock);
+ return (ENXIO);
+ }
+
+ pvscsi_setup_rings(sc);
+ if (sc->use_msg) {
+ pvscsi_setup_msg_ring(sc);
+ }
+
+ sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
+
+ pvscsi_intr_enable(sc);
+
+ mtx_unlock(&sc->lock);
+
+ return (0);
+}
+
+static int
+pvscsi_detach(device_t dev)
+{
+ struct pvscsi_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ pvscsi_intr_disable(sc);
+ pvscsi_adapter_reset(sc);
+
+ if (sc->irq_handler != NULL) {
+ bus_teardown_intr(dev, sc->irq_res, sc->irq_handler);
+ }
+
+ mtx_lock(&sc->lock);
+ pvscsi_free_all(sc);
+ mtx_unlock(&sc->lock);
+
+ mtx_destroy(&sc->lock);
+
+ return (0);
+}
+
+static device_method_t pvscsi_methods[] = {
+ DEVMETHOD(device_probe, pvscsi_probe),
+ DEVMETHOD(device_shutdown, pvscsi_shutdown),
+ DEVMETHOD(device_attach, pvscsi_attach),
+ DEVMETHOD(device_detach, pvscsi_detach),
+ DEVMETHOD_END
+};
+
+static driver_t pvscsi_driver = {
+ "pvscsi", pvscsi_methods, sizeof(struct pvscsi_softc)
+};
+
+static devclass_t pvscsi_devclass;
+DRIVER_MODULE(pvscsi, pci, pvscsi_driver, pvscsi_devclass, 0, 0);
+
+MODULE_DEPEND(pvscsi, pci, 1, 1, 1);
+MODULE_DEPEND(pvscsi, cam, 1, 1, 1);
diff --git a/sys/dev/vmware/pvscsi/pvscsi.h b/sys/dev/vmware/pvscsi/pvscsi.h
new file mode 100644
index 000000000000..9f56823e9688
--- /dev/null
+++ b/sys/dev/vmware/pvscsi/pvscsi.h
@@ -0,0 +1,215 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PVSCSI_H_
+#define _PVSCSI_H_
+
+#define MASK(v) ((1 << (v)) - 1)
+
+#define PCI_VENDOR_ID_VMWARE 0x15ad
+#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07c0
+
+enum pvscsi_reg_offset {
+ PVSCSI_REG_OFFSET_COMMAND = 0x0000,
+ PVSCSI_REG_OFFSET_COMMAND_DATA = 0x0004,
+ PVSCSI_REG_OFFSET_COMMAND_STATUS = 0x0008,
+ PVSCSI_REG_OFFSET_LAST_STS_0 = 0x0100,
+ PVSCSI_REG_OFFSET_LAST_STS_1 = 0x0104,
+ PVSCSI_REG_OFFSET_LAST_STS_2 = 0x0108,
+ PVSCSI_REG_OFFSET_LAST_STS_3 = 0x010c,
+ PVSCSI_REG_OFFSET_INTR_STATUS = 0x100c,
+ PVSCSI_REG_OFFSET_INTR_MASK = 0x2010,
+ PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
+ PVSCSI_REG_OFFSET_DEBUG = 0x3018,
+ PVSCSI_REG_OFFSET_KICK_RW_IO = 0x4018,
+};
+
+enum pvscsi_commands {
+ PVSCSI_CMD_FIRST = 0,
+
+ PVSCSI_CMD_ADAPTER_RESET = 1,
+ PVSCSI_CMD_ISSUE_SCSI = 2,
+ PVSCSI_CMD_SETUP_RINGS = 3,
+ PVSCSI_CMD_RESET_BUS = 4,
+ PVSCSI_CMD_RESET_DEVICE = 5,
+ PVSCSI_CMD_ABORT_CMD = 6,
+ PVSCSI_CMD_CONFIG = 7,
+ PVSCSI_CMD_SETUP_MSG_RING = 8,
+ PVSCSI_CMD_DEVICE_UNPLUG = 9,
+ PVSCSI_CMD_SETUP_REQCALLTHRESHOLD = 10,
+ PVSCSI_CMD_GET_MAX_TARGETS = 11,
+
+ PVSCSI_CMD_LAST = 12,
+};
+
+struct pvscsi_cmd_desc_reset_device {
+ uint32_t target;
+ uint8_t lun[8];
+};
+
+struct pvscsi_cmd_desc_abort_cmd {
+ uint64_t context;
+ uint32_t target;
+ uint32_t pad;
+};
+
+#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 32
+#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES 16
+
+struct pvscsi_cmd_desc_setup_rings {
+ uint32_t req_ring_num_pages;
+ uint32_t cmp_ring_num_pages;
+ uint64_t rings_state_ppn;
+ uint64_t req_ring_ppns[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+ uint64_t cmp_ring_ppns[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+};
+
+struct pvscsi_cmd_desc_setup_msg_ring {
+ uint32_t num_pages;
+ uint32_t pad_;
+ uint64_t ring_ppns[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
+};
+
+struct pvscsi_rings_state {
+ uint32_t req_prod_idx;
+ uint32_t req_cons_idx;
+ uint32_t req_num_entries_log2;
+ uint32_t cmp_prod_idx;
+ uint32_t cmp_cons_idx;
+ uint32_t cmp_num_entries_log2;
+ uint32_t req_call_threshold;
+ uint8_t _pad[100];
+ uint32_t msg_prod_idx;
+ uint32_t msg_cons_idx;
+ uint32_t msg_num_entries_log2;
+};
+
+#define PVSCSI_FLAG_CMD_WITH_SG_LIST (1 << 0)
+#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB (1 << 1)
+#define PVSCSI_FLAG_CMD_DIR_NONE (1 << 2)
+#define PVSCSI_FLAG_CMD_DIR_TOHOST (1 << 3)
+#define PVSCSI_FLAG_CMD_DIR_TODEVICE (1 << 4)
+
+#define PVSCSI_FLAG_RESERVED_MASK (~MASK(5))
+
+#define PVSCSI_INTR_CMPL_0 (1 << 0)
+#define PVSCSI_INTR_CMPL_1 (1 << 1)
+#define PVSCSI_INTR_CMPL_MASK MASK(2)
+
+#define PVSCSI_INTR_MSG_0 (1 << 2)
+#define PVSCSI_INTR_MSG_1 (1 << 3)
+#define PVSCSI_INTR_MSG_MASK (MASK(2) << 2)
+
+#define PVSCSI_INTR_ALL_SUPPORTED MASK(4)
+
+struct pvscsi_ring_req_desc {
+ uint64_t context;
+ uint64_t data_addr;
+ uint64_t data_len;
+ uint64_t sense_addr;
+ uint32_t sense_len;
+ uint32_t flags;
+ uint8_t cdb[16];
+ uint8_t cdb_len;
+ uint8_t lun[8];
+ uint8_t tag;
+ uint8_t bus;
+ uint8_t target;
+ uint8_t vcpu_hint;
+ uint8_t unused[59];
+};
+
+struct pvscsi_ring_cmp_desc {
+ uint64_t context;
+ uint64_t data_len;
+ uint32_t sense_len;
+ uint16_t host_status;
+ uint16_t scsi_status;
+ uint32_t _pad[2];
+};
+
+#define PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT 128
+#define PVSCSI_MAX_NUM_SG_SEGMENTS 128
+#define PVSCSI_SGE_FLAG_CHAIN_ELEMENT (1 << 0)
+
+struct pvscsi_sg_element {
+ uint64_t addr;
+ uint32_t length;
+ uint32_t flags;
+};
+
+enum pvscsi_msg_type {
+ PVSCSI_MSG_DEV_ADDED = 0,
+ PVSCSI_MSG_DEV_REMOVED = 1,
+ PVSCSI_MSG_LAST = 2,
+};
+
+struct pvscsi_ring_msg_desc {
+ uint32_t type;
+ uint32_t args[31];
+};
+
+struct pvscsi_ring_msg_dev_status_changed {
+ uint32_t type;
+ uint32_t bus;
+ uint32_t target;
+ uint8_t lun[8];
+ uint32_t pad[27];
+};
+
+struct pvscsi_cmd_desc_setup_req_call {
+ uint32_t enable;
+};
+
+#define PVSCSI_MAX_NUM_PAGES_REQ_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_CMP_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_MSG_RING PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
+
+#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
+ (PAGE_SIZE / sizeof(struct pvscsi_ring_req_desc))
+#define PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE \
+ (PAGE_SIZE / sizeof(struct pvscs_ring_cmp_desc))
+#define PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE \
+ (PAGE_SIZE / sizeof(struct pvscsi_ring_msg_desc))
+
+#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
+ (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
+#define PVSCSI_MAX_CMP_QUEUE_DEPTH \
+ (PVSCSI_MAX_NUM_PAGES_CMP_RING * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE)
+#define PVSCSI_MAX_QUEUE_DEPTH \
+ MAX(PVSCSI_MAX_REQ_QUEUE_DEPTH, PVSCSI_MAX_CMP_QUEUE_DEPTH)
+
+enum pvscsi_host_status {
+ BTSTAT_SUCCESS = 0x00,
+ BTSTAT_LINKED_COMMAND_COMPLETED = 0x0a,
+ BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
+ BTSTAT_DATA_UNDERRUN = 0x0c,
+ BTSTAT_SELTIMEO = 0x11,
+ BTSTAT_DATARUN = 0x12,
+ BTSTAT_BUSFREE = 0x13,
+ BTSTAT_INVPHASE = 0x14,
+ BTSTAT_INVCODE = 0x15,
+ BTSTAT_INVOPCODE = 0x16,
+ BTSTAT_LUNMISMATCH = 0x17,
+ BTSTAT_INVPARAM = 0x1a,
+ BTSTAT_SENSFAILED = 0x1b,
+ BTSTAT_TAGREJECT = 0x1c,
+ BTSTAT_BADMSG = 0x1d,
+ BTSTAT_HAHARDWARE = 0x20,
+ BTSTAT_NORESPONSE = 0x21,
+ BTSTAT_SENTRST = 0x22,
+ BTSTAT_RECVRST = 0x23,
+ BTSTAT_DISCONNECT = 0x24,
+ BTSTAT_BUSRESET = 0x25,
+ BTSTAT_ABORTQUEUE = 0x26,
+ BTSTAT_HASOFTWARE = 0x27,
+ BTSTAT_HATIMEOUT = 0x30,
+ BTSTAT_SCSIPARITY = 0x34,
+};
+
+#endif /* !_PVSCSI_H_ */
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 0aef50a08ea0..6445c5104e77 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -138,6 +138,7 @@ device mpr # LSI-Logic MPT-Fusion 3
device sym # NCR/Symbios Logic
device trm # Tekram DC395U/UW/F DC315U adapters
device isci # Intel C600 SAS controller
+device pvscsi # VMware PVSCSI
# ATA/SCSI peripherals
device scbus # SCSI bus (required for ATA/SCSI)
diff --git a/sys/modules/vmware/Makefile b/sys/modules/vmware/Makefile
index c02196645064..02442eb96bc4 100644
--- a/sys/modules/vmware/Makefile
+++ b/sys/modules/vmware/Makefile
@@ -23,6 +23,6 @@
# SUCH DAMAGE.
#
-SUBDIR= vmci vmxnet3
+SUBDIR= pvscsi vmci vmxnet3
.include <bsd.subdir.mk>
diff --git a/sys/modules/vmware/pvscsi/Makefile b/sys/modules/vmware/pvscsi/Makefile
new file mode 100644
index 000000000000..6a4e8af952e1
--- /dev/null
+++ b/sys/modules/vmware/pvscsi/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/dev/vmware/pvscsi
+
+KMOD= pvscsi
+SRCS= pvscsi.c
+SRCS+= device_if.h bus_if.h pci_if.h
+SRCS+= opt_cam.h
+
+.include <bsd.kmod.mk>