aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/nvme')
-rw-r--r--sys/dev/nvme/nvme.c4
-rw-r--r--sys/dev/nvme/nvme.h34
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c28
-rw-r--r--sys/dev/nvme/nvme_ns.c32
-rw-r--r--sys/dev/nvme/nvme_pci.c48
-rw-r--r--sys/dev/nvme/nvme_private.h12
-rw-r--r--sys/dev/nvme/nvme_sim.c32
7 files changed, 135 insertions, 55 deletions
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index ead91f0d01fe..d119f9877aaa 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -51,7 +51,7 @@ int32_t nvme_retry_count;
MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
static void
-nvme_init(void)
+nvme_init(void *dummy __unused)
{
uint32_t i;
@@ -62,7 +62,7 @@ nvme_init(void)
SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
static void
-nvme_uninit(void)
+nvme_uninit(void *dummy __unused)
{
}
diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h
index 17c5cdb4db87..8f7a7fbda14c 100644
--- a/sys/dev/nvme/nvme.h
+++ b/sys/dev/nvme/nvme.h
@@ -1507,9 +1507,7 @@ struct nvme_namespace_data {
uint8_t eui64[8];
/** lba format support */
- uint32_t lbaf[16];
-
- uint8_t reserved7[192];
+ uint32_t lbaf[64];
uint8_t vendor_specific[3712];
} __packed __aligned(4);
@@ -1912,6 +1910,7 @@ void nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sbuf);
void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
#ifdef _KERNEL
+#include <sys/disk.h>
struct bio;
struct thread;
@@ -1930,8 +1929,11 @@ typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
typedef void (*nvme_cons_fail_fn_t)(void *);
enum nvme_namespace_flags {
- NVME_NS_DEALLOCATE_SUPPORTED = 0x1,
- NVME_NS_FLUSH_SUPPORTED = 0x2,
+ NVME_NS_DEALLOCATE_SUPPORTED = 0x01,
+ NVME_NS_FLUSH_SUPPORTED = 0x02,
+ NVME_NS_ADDED = 0x04,
+ NVME_NS_CHANGED = 0x08,
+ NVME_NS_GONE = 0x10,
};
int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
@@ -1997,6 +1999,24 @@ nvme_ctrlr_has_dataset_mgmt(const struct nvme_controller_data *cd)
return (NVMEV(NVME_CTRLR_DATA_ONCS_DSM, cd->oncs) != 0);
}
+/*
+ * Copy the NVME device's serial number to the provided buffer, which must be
+ * at least DISK_IDENT_SIZE bytes large.
+ */
+static inline void
+nvme_cdata_get_disk_ident(const struct nvme_controller_data *cdata, uint8_t *sn)
+{
+ _Static_assert(NVME_SERIAL_NUMBER_LENGTH < DISK_IDENT_SIZE,
+ "NVME serial number too big for disk ident");
+
+ memmove(sn, cdata->sn, NVME_SERIAL_NUMBER_LENGTH);
+ sn[NVME_SERIAL_NUMBER_LENGTH] = '\0';
+ for (int i = 0; sn[i] != '\0'; i++) {
+ if (sn[i] < 0x20 || sn[i] >= 0x80)
+ sn[i] = ' ';
+ }
+}
+
/* Namespace helper functions */
uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns);
@@ -2155,8 +2175,6 @@ static inline
void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused)
{
#if _BYTE_ORDER != _LITTLE_ENDIAN
- int i;
-
s->nsze = le64toh(s->nsze);
s->ncap = le64toh(s->ncap);
s->nuse = le64toh(s->nuse);
@@ -2175,7 +2193,7 @@ void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused)
s->anagrpid = le32toh(s->anagrpid);
s->nvmsetid = le16toh(s->nvmsetid);
s->endgid = le16toh(s->endgid);
- for (i = 0; i < 16; i++)
+ for (unsigned i = 0; i < nitems(s->lbaf); i++)
s->lbaf[i] = le32toh(s->lbaf[i]);
#endif
}
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 3a1894bf754d..41542d24c107 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -1153,7 +1153,7 @@ nvme_ctrlr_aer_task(void *arg, int pending)
mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
mtx_unlock(&aer->mtx);
- if (aer->log_page_size != (uint32_t)-1) {
+ if (aer->log_page_size == (uint32_t)-1) {
/*
* If the log page fetch for some reason completed with an
* error, don't pass log page data to the consumers. In
@@ -1216,10 +1216,20 @@ nvme_ctrlr_aer_task(void *arg, int pending)
} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
struct nvme_ns_list *nsl =
(struct nvme_ns_list *)aer->log_page_buffer;
+ struct nvme_controller *ctrlr = aer->ctrlr;
+
for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+ struct nvme_namespace *ns;
+ uint32_t id = nsl->ns[i];
+
if (nsl->ns[i] > NVME_MAX_NAMESPACES)
break;
- nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+
+ ns = &ctrlr->ns[id - 1];
+ ns->flags |= NVME_NS_CHANGED;
+ nvme_ns_construct(ns, id, ctrlr);
+ nvme_notify_ns(ctrlr, id);
+ ns->flags &= ~NVME_NS_CHANGED;
}
}
@@ -1495,6 +1505,11 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
case NVME_GET_CONTROLLER_DATA:
memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata));
break;
+ case DIOCGIDENT: {
+ uint8_t *sn = arg;
+ nvme_cdata_get_disk_ident(&ctrlr->cdata, sn);
+ break;
+ }
/* Linux Compatible (see nvme_linux.h) */
case NVME_IOCTL_ID:
td->td_retval[0] = 0xfffffffful;
@@ -1738,9 +1753,14 @@ noadminq:
bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
rman_get_rid(ctrlr->res), ctrlr->res);
- if (ctrlr->bar4_resource != NULL) {
+ if (ctrlr->msix_table_resource != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ ctrlr->msix_table_resource_id, ctrlr->msix_table_resource);
+ }
+
+ if (ctrlr->msix_pba_resource != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
- ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+ ctrlr->msix_pba_resource_id, ctrlr->msix_pba_resource);
}
bus_release_resource(dev, SYS_RES_MEMORY,
diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c
index e84d2066930e..4ebcc03c4f04 100644
--- a/sys/dev/nvme/nvme_ns.c
+++ b/sys/dev/nvme/nvme_ns.c
@@ -45,7 +45,7 @@
#include "nvme_private.h"
#include "nvme_linux.h"
-static void nvme_bio_child_inbed(struct bio *parent, int bio_error);
+static void nvme_bio_child_inbed(struct bio *parent, int abio_error);
static void nvme_bio_child_done(void *arg,
const struct nvme_completion *cpl);
static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size,
@@ -78,7 +78,7 @@ nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
break;
case NVME_PASSTHROUGH_CMD:
pt = (struct nvme_pt_command *)arg;
- return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id,
+ return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id,
1 /* is_user_buffer */, 0 /* is_admin_cmd */));
case NVME_GET_NSID:
{
@@ -88,6 +88,11 @@ nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
gnsid->nsid = ns->id;
break;
}
+ case DIOCGIDENT: {
+ uint8_t *sn = arg;
+ nvme_cdata_get_disk_ident(&ctrlr->cdata, sn);
+ break;
+ }
case DIOCGMEDIASIZE:
*(off_t *)arg = (off_t)nvme_ns_get_size(ns);
break;
@@ -137,10 +142,6 @@ nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl)
{
struct bio *bp = arg;
- /*
- * TODO: add more extensive translation of NVMe status codes
- * to different bio error codes (i.e. EIO, EINVAL, etc.)
- */
if (nvme_completion_is_error(cpl)) {
bp->bio_error = EIO;
bp->bio_flags |= BIO_ERROR;
@@ -274,14 +275,14 @@ nvme_ns_bio_done(void *arg, const struct nvme_completion *status)
}
static void
-nvme_bio_child_inbed(struct bio *parent, int bio_error)
+nvme_bio_child_inbed(struct bio *parent, int abio_error)
{
struct nvme_completion parent_cpl;
int children, inbed;
- if (bio_error != 0) {
+ if (abio_error != 0) {
parent->bio_flags |= BIO_ERROR;
- parent->bio_error = bio_error;
+ parent->bio_error = abio_error;
}
/*
@@ -308,12 +309,12 @@ nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
{
struct bio *child = arg;
struct bio *parent;
- int bio_error;
+ int abio_error;
parent = child->bio_parent;
g_destroy_bio(child);
- bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
- nvme_bio_child_inbed(parent, bio_error);
+ abio_error = nvme_completion_is_error(cpl) ? EIO : 0;
+ nvme_bio_child_inbed(parent, abio_error);
}
static uint32_t
@@ -557,8 +558,10 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
* standard says the entire id will be zeros, so this is a
* cheap way to test for that.
*/
- if (ns->data.nsze == 0)
- return (ENXIO);
+ if (ns->data.nsze == 0) {
+ ns->flags |= NVME_NS_GONE;
+ return ((ns->flags & NVME_NS_ADDED) ? 0 : ENXIO);
+ }
flbas_fmt = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, ns->data.flbas);
@@ -622,6 +625,7 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%d",
device_get_nameunit(ctrlr->dev), ns->id);
ns->cdev->si_flags |= SI_UNMAPPED;
+ ns->flags |= NVME_NS_ADDED;
return (0);
}
diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c
index c07a68d2f0dc..cecb05ca0a92 100644
--- a/sys/dev/nvme/nvme_pci.c
+++ b/sys/dev/nvme/nvme_pci.c
@@ -152,11 +152,15 @@ static int
nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
{
ctrlr->resource_id = PCIR_BAR(0);
+ ctrlr->msix_table_resource_id = -1;
+ ctrlr->msix_table_resource = NULL;
+ ctrlr->msix_pba_resource_id = -1;
+ ctrlr->msix_pba_resource = NULL;
ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
&ctrlr->resource_id, RF_ACTIVE);
- if(ctrlr->resource == NULL) {
+ if (ctrlr->resource == NULL) {
nvme_printf(ctrlr, "unable to allocate pci resource\n");
return (ENOMEM);
}
@@ -166,15 +170,32 @@ nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
/*
- * The NVMe spec allows for the MSI-X table to be placed behind
- * BAR 4/5, separate from the control/doorbell registers. Always
- * try to map this bar, because it must be mapped prior to calling
- * pci_alloc_msix(). If the table isn't behind BAR 4/5,
- * bus_alloc_resource() will just return NULL which is OK.
+ * The NVMe spec allows for the MSI-X tables to be placed behind
+ * BAR 4 and/or 5, separate from the control/doorbell registers.
*/
- ctrlr->bar4_resource_id = PCIR_BAR(4);
- ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
- &ctrlr->bar4_resource_id, RF_ACTIVE);
+
+ ctrlr->msix_table_resource_id = pci_msix_table_bar(ctrlr->dev);
+ ctrlr->msix_pba_resource_id = pci_msix_pba_bar(ctrlr->dev);
+
+ if (ctrlr->msix_table_resource_id >= 0 &&
+ ctrlr->msix_table_resource_id != ctrlr->resource_id) {
+ ctrlr->msix_table_resource = bus_alloc_resource_any(ctrlr->dev,
+ SYS_RES_MEMORY, &ctrlr->msix_table_resource_id, RF_ACTIVE);
+ if (ctrlr->msix_table_resource == NULL) {
+ nvme_printf(ctrlr, "unable to allocate msi-x table resource\n");
+ return (ENOMEM);
+ }
+ }
+ if (ctrlr->msix_pba_resource_id >= 0 &&
+ ctrlr->msix_pba_resource_id != ctrlr->resource_id &&
+ ctrlr->msix_pba_resource_id != ctrlr->msix_table_resource_id) {
+ ctrlr->msix_pba_resource = bus_alloc_resource_any(ctrlr->dev,
+ SYS_RES_MEMORY, &ctrlr->msix_pba_resource_id, RF_ACTIVE);
+ if (ctrlr->msix_pba_resource == NULL) {
+ nvme_printf(ctrlr, "unable to allocate msi-x pba resource\n");
+ return (ENOMEM);
+ }
+ }
return (0);
}
@@ -200,9 +221,14 @@ bad:
ctrlr->resource_id, ctrlr->resource);
}
- if (ctrlr->bar4_resource != NULL) {
+ if (ctrlr->msix_table_resource != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ ctrlr->msix_table_resource_id, ctrlr->msix_table_resource);
+ }
+
+ if (ctrlr->msix_pba_resource != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
- ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+ ctrlr->msix_pba_resource_id, ctrlr->msix_pba_resource);
}
if (ctrlr->tag)
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 52f9e12f8f9a..a425a6a5ad62 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -235,8 +235,10 @@ struct nvme_controller {
* separate from the control registers which are in BAR 0/1. These
* members track the mapping of BAR 4/5 for that reason.
*/
- int bar4_resource_id;
- struct resource *bar4_resource;
+ int msix_table_resource_id;
+ struct resource *msix_table_resource;
+ int msix_pba_resource_id;
+ struct resource *msix_pba_resource;
int msi_count;
uint32_t enable_aborts;
@@ -463,13 +465,13 @@ static __inline void
nvme_completion_poll(struct nvme_completion_poll_status *status)
{
int timeout = ticks + 10 * hz;
- sbintime_t delta_t = SBT_1US;
+ sbintime_t delta = SBT_1US;
while (!atomic_load_acq_int(&status->done)) {
if (timeout - ticks < 0)
panic("NVME polled command failed to complete within 10s.");
- pause_sbt("nvme", delta_t, 0, C_PREL(1));
- delta_t = min(SBT_1MS, delta_t * 3 / 2);
+ pause_sbt("nvme", delta, 0, C_PREL(1));
+ delta = min(SBT_1MS, delta + delta / 2);
}
}
diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c
index a06774a64761..e015fbe4d072 100644
--- a/sys/dev/nvme/nvme_sim.c
+++ b/sys/dev/nvme/nvme_sim.c
@@ -352,25 +352,35 @@ static void *
nvme_sim_ns_change(struct nvme_namespace *ns, void *sc_arg)
{
struct nvme_sim_softc *sc = sc_arg;
+ struct cam_path *tmppath;
union ccb *ccb;
+ if (xpt_create_path(&tmppath, /*periph*/NULL,
+ cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) {
+ printf("unable to create path for rescan\n");
+ return (NULL);
+ }
+ /*
+ * If it's gone, then signal that and leave.
+ */
+ if (ns->flags & NVME_NS_GONE) {
+ xpt_async(AC_LOST_DEVICE, tmppath, NULL);
+ xpt_free_path(tmppath);
+ return (sc_arg);
+ }
+
ccb = xpt_alloc_ccb_nowait();
if (ccb == NULL) {
printf("unable to alloc CCB for rescan\n");
return (NULL);
}
+ ccb->ccb_h.path = tmppath;
/*
- * We map the NVMe namespace idea onto the CAM unit LUN. For
- * each new namespace, we create a new CAM path for it. We then
- * rescan the path to get it to enumerate.
+ * We map the NVMe namespace idea onto the CAM unit LUN. For each new
+ * namespace, scan or rescan the path to enumerate it. tmppath freed at
+ * end of scan.
*/
- if (xpt_create_path(&ccb->ccb_h.path, /*periph*/NULL,
- cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) {
- printf("unable to create path for rescan\n");
- xpt_free_ccb(ccb);
- return (NULL);
- }
xpt_rescan(ccb);
return (sc_arg);
@@ -391,7 +401,7 @@ nvme_sim_controller_fail(void *ctrlr_arg)
struct nvme_consumer *consumer_cookie;
static void
-nvme_sim_init(void)
+nvme_sim_init(void *dummy __unused)
{
if (nvme_use_nvd)
return;
@@ -404,7 +414,7 @@ SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY,
nvme_sim_init, NULL);
static void
-nvme_sim_uninit(void)
+nvme_sim_uninit(void *dummy __unused)
{
if (nvme_use_nvd)
return;