diff options
Diffstat (limited to 'sys/dev/nvme')
| -rw-r--r-- | sys/dev/nvme/nvme.c | 4 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme.h | 34 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ctrlr.c | 28 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ns.c | 32 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_pci.c | 48 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_private.h | 12 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_sim.c | 32 |
7 files changed, 135 insertions, 55 deletions
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index ead91f0d01fe..d119f9877aaa 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -51,7 +51,7 @@ int32_t nvme_retry_count; MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations"); static void -nvme_init(void) +nvme_init(void *dummy __unused) { uint32_t i; @@ -62,7 +62,7 @@ nvme_init(void) SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL); static void -nvme_uninit(void) +nvme_uninit(void *dummy __unused) { } diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index 17c5cdb4db87..8f7a7fbda14c 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -1507,9 +1507,7 @@ struct nvme_namespace_data { uint8_t eui64[8]; /** lba format support */ - uint32_t lbaf[16]; - - uint8_t reserved7[192]; + uint32_t lbaf[64]; uint8_t vendor_specific[3712]; } __packed __aligned(4); @@ -1912,6 +1910,7 @@ void nvme_sc_sbuf(const struct nvme_completion *cpl, struct sbuf *sbuf); void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen); #ifdef _KERNEL +#include <sys/disk.h> struct bio; struct thread; @@ -1930,8 +1929,11 @@ typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *, typedef void (*nvme_cons_fail_fn_t)(void *); enum nvme_namespace_flags { - NVME_NS_DEALLOCATE_SUPPORTED = 0x1, - NVME_NS_FLUSH_SUPPORTED = 0x2, + NVME_NS_DEALLOCATE_SUPPORTED = 0x01, + NVME_NS_FLUSH_SUPPORTED = 0x02, + NVME_NS_ADDED = 0x04, + NVME_NS_CHANGED = 0x08, + NVME_NS_GONE = 0x10, }; int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, @@ -1997,6 +1999,24 @@ nvme_ctrlr_has_dataset_mgmt(const struct nvme_controller_data *cd) return (NVMEV(NVME_CTRLR_DATA_ONCS_DSM, cd->oncs) != 0); } +/* + * Copy the NVME device's serial number to the provided buffer, which must be + * at least DISK_IDENT_SIZE bytes large. + */ +static inline void +nvme_cdata_get_disk_ident(const struct nvme_controller_data *cdata, uint8_t *sn) +{ + _Static_assert(NVME_SERIAL_NUMBER_LENGTH < DISK_IDENT_SIZE, + "NVME serial number too big for disk ident"); + + memmove(sn, cdata->sn, NVME_SERIAL_NUMBER_LENGTH); + sn[NVME_SERIAL_NUMBER_LENGTH] = '\0'; + for (int i = 0; sn[i] != '\0'; i++) { + if (sn[i] < 0x20 || sn[i] >= 0x80) + sn[i] = ' '; + } +} + /* Namespace helper functions */ uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns); uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns); @@ -2155,8 +2175,6 @@ static inline void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused) { #if _BYTE_ORDER != _LITTLE_ENDIAN - int i; - s->nsze = le64toh(s->nsze); s->ncap = le64toh(s->ncap); s->nuse = le64toh(s->nuse); @@ -2175,7 +2193,7 @@ void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused) s->anagrpid = le32toh(s->anagrpid); s->nvmsetid = le16toh(s->nvmsetid); s->endgid = le16toh(s->endgid); - for (i = 0; i < 16; i++) + for (unsigned i = 0; i < nitems(s->lbaf); i++) s->lbaf[i] = le32toh(s->lbaf[i]); #endif } diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 3a1894bf754d..41542d24c107 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -1153,7 +1153,7 @@ nvme_ctrlr_aer_task(void *arg, int pending) mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0); mtx_unlock(&aer->mtx); - if (aer->log_page_size != (uint32_t)-1) { + if (aer->log_page_size == (uint32_t)-1) { /* * If the log page fetch for some reason completed with an * error, don't pass log page data to the consumers. In @@ -1216,10 +1216,20 @@ nvme_ctrlr_aer_task(void *arg, int pending) } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) { struct nvme_ns_list *nsl = (struct nvme_ns_list *)aer->log_page_buffer; + struct nvme_controller *ctrlr = aer->ctrlr; + for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) { + struct nvme_namespace *ns; + uint32_t id = nsl->ns[i]; + if (nsl->ns[i] > NVME_MAX_NAMESPACES) break; - nvme_notify_ns(aer->ctrlr, nsl->ns[i]); + + ns = &ctrlr->ns[id - 1]; + ns->flags |= NVME_NS_CHANGED; + nvme_ns_construct(ns, id, ctrlr); + nvme_notify_ns(ctrlr, id); + ns->flags &= ~NVME_NS_CHANGED; } } @@ -1495,6 +1505,11 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, case NVME_GET_CONTROLLER_DATA: memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); break; + case DIOCGIDENT: { + uint8_t *sn = arg; + nvme_cdata_get_disk_ident(&ctrlr->cdata, sn); + break; + } /* Linux Compatible (see nvme_linux.h) */ case NVME_IOCTL_ID: td->td_retval[0] = 0xfffffffful; @@ -1738,9 +1753,14 @@ noadminq: bus_release_resource(ctrlr->dev, SYS_RES_IRQ, rman_get_rid(ctrlr->res), ctrlr->res); - if (ctrlr->bar4_resource != NULL) { + if (ctrlr->msix_table_resource != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, + ctrlr->msix_table_resource_id, ctrlr->msix_table_resource); + } + + if (ctrlr->msix_pba_resource != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - ctrlr->bar4_resource_id, ctrlr->bar4_resource); + ctrlr->msix_pba_resource_id, ctrlr->msix_pba_resource); } bus_release_resource(dev, SYS_RES_MEMORY, diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index e84d2066930e..4ebcc03c4f04 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -45,7 +45,7 @@ #include "nvme_private.h" #include "nvme_linux.h" -static void nvme_bio_child_inbed(struct bio *parent, int bio_error); +static void nvme_bio_child_inbed(struct bio *parent, int abio_error); static void nvme_bio_child_done(void *arg, const struct nvme_completion *cpl); static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, @@ -78,7 +78,7 @@ nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, break; case NVME_PASSTHROUGH_CMD: pt = (struct nvme_pt_command *)arg; - return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, + return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, 1 /* is_user_buffer */, 0 /* is_admin_cmd */)); case NVME_GET_NSID: { @@ -88,6 +88,11 @@ nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, gnsid->nsid = ns->id; break; } + case DIOCGIDENT: { + uint8_t *sn = arg; + nvme_cdata_get_disk_ident(&ctrlr->cdata, sn); + break; + } case DIOCGMEDIASIZE: *(off_t *)arg = (off_t)nvme_ns_get_size(ns); break; @@ -137,10 +142,6 @@ nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) { struct bio *bp = arg; - /* - * TODO: add more extensive translation of NVMe status codes - * to different bio error codes (i.e. EIO, EINVAL, etc.) - */ if (nvme_completion_is_error(cpl)) { bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; @@ -274,14 +275,14 @@ nvme_ns_bio_done(void *arg, const struct nvme_completion *status) } static void -nvme_bio_child_inbed(struct bio *parent, int bio_error) +nvme_bio_child_inbed(struct bio *parent, int abio_error) { struct nvme_completion parent_cpl; int children, inbed; - if (bio_error != 0) { + if (abio_error != 0) { parent->bio_flags |= BIO_ERROR; - parent->bio_error = bio_error; + parent->bio_error = abio_error; } /* @@ -308,12 +309,12 @@ nvme_bio_child_done(void *arg, const struct nvme_completion *cpl) { struct bio *child = arg; struct bio *parent; - int bio_error; + int abio_error; parent = child->bio_parent; g_destroy_bio(child); - bio_error = nvme_completion_is_error(cpl) ? EIO : 0; - nvme_bio_child_inbed(parent, bio_error); + abio_error = nvme_completion_is_error(cpl) ? EIO : 0; + nvme_bio_child_inbed(parent, abio_error); } static uint32_t @@ -557,8 +558,10 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, * standard says the entire id will be zeros, so this is a * cheap way to test for that. */ - if (ns->data.nsze == 0) - return (ENXIO); + if (ns->data.nsze == 0) { + ns->flags |= NVME_NS_GONE; + return ((ns->flags & NVME_NS_ADDED) ? 0 : ENXIO); + } flbas_fmt = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, ns->data.flbas); @@ -622,6 +625,7 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%d", device_get_nameunit(ctrlr->dev), ns->id); ns->cdev->si_flags |= SI_UNMAPPED; + ns->flags |= NVME_NS_ADDED; return (0); } diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c index c07a68d2f0dc..cecb05ca0a92 100644 --- a/sys/dev/nvme/nvme_pci.c +++ b/sys/dev/nvme/nvme_pci.c @@ -152,11 +152,15 @@ static int nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) { ctrlr->resource_id = PCIR_BAR(0); + ctrlr->msix_table_resource_id = -1; + ctrlr->msix_table_resource = NULL; + ctrlr->msix_pba_resource_id = -1; + ctrlr->msix_pba_resource = NULL; ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY, &ctrlr->resource_id, RF_ACTIVE); - if(ctrlr->resource == NULL) { + if (ctrlr->resource == NULL) { nvme_printf(ctrlr, "unable to allocate pci resource\n"); return (ENOMEM); } @@ -166,15 +170,32 @@ nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; /* - * The NVMe spec allows for the MSI-X table to be placed behind - * BAR 4/5, separate from the control/doorbell registers. Always - * try to map this bar, because it must be mapped prior to calling - * pci_alloc_msix(). If the table isn't behind BAR 4/5, - * bus_alloc_resource() will just return NULL which is OK. + * The NVMe spec allows for the MSI-X tables to be placed behind + * BAR 4 and/or 5, separate from the control/doorbell registers. */ - ctrlr->bar4_resource_id = PCIR_BAR(4); - ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY, - &ctrlr->bar4_resource_id, RF_ACTIVE); + + ctrlr->msix_table_resource_id = pci_msix_table_bar(ctrlr->dev); + ctrlr->msix_pba_resource_id = pci_msix_pba_bar(ctrlr->dev); + + if (ctrlr->msix_table_resource_id >= 0 && + ctrlr->msix_table_resource_id != ctrlr->resource_id) { + ctrlr->msix_table_resource = bus_alloc_resource_any(ctrlr->dev, + SYS_RES_MEMORY, &ctrlr->msix_table_resource_id, RF_ACTIVE); + if (ctrlr->msix_table_resource == NULL) { + nvme_printf(ctrlr, "unable to allocate msi-x table resource\n"); + return (ENOMEM); + } + } + if (ctrlr->msix_pba_resource_id >= 0 && + ctrlr->msix_pba_resource_id != ctrlr->resource_id && + ctrlr->msix_pba_resource_id != ctrlr->msix_table_resource_id) { + ctrlr->msix_pba_resource = bus_alloc_resource_any(ctrlr->dev, + SYS_RES_MEMORY, &ctrlr->msix_pba_resource_id, RF_ACTIVE); + if (ctrlr->msix_pba_resource == NULL) { + nvme_printf(ctrlr, "unable to allocate msi-x pba resource\n"); + return (ENOMEM); + } + } return (0); } @@ -200,9 +221,14 @@ bad: ctrlr->resource_id, ctrlr->resource); } - if (ctrlr->bar4_resource != NULL) { + if (ctrlr->msix_table_resource != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, + ctrlr->msix_table_resource_id, ctrlr->msix_table_resource); + } + + if (ctrlr->msix_pba_resource != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - ctrlr->bar4_resource_id, ctrlr->bar4_resource); + ctrlr->msix_pba_resource_id, ctrlr->msix_pba_resource); } if (ctrlr->tag) diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 52f9e12f8f9a..a425a6a5ad62 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -235,8 +235,10 @@ struct nvme_controller { * separate from the control registers which are in BAR 0/1. These * members track the mapping of BAR 4/5 for that reason. */ - int bar4_resource_id; - struct resource *bar4_resource; + int msix_table_resource_id; + struct resource *msix_table_resource; + int msix_pba_resource_id; + struct resource *msix_pba_resource; int msi_count; uint32_t enable_aborts; @@ -463,13 +465,13 @@ static __inline void nvme_completion_poll(struct nvme_completion_poll_status *status) { int timeout = ticks + 10 * hz; - sbintime_t delta_t = SBT_1US; + sbintime_t delta = SBT_1US; while (!atomic_load_acq_int(&status->done)) { if (timeout - ticks < 0) panic("NVME polled command failed to complete within 10s."); - pause_sbt("nvme", delta_t, 0, C_PREL(1)); - delta_t = min(SBT_1MS, delta_t * 3 / 2); + pause_sbt("nvme", delta, 0, C_PREL(1)); + delta = min(SBT_1MS, delta + delta / 2); } } diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c index a06774a64761..e015fbe4d072 100644 --- a/sys/dev/nvme/nvme_sim.c +++ b/sys/dev/nvme/nvme_sim.c @@ -352,25 +352,35 @@ static void * nvme_sim_ns_change(struct nvme_namespace *ns, void *sc_arg) { struct nvme_sim_softc *sc = sc_arg; + struct cam_path *tmppath; union ccb *ccb; + if (xpt_create_path(&tmppath, /*periph*/NULL, + cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) { + printf("unable to create path for rescan\n"); + return (NULL); + } + /* + * If it's gone, then signal that and leave. + */ + if (ns->flags & NVME_NS_GONE) { + xpt_async(AC_LOST_DEVICE, tmppath, NULL); + xpt_free_path(tmppath); + return (sc_arg); + } + ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return (NULL); } + ccb->ccb_h.path = tmppath; /* - * We map the NVMe namespace idea onto the CAM unit LUN. For - * each new namespace, we create a new CAM path for it. We then - * rescan the path to get it to enumerate. + * We map the NVMe namespace idea onto the CAM unit LUN. For each new + * namespace, scan or rescan the path to enumerate it. tmppath freed at + * end of scan. */ - if (xpt_create_path(&ccb->ccb_h.path, /*periph*/NULL, - cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) { - printf("unable to create path for rescan\n"); - xpt_free_ccb(ccb); - return (NULL); - } xpt_rescan(ccb); return (sc_arg); @@ -391,7 +401,7 @@ nvme_sim_controller_fail(void *ctrlr_arg) struct nvme_consumer *consumer_cookie; static void -nvme_sim_init(void) +nvme_sim_init(void *dummy __unused) { if (nvme_use_nvd) return; @@ -404,7 +414,7 @@ SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY, nvme_sim_init, NULL); static void -nvme_sim_uninit(void) +nvme_sim_uninit(void *dummy __unused) { if (nvme_use_nvd) return; |
