diff options
Diffstat (limited to 'sys/dev/nvme')
| -rw-r--r-- | sys/dev/nvme/nvme_ahci.c | 5 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ctrlr.c | 96 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ctrlr_cmd.c | 12 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_pci.c | 42 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_private.h | 46 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_qpair.c | 9 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_sim.c | 18 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_sysctl.c | 2 |
8 files changed, 176 insertions, 54 deletions
diff --git a/sys/dev/nvme/nvme_ahci.c b/sys/dev/nvme/nvme_ahci.c index b06661226d34..8be3887d835e 100644 --- a/sys/dev/nvme/nvme_ahci.c +++ b/sys/dev/nvme/nvme_ahci.c @@ -43,7 +43,7 @@ static device_method_t nvme_ahci_methods[] = { DEVMETHOD(device_attach, nvme_ahci_attach), DEVMETHOD(device_detach, nvme_ahci_detach), DEVMETHOD(device_shutdown, nvme_shutdown), - { 0, 0 } + DEVMETHOD_END }; static driver_t nvme_ahci_driver = { @@ -77,9 +77,6 @@ nvme_ahci_attach(device_t dev) ret = ENOMEM; goto bad; } - ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); - ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); - ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; /* Allocate and setup IRQ */ ctrlr->rid = 0; diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 7e1a3f02f326..753a8b380a75 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -782,6 +782,47 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) } static void +nvme_ctrlr_configure_apst(struct nvme_controller *ctrlr) +{ + struct nvme_completion_poll_status status; + uint64_t *data; + int data_size, i, read_size; + bool enable, error = true; + + if (TUNABLE_BOOL_FETCH("hw.nvme.apst_enable", &enable) == 0 || + ctrlr->cdata.apsta == 0) + return; + + data_size = 32 * sizeof(*data); + data = malloc(data_size, M_NVME, M_WAITOK | M_ZERO); + + if (getenv_array("hw.nvme.apst_data", data, data_size, + &read_size, sizeof(*data), GETENV_UNSIGNED) != 0) { + for (i = 0; i < read_size / sizeof(*data); ++i) + data[i] = htole64(data[i]); + } else { + status.done = 0; + nvme_ctrlr_cmd_get_feature(ctrlr, + NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION, 0, + data, data_size, nvme_completion_poll_cb, &status); + nvme_completion_poll(&status); + if (nvme_completion_is_error(&status.cpl)) + goto out; + } + + status.done = 0; + nvme_ctrlr_cmd_set_feature(ctrlr, + NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION, enable, 0, 0, + 0, 0, data, data_size, nvme_completion_poll_cb, &status); + nvme_completion_poll(&status); + error = nvme_completion_is_error(&status.cpl); +out: + if (error && bootverbose) + nvme_printf(ctrlr, "failed to configure APST\n"); + free(data, M_NVME); +} + +static void nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) { ctrlr->int_coal_time = 0; @@ -1047,6 +1088,7 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting) } nvme_ctrlr_configure_aer(ctrlr); + nvme_ctrlr_configure_apst(ctrlr); nvme_ctrlr_configure_int_coalescing(ctrlr); for (i = 0; i < ctrlr->num_io_queues; i++) @@ -1304,29 +1346,51 @@ nvme_ctrlr_shared_handler(void *arg) #define NVME_MAX_PAGES (int)(1024 / sizeof(vm_page_t)) static int +nvme_page_count(vm_offset_t start, size_t len) +{ + return atop(round_page(start + len) - trunc_page(start)); +} + +static int nvme_user_ioctl_req(vm_offset_t addr, size_t len, bool is_read, - vm_page_t *upages, int max_pages, int *npagesp, struct nvme_request **req, + vm_page_t **upages, int max_pages, int *npagesp, struct nvme_request **req, nvme_cb_fn_t cb_fn, void *cb_arg) { vm_prot_t prot = VM_PROT_READ; - int err; + int err, npages; + vm_page_t *upages_us; + + upages_us = *upages; + npages = nvme_page_count(addr, len); + if (npages > atop(maxphys)) + return (EINVAL); + if (npages > max_pages) + upages_us = malloc(npages * sizeof(vm_page_t), M_NVME, + M_ZERO | M_WAITOK); if (is_read) prot |= VM_PROT_WRITE; /* Device will write to host memory */ err = vm_fault_hold_pages(&curproc->p_vmspace->vm_map, - addr, len, prot, upages, max_pages, npagesp); - if (err != 0) + addr, len, prot, upages_us, npages, npagesp); + if (err != 0) { + if (*upages != upages_us) + free(upages_us, M_NVME); return (err); + } *req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); - (*req)->payload = memdesc_vmpages(upages, len, addr & PAGE_MASK); + (*req)->payload = memdesc_vmpages(upages_us, len, addr & PAGE_MASK); (*req)->payload_valid = true; + if (*upages != upages_us) + *upages = upages_us; return (0); } static void -nvme_user_ioctl_free(vm_page_t *pages, int npage) +nvme_user_ioctl_free(vm_page_t *pages, int npage, bool freeit) { vm_page_unhold_pages(pages, npage); + if (freeit) + free(pages, M_NVME); } static void @@ -1358,7 +1422,8 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, struct mtx *mtx; int ret = 0; int npages = 0; - vm_page_t upages[NVME_MAX_PAGES]; + vm_page_t upages_small[NVME_MAX_PAGES]; + vm_page_t *upages = upages_small; if (pt->len > 0) { if (pt->len > ctrlr->max_xfer_size) { @@ -1369,7 +1434,7 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, } if (is_user) { ret = nvme_user_ioctl_req((vm_offset_t)pt->buf, pt->len, - pt->is_read, upages, nitems(upages), &npages, &req, + pt->is_read, &upages, nitems(upages_small), &npages, &req, nvme_pt_done, pt); if (ret != 0) return (ret); @@ -1407,7 +1472,7 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, mtx_unlock(mtx); if (npages > 0) - nvme_user_ioctl_free(upages, npages); + nvme_user_ioctl_free(upages, npages, upages != upages_small); return (ret); } @@ -1435,7 +1500,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, struct mtx *mtx; int ret = 0; int npages = 0; - vm_page_t upages[NVME_MAX_PAGES]; + vm_page_t upages_small[NVME_MAX_PAGES]; + vm_page_t *upages = upages_small; /* * We don't support metadata. @@ -1452,8 +1518,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, } if (is_user) { ret = nvme_user_ioctl_req(npc->addr, npc->data_len, - npc->opcode & 0x1, upages, nitems(upages), &npages, - &req, nvme_npc_done, npc); + npc->opcode & 0x1, &upages, nitems(upages_small), + &npages, &req, nvme_npc_done, npc); if (ret != 0) return (ret); } else @@ -1491,7 +1557,7 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, mtx_unlock(mtx); if (npages > 0) - nvme_user_ioctl_free(upages, npages); + nvme_user_ioctl_free(upages, npages, upages != upages_small); return (ret); } @@ -1835,8 +1901,10 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, struct nvme_request *req) { struct nvme_qpair *qpair; + int32_t ioq; - qpair = &ctrlr->ioq[QP(ctrlr, curcpu)]; + ioq = req->ioq == NVME_IOQ_DEFAULT ? QP(ctrlr, curcpu) : req->ioq; + qpair = &ctrlr->ioq[ioq]; nvme_qpair_submit_request(qpair, req); } diff --git a/sys/dev/nvme/nvme_ctrlr_cmd.c b/sys/dev/nvme/nvme_ctrlr_cmd.c index 5a44ed425acb..1a48a058edd8 100644 --- a/sys/dev/nvme/nvme_ctrlr_cmd.c +++ b/sys/dev/nvme/nvme_ctrlr_cmd.c @@ -171,7 +171,11 @@ nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature, struct nvme_request *req; struct nvme_command *cmd; - req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); + if (payload != NULL) + req = nvme_allocate_request_vaddr(payload, payload_size, + M_WAITOK, cb_fn, cb_arg); + else + req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); cmd = &req->cmd; cmd->opc = NVME_OPC_SET_FEATURES; @@ -193,7 +197,11 @@ nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature, struct nvme_request *req; struct nvme_command *cmd; - req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); + if (payload != NULL) + req = nvme_allocate_request_vaddr(payload, payload_size, + M_WAITOK, cb_fn, cb_arg); + else + req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); cmd = &req->cmd; cmd->opc = NVME_OPC_GET_FEATURES; diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c index cecb05ca0a92..55cba580d6ca 100644 --- a/sys/dev/nvme/nvme_pci.c +++ b/sys/dev/nvme/nvme_pci.c @@ -54,7 +54,7 @@ static device_method_t nvme_pci_methods[] = { DEVMETHOD(device_suspend, nvme_pci_suspend), DEVMETHOD(device_resume, nvme_pci_resume), DEVMETHOD(device_shutdown, nvme_shutdown), - { 0, 0 } + DEVMETHOD_END }; static driver_t nvme_pci_driver = { @@ -151,24 +151,28 @@ nvme_pci_probe (device_t device) static int nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) { + int error; + ctrlr->resource_id = PCIR_BAR(0); ctrlr->msix_table_resource_id = -1; ctrlr->msix_table_resource = NULL; ctrlr->msix_pba_resource_id = -1; ctrlr->msix_pba_resource = NULL; + /* + * Using RF_ACTIVE will set the Memory Space bit in the PCI command register. + * The remaining BARs will get mapped in before they've been programmed with + * an address. To avoid this we'll not set this flag and instead call + * bus_activate_resource() after all the BARs have been programmed. + */ ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY, - &ctrlr->resource_id, RF_ACTIVE); + &ctrlr->resource_id, 0); if (ctrlr->resource == NULL) { nvme_printf(ctrlr, "unable to allocate pci resource\n"); return (ENOMEM); } - ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); - ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); - ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; - /* * The NVMe spec allows for the MSI-X tables to be placed behind * BAR 4 and/or 5, separate from the control/doorbell registers. @@ -180,7 +184,7 @@ nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) if (ctrlr->msix_table_resource_id >= 0 && ctrlr->msix_table_resource_id != ctrlr->resource_id) { ctrlr->msix_table_resource = bus_alloc_resource_any(ctrlr->dev, - SYS_RES_MEMORY, &ctrlr->msix_table_resource_id, RF_ACTIVE); + SYS_RES_MEMORY, &ctrlr->msix_table_resource_id, 0); if (ctrlr->msix_table_resource == NULL) { nvme_printf(ctrlr, "unable to allocate msi-x table resource\n"); return (ENOMEM); @@ -190,13 +194,35 @@ nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) ctrlr->msix_pba_resource_id != ctrlr->resource_id && ctrlr->msix_pba_resource_id != ctrlr->msix_table_resource_id) { ctrlr->msix_pba_resource = bus_alloc_resource_any(ctrlr->dev, - SYS_RES_MEMORY, &ctrlr->msix_pba_resource_id, RF_ACTIVE); + SYS_RES_MEMORY, &ctrlr->msix_pba_resource_id, 0); if (ctrlr->msix_pba_resource == NULL) { nvme_printf(ctrlr, "unable to allocate msi-x pba resource\n"); return (ENOMEM); } } + error = bus_activate_resource(ctrlr->dev, ctrlr->resource); + if (error) { + nvme_printf(ctrlr, "unable to activate pci resource: %d\n", error); + return (error); + } + if (ctrlr->msix_table_resource != NULL) { + error = bus_activate_resource(ctrlr->dev, ctrlr->msix_table_resource); + if (error) { + nvme_printf(ctrlr, "unable to activate msi-x table resource: %d\n", + error); + return (error); + } + } + if (ctrlr->msix_pba_resource != NULL) { + error = bus_activate_resource(ctrlr->dev, ctrlr->msix_pba_resource); + if (error) { + nvme_printf(ctrlr, "unable to activate msi-x pba resource: %d\n", + error); + return (error); + } + } + return (0); } diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 8837275e2ed5..32c8cf91c1db 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -112,7 +112,9 @@ struct nvme_request { struct memdesc payload; nvme_cb_fn_t cb_fn; void *cb_arg; - int32_t retries; + int16_t retries; + uint16_t ioq; +#define NVME_IOQ_DEFAULT 0xffff bool payload_valid; bool timeout; bool spare[2]; /* Future use */ @@ -223,8 +225,6 @@ struct nvme_controller { #define QUIRK_INTEL_ALIGNMENT 4 /* Pre NVMe 1.3 performance alignment */ #define QUIRK_AHCI 8 /* Attached via AHCI redirect */ - bus_space_tag_t bus_tag; - bus_space_handle_t bus_handle; int resource_id; struct resource *resource; @@ -284,8 +284,6 @@ struct nvme_controller { struct nvme_qpair adminq; struct nvme_qpair *ioq; - struct nvme_registers *regs; - struct nvme_controller_data cdata; struct nvme_namespace ns[NVME_MAX_NAMESPACES]; @@ -298,8 +296,8 @@ struct nvme_controller { struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; uint32_t is_resetting; - u_int fail_on_reset; + bool fail_on_reset; bool is_failed; bool is_failed_admin; bool is_dying; @@ -328,20 +326,17 @@ struct nvme_controller { offsetof(struct nvme_registers, reg) #define nvme_mmio_read_4(sc, reg) \ - bus_space_read_4((sc)->bus_tag, (sc)->bus_handle, \ - nvme_mmio_offsetof(reg)) + bus_read_4((sc)->resource, nvme_mmio_offsetof(reg)) #define nvme_mmio_write_4(sc, reg, val) \ - bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ - nvme_mmio_offsetof(reg), val) + bus_write_4((sc)->resource, nvme_mmio_offsetof(reg), val) #define nvme_mmio_write_8(sc, reg, val) \ do { \ - bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ - nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); \ - bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ - nvme_mmio_offsetof(reg)+4, \ - (val & 0xFFFFFFFF00000000ULL) >> 32); \ + bus_write_4((sc)->resource, nvme_mmio_offsetof(reg), \ + (val) & 0xFFFFFFFF); \ + bus_write_4((sc)->resource, nvme_mmio_offsetof(reg) + 4, \ + ((val) & 0xFFFFFFFF00000000ULL) >> 32); \ } while (0); #define nvme_printf(ctrlr, fmt, args...) \ @@ -491,6 +486,7 @@ _nvme_allocate_request(const int how, nvme_cb_fn_t cb_fn, void *cb_arg) req = malloc(sizeof(*req), M_NVME, how | M_ZERO); if (req != NULL) { + req->ioq = NVME_IOQ_DEFAULT; req->cb_fn = cb_fn; req->cb_arg = cb_arg; req->timeout = true; @@ -499,11 +495,13 @@ _nvme_allocate_request(const int how, nvme_cb_fn_t cb_fn, void *cb_arg) } static __inline struct nvme_request * -nvme_allocate_request_vaddr(void *payload, uint32_t payload_size, +nvme_allocate_request_vaddr(void *payload, size_t payload_size, const int how, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; + KASSERT(payload_size <= UINT32_MAX, + ("payload size %zu exceeds maximum", payload_size)); req = _nvme_allocate_request(how, cb_fn, cb_arg); if (req != NULL) { req->payload = memdesc_vaddr(payload, payload_size); @@ -551,6 +549,22 @@ nvme_allocate_request_ccb(union ccb *ccb, const int how, nvme_cb_fn_t cb_fn, #define nvme_free_request(req) free(req, M_NVME) +static __inline void +nvme_request_set_ioq(struct nvme_controller *ctrlr, struct nvme_request *req, + uint16_t ioq) +{ + /* + * Note: NVMe queues are numbered 1-65535. The ioq here is numbered + * 0-65534 to avoid off-by-one bugs, with 65535 being reserved for + * DEFAULT. + */ + KASSERT(ioq == NVME_IOQ_DEFAULT || ioq < ctrlr->num_io_queues, + ("ioq %d out of range 0..%d", ioq, ctrlr->num_io_queues)); + if (ioq < 0 || ioq >= ctrlr->num_io_queues) + ioq = NVME_IOQ_DEFAULT; + req->ioq = ioq; +} + void nvme_notify_async(struct nvme_controller *ctrlr, const struct nvme_completion *async_cpl, uint32_t log_page_id, void *log_page_buffer, diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 4f2c44da3b4f..e31bf818ed35 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -476,8 +476,8 @@ _nvme_qpair_process_completions(struct nvme_qpair *qpair) } if (done) { - bus_space_write_4(qpair->ctrlr->bus_tag, qpair->ctrlr->bus_handle, - qpair->cq_hdbl_off, qpair->cq_head); + bus_write_4(qpair->ctrlr->resource, qpair->cq_hdbl_off, + qpair->cq_head); } return (done); @@ -1068,8 +1068,7 @@ nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - bus_space_write_4(ctrlr->bus_tag, ctrlr->bus_handle, - qpair->sq_tdbl_off, qpair->sq_tail); + bus_write_4(ctrlr->resource, qpair->sq_tdbl_off, qpair->sq_tail); qpair->num_cmds++; } @@ -1209,7 +1208,7 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) static void nvme_qpair_enable(struct nvme_qpair *qpair) { - bool is_admin __unused = qpair == &qpair->ctrlr->adminq; + bool is_admin __diagused = qpair == &qpair->ctrlr->adminq; if (mtx_initialized(&qpair->recovery)) mtx_assert(&qpair->recovery, MA_OWNED); diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c index 8b861cf3aa71..b9f09c8d1f61 100644 --- a/sys/dev/nvme/nvme_sim.c +++ b/sys/dev/nvme/nvme_sim.c @@ -343,7 +343,7 @@ nvme_sim_attach(device_t dev) sc->s_ctrlr = ctrlr; sc->s_sim = cam_sim_alloc(nvme_sim_action, nvme_sim_poll, - "nvme", sc, device_get_unit(dev), + "nvme", sc, device_get_unit(ctrlr->dev), NULL, max_trans, max_trans, devq); if (sc->s_sim == NULL) { device_printf(dev, "Failed to allocate a sim\n"); @@ -406,6 +406,16 @@ nvme_sim_ns_added(device_t dev, struct nvme_namespace *ns) union ccb *ccb; /* + * If we have no namespaces, then we both do not attach the nvme_sim_ns + * device. And then get a ns changed AER as well to tell us about it + * (which is how we get here). If there's no device attached, then + * there's nothing to do. sc->s_sim will be NULL as well (since it + * only gets set when we attach). + */ + if (!device_is_attached(dev)) + return (0); + + /* * We map the NVMe namespace idea onto the CAM unit LUN. For each new * namespace, scan or rescan the path to enumerate it. */ @@ -428,7 +438,7 @@ nvme_sim_ns_removed(device_t dev, struct nvme_namespace *ns) if (xpt_create_path(&tmppath, /*periph*/NULL, cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) { - printf("unable to create path for rescan\n"); + printf("unable to create path for ns removal\n"); return (ENOMEM); } xpt_async(AC_LOST_DEVICE, tmppath, NULL); @@ -476,7 +486,7 @@ static device_method_t nvme_sim_methods[] = { DEVMETHOD(nvme_ns_changed, nvme_sim_ns_changed), DEVMETHOD(nvme_controller_failed, nvme_sim_controller_failed), DEVMETHOD(nvme_handle_aen, nvme_sim_handle_aen), - { 0, 0 } + DEVMETHOD_END }; static driver_t nvme_sim_driver = { @@ -486,4 +496,4 @@ static driver_t nvme_sim_driver = { }; DRIVER_MODULE(nvme_sim, nvme, nvme_sim_driver, NULL, NULL); -MODULE_VERSION(nvme_shim, 1); +MODULE_VERSION(nvme_sim, 1); diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c index 50d19e730a16..1b64ebddb9b2 100644 --- a/sys/dev/nvme/nvme_sysctl.c +++ b/sys/dev/nvme/nvme_sysctl.c @@ -425,7 +425,7 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr) CTLFLAG_RD, &ctrlr->cap_hi, 0, "Hi 32-bits of capacities for the drive"); - SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "fail_on_reset", + SYSCTL_ADD_BOOL(ctrlr_ctx, ctrlr_list, OID_AUTO, "fail_on_reset", CTLFLAG_RD, &ctrlr->fail_on_reset, 0, "Pretend the next reset fails and fail the controller"); |
