diff options
Diffstat (limited to 'sys/cam/ctl/ctl.c')
-rw-r--r-- | sys/cam/ctl/ctl.c | 1308 |
1 files changed, 1195 insertions, 113 deletions
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index 5230a2694544..b1367cd79fab 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -83,6 +83,7 @@ #include <cam/ctl/ctl_ha.h> #include <cam/ctl/ctl_private.h> #include <cam/ctl/ctl_debug.h> +#include <cam/ctl/ctl_nvme_all.h> #include <cam/ctl/ctl_scsi_all.h> #include <cam/ctl/ctl_error.h> @@ -447,6 +448,8 @@ static int ctl_scsiio_lun_check(struct ctl_lun *lun, static void ctl_failover_lun(union ctl_io *io); static void ctl_scsiio_precheck(struct ctl_scsiio *ctsio); static int ctl_scsiio(struct ctl_scsiio *ctsio); +static void ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio); +static int ctl_nvmeio(struct ctl_nvmeio *ctnio); static int ctl_target_reset(union ctl_io *io); static void ctl_do_lun_reset(struct ctl_lun *lun, uint32_t initidx, @@ -528,6 +531,38 @@ static moduledata_t ctl_moduledata = { DECLARE_MODULE(ctl, ctl_moduledata, SI_SUB_CONFIGURE, SI_ORDER_THIRD); MODULE_VERSION(ctl, 1); +static void +ctl_be_move_done(union ctl_io *io, bool samethr) +{ + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + io->scsiio.be_move_done(io, samethr); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + io->nvmeio.be_move_done(io, samethr); + break; + default: + __assert_unreachable(); + } +} + +static void +ctl_continue_io(union ctl_io *io) +{ + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + io->scsiio.io_cont(io); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + io->nvmeio.io_cont(io); + break; + default: + __assert_unreachable(); + } +} + static struct ctl_frontend ha_frontend = { .name = "ha", @@ -585,6 +620,8 @@ ctl_ha_datamove(union ctl_io *io) uint32_t sg_entries_sent; int do_sg_copy, i, j; + CTL_IO_ASSERT(io, SCSI); + memset(&msg.dt, 0, sizeof(msg.dt)); msg.hdr.msg_type = CTL_MSG_DATAMOVE; msg.hdr.original_sc = io->io_hdr.remote_io; @@ -601,32 +638,32 @@ ctl_ha_datamove(union ctl_io *io) * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries, * then we need to break this up into multiple transfers. */ - if (io->scsiio.kern_sg_entries == 0) { + if (ctl_kern_sg_entries(io) == 0) { msg.dt.kern_sg_entries = 1; #if 0 if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) { - msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr; + msg.dt.sg_list[0].addr = ctl_kern_data_ptr(io); } else { /* XXX KDM use busdma here! */ msg.dt.sg_list[0].addr = - (void *)vtophys(io->scsiio.kern_data_ptr); + (void *)vtophys(ctl_kern_data_ptr(io)); } #else KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0, ("HA does not support BUS_ADDR")); - msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr; + msg.dt.sg_list[0].addr = ctl_kern_data_ptr(io); #endif - msg.dt.sg_list[0].len = io->scsiio.kern_data_len; + msg.dt.sg_list[0].len = ctl_kern_data_len(io); do_sg_copy = 0; } else { - msg.dt.kern_sg_entries = io->scsiio.kern_sg_entries; + msg.dt.kern_sg_entries = ctl_kern_sg_entries(io); do_sg_copy = 1; } - msg.dt.kern_data_len = io->scsiio.kern_data_len; - msg.dt.kern_total_len = io->scsiio.kern_total_len; - msg.dt.kern_data_resid = io->scsiio.kern_data_resid; - msg.dt.kern_rel_offset = io->scsiio.kern_rel_offset; + msg.dt.kern_data_len = ctl_kern_data_len(io); + msg.dt.kern_total_len = ctl_kern_total_len(io); + msg.dt.kern_data_resid = ctl_kern_data_resid(io); + msg.dt.kern_rel_offset = ctl_kern_rel_offset(io); msg.dt.sg_sequence = 0; /* @@ -640,7 +677,7 @@ ctl_ha_datamove(union ctl_io *io) sizeof(msg.dt.sg_list[0])), msg.dt.kern_sg_entries - sg_entries_sent); if (do_sg_copy != 0) { - sgl = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; + sgl = (struct ctl_sg_entry *)ctl_kern_data_ptr(io); for (i = sg_entries_sent, j = 0; i < msg.dt.cur_sg_entries; i++, j++) { #if 0 @@ -1496,6 +1533,8 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param) /* XXX KDM do something here */ break; } + CTL_IO_ASSERT(io, SCSI); + io->io_hdr.msg_type = CTL_MSG_DATAMOVE; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; /* @@ -1569,6 +1608,8 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param) * back to the initiator. */ io = msg->hdr.serializing_sc; + CTL_IO_ASSERT(io, SCSI); + io->io_hdr.msg_type = CTL_MSG_DATAMOVE_DONE; io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; @@ -2410,6 +2451,8 @@ ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num, union ctl_io *io = (union ctl_io *)ioh; struct ctl_ooa_entry *entry; + CTL_IO_ASSERT(io, SCSI); + /* * If we've got more than we can fit, just count the * remaining entries. @@ -2644,12 +2687,6 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, } entries = malloc(ooa_hdr->alloc_len, M_CTL, M_WAITOK | M_ZERO); - if (entries == NULL) { - printf("%s: could not allocate %d bytes for OOA " - "dump\n", __func__, ooa_hdr->alloc_len); - retval = ENOMEM; - break; - } mtx_lock(&softc->ctl_lock); if ((ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) == 0 && @@ -3174,6 +3211,23 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, retval = fe->ioctl(dev, cmd, addr, flag, td); break; } + case CTL_NVMF: { + struct ctl_nvmf *cn; + struct ctl_frontend *fe; + + cn = (struct ctl_nvmf *)addr; + + fe = ctl_frontend_find("nvmf"); + if (fe == NULL) { + cn->status = CTL_NVMF_ERROR; + snprintf(cn->error_str, sizeof(cn->error_str), + "Frontend \"nvmf\" not found."); + break; + } + + retval = fe->ioctl(dev, cmd, addr, flag, td); + break; + } case CTL_PORT_REQ: { struct ctl_req *req; struct ctl_frontend *fe; @@ -4576,7 +4630,7 @@ fail: ctl_tpc_lun_init(lun); if (lun->flags & CTL_LUN_REMOVABLE) { lun->prevent = malloc((CTL_MAX_INITIATORS + 31) / 32 * 4, - M_CTL, M_WAITOK); + M_CTL, M_WAITOK | M_ZERO); } /* @@ -4913,6 +4967,91 @@ ctl_lun_capacity_changed(struct ctl_be_lun *be_lun) } } +void +ctl_lun_nsdata_ids(struct ctl_be_lun *be_lun, + struct nvme_namespace_data *nsdata) +{ + struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; + struct scsi_vpd_id_descriptor *idd; + + if (lun->lun_devid == NULL) + return; + + idd = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *) + lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_naa); + if (idd != NULL) { + if (idd->length == 16) { + memcpy(nsdata->nguid, idd->identifier, 16); + return; + } + if (idd->length == 8) { + memcpy(nsdata->eui64, idd->identifier, 8); + return; + } + } + + idd = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *) + lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_eui64); + if (idd != NULL) { + if (idd->length == 8) { + memcpy(nsdata->eui64, idd->identifier, 8); + return; + } + } +} + +void +ctl_lun_nvme_ids(struct ctl_be_lun *be_lun, void *data) +{ + struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; + struct scsi_vpd_id_descriptor *naa, *eui64, *uuid; + char *p; + + memset(data, 0, 4096); + + if (lun->lun_devid == NULL) + return; + + naa = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *) + lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_naa); + eui64 = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *) + lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_eui64); + uuid = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *) + lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_uuid); + + p = data; + + /* EUI64 */ + if ((naa != NULL && naa->length == 8) || eui64 != NULL) { + *p++ = 1; + *p++ = 8; + p += 2; + if (naa != NULL && naa->length == 8) + memcpy(p, naa->identifier, 8); + else + memcpy(p, eui64->identifier, 8); + p += 8; + } + + /* NGUID */ + if (naa != NULL && naa->length == 16) { + *p++ = 1; + *p++ = 16; + p += 2; + memcpy(p, naa->identifier, 16); + p += 16; + } + + /* UUID */ + if (uuid != NULL) { + *p++ = 1; + *p++ = uuid->length; + p += 2; + memcpy(p, uuid->identifier, uuid->length); + p += uuid->length; + } +} + /* * Backend "memory move is complete" callback for requests that never * make it down to say RAIDCore's configuration code. @@ -4923,8 +5062,6 @@ ctl_config_move_done(union ctl_io *io, bool samethr) int retval; CTL_DEBUG_PRINT(("ctl_config_move_done\n")); - KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, - ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); if (ctl_debug & CTL_DEBUG_CDB_DATA) ctl_data_print(io); @@ -4938,7 +5075,7 @@ ctl_config_move_done(union ctl_io *io, bool samethr) * we'll need to know how to clean them up here as well. */ if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) - free(io->scsiio.kern_data_ptr, M_CTL); + free(ctl_kern_data_ptr(io), M_CTL); ctl_done(io); retval = CTL_RETVAL_COMPLETE; } else { @@ -4959,7 +5096,17 @@ ctl_config_move_done(union ctl_io *io, bool samethr) * XXX KDM call ctl_scsiio() again for now, and check flag * bits to see whether we're allocated or not. */ - retval = ctl_scsiio(&io->scsiio); + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + retval = ctl_scsiio(&io->scsiio); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + retval = ctl_nvmeio(&io->nvmeio); + break; + default: + __assert_unreachable(); + } } return (retval); } @@ -4983,7 +5130,7 @@ ctl_data_submit_done(union ctl_io *io) (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { - io->scsiio.io_cont(io); + ctl_continue_io(io); return; } ctl_done(io); @@ -5010,7 +5157,7 @@ ctl_config_write_done(union ctl_io *io) (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { - io->scsiio.io_cont(io); + ctl_continue_io(io); return; } /* @@ -5019,7 +5166,7 @@ ctl_config_write_done(union ctl_io *io) * no data, like start/stop unit, we need to check here. */ if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) - buf = io->scsiio.kern_data_ptr; + buf = ctl_kern_data_ptr(io); else buf = NULL; ctl_done(io); @@ -5039,7 +5186,7 @@ ctl_config_read_done(union ctl_io *io) ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) - buf = io->scsiio.kern_data_ptr; + buf = ctl_kern_data_ptr(io); else buf = NULL; ctl_done(io); @@ -5054,7 +5201,7 @@ ctl_config_read_done(union ctl_io *io) * the I/O just yet. */ if (io->io_hdr.flags & CTL_FLAG_IO_CONT) { - io->scsiio.io_cont(io); + ctl_continue_io(io); return; } @@ -5439,7 +5586,7 @@ ctl_read_buffer(struct ctl_scsiio *ctsio) } else { if (lun->write_buffer == NULL) { lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, - M_CTL, M_WAITOK); + M_CTL, M_WAITOK | M_ZERO); } ctsio->kern_data_ptr = lun->write_buffer + buffer_offset; } @@ -5478,21 +5625,24 @@ ctl_write_buffer(struct ctl_scsiio *ctsio) return (CTL_RETVAL_COMPLETE); } + if (lun->write_buffer == NULL) { + lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, + M_CTL, M_WAITOK | M_ZERO); + } + /* - * If we've got a kernel request that hasn't been malloced yet, - * malloc it and tell the caller the data buffer is here. + * If this kernel request hasn't started yet, initialize the data + * buffer to the correct region of the LUN's write buffer. Note that + * this doesn't set CTL_FLAG_ALLOCATED since this points into a + * persistent buffer belonging to the LUN rather than a buffer + * dedicated to this request. */ - if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { - if (lun->write_buffer == NULL) { - lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, - M_CTL, M_WAITOK); - } + if (ctsio->kern_data_ptr == NULL) { ctsio->kern_data_ptr = lun->write_buffer + buffer_offset; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; - ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); @@ -5512,6 +5662,8 @@ ctl_write_same_cont(union ctl_io *io) struct ctl_lba_len_flags *lbalen; int retval; + CTL_IO_ASSERT(io, SCSI); + ctsio = &io->scsiio; ctsio->io_hdr.status = CTL_STATUS_NONE; lbalen = (struct ctl_lba_len_flags *) @@ -5857,6 +6009,8 @@ ctl_do_mode_select(union ctl_io *io) uint16_t *len_left, *len_used; int retval, i; + CTL_IO_ASSERT(io, SCSI); + ctsio = &io->scsiio; page_index = NULL; page_len = 0; @@ -7302,34 +7456,26 @@ ctl_report_supported_opcodes(struct ctl_scsiio *ctsio) break; case RSO_OPTIONS_OC: if (ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) { - ctl_set_invalid_field(/*ctsio*/ ctsio, - /*sks_valid*/ 1, - /*command*/ 1, - /*field*/ 2, - /*bit_valid*/ 1, - /*bit*/ 2); - ctl_done((union ctl_io *)ctsio); - return (CTL_RETVAL_COMPLETE); + goto invalid_options; } total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; break; case RSO_OPTIONS_OC_SA: if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) == 0 || service_action >= 32) { - ctl_set_invalid_field(/*ctsio*/ ctsio, - /*sks_valid*/ 1, - /*command*/ 1, - /*field*/ 2, - /*bit_valid*/ 1, - /*bit*/ 2); - ctl_done((union ctl_io *)ctsio); - return (CTL_RETVAL_COMPLETE); + goto invalid_options; } - /* FALLTHROUGH */ + total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; + break; case RSO_OPTIONS_OC_ASA: + if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) != 0 && + service_action >= 32) { + goto invalid_options; + } total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; break; default: +invalid_options: ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, @@ -8770,6 +8916,8 @@ ctl_cnw_cont(union ctl_io *io) struct ctl_lba_len_flags *lbalen; int retval; + CTL_IO_ASSERT(io, SCSI); + ctsio = &io->scsiio; ctsio->io_hdr.status = CTL_STATUS_NONE; ctsio->io_hdr.flags &= ~CTL_FLAG_IO_CONT; @@ -9145,14 +9293,8 @@ ctl_request_sense(struct ctl_scsiio *ctsio) sense_ptr = (struct scsi_sense_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; ctsio->kern_rel_offset = 0; - - /* - * struct scsi_sense_data, which is currently set to 256 bytes, is - * larger than the largest allowed value for the length field in the - * REQUEST SENSE CDB, which is 252 bytes as of SPC-4. - */ - ctsio->kern_data_len = cdb->length; - ctsio->kern_total_len = cdb->length; + ctsio->kern_data_len = ctsio->kern_total_len = + MIN(cdb->length, sizeof(*sense_ptr)); /* * If we don't have a LUN, we don't have any pending sense. @@ -10554,14 +10696,732 @@ ctl_read_toc(struct ctl_scsiio *ctsio) } /* + * For NVMe commands, parse the LBA and length. + */ +static bool +ctl_nvme_get_lba_len(struct ctl_nvmeio *ctnio, uint64_t *lba, uint32_t *len) +{ + CTL_IO_ASSERT(ctnio, NVME); + + switch (ctnio->cmd.opc) { + case NVME_OPC_WRITE: + case NVME_OPC_READ: + case NVME_OPC_WRITE_UNCORRECTABLE: + case NVME_OPC_COMPARE: + case NVME_OPC_WRITE_ZEROES: + case NVME_OPC_VERIFY: + *lba = (uint64_t)le32toh(ctnio->cmd.cdw11) << 32 | + le32toh(ctnio->cmd.cdw10); + *len = (le32toh(ctnio->cmd.cdw12) & 0xffff) + 1; + return (true); + default: + *lba = 0; + *len = 0; + return (false); + } +} + +static bool +ctl_nvme_fua(struct ctl_nvmeio *ctnio) +{ + return ((le32toh(ctnio->cmd.cdw12) & (1U << 30)) != 0); +} + +int +ctl_nvme_identify(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + size_t len; + int retval; + uint8_t cns; + + CTL_DEBUG_PRINT(("ctl_nvme_identify\n")); + + CTL_IO_ASSERT(ctnio, NVME_ADMIN); + MPASS(ctnio->cmd.opc == NVME_OPC_IDENTIFY); + + /* + * The data buffer for Identify is always 4096 bytes, see + * 5.51.1 in NVMe base specification 1.4. + */ + len = 4096; + + ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK); + ctnio->kern_data_len = len; + ctnio->kern_total_len = len; + ctnio->kern_rel_offset = 0; + ctnio->kern_sg_entries = 0; + + ctl_nvme_set_success(ctnio); + ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED; + ctnio->be_move_done = ctl_config_move_done; + + /* + * If we don't have a LUN, return an empty result for CNS == 0. + */ + if (lun == NULL) { + cns = le32toh(ctnio->cmd.cdw10) & 0xff; + switch (cns) { + case 0: + memset(ctnio->kern_data_ptr, 0, len); + ctl_datamove((union ctl_io *)ctnio); + break; + default: + ctl_nvme_set_invalid_field(ctnio); + break; + } + return (CTL_RETVAL_COMPLETE); + } + + retval = lun->backend->config_read((union ctl_io *)ctnio); + return (retval); +} + +int +ctl_nvme_flush(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + int retval; + + CTL_DEBUG_PRINT(("ctl_nvme_flush\n")); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_FLUSH); + + /* + * NVMe flushes always flush the entire namespace, not an LBA + * range. + */ + retval = lun->backend->config_write((union ctl_io *)ctnio); + + return (retval); +} + +int +ctl_nvme_read_write(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + struct ctl_lba_len_flags *lbalen; + uint64_t lba; + uint32_t num_blocks; + int flags, retval; + bool isread; + + CTL_DEBUG_PRINT(("ctl_nvme_read_write: command: %#x\n", + ctnio->cmd.opc)); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_WRITE || + ctnio->cmd.opc == NVME_OPC_READ); + + flags = 0; + isread = ctnio->cmd.opc == NVME_OPC_READ; + ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks); + + /* + * The first check is to make sure we're in bounds, the second + * check is to catch wrap-around problems. If the lba + num blocks + * is less than the lba, then we've wrapped around and the block + * range is invalid anyway. + */ + if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) + || ((lba + num_blocks) < lba)) { + ctl_nvme_set_lba_out_of_range(ctnio); + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + + /* + * Set FUA and/or DPO if caches are disabled. + * + * For a read this may not be quite correct for the block + * backend as any earlier writes to the LBA range should be + * flushed to backing store as part of the read. + */ + if (ctl_nvme_fua(ctnio)) { + flags |= CTL_LLF_FUA; + if (isread) + flags |= CTL_LLF_DPO; + } + + lbalen = (struct ctl_lba_len_flags *) + &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; + lbalen->lba = lba; + lbalen->len = num_blocks; + lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags; + + ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize; + ctnio->kern_rel_offset = 0; + + CTL_DEBUG_PRINT(("ctl_nvme_read_write: calling data_submit()\n")); + + retval = lun->backend->data_submit((union ctl_io *)ctnio); + return (retval); +} + +int +ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + struct ctl_lba_len_flags *lbalen; + uint64_t lba; + uint32_t num_blocks; + int retval; + + CTL_DEBUG_PRINT(("ctl_nvme_write_uncorrectable\n")); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_UNCORRECTABLE); + + ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks); + + /* + * The first check is to make sure we're in bounds, the second + * check is to catch wrap-around problems. If the lba + num blocks + * is less than the lba, then we've wrapped around and the block + * range is invalid anyway. + */ + if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) + || ((lba + num_blocks) < lba)) { + ctl_nvme_set_lba_out_of_range(ctnio); + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + + lbalen = (struct ctl_lba_len_flags *) + &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; + lbalen->lba = lba; + lbalen->len = num_blocks; + lbalen->flags = 0; + retval = lun->backend->config_write((union ctl_io *)ctnio); + + return (retval); +} + +int +ctl_nvme_compare(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + struct ctl_lba_len_flags *lbalen; + uint64_t lba; + uint32_t num_blocks; + int flags; + int retval; + + CTL_DEBUG_PRINT(("ctl_nvme_compare\n")); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_COMPARE); + + flags = 0; + ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks); + if (ctl_nvme_fua(ctnio)) + flags |= CTL_LLF_FUA; + + /* + * The first check is to make sure we're in bounds, the second + * check is to catch wrap-around problems. If the lba + num blocks + * is less than the lba, then we've wrapped around and the block + * range is invalid anyway. + */ + if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) + || ((lba + num_blocks) < lba)) { + ctl_nvme_set_lba_out_of_range(ctnio); + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + + lbalen = (struct ctl_lba_len_flags *) + &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; + lbalen->lba = lba; + lbalen->len = num_blocks; + lbalen->flags = CTL_LLF_COMPARE | flags; + ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize; + ctnio->kern_rel_offset = 0; + + CTL_DEBUG_PRINT(("ctl_nvme_compare: calling data_submit()\n")); + retval = lun->backend->data_submit((union ctl_io *)ctnio); + return (retval); +} + +int +ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + struct ctl_lba_len_flags *lbalen; + uint64_t lba; + uint32_t num_blocks; + int retval; + + CTL_DEBUG_PRINT(("ctl_nvme_write_zeroes\n")); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_ZEROES); + + ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks); + + /* + * The first check is to make sure we're in bounds, the second + * check is to catch wrap-around problems. If the lba + num blocks + * is less than the lba, then we've wrapped around and the block + * range is invalid anyway. + */ + if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) + || ((lba + num_blocks) < lba)) { + ctl_nvme_set_lba_out_of_range(ctnio); + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + + lbalen = (struct ctl_lba_len_flags *) + &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; + lbalen->lba = lba; + lbalen->len = num_blocks; + lbalen->flags = 0; + retval = lun->backend->config_write((union ctl_io *)ctnio); + + return (retval); +} + +int +ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + struct nvme_dsm_range *r; + uint64_t lba; + uint32_t len, num_blocks; + u_int i, ranges; + int retval; + + CTL_DEBUG_PRINT(("ctl_nvme_dataset_management\n")); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_DATASET_MANAGEMENT); + + ranges = le32toh(ctnio->cmd.cdw10) & 0xff; + len = ranges * sizeof(struct nvme_dsm_range); + + /* + * If we've got a kernel request that hasn't been malloced yet, + * malloc it and tell the caller the data buffer is here. + */ + if ((ctnio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { + ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK); + ctnio->kern_data_len = len; + ctnio->kern_total_len = len; + ctnio->kern_rel_offset = 0; + ctnio->kern_sg_entries = 0; + ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED; + ctnio->be_move_done = ctl_config_move_done; + ctl_datamove((union ctl_io *)ctnio); + + return (CTL_RETVAL_COMPLETE); + } + + /* + * Require a flat buffer of the correct size. + */ + if (ctnio->kern_sg_entries > 0 || + ctnio->kern_total_len - ctnio->kern_data_resid != len) + return (CTL_RETVAL_ERROR); + + /* + * Verify that none of the ranges are out of bounds. + */ + r = (struct nvme_dsm_range *)ctnio->kern_data_ptr; + for (i = 0; i < ranges; i++) { + lba = le64toh(r[i].starting_lba); + num_blocks = le32toh(r[i].length); + if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) + || ((lba + num_blocks) < lba)) { + ctl_nvme_set_lba_out_of_range(ctnio); + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + } + + CTL_DEBUG_PRINT(("ctl_nvme_dataset_management: calling config_write()\n")); + retval = lun->backend->config_write((union ctl_io *)ctnio); + return (retval); +} + +int +ctl_nvme_verify(struct ctl_nvmeio *ctnio) +{ + struct ctl_lun *lun = CTL_LUN(ctnio); + struct ctl_lba_len_flags *lbalen; + uint64_t lba; + uint32_t num_blocks; + int flags; + int retval; + + CTL_DEBUG_PRINT(("ctl_nvme_verify\n")); + + CTL_IO_ASSERT(ctnio, NVME); + MPASS(ctnio->cmd.opc == NVME_OPC_VERIFY); + + flags = 0; + ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks); + if (ctl_nvme_fua(ctnio)) + flags |= CTL_LLF_FUA; + + /* + * The first check is to make sure we're in bounds, the second + * check is to catch wrap-around problems. If the lba + num blocks + * is less than the lba, then we've wrapped around and the block + * range is invalid anyway. + */ + if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) + || ((lba + num_blocks) < lba)) { + ctl_nvme_set_lba_out_of_range(ctnio); + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + + lbalen = (struct ctl_lba_len_flags *) + &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; + lbalen->lba = lba; + lbalen->len = num_blocks; + lbalen->flags = CTL_LLF_VERIFY | flags; + ctnio->kern_total_len = 0; + ctnio->kern_rel_offset = 0; + + CTL_DEBUG_PRINT(("ctl_nvme_verify: calling data_submit()\n")); + retval = lun->backend->data_submit((union ctl_io *)ctnio); + return (retval); +} + +static const struct ctl_nvme_cmd_entry * +ctl_nvme_get_cmd_entry(struct ctl_nvmeio *ctnio) +{ + const struct ctl_nvme_cmd_entry *entry; + + switch (ctnio->io_hdr.io_type) { + case CTL_IO_NVME: + entry = &nvme_nvm_cmd_table[ctnio->cmd.opc]; + break; + case CTL_IO_NVME_ADMIN: + entry = &nvme_admin_cmd_table[ctnio->cmd.opc]; + break; + default: + __assert_unreachable(); + } + return (entry); +} + +static const struct ctl_nvme_cmd_entry * +ctl_nvme_validate_command(struct ctl_nvmeio *ctnio) +{ + const struct ctl_nvme_cmd_entry *entry; + + entry = ctl_nvme_get_cmd_entry(ctnio); + if (entry->execute == NULL) { + ctl_nvme_set_invalid_opcode(ctnio); + ctl_done((union ctl_io *)ctnio); + return (NULL); + } + + /* Validate fused commands. */ + switch (NVMEV(NVME_CMD_FUSE, ctnio->cmd.fuse)) { + case NVME_FUSE_NORMAL: + break; + case NVME_FUSE_FIRST: + if (ctnio->io_hdr.io_type != CTL_IO_NVME || + ctnio->cmd.opc != NVME_OPC_COMPARE) { + ctl_nvme_set_invalid_field(ctnio); + ctl_done((union ctl_io *)ctnio); + return (NULL); + } + break; + case NVME_FUSE_SECOND: + if (ctnio->io_hdr.io_type != CTL_IO_NVME || + ctnio->cmd.opc != NVME_OPC_COMPARE) { + ctl_nvme_set_invalid_field(ctnio); + ctl_done((union ctl_io *)ctnio); + return (NULL); + } + break; + default: + ctl_nvme_set_invalid_field(ctnio); + ctl_done((union ctl_io *)ctnio); + return (NULL); + } + + return (entry); +} + +/* + * This is a simpler version of ctl_scsiio_lun_check that fails + * requests on a LUN without active media. + * + * Returns true if the command has been completed with an error. + */ +static bool +ctl_nvmeio_lun_check(struct ctl_lun *lun, + const struct ctl_nvme_cmd_entry *entry, struct ctl_nvmeio *ctnio) +{ + mtx_assert(&lun->lun_lock, MA_OWNED); + + if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) { + if ((lun->flags & (CTL_LUN_EJECTED | CTL_LUN_NO_MEDIA | + CTL_LUN_STOPPED)) != 0) { + ctl_nvme_set_namespace_not_ready(ctnio); + return (true); + } + } + + return (false); +} + +/* + * Check for blockage against the OOA (Order Of Arrival) queue. + * Assumptions: + * - pending_io is generally either incoming, or on the blocked queue + * - starting I/O is the I/O we want to start the check with. + */ +static ctl_action +ctl_nvme_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, + union ctl_io **starting_io, union ctl_io **aborted_io) +{ + union ctl_io *ooa_io = *starting_io; + + CTL_IO_ASSERT(pending_io, NVME, NVME_ADMIN); + + mtx_assert(&lun->lun_lock, MA_OWNED); + + *aborted_io = NULL; + + /* + * Aborted commands are not going to be executed and may even + * not report completion, so we don't care about their order. + * Let them complete ASAP to clean the OOA queue. + */ + if (__predict_false(pending_io->io_hdr.flags & CTL_FLAG_ABORT)) + return (CTL_ACTION_PASS); + + /* + * NVMe has rather simple command ordering requirements. In + * particular, there is no requirement on the controller to + * enforce a specific order for overlapping LBAs. The only + * constraint is that fused operations (Compare and Write), + * must be completed as a unit. + * + * To support fused operations, the following strategy is used: + * - the first half of a fused command is not enqueued to rtr + * until the second half is enqueued + * - the second half of a fused command blocks on the first + * half of a fuse command + * - subsequent commands block on the second half of the + * fused command + */ + + /* + * Is the previously submitted command the first half of a + * fused operation? + */ + if (ooa_io != NULL && + NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) == NVME_FUSE_FIRST) { + /* + * If this is the second half, enqueue the first half + * and block the second half on the first half. + */ + if (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse) == + NVME_FUSE_SECOND) { + /* + * XXX: Do we need to wait for other rtr requests + * to drain so this is truly atomic? + */ + return (CTL_ACTION_FUSED); + } + + /* Abort the first half. */ + ctl_nvme_set_missing_fused_command(&ooa_io->nvmeio); + *aborted_io = ooa_io; + } else { + switch (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse)) { + case NVME_FUSE_FIRST: + /* First half, wait for the second half. */ + return (CTL_ACTION_SKIP); + case NVME_FUSE_SECOND: + /* Second half without a matching first half, abort. */ + ctl_nvme_set_missing_fused_command(&pending_io->nvmeio); + *aborted_io = pending_io; + return (CTL_ACTION_SKIP); + } + } + + /* + * Scan the OOA queue looking for the most recent second half + * of a fused op. + */ + for (; ooa_io != NULL; + ooa_io = (union ctl_io *)LIST_NEXT(&ooa_io->io_hdr, ooa_links)) { + if (NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) == + NVME_FUSE_SECOND) { + *starting_io = ooa_io; + return (CTL_ACTION_BLOCK); + } + } + + *starting_io = NULL; + return (CTL_ACTION_PASS); +} + +static void +ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio) +{ + struct ctl_softc *softc = CTL_SOFTC(ctnio); + struct ctl_lun *lun; + const struct ctl_nvme_cmd_entry *entry; + union ctl_io *bio, *aborted_io; + uint32_t targ_lun; + + lun = NULL; + targ_lun = ctnio->io_hdr.nexus.targ_mapped_lun; + if (targ_lun < ctl_max_luns) + lun = softc->ctl_luns[targ_lun]; + if (lun != NULL) { + /* + * If the LUN is invalid, pretend that it doesn't exist. + * It will go away as soon as all pending I/O has been + * completed. + */ + mtx_lock(&lun->lun_lock); + if (lun->flags & CTL_LUN_DISABLED) { + mtx_unlock(&lun->lun_lock); + lun = NULL; + } + } + CTL_LUN(ctnio) = lun; + if (lun != NULL) { + CTL_BACKEND_LUN(ctnio) = lun->be_lun; + + /* + * Every I/O goes into the OOA queue for a particular LUN, + * and stays there until completion. + */ +#ifdef CTL_TIME_IO + if (LIST_EMPTY(&lun->ooa_queue)) + lun->idle_time += getsbinuptime() - lun->last_busy; +#endif + LIST_INSERT_HEAD(&lun->ooa_queue, &ctnio->io_hdr, ooa_links); + } + + /* Get command entry and return error if it is unsupported. */ + entry = ctl_nvme_validate_command(ctnio); + if (entry == NULL) { + if (lun) + mtx_unlock(&lun->lun_lock); + return; + } + + ctnio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK; + ctnio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK; + + /* All NVMe commands other than IDENTIFY require a LUN. */ + if (lun == NULL) { + if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) { + ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; + ctl_enqueue_rtr((union ctl_io *)ctnio); + return; + } + + ctl_nvme_set_invalid_namespace(ctnio); + ctl_done((union ctl_io *)ctnio); + CTL_DEBUG_PRINT(("ctl_nvmeio_precheck: bailing out due to invalid LUN\n")); + return; + } else { + /* + * NVMe namespaces can only be backed by T_DIRECT LUNs. + */ + if (lun->be_lun->lun_type != T_DIRECT) { + mtx_unlock(&lun->lun_lock); + ctl_nvme_set_invalid_namespace(ctnio); + ctl_done((union ctl_io *)ctnio); + return; + } + } + + if (ctl_nvmeio_lun_check(lun, entry, ctnio) != 0) { + mtx_unlock(&lun->lun_lock); + ctl_done((union ctl_io *)ctnio); + return; + } + + bio = (union ctl_io *)LIST_NEXT(&ctnio->io_hdr, ooa_links); + switch (ctl_nvme_check_ooa(lun, (union ctl_io *)ctnio, &bio, + &aborted_io)) { + case CTL_ACTION_PASS: + ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; + mtx_unlock(&lun->lun_lock); + ctl_enqueue_rtr((union ctl_io *)ctnio); + break; + case CTL_ACTION_FUSED: + /* Block the second half on the first half. */ + ctnio->io_hdr.blocker = bio; + TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr, + blocked_links); + + /* Pass the first half. */ + bio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; + mtx_unlock(&lun->lun_lock); + ctl_enqueue_rtr(bio); + break; + case CTL_ACTION_SKIP: + mtx_unlock(&lun->lun_lock); + break; + case CTL_ACTION_BLOCK: + ctnio->io_hdr.blocker = bio; + TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr, + blocked_links); + mtx_unlock(&lun->lun_lock); + break; + default: + __assert_unreachable(); + } + if (aborted_io != NULL) + ctl_done(aborted_io); +} + +static int +ctl_nvmeio(struct ctl_nvmeio *ctnio) +{ + const struct ctl_nvme_cmd_entry *entry; + int retval; + + CTL_DEBUG_PRINT(("ctl_nvmeio %s opc=%02X\n", + ctnio->io_hdr.io_type == CTL_IO_NVME ? "nvm" : "admin", + ctnio->cmd.opc)); + + entry = ctl_nvme_get_cmd_entry(ctnio); + MPASS(entry != NULL); + + /* + * If this I/O has been aborted, just send it straight to + * ctl_done() without executing it. + */ + if (ctnio->io_hdr.flags & CTL_FLAG_ABORT) { + ctl_done((union ctl_io *)ctnio); + return (CTL_RETVAL_COMPLETE); + } + + /* + * All the checks should have been handled by ctl_nvmeio_precheck(). + * We should be clear now to just execute the I/O. + */ + retval = entry->execute(ctnio); + + return (retval); +} + +/* * For known CDB types, parse the LBA and length. */ static int ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len) { - KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, - ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); + CTL_IO_ASSERT(io, SCSI); switch (io->scsiio.cdb[0]) { case COMPARE_AND_WRITE: { @@ -10741,8 +11601,7 @@ ctl_extent_check_unmap(union ctl_io *io, uint64_t lba2, uint64_t len2) uint64_t lba; uint32_t len; - KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, - ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); + CTL_IO_ASSERT(io, SCSI); /* If not UNMAP -- go other way. */ if (io->scsiio.cdb[0] != UNMAP) @@ -10812,6 +11671,8 @@ static ctl_action ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io, const uint8_t *serialize_row, union ctl_io *ooa_io) { + CTL_IO_ASSERT(pending_io, SCSI); + CTL_IO_ASSERT(ooa_io, SCSI); /* * The initiator attempted multiple untagged commands at the same @@ -10920,6 +11781,8 @@ ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, const uint8_t *serialize_row; ctl_action action; + CTL_IO_ASSERT(pending_io, SCSI); + mtx_assert(&lun->lun_lock, MA_OWNED); /* @@ -10969,7 +11832,7 @@ ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, * we know for sure that the blocker I/O does no longer count. */ static void -ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip) +ctl_scsi_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip) { struct ctl_softc *softc = lun->ctl_softc; union ctl_io *bio, *obio; @@ -10977,6 +11840,8 @@ ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip) union ctl_ha_msg msg_info; ctl_action action; + CTL_IO_ASSERT(io, SCSI); + mtx_assert(&lun->lun_lock, MA_OWNED); if (io->io_hdr.blocker == NULL) @@ -11062,6 +11927,72 @@ error: } } +static void +ctl_nvme_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip) +{ + union ctl_io *bio; + const struct ctl_nvme_cmd_entry *entry; + + CTL_IO_ASSERT(io, NVME, NVME_ADMIN); + + mtx_assert(&lun->lun_lock, MA_OWNED); + + if (io->io_hdr.blocker == NULL) + return; + + /* + * If this is the second half of a fused operation, it should + * be the only io on the blocked list. If the first half + * failed, complete the second half with an appropriate error. + */ + bio = io->io_hdr.blocker; + if (NVMEV(NVME_CMD_FUSE, io->nvmeio.cmd.fuse) == NVME_FUSE_SECOND) { + MPASS(io == + (union ctl_io *)TAILQ_FIRST(&bio->io_hdr.blocked_queue)); + MPASS(TAILQ_NEXT(&io->io_hdr, blocked_links) == NULL); + + TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr, + blocked_links); + if (bio->io_hdr.status != CTL_SUCCESS) { + ctl_nvme_set_failed_fused_command(&io->nvmeio); + ctl_done(io); + return; + } + } else { + /* + * This must be a command that was blocked on the + * second half of a fused operation. + */ + MPASS(NVMEV(NVME_CMD_FUSE, bio->nvmeio.cmd.fuse) == + NVME_FUSE_SECOND); + TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr, + blocked_links); + } + + entry = ctl_nvme_get_cmd_entry(&io->nvmeio); + if (ctl_nvmeio_lun_check(lun, entry, &io->nvmeio) != 0) { + ctl_done(io); + return; + } + + io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; + ctl_enqueue_rtr(io); +} + +static void +ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip) +{ + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + return (ctl_scsi_try_unblock_io(lun, io, skip)); + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + return (ctl_nvme_try_unblock_io(lun, io, skip)); + default: + __assert_unreachable(); + } +} + /* * Try to unblock I/Os blocked by the specified I/O. * @@ -11217,6 +12148,8 @@ bailout: static void ctl_failover_io(union ctl_io *io, int have_lock) { + CTL_IO_ASSERT(io, SCSI); + ctl_set_busy(&io->scsiio); ctl_done(io); } @@ -11773,6 +12706,7 @@ ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id, */ LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) { union ctl_io *xio = (union ctl_io *)xioh; + if ((targ_port == UINT32_MAX || targ_port == xioh->nexus.targ_port) && (init_id == UINT32_MAX || @@ -11784,6 +12718,7 @@ ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id, if (!other_sc && !(lun->flags & CTL_LUN_PRIMARY_SC)) { union ctl_ha_msg msg_info; + CTL_IO_ASSERT(xio, SCSI); msg_info.hdr.nexus = xioh->nexus; msg_info.task.task_action = CTL_TASK_ABORT_TASK; msg_info.task.tag_num = xio->scsiio.tag_num; @@ -11924,6 +12859,8 @@ ctl_abort_task(union ctl_io *io) */ LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) { union ctl_io *xio = (union ctl_io *)xioh; + + CTL_IO_ASSERT(xio, SCSI); if ((xioh->nexus.targ_port != io->io_hdr.nexus.targ_port) || (xioh->nexus.initid != io->io_hdr.nexus.initid) || (xioh->flags & CTL_FLAG_ABORT)) @@ -11995,6 +12932,8 @@ ctl_query_task(union ctl_io *io, int task_set) mtx_unlock(&softc->ctl_lock); LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) { union ctl_io *xio = (union ctl_io *)xioh; + + CTL_IO_ASSERT(xio, SCSI); if ((xioh->nexus.targ_port != io->io_hdr.nexus.targ_port) || (xioh->nexus.initid != io->io_hdr.nexus.initid) || (xioh->flags & CTL_FLAG_ABORT)) @@ -12108,6 +13047,8 @@ ctl_handle_isc(union ctl_io *io) const struct ctl_cmd_entry *entry; uint32_t targ_lun; + CTL_IO_ASSERT(io, SCSI); + targ_lun = io->io_hdr.nexus.targ_mapped_lun; switch (io->io_hdr.msg_type) { case CTL_MSG_SERIALIZE: @@ -12243,6 +13184,8 @@ ctl_inject_error(struct ctl_lun *lun, union ctl_io *io) { struct ctl_error_desc *desc, *desc2; + CTL_IO_ASSERT(io, SCSI); + mtx_assert(&lun->lun_lock, MA_OWNED); STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) { @@ -12321,12 +13264,7 @@ ctl_datamove_done_process(union ctl_io *io) { #ifdef CTL_TIME_IO struct bintime cur_bt; -#endif - - KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, - ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); -#ifdef CTL_TIME_IO getbinuptime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); bintime_add(&io->io_hdr.dma_bt, &cur_bt); @@ -12336,13 +13274,36 @@ ctl_datamove_done_process(union ctl_io *io) if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { - ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, - /*retry_count*/ io->io_hdr.port_status); - } else if (io->scsiio.kern_data_resid != 0 && + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, + /*retry_count*/ io->io_hdr.port_status); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + if (io->io_hdr.flags & CTL_FLAG_ABORT) + ctl_nvme_set_command_aborted(&io->nvmeio); + else + ctl_nvme_set_data_transfer_error(&io->nvmeio); + break; + default: + __assert_unreachable(); + } + } else if (ctl_kern_data_resid(io) != 0 && (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { - ctl_set_invalid_field_ciu(&io->scsiio); + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + ctl_set_invalid_field_ciu(&io->scsiio); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + ctl_nvme_set_data_transfer_error(&io->nvmeio); + break; + default: + __assert_unreachable(); + } } else if (ctl_debug & CTL_DEBUG_CDB_DATA) ctl_data_print(io); } @@ -12352,7 +13313,7 @@ ctl_datamove_done(union ctl_io *io, bool samethr) { ctl_datamove_done_process(io); - io->scsiio.be_move_done(io, samethr); + ctl_be_move_done(io, samethr); } void @@ -12365,7 +13326,7 @@ ctl_datamove(union ctl_io *io) CTL_DEBUG_PRINT(("ctl_datamove\n")); /* No data transferred yet. Frontend must update this when done. */ - io->scsiio.kern_data_resid = io->scsiio.kern_data_len; + ctl_set_kern_data_resid(io, ctl_kern_data_len(io)); #ifdef CTL_TIME_IO getbinuptime(&io->io_hdr.dma_start_bt); @@ -12398,20 +13359,33 @@ ctl_datamove(union ctl_io *io) * the data move. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { - printf("ctl_datamove: tag 0x%jx on (%u:%u:%u) aborted\n", - io->scsiio.tag_num, io->io_hdr.nexus.initid, - io->io_hdr.nexus.targ_port, - io->io_hdr.nexus.targ_lun); + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + printf("ctl_datamove: tag 0x%jx on (%u:%u:%u) aborted\n", + io->scsiio.tag_num, io->io_hdr.nexus.initid, + io->io_hdr.nexus.targ_port, + io->io_hdr.nexus.targ_lun); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + printf("ctl_datamove: CID 0x%x on (%u:%u:%u) aborted\n", + le16toh(io->nvmeio.cmd.cid), + io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port, + io->io_hdr.nexus.targ_lun); + break; + default: + __assert_unreachable(); + } io->io_hdr.port_status = 31337; ctl_datamove_done_process(io); - io->scsiio.be_move_done(io, true); + ctl_be_move_done(io, true); return; } /* Don't confuse frontend with zero length data move. */ - if (io->scsiio.kern_data_len == 0) { + if (ctl_kern_data_len(io) == 0) { ctl_datamove_done_process(io); - io->scsiio.be_move_done(io, true); + ctl_be_move_done(io, true); return; } @@ -12427,6 +13401,8 @@ ctl_send_datamove_done(union ctl_io *io, int have_lock) struct bintime cur_bt; #endif + CTL_IO_ASSERT(io, SCSI); + memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_DATAMOVE_DONE; msg.hdr.original_sc = io; @@ -12469,6 +13445,7 @@ ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq) uint32_t i; io = rq->context; + CTL_IO_ASSERT(io, SCSI); if (rq->ret != CTL_HA_STATUS_SUCCESS) { printf("%s: ISC DMA write failed with error %d", __func__, @@ -12513,6 +13490,8 @@ ctl_datamove_remote_write(union ctl_io *io) int retval; void (*fe_datamove)(union ctl_io *io); + CTL_IO_ASSERT(io, SCSI); + /* * - Get the data from the host/HBA into local memory. * - DMA memory from the local controller to the remote controller. @@ -12541,6 +13520,8 @@ ctl_datamove_remote_dm_read_cb(union ctl_io *io, bool samethr) { uint32_t i; + CTL_IO_ASSERT(io, SCSI); + for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(CTL_LSGLT(io)[i].addr, M_CTL); free(CTL_RSGL(io), M_CTL); @@ -12563,6 +13544,7 @@ ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq) void (*fe_datamove)(union ctl_io *io); io = rq->context; + CTL_IO_ASSERT(io, SCSI); if (rq->ret != CTL_HA_STATUS_SUCCESS) { printf("%s: ISC DMA read failed with error %d\n", __func__, @@ -12597,6 +13579,8 @@ ctl_datamove_remote_sgl_setup(union ctl_io *io) int retval; int i; + CTL_IO_ASSERT(io, SCSI); + retval = 0; local_sglist = CTL_LSGL(io); len_to_go = io->scsiio.kern_data_len; @@ -12634,6 +13618,8 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command, rq = ctl_dt_req_alloc(); + CTL_IO_ASSERT(io, SCSI); + /* * If we failed to allocate the request, and if the DMA didn't fail * anyway, set busy status. This is just a resource allocation @@ -12767,7 +13753,7 @@ ctl_datamove_remote_read(union ctl_io *io) * datamove done message, or call the callback with an * error if there is a problem. */ - for (i = 0; i < io->scsiio.kern_sg_entries; i++) + for (i = 0; i < ctl_kern_sg_entries(io); i++) free(CTL_LSGLT(io)[i].addr, M_CTL); free(CTL_RSGL(io), M_CTL); CTL_RSGL(io) = NULL; @@ -12785,6 +13771,7 @@ ctl_datamove_remote_read(union ctl_io *io) static void ctl_datamove_remote(union ctl_io *io) { + CTL_IO_ASSERT(io, SCSI); mtx_assert(&((struct ctl_softc *)CTL_SOFTC(io))->ctl_lock, MA_NOTOWNED); @@ -12837,7 +13824,7 @@ ctl_process_done(union ctl_io *io) char path_str[64]; struct sbuf sb; - ctl_scsi_path_string(io, path_str, sizeof(path_str)); + ctl_scsi_path_string(&io->io_hdr, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); ctl_io_sbuf(io, &sb); @@ -12851,6 +13838,8 @@ ctl_process_done(union ctl_io *io) switch (io->io_hdr.io_type) { case CTL_IO_SCSI: + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: break; case CTL_IO_TASK: if (ctl_debug & CTL_DEBUG_INFO) @@ -12881,6 +13870,8 @@ ctl_process_done(union ctl_io *io) uint8_t mrie = lun->MODE_IE.mrie; uint8_t per = ((lun->MODE_RWER.byte3 & SMS_RWER_PER) || (lun->MODE_VER.byte3 & SMS_VER_PER)); + + CTL_IO_ASSERT(io, SCSI); if (((mrie == SIEP_MRIE_REC_COND && per) || mrie == SIEP_MRIE_REC_UNCOND || mrie == SIEP_MRIE_NO_SENSE) && @@ -12914,7 +13905,9 @@ ctl_process_done(union ctl_io *io) * XXX KDM should we also track I/O latency? */ if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS && - io->io_hdr.io_type == CTL_IO_SCSI) { + (io->io_hdr.io_type == CTL_IO_SCSI || + io->io_hdr.io_type == CTL_IO_NVME || + io->io_hdr.io_type == CTL_IO_NVME_ADMIN)) { int type; #ifdef CTL_TIME_IO struct bintime bt; @@ -12931,7 +13924,7 @@ ctl_process_done(union ctl_io *io) else type = CTL_STATS_NO_IO; - lun->stats.bytes[type] += io->scsiio.kern_total_len; + lun->stats.bytes[type] += ctl_kern_total_len(io); lun->stats.operations[type] ++; lun->stats.dmas[type] += io->io_hdr.num_dmas; #ifdef CTL_TIME_IO @@ -12940,7 +13933,7 @@ ctl_process_done(union ctl_io *io) #endif mtx_lock(&port->port_lock); - port->stats.bytes[type] += io->scsiio.kern_total_len; + port->stats.bytes[type] += ctl_kern_total_len(io); port->stats.operations[type] ++; port->stats.dmas[type] += io->io_hdr.num_dmas; #ifdef CTL_TIME_IO @@ -12984,8 +13977,19 @@ bailout: * properly. The FETD is responsible for freeing the I/O and doing * whatever it needs to do to clean up its state. */ - if (io->io_hdr.flags & CTL_FLAG_ABORT) - ctl_set_task_aborted(&io->scsiio); + if (io->io_hdr.flags & CTL_FLAG_ABORT) { + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + ctl_set_task_aborted(&io->scsiio); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + ctl_nvme_set_command_aborted(&io->nvmeio); + break; + default: + __assert_unreachable(); + } + } /* * If enabled, print command error status. @@ -13027,6 +14031,7 @@ ctl_queue_sense(union ctl_io *io) uint32_t initidx, p, targ_lun; CTL_DEBUG_PRINT(("ctl_queue_sense\n")); + CTL_IO_ASSERT(io, SCSI); targ_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun); @@ -13074,7 +14079,22 @@ ctl_queue(union ctl_io *io) { struct ctl_port *port = CTL_PORT(io); - CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0])); + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + case CTL_IO_TASK: + CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0])); + break; + case CTL_IO_NVME: + CTL_DEBUG_PRINT(("ctl_queue nvme nvm cmd=%02X\n", + io->nvmeio.cmd.opc)); + break; + case CTL_IO_NVME_ADMIN: + CTL_DEBUG_PRINT(("ctl_queue nvme admin cmd=%02X\n", + io->nvmeio.cmd.opc)); + break; + default: + break; + } #ifdef CTL_TIME_IO io->io_hdr.start_time = time_uptime; @@ -13088,6 +14108,8 @@ ctl_queue(union ctl_io *io) switch (io->io_hdr.io_type) { case CTL_IO_SCSI: case CTL_IO_TASK: + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: if (ctl_debug & CTL_DEBUG_CDB) ctl_io_print(io); ctl_enqueue_incoming(io); @@ -13127,6 +14149,12 @@ ctl_run(union ctl_io *io) ctl_io_print(io); ctl_run_task(io); break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + if (ctl_debug & CTL_DEBUG_CDB) + ctl_io_print(io); + ctl_nvmeio_precheck(&io->nvmeio); + break; default: printf("ctl_run: unknown I/O type %d\n", io->io_hdr.io_type); return (EINVAL); @@ -13170,22 +14198,54 @@ ctl_done(union ctl_io *io) */ #if 0 if (io->io_hdr.flags & CTL_FLAG_ALREADY_DONE) { - printf("%s: type %d msg %d cdb %x iptl: " - "%u:%u:%u tag 0x%04x " - "flag %#x status %x\n", - __func__, - io->io_hdr.io_type, - io->io_hdr.msg_type, - io->scsiio.cdb[0], - io->io_hdr.nexus.initid, - io->io_hdr.nexus.targ_port, - io->io_hdr.nexus.targ_lun, - (io->io_hdr.io_type == - CTL_IO_TASK) ? - io->taskio.tag_num : - io->scsiio.tag_num, - io->io_hdr.flags, - io->io_hdr.status); + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + case CTL_IO_TASK: + printf("%s: type %d msg %d cdb %x iptl: " + "%u:%u:%u tag 0x%04lx " + "flag %#x status %x\n", + __func__, + io->io_hdr.io_type, + io->io_hdr.msg_type, + io->scsiio.cdb[0], + io->io_hdr.nexus.initid, + io->io_hdr.nexus.targ_port, + io->io_hdr.nexus.targ_lun, + (io->io_hdr.io_type == CTL_IO_TASK) ? + io->taskio.tag_num : + io->scsiio.tag_num, + io->io_hdr.flags, + io->io_hdr.status); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + printf("%s: type %d msg %d opc %x iptl: " + "%u:%u:%u cid 0x%04x " + "flag %#x status %x\n", + __func__, + io->io_hdr.io_type, + io->io_hdr.msg_type, + io->nvmeio.cmd.opc, + io->io_hdr.nexus.initid, + io->io_hdr.nexus.targ_port, + io->io_hdr.nexus.targ_lun, + io->nvmeio.cmd.cid, + io->io_hdr.flags, + io->io_hdr.status); + break; + default: + printf("%s: type %d msg %d iptl: " + "%u:%u:%u flag %#x status %x\n", + __func__, + io->io_hdr.io_type, + io->io_hdr.msg_type, + io->io_hdr.nexus.initid, + io->io_hdr.nexus.targ_port, + io->io_hdr.nexus.targ_lun, + io->io_hdr.flags, + io->io_hdr.status); + break; + } } else io->io_hdr.flags |= CTL_FLAG_ALREADY_DONE; #endif @@ -13264,19 +14324,41 @@ ctl_work_thread(void *arg) if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->incoming_queue, links); mtx_unlock(&thr->queue_lock); - if (io->io_hdr.io_type == CTL_IO_TASK) + switch (io->io_hdr.io_type) { + case CTL_IO_TASK: ctl_run_task(io); - else + break; + case CTL_IO_SCSI: ctl_scsiio_precheck(&io->scsiio); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + ctl_nvmeio_precheck(&io->nvmeio); + break; + default: + __assert_unreachable(); + } continue; } io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->rtr_queue, links); mtx_unlock(&thr->queue_lock); - retval = ctl_scsiio(&io->scsiio); - if (retval != CTL_RETVAL_COMPLETE) - CTL_DEBUG_PRINT(("ctl_scsiio failed\n")); + switch (io->io_hdr.io_type) { + case CTL_IO_SCSI: + retval = ctl_scsiio(&io->scsiio); + if (retval != CTL_RETVAL_COMPLETE) + CTL_DEBUG_PRINT(("ctl_scsiio failed\n")); + break; + case CTL_IO_NVME: + case CTL_IO_NVME_ADMIN: + retval = ctl_nvmeio(&io->nvmeio); + if (retval != CTL_RETVAL_COMPLETE) + CTL_DEBUG_PRINT(("ctl_nvmeio failed\n")); + break; + default: + __assert_unreachable(); + } continue; } |