1 files changed, 1195 insertions, 113 deletions
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
index 5230a2694544..b1367cd79fab 100644
--- a/sys/cam/ctl/ctl.c
+++ b/sys/cam/ctl/ctl.c
@@ -83,6 +83,7 @@
 #include <cam/ctl/ctl_ha.h>
 #include <cam/ctl/ctl_private.h>
 #include <cam/ctl/ctl_debug.h>
+#include <cam/ctl/ctl_nvme_all.h>
 #include <cam/ctl/ctl_scsi_all.h>
 #include <cam/ctl/ctl_error.h>
 
@@ -447,6 +448,8 @@ static int ctl_scsiio_lun_check(struct ctl_lun *lun,
 static void ctl_failover_lun(union ctl_io *io);
 static void ctl_scsiio_precheck(struct ctl_scsiio *ctsio);
 static int ctl_scsiio(struct ctl_scsiio *ctsio);
+static void ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio);
+static int ctl_nvmeio(struct ctl_nvmeio *ctnio);
 
 static int ctl_target_reset(union ctl_io *io);
 static void ctl_do_lun_reset(struct ctl_lun *lun, uint32_t initidx,
@@ -528,6 +531,38 @@ static moduledata_t ctl_moduledata = {
 DECLARE_MODULE(ctl, ctl_moduledata, SI_SUB_CONFIGURE, SI_ORDER_THIRD);
 MODULE_VERSION(ctl, 1);
 
+static void
+ctl_be_move_done(union ctl_io *io, bool samethr)
+{
+	switch (io->io_hdr.io_type) {
+	case CTL_IO_SCSI:
+		io->scsiio.be_move_done(io, samethr);
+		break;
+	case CTL_IO_NVME:
+	case CTL_IO_NVME_ADMIN:
+		io->nvmeio.be_move_done(io, samethr);
+		break;
+	default:
+		__assert_unreachable();
+	}
+}
+
+static void
+ctl_continue_io(union ctl_io *io)
+{
+	switch (io->io_hdr.io_type) {
+	case CTL_IO_SCSI:
+		io->scsiio.io_cont(io);
+		break;
+	case CTL_IO_NVME:
+	case CTL_IO_NVME_ADMIN:
+		io->nvmeio.io_cont(io);
+		break;
+	default:
+		__assert_unreachable();
+	}
+}
+
 static struct ctl_frontend ha_frontend =
 {
 	.name = "ha",
@@ -585,6 +620,8 @@ ctl_ha_datamove(union ctl_io *io)
 	uint32_t sg_entries_sent;
 	int do_sg_copy, i, j;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	memset(&msg.dt, 0, sizeof(msg.dt));
 	msg.hdr.msg_type = CTL_MSG_DATAMOVE;
 	msg.hdr.original_sc = io->io_hdr.remote_io;
@@ -601,32 +638,32 @@ ctl_ha_datamove(union ctl_io *io)
 	 * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries,
 	 * then we need to break this up into multiple transfers.
 	 */
-	if (io->scsiio.kern_sg_entries == 0) {
+	if (ctl_kern_sg_entries(io) == 0) {
 		msg.dt.kern_sg_entries = 1;
 #if 0
 		if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
-			msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr;
+			msg.dt.sg_list[0].addr = ctl_kern_data_ptr(io);
 		} else {
 			/* XXX KDM use busdma here! */
 			msg.dt.sg_list[0].addr =
-			    (void *)vtophys(io->scsiio.kern_data_ptr);
+			    (void *)vtophys(ctl_kern_data_ptr(io));
 		}
 #else
 		KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0,
 		    ("HA does not support BUS_ADDR"));
-		msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr;
+		msg.dt.sg_list[0].addr = ctl_kern_data_ptr(io);
 #endif
-		msg.dt.sg_list[0].len = io->scsiio.kern_data_len;
+		msg.dt.sg_list[0].len = ctl_kern_data_len(io);
 		do_sg_copy = 0;
 	} else {
-		msg.dt.kern_sg_entries = io->scsiio.kern_sg_entries;
+		msg.dt.kern_sg_entries = ctl_kern_sg_entries(io);
 		do_sg_copy = 1;
 	}
 
-	msg.dt.kern_data_len = io->scsiio.kern_data_len;
-	msg.dt.kern_total_len = io->scsiio.kern_total_len;
-	msg.dt.kern_data_resid = io->scsiio.kern_data_resid;
-	msg.dt.kern_rel_offset = io->scsiio.kern_rel_offset;
+	msg.dt.kern_data_len = ctl_kern_data_len(io);
+	msg.dt.kern_total_len = ctl_kern_total_len(io);
+	msg.dt.kern_data_resid = ctl_kern_data_resid(io);
+	msg.dt.kern_rel_offset = ctl_kern_rel_offset(io);
 	msg.dt.sg_sequence = 0;
 
 	/*
@@ -640,7 +677,7 @@ ctl_ha_datamove(union ctl_io *io)
 		    sizeof(msg.dt.sg_list[0])),
 		    msg.dt.kern_sg_entries - sg_entries_sent);
 		if (do_sg_copy != 0) {
-			sgl = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr;
+			sgl = (struct ctl_sg_entry *)ctl_kern_data_ptr(io);
 			for (i = sg_entries_sent, j = 0;
 			     i < msg.dt.cur_sg_entries; i++, j++) {
 #if 0
@@ -1496,6 +1533,8 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
 				/* XXX KDM do something here */
 				break;
 			}
+			CTL_IO_ASSERT(io, SCSI);
+
 			io->io_hdr.msg_type = CTL_MSG_DATAMOVE;
 			io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
 			/*
@@ -1569,6 +1608,8 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
 			 * back to the initiator.
 			 */
 			io = msg->hdr.serializing_sc;
+			CTL_IO_ASSERT(io, SCSI);
+
 			io->io_hdr.msg_type = CTL_MSG_DATAMOVE_DONE;
 			io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
 			io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
@@ -2410,6 +2451,8 @@ ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
 		union ctl_io *io = (union ctl_io *)ioh;
 		struct ctl_ooa_entry *entry;
 
+		CTL_IO_ASSERT(io, SCSI);
+
 		/*
 		 * If we've got more than we can fit, just count the
 		 * remaining entries.
@@ -2644,12 +2687,6 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
 		}
 
 		entries = malloc(ooa_hdr->alloc_len, M_CTL, M_WAITOK | M_ZERO);
-		if (entries == NULL) {
-			printf("%s: could not allocate %d bytes for OOA "
-			       "dump\n", __func__, ooa_hdr->alloc_len);
-			retval = ENOMEM;
-			break;
-		}
 
 		mtx_lock(&softc->ctl_lock);
 		if ((ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) == 0 &&
@@ -3174,6 +3211,23 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
 		retval = fe->ioctl(dev, cmd, addr, flag, td);
 		break;
 	}
+	case CTL_NVMF: {
+		struct ctl_nvmf *cn;
+		struct ctl_frontend *fe;
+
+		cn = (struct ctl_nvmf *)addr;
+
+		fe = ctl_frontend_find("nvmf");
+		if (fe == NULL) {
+			cn->status = CTL_NVMF_ERROR;
+			snprintf(cn->error_str, sizeof(cn->error_str),
+			    "Frontend \"nvmf\" not found.");
+			break;
+		}
+
+		retval = fe->ioctl(dev, cmd, addr, flag, td);
+		break;
+	}
 	case CTL_PORT_REQ: {
 		struct ctl_req *req;
 		struct ctl_frontend *fe;
@@ -4576,7 +4630,7 @@ fail:
 	ctl_tpc_lun_init(lun);
 	if (lun->flags & CTL_LUN_REMOVABLE) {
 		lun->prevent = malloc((CTL_MAX_INITIATORS + 31) / 32 * 4,
-		    M_CTL, M_WAITOK);
+		    M_CTL, M_WAITOK | M_ZERO);
 	}
 
 	/*
@@ -4913,6 +4967,91 @@ ctl_lun_capacity_changed(struct ctl_be_lun *be_lun)
 	}
 }
 
+void
+ctl_lun_nsdata_ids(struct ctl_be_lun *be_lun,
+    struct nvme_namespace_data *nsdata)
+{
+	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+	struct scsi_vpd_id_descriptor *idd;
+
+	if (lun->lun_devid == NULL)
+		return;
+
+	idd = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
+	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_naa);
+	if (idd != NULL) {
+		if (idd->length == 16) {
+			memcpy(nsdata->nguid, idd->identifier, 16);
+			return;
+		}
+		if (idd->length == 8) {
+			memcpy(nsdata->eui64, idd->identifier, 8);
+			return;
+		}
+	}
+
+	idd = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
+	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_eui64);
+	if (idd != NULL) {
+		if (idd->length == 8) {
+			memcpy(nsdata->eui64, idd->identifier, 8);
+			return;
+		}
+	}
+}
+
+void
+ctl_lun_nvme_ids(struct ctl_be_lun *be_lun, void *data)
+{
+	struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+	struct scsi_vpd_id_descriptor *naa, *eui64, *uuid;
+	char *p;
+
+	memset(data, 0, 4096);
+
+	if (lun->lun_devid == NULL)
+		return;
+
+	naa = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
+	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_naa);
+	eui64 = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
+	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_eui64);
+	uuid = scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
+	    lun->lun_devid->data, lun->lun_devid->len, scsi_devid_is_lun_uuid);
+
+	p = data;
+
+	/* EUI64 */
+	if ((naa != NULL && naa->length == 8) || eui64 != NULL) {
+		*p++ = 1;
+		*p++ = 8;
+		p += 2;
+		if (naa != NULL && naa->length == 8)
+			memcpy(p, naa->identifier, 8);
+		else
+			memcpy(p, eui64->identifier, 8);
+		p += 8;
+	}
+
+	/* NGUID */
+	if (naa != NULL && naa->length == 16) {
+		*p++ = 1;
+		*p++ = 16;
+		p += 2;
+		memcpy(p, naa->identifier, 16);
+		p += 16;
+	}
+
+	/* UUID */
+	if (uuid != NULL) {
+		*p++ = 1;
+		*p++ = uuid->length;
+		p += 2;
+		memcpy(p, uuid->identifier, uuid->length);
+		p += uuid->length;
+	}
+}
+
 /*
  * Backend "memory move is complete" callback for requests that never
  * make it down to say RAIDCore's configuration code.
@@ -4923,8 +5062,6 @@ ctl_config_move_done(union ctl_io *io, bool samethr)
 	int retval;
 
 	CTL_DEBUG_PRINT(("ctl_config_move_done\n"));
-	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
-	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
 
 	if (ctl_debug & CTL_DEBUG_CDB_DATA)
 		ctl_data_print(io);
@@ -4938,7 +5075,7 @@ ctl_config_move_done(union ctl_io *io, bool samethr)
 		 * we'll need to know how to clean them up here as well.
 		 */
 		if (io->io_hdr.flags & CTL_FLAG_ALLOCATED)
-			free(io->scsiio.kern_data_ptr, M_CTL);
+			free(ctl_kern_data_ptr(io), M_CTL);
 		ctl_done(io);
 		retval = CTL_RETVAL_COMPLETE;
 	} else {
@@ -4959,7 +5096,17 @@ ctl_config_move_done(union ctl_io *io, bool samethr)
 		 * XXX KDM call ctl_scsiio() again for now, and check flag
 		 * bits to see whether we're allocated or not.
 		 */
-		retval = ctl_scsiio(&io->scsiio);
+		switch (io->io_hdr.io_type) {
+		case CTL_IO_SCSI:
+			retval = ctl_scsiio(&io->scsiio);
+			break;
+		case CTL_IO_NVME:
+		case CTL_IO_NVME_ADMIN:
+			retval = ctl_nvmeio(&io->nvmeio);
+			break;
+		default:
+			__assert_unreachable();
+		}
 	}
 	return (retval);
 }
@@ -4983,7 +5130,7 @@ ctl_data_submit_done(union ctl_io *io)
 	    (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
-		io->scsiio.io_cont(io);
+		ctl_continue_io(io);
 		return;
 	}
 	ctl_done(io);
@@ -5010,7 +5157,7 @@ ctl_config_write_done(union ctl_io *io)
 	    (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
-		io->scsiio.io_cont(io);
+		ctl_continue_io(io);
 		return;
 	}
 	/*
@@ -5019,7 +5166,7 @@ ctl_config_write_done(union ctl_io *io)
 	 * no data, like start/stop unit, we need to check here.
 	 */
 	if (io->io_hdr.flags & CTL_FLAG_ALLOCATED)
-		buf = io->scsiio.kern_data_ptr;
+		buf = ctl_kern_data_ptr(io);
 	else
 		buf = NULL;
 	ctl_done(io);
@@ -5039,7 +5186,7 @@ ctl_config_read_done(union ctl_io *io)
 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
 		if (io->io_hdr.flags & CTL_FLAG_ALLOCATED)
-			buf = io->scsiio.kern_data_ptr;
+			buf = ctl_kern_data_ptr(io);
 		else
 			buf = NULL;
 		ctl_done(io);
@@ -5054,7 +5201,7 @@ ctl_config_read_done(union ctl_io *io)
 	 * the I/O just yet.
 	 */
 	if (io->io_hdr.flags & CTL_FLAG_IO_CONT) {
-		io->scsiio.io_cont(io);
+		ctl_continue_io(io);
 		return;
 	}
 
@@ -5439,7 +5586,7 @@ ctl_read_buffer(struct ctl_scsiio *ctsio)
 	} else {
 		if (lun->write_buffer == NULL) {
 			lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE,
-			    M_CTL, M_WAITOK);
+			    M_CTL, M_WAITOK | M_ZERO);
 		}
 		ctsio->kern_data_ptr = lun->write_buffer + buffer_offset;
 	}
@@ -5478,21 +5625,24 @@ ctl_write_buffer(struct ctl_scsiio *ctsio)
 		return (CTL_RETVAL_COMPLETE);
 	}
 
+	if (lun->write_buffer == NULL) {
+		lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE,
+			    M_CTL, M_WAITOK | M_ZERO);
+	}
+
 	/*
-	 * If we've got a kernel request that hasn't been malloced yet,
-	 * malloc it and tell the caller the data buffer is here.
+	 * If this kernel request hasn't started yet, initialize the data
+	 * buffer to the correct region of the LUN's write buffer.  Note that
+	 * this doesn't set CTL_FLAG_ALLOCATED since this points into a
+	 * persistent buffer belonging to the LUN rather than a buffer
+	 * dedicated to this request.
 	 */
-	if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
-		if (lun->write_buffer == NULL) {
-			lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE,
-			    M_CTL, M_WAITOK);
-		}
+	if (ctsio->kern_data_ptr == NULL) {
 		ctsio->kern_data_ptr = lun->write_buffer + buffer_offset;
 		ctsio->kern_data_len = len;
 		ctsio->kern_total_len = len;
 		ctsio->kern_rel_offset = 0;
 		ctsio->kern_sg_entries = 0;
-		ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
 		ctsio->be_move_done = ctl_config_move_done;
 		ctl_datamove((union ctl_io *)ctsio);
 
@@ -5512,6 +5662,8 @@ ctl_write_same_cont(union ctl_io *io)
 	struct ctl_lba_len_flags *lbalen;
 	int retval;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	ctsio = &io->scsiio;
 	ctsio->io_hdr.status = CTL_STATUS_NONE;
 	lbalen = (struct ctl_lba_len_flags *)
@@ -5857,6 +6009,8 @@ ctl_do_mode_select(union ctl_io *io)
 	uint16_t *len_left, *len_used;
 	int retval, i;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	ctsio = &io->scsiio;
 	page_index = NULL;
 	page_len = 0;
@@ -7302,34 +7456,26 @@ ctl_report_supported_opcodes(struct ctl_scsiio *ctsio)
 		break;
 	case RSO_OPTIONS_OC:
 		if (ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) {
-			ctl_set_invalid_field(/*ctsio*/ ctsio,
-					      /*sks_valid*/ 1,
-					      /*command*/ 1,
-					      /*field*/ 2,
-					      /*bit_valid*/ 1,
-					      /*bit*/ 2);
-			ctl_done((union ctl_io *)ctsio);
-			return (CTL_RETVAL_COMPLETE);
+			goto invalid_options;
 		}
 		total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32;
 		break;
 	case RSO_OPTIONS_OC_SA:
 		if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) == 0 ||
 		    service_action >= 32) {
-			ctl_set_invalid_field(/*ctsio*/ ctsio,
-					      /*sks_valid*/ 1,
-					      /*command*/ 1,
-					      /*field*/ 2,
-					      /*bit_valid*/ 1,
-					      /*bit*/ 2);
-			ctl_done((union ctl_io *)ctsio);
-			return (CTL_RETVAL_COMPLETE);
+			goto invalid_options;
 		}
-		/* FALLTHROUGH */
+		total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32;
+		break;
 	case RSO_OPTIONS_OC_ASA:
+		if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) != 0 &&
+		    service_action >= 32) {
+			goto invalid_options;
+		}
 		total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32;
 		break;
 	default:
+invalid_options:
 		ctl_set_invalid_field(/*ctsio*/ ctsio,
 				      /*sks_valid*/ 1,
 				      /*command*/ 1,
@@ -8770,6 +8916,8 @@ ctl_cnw_cont(union ctl_io *io)
 	struct ctl_lba_len_flags *lbalen;
 	int retval;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	ctsio = &io->scsiio;
 	ctsio->io_hdr.status = CTL_STATUS_NONE;
 	ctsio->io_hdr.flags &= ~CTL_FLAG_IO_CONT;
@@ -9145,14 +9293,8 @@ ctl_request_sense(struct ctl_scsiio *ctsio)
 	sense_ptr = (struct scsi_sense_data *)ctsio->kern_data_ptr;
 	ctsio->kern_sg_entries = 0;
 	ctsio->kern_rel_offset = 0;
-
-	/*
-	 * struct scsi_sense_data, which is currently set to 256 bytes, is
-	 * larger than the largest allowed value for the length field in the
-	 * REQUEST SENSE CDB, which is 252 bytes as of SPC-4.
-	 */
-	ctsio->kern_data_len = cdb->length;
-	ctsio->kern_total_len = cdb->length;
+	ctsio->kern_data_len = ctsio->kern_total_len =
+	    MIN(cdb->length, sizeof(*sense_ptr));
 
 	/*
 	 * If we don't have a LUN, we don't have any pending sense.
@@ -10554,14 +10696,732 @@ ctl_read_toc(struct ctl_scsiio *ctsio)
 }
 
 /*
+ * For NVMe commands, parse the LBA and length.
+ */
+static bool
+ctl_nvme_get_lba_len(struct ctl_nvmeio *ctnio, uint64_t *lba, uint32_t *len)
+{
+	CTL_IO_ASSERT(ctnio, NVME);
+
+	switch (ctnio->cmd.opc) {
+	case NVME_OPC_WRITE:
+	case NVME_OPC_READ:
+	case NVME_OPC_WRITE_UNCORRECTABLE:
+	case NVME_OPC_COMPARE:
+	case NVME_OPC_WRITE_ZEROES:
+	case NVME_OPC_VERIFY:
+		*lba = (uint64_t)le32toh(ctnio->cmd.cdw11) << 32 |
+		    le32toh(ctnio->cmd.cdw10);
+		*len = (le32toh(ctnio->cmd.cdw12) & 0xffff) + 1;
+		return (true);
+	default:
+		*lba = 0;
+		*len = 0;
+		return (false);
+	}
+}
+
+static bool
+ctl_nvme_fua(struct ctl_nvmeio *ctnio)
+{
+	return ((le32toh(ctnio->cmd.cdw12) & (1U << 30)) != 0);
+}
+
+int
+ctl_nvme_identify(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	size_t len;
+	int retval;
+	uint8_t cns;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_identify\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME_ADMIN);
+	MPASS(ctnio->cmd.opc == NVME_OPC_IDENTIFY);
+
+	/*
+	 * The data buffer for Identify is always 4096 bytes, see
+	 * 5.51.1 in NVMe base specification 1.4.
+	 */
+	len = 4096;
+
+	ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
+	ctnio->kern_data_len = len;
+	ctnio->kern_total_len = len;
+	ctnio->kern_rel_offset = 0;
+	ctnio->kern_sg_entries = 0;
+
+	ctl_nvme_set_success(ctnio);
+	ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+	ctnio->be_move_done = ctl_config_move_done;
+
+	/*
+	 * If we don't have a LUN, return an empty result for CNS == 0.
+	 */
+	if (lun == NULL) {
+		cns = le32toh(ctnio->cmd.cdw10) & 0xff;
+		switch (cns) {
+		case 0:
+			memset(ctnio->kern_data_ptr, 0, len);
+			ctl_datamove((union ctl_io *)ctnio);
+			break;
+		default:
+			ctl_nvme_set_invalid_field(ctnio);
+			break;
+		}
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	retval = lun->backend->config_read((union ctl_io *)ctnio);
+	return (retval);
+}
+
+int
+ctl_nvme_flush(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_flush\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_FLUSH);
+
+	/*
+	 * NVMe flushes always flush the entire namespace, not an LBA
+	 * range.
+	 */
+	retval = lun->backend->config_write((union ctl_io *)ctnio);
+
+	return (retval);
+}
+
+int
+ctl_nvme_read_write(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	struct ctl_lba_len_flags *lbalen;
+	uint64_t lba;
+	uint32_t num_blocks;
+	int flags, retval;
+	bool isread;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_read_write: command: %#x\n",
+	    ctnio->cmd.opc));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_WRITE ||
+	    ctnio->cmd.opc == NVME_OPC_READ);
+
+	flags = 0;
+	isread = ctnio->cmd.opc == NVME_OPC_READ;
+	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+
+	/*
+	 * The first check is to make sure we're in bounds, the second
+	 * check is to catch wrap-around problems.  If the lba + num blocks
+	 * is less than the lba, then we've wrapped around and the block
+	 * range is invalid anyway.
+	 */
+	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+	 || ((lba + num_blocks) < lba)) {
+		ctl_nvme_set_lba_out_of_range(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	/*
+	 * Set FUA and/or DPO if caches are disabled.
+	 *
+	 * For a read this may not be quite correct for the block
+	 * backend as any earlier writes to the LBA range should be
+	 * flushed to backing store as part of the read.
+	 */
+	if (ctl_nvme_fua(ctnio)) {
+		flags |= CTL_LLF_FUA;
+		if (isread)
+			flags |= CTL_LLF_DPO;
+	}
+
+	lbalen = (struct ctl_lba_len_flags *)
+	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+	lbalen->lba = lba;
+	lbalen->len = num_blocks;
+	lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags;
+
+	ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
+	ctnio->kern_rel_offset = 0;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_read_write: calling data_submit()\n"));
+
+	retval = lun->backend->data_submit((union ctl_io *)ctnio);
+	return (retval);
+}
+
+int
+ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	struct ctl_lba_len_flags *lbalen;
+	uint64_t lba;
+	uint32_t num_blocks;
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_write_uncorrectable\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_UNCORRECTABLE);
+
+	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+
+	/*
+	 * The first check is to make sure we're in bounds, the second
+	 * check is to catch wrap-around problems.  If the lba + num blocks
+	 * is less than the lba, then we've wrapped around and the block
+	 * range is invalid anyway.
+	 */
+	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+	 || ((lba + num_blocks) < lba)) {
+		ctl_nvme_set_lba_out_of_range(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	lbalen = (struct ctl_lba_len_flags *)
+	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+	lbalen->lba = lba;
+	lbalen->len = num_blocks;
+	lbalen->flags = 0;
+	retval = lun->backend->config_write((union ctl_io *)ctnio);
+
+	return (retval);
+}
+
+int
+ctl_nvme_compare(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	struct ctl_lba_len_flags *lbalen;
+	uint64_t lba;
+	uint32_t num_blocks;
+	int flags;
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_compare\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_COMPARE);
+
+	flags = 0;
+	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+	if (ctl_nvme_fua(ctnio))
+		flags |= CTL_LLF_FUA;
+
+	/*
+	 * The first check is to make sure we're in bounds, the second
+	 * check is to catch wrap-around problems.  If the lba + num blocks
+	 * is less than the lba, then we've wrapped around and the block
+	 * range is invalid anyway.
+	 */
+	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+	 || ((lba + num_blocks) < lba)) {
+		ctl_nvme_set_lba_out_of_range(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	lbalen = (struct ctl_lba_len_flags *)
+	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+	lbalen->lba = lba;
+	lbalen->len = num_blocks;
+	lbalen->flags = CTL_LLF_COMPARE | flags;
+	ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
+	ctnio->kern_rel_offset = 0;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_compare: calling data_submit()\n"));
+	retval = lun->backend->data_submit((union ctl_io *)ctnio);
+	return (retval);
+}
+
+int
+ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	struct ctl_lba_len_flags *lbalen;
+	uint64_t lba;
+	uint32_t num_blocks;
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_write_zeroes\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_ZEROES);
+
+	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+
+	/*
+	 * The first check is to make sure we're in bounds, the second
+	 * check is to catch wrap-around problems.  If the lba + num blocks
+	 * is less than the lba, then we've wrapped around and the block
+	 * range is invalid anyway.
+	 */
+	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+	 || ((lba + num_blocks) < lba)) {
+		ctl_nvme_set_lba_out_of_range(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	lbalen = (struct ctl_lba_len_flags *)
+	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+	lbalen->lba = lba;
+	lbalen->len = num_blocks;
+	lbalen->flags = 0;
+	retval = lun->backend->config_write((union ctl_io *)ctnio);
+
+	return (retval);
+}
+
+int
+ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	struct nvme_dsm_range *r;
+	uint64_t lba;
+	uint32_t len, num_blocks;
+	u_int i, ranges;
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_dataset_management\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_DATASET_MANAGEMENT);
+
+	ranges = le32toh(ctnio->cmd.cdw10) & 0xff;
+	len = ranges * sizeof(struct nvme_dsm_range);
+
+	/*
+	 * If we've got a kernel request that hasn't been malloced yet,
+	 * malloc it and tell the caller the data buffer is here.
+	 */
+	if ((ctnio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
+		ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
+		ctnio->kern_data_len = len;
+		ctnio->kern_total_len = len;
+		ctnio->kern_rel_offset = 0;
+		ctnio->kern_sg_entries = 0;
+		ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+		ctnio->be_move_done = ctl_config_move_done;
+		ctl_datamove((union ctl_io *)ctnio);
+
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	/*
+	 * Require a flat buffer of the correct size.
+	 */
+	if (ctnio->kern_sg_entries > 0 ||
+	    ctnio->kern_total_len - ctnio->kern_data_resid != len)
+		return (CTL_RETVAL_ERROR);
+
+	/*
+	 * Verify that none of the ranges are out of bounds.
+	 */
+	r = (struct nvme_dsm_range *)ctnio->kern_data_ptr;
+	for (i = 0; i < ranges; i++) {
+		lba = le64toh(r[i].starting_lba);
+		num_blocks = le32toh(r[i].length);
+		if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+		    || ((lba + num_blocks) < lba)) {
+			ctl_nvme_set_lba_out_of_range(ctnio);
+			ctl_done((union ctl_io *)ctnio);
+			return (CTL_RETVAL_COMPLETE);
+		}
+	}
+
+	CTL_DEBUG_PRINT(("ctl_nvme_dataset_management: calling config_write()\n"));
+	retval = lun->backend->config_write((union ctl_io *)ctnio);
+	return (retval);
+}
+
+int
+ctl_nvme_verify(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_lun *lun = CTL_LUN(ctnio);
+	struct ctl_lba_len_flags *lbalen;
+	uint64_t lba;
+	uint32_t num_blocks;
+	int flags;
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_verify\n"));
+
+	CTL_IO_ASSERT(ctnio, NVME);
+	MPASS(ctnio->cmd.opc == NVME_OPC_VERIFY);
+
+	flags = 0;
+	ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+	if (ctl_nvme_fua(ctnio))
+		flags |= CTL_LLF_FUA;
+
+	/*
+	 * The first check is to make sure we're in bounds, the second
+	 * check is to catch wrap-around problems.  If the lba + num blocks
+	 * is less than the lba, then we've wrapped around and the block
+	 * range is invalid anyway.
+	 */
+	if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+	 || ((lba + num_blocks) < lba)) {
+		ctl_nvme_set_lba_out_of_range(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	lbalen = (struct ctl_lba_len_flags *)
+	    &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+	lbalen->lba = lba;
+	lbalen->len = num_blocks;
+	lbalen->flags = CTL_LLF_VERIFY | flags;
+	ctnio->kern_total_len = 0;
+	ctnio->kern_rel_offset = 0;
+
+	CTL_DEBUG_PRINT(("ctl_nvme_verify: calling data_submit()\n"));
+	retval = lun->backend->data_submit((union ctl_io *)ctnio);
+	return (retval);
+}
+
+static const struct ctl_nvme_cmd_entry *
+ctl_nvme_get_cmd_entry(struct ctl_nvmeio *ctnio)
+{
+	const struct ctl_nvme_cmd_entry *entry;
+
+	switch (ctnio->io_hdr.io_type) {
+	case CTL_IO_NVME:
+		entry = &nvme_nvm_cmd_table[ctnio->cmd.opc];
+		break;
+	case CTL_IO_NVME_ADMIN:
+		entry = &nvme_admin_cmd_table[ctnio->cmd.opc];
+		break;
+	default:
+		__assert_unreachable();
+	}
+	return (entry);
+}
+
+static const struct ctl_nvme_cmd_entry *
+ctl_nvme_validate_command(struct ctl_nvmeio *ctnio)
+{
+	const struct ctl_nvme_cmd_entry *entry;
+
+	entry = ctl_nvme_get_cmd_entry(ctnio);
+	if (entry->execute == NULL) {
+		ctl_nvme_set_invalid_opcode(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (NULL);
+	}
+
+	/* Validate fused commands. */
+	switch (NVMEV(NVME_CMD_FUSE, ctnio->cmd.fuse)) {
+	case NVME_FUSE_NORMAL:
+		break;
+	case NVME_FUSE_FIRST:
+		if (ctnio->io_hdr.io_type != CTL_IO_NVME ||
+		    ctnio->cmd.opc != NVME_OPC_COMPARE) {
+			ctl_nvme_set_invalid_field(ctnio);
+			ctl_done((union ctl_io *)ctnio);
+			return (NULL);
+		}
+		break;
+	case NVME_FUSE_SECOND:
+		if (ctnio->io_hdr.io_type != CTL_IO_NVME ||
+		    ctnio->cmd.opc != NVME_OPC_COMPARE) {
+			ctl_nvme_set_invalid_field(ctnio);
+			ctl_done((union ctl_io *)ctnio);
+			return (NULL);
+		}
+		break;
+	default:
+		ctl_nvme_set_invalid_field(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		return (NULL);
+	}
+
+	return (entry);
+}
+
+/*
+ * This is a simpler version of ctl_scsiio_lun_check that fails
+ * requests on a LUN without active media.
+ *
+ * Returns true if the command has been completed with an error.
+ */
+static bool
+ctl_nvmeio_lun_check(struct ctl_lun *lun,
+    const struct ctl_nvme_cmd_entry *entry, struct ctl_nvmeio *ctnio)
+{
+	mtx_assert(&lun->lun_lock, MA_OWNED);
+
+	if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) {
+		if ((lun->flags & (CTL_LUN_EJECTED | CTL_LUN_NO_MEDIA |
+		    CTL_LUN_STOPPED)) != 0) {
+			ctl_nvme_set_namespace_not_ready(ctnio);
+			return (true);
+		}
+	}
+
+	return (false);
+}
+
+/*
+ * Check for blockage against the OOA (Order Of Arrival) queue.
+ * Assumptions:
+ * - pending_io is generally either incoming, or on the blocked queue
+ * - starting I/O is the I/O we want to start the check with.
+ */
+static ctl_action
+ctl_nvme_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
+    union ctl_io **starting_io, union ctl_io **aborted_io)
+{
+	union ctl_io *ooa_io = *starting_io;
+
+	CTL_IO_ASSERT(pending_io, NVME, NVME_ADMIN);
+
+	mtx_assert(&lun->lun_lock, MA_OWNED);
+
+	*aborted_io = NULL;
+
+	/*
+	 * Aborted commands are not going to be executed and may even
+	 * not report completion, so we don't care about their order.
+	 * Let them complete ASAP to clean the OOA queue.
+	 */
+	if (__predict_false(pending_io->io_hdr.flags & CTL_FLAG_ABORT))
+		return (CTL_ACTION_PASS);
+
+	/*
+	 * NVMe has rather simple command ordering requirements.  In
+	 * particular, there is no requirement on the controller to
+	 * enforce a specific order for overlapping LBAs.  The only
+	 * constraint is that fused operations (Compare and Write),
+	 * must be completed as a unit.
+	 *
+	 * To support fused operations, the following strategy is used:
+	 * - the first half of a fused command is not enqueued to rtr
+	 *   until the second half is enqueued
+	 * - the second half of a fused command blocks on the first
+	 *   half of a fuse command
+	 * - subsequent commands block on the second half of the
+	 *   fused command
+	 */
+
+	/*
+	 * Is the previously submitted command the first half of a
+	 * fused operation?
+	 */
+	if (ooa_io != NULL &&
+	    NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) == NVME_FUSE_FIRST) {
+		/*
+		 * If this is the second half, enqueue the first half
+		 * and block the second half on the first half.
+		 */
+		if (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse) ==
+		    NVME_FUSE_SECOND) {
+			/*
+			 * XXX: Do we need to wait for other rtr requests
+			 * to drain so this is truly atomic?
+			 */
+			return (CTL_ACTION_FUSED);
+		}
+
+		/* Abort the first half. */
+		ctl_nvme_set_missing_fused_command(&ooa_io->nvmeio);
+		*aborted_io = ooa_io;
+	} else {
+		switch (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse)) {
+		case NVME_FUSE_FIRST:
+			/* First half, wait for the second half. */
+			return (CTL_ACTION_SKIP);
+		case NVME_FUSE_SECOND:
+			/* Second half without a matching first half, abort. */
+			ctl_nvme_set_missing_fused_command(&pending_io->nvmeio);
+			*aborted_io = pending_io;
+			return (CTL_ACTION_SKIP);
+		}
+	}
+
+	/*
+	 * Scan the OOA queue looking for the most recent second half
+	 * of a fused op.
+	 */
+	for (; ooa_io != NULL;
+	     ooa_io = (union ctl_io *)LIST_NEXT(&ooa_io->io_hdr, ooa_links)) {
+		if (NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) ==
+		    NVME_FUSE_SECOND) {
+			*starting_io = ooa_io;
+			return (CTL_ACTION_BLOCK);
+		}
+	}
+
+	*starting_io = NULL;
+	return (CTL_ACTION_PASS);
+}
+
+static void
+ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio)
+{
+	struct ctl_softc *softc = CTL_SOFTC(ctnio);
+	struct ctl_lun *lun;
+	const struct ctl_nvme_cmd_entry *entry;
+	union ctl_io *bio, *aborted_io;
+	uint32_t targ_lun;
+
+	lun = NULL;
+	targ_lun = ctnio->io_hdr.nexus.targ_mapped_lun;
+	if (targ_lun < ctl_max_luns)
+		lun = softc->ctl_luns[targ_lun];
+	if (lun != NULL) {
+		/*
+		 * If the LUN is invalid, pretend that it doesn't exist.
+		 * It will go away as soon as all pending I/O has been
+		 * completed.
+		 */
+		mtx_lock(&lun->lun_lock);
+		if (lun->flags & CTL_LUN_DISABLED) {
+			mtx_unlock(&lun->lun_lock);
+			lun = NULL;
+		}
+	}
+	CTL_LUN(ctnio) = lun;
+	if (lun != NULL) {
+		CTL_BACKEND_LUN(ctnio) = lun->be_lun;
+
+		/*
+		 * Every I/O goes into the OOA queue for a particular LUN,
+		 * and stays there until completion.
+		 */
+#ifdef CTL_TIME_IO
+		if (LIST_EMPTY(&lun->ooa_queue))
+			lun->idle_time += getsbinuptime() - lun->last_busy;
+#endif
+		LIST_INSERT_HEAD(&lun->ooa_queue, &ctnio->io_hdr, ooa_links);
+	}
+
+	/* Get command entry and return error if it is unsupported. */
+	entry = ctl_nvme_validate_command(ctnio);
+	if (entry == NULL) {
+		if (lun)
+			mtx_unlock(&lun->lun_lock);
+		return;
+	}
+
+	ctnio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK;
+	ctnio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK;
+
+	/* All NVMe commands other than IDENTIFY require a LUN. */
+	if (lun == NULL) {
+		if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) {
+			ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+			ctl_enqueue_rtr((union ctl_io *)ctnio);
+			return;
+		}
+
+		ctl_nvme_set_invalid_namespace(ctnio);
+		ctl_done((union ctl_io *)ctnio);
+		CTL_DEBUG_PRINT(("ctl_nvmeio_precheck: bailing out due to invalid LUN\n"));
+		return;
+	} else {
+		/*
+		 * NVMe namespaces can only be backed by T_DIRECT LUNs.
+		 */
+		if (lun->be_lun->lun_type != T_DIRECT) {
+			mtx_unlock(&lun->lun_lock);
+			ctl_nvme_set_invalid_namespace(ctnio);
+			ctl_done((union ctl_io *)ctnio);
+			return;
+		}
+	}
+
+	if (ctl_nvmeio_lun_check(lun, entry, ctnio) != 0) {
+		mtx_unlock(&lun->lun_lock);
+		ctl_done((union ctl_io *)ctnio);
+		return;
+	}
+
+	bio = (union ctl_io *)LIST_NEXT(&ctnio->io_hdr, ooa_links);
+	switch (ctl_nvme_check_ooa(lun, (union ctl_io *)ctnio, &bio,
+	    &aborted_io)) {
+	case CTL_ACTION_PASS:
+		ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+		mtx_unlock(&lun->lun_lock);
+		ctl_enqueue_rtr((union ctl_io *)ctnio);
+		break;
+	case CTL_ACTION_FUSED:
+		/* Block the second half on the first half. */
+		ctnio->io_hdr.blocker = bio;
+		TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
+				  blocked_links);
+
+		/* Pass the first half. */
+		bio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+		mtx_unlock(&lun->lun_lock);
+		ctl_enqueue_rtr(bio);
+		break;
+	case CTL_ACTION_SKIP:
+		mtx_unlock(&lun->lun_lock);
+		break;
+	case CTL_ACTION_BLOCK:
+		ctnio->io_hdr.blocker = bio;
+		TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
+				  blocked_links);
+		mtx_unlock(&lun->lun_lock);
+		break;
+	default:
+		__assert_unreachable();
+	}
+	if (aborted_io != NULL)
+		ctl_done(aborted_io);
+}
+
+static int
+ctl_nvmeio(struct ctl_nvmeio *ctnio)
+{
+	const struct ctl_nvme_cmd_entry *entry;
+	int retval;
+
+	CTL_DEBUG_PRINT(("ctl_nvmeio %s opc=%02X\n",
+	    ctnio->io_hdr.io_type == CTL_IO_NVME ? "nvm" : "admin",
+	    ctnio->cmd.opc));
+
+	entry = ctl_nvme_get_cmd_entry(ctnio);
+	MPASS(entry != NULL);
+
+	/*
+	 * If this I/O has been aborted, just send it straight to
+	 * ctl_done() without executing it.
+	 */
+	if (ctnio->io_hdr.flags & CTL_FLAG_ABORT) {
+		ctl_done((union ctl_io *)ctnio);
+		return (CTL_RETVAL_COMPLETE);
+	}
+
+	/*
+	 * All the checks should have been handled by ctl_nvmeio_precheck().
+	 * We should be clear now to just execute the I/O.
+	 */
+	retval = entry->execute(ctnio);
+
+	return (retval);
+}
+
+/*
  * For known CDB types, parse the LBA and length.
  */
 static int
 ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len)
 {
 
-	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
-	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
+	CTL_IO_ASSERT(io, SCSI);
 
 	switch (io->scsiio.cdb[0]) {
 	case COMPARE_AND_WRITE: {
@@ -10741,8 +11601,7 @@ ctl_extent_check_unmap(union ctl_io *io, uint64_t lba2, uint64_t len2)
 	uint64_t lba;
 	uint32_t len;
 
-	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
-	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
+	CTL_IO_ASSERT(io, SCSI);
 
 	/* If not UNMAP -- go other way. */
 	if (io->scsiio.cdb[0] != UNMAP)
@@ -10812,6 +11671,8 @@ static ctl_action
 ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io,
     const uint8_t *serialize_row, union ctl_io *ooa_io)
 {
+	CTL_IO_ASSERT(pending_io, SCSI);
+	CTL_IO_ASSERT(ooa_io, SCSI);
 
 	/*
 	 * The initiator attempted multiple untagged commands at the same
@@ -10920,6 +11781,8 @@ ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
 	const uint8_t *serialize_row;
 	ctl_action action;
 
+	CTL_IO_ASSERT(pending_io, SCSI);
+
 	mtx_assert(&lun->lun_lock, MA_OWNED);
 
 	/*
@@ -10969,7 +11832,7 @@ ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
  * we know for sure that the blocker I/O does no longer count.
  */
 static void
-ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
+ctl_scsi_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
 {
 	struct ctl_softc *softc = lun->ctl_softc;
 	union ctl_io *bio, *obio;
@@ -10977,6 +11840,8 @@ ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
 	union ctl_ha_msg msg_info;
 	ctl_action action;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	mtx_assert(&lun->lun_lock, MA_OWNED);
 
 	if (io->io_hdr.blocker == NULL)
@@ -11062,6 +11927,72 @@ error:
 	}
 }
 
+static void
+ctl_nvme_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
+{
+	union ctl_io *bio;
+	const struct ctl_nvme_cmd_entry *entry;
+
+	CTL_IO_ASSERT(io, NVME, NVME_ADMIN);
+
+	mtx_assert(&lun->lun_lock, MA_OWNED);
+
+	if (io->io_hdr.blocker == NULL)
+		return;
+
+	/*
+	 * If this is the second half of a fused operation, it should
+	 * be the only io on the blocked list.  If the first half
+	 * failed, complete the second half with an appropriate error.
+	 */
+	bio = io->io_hdr.blocker;
+	if (NVMEV(NVME_CMD_FUSE, io->nvmeio.cmd.fuse) == NVME_FUSE_SECOND) {
+		MPASS(io ==
+		    (union ctl_io *)TAILQ_FIRST(&bio->io_hdr.blocked_queue));
+		MPASS(TAILQ_NEXT(&io->io_hdr, blocked_links) == NULL);
+
+		TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
+		    blocked_links);
+		if (bio->io_hdr.status != CTL_SUCCESS) {
+			ctl_nvme_set_failed_fused_command(&io->nvmeio);
+			ctl_done(io);
+			return;
+		}
+	} else {
+		/*
+		 * This must be a command that was blocked on the
+		 * second half of a fused operation.
+		 */
+		MPASS(NVMEV(NVME_CMD_FUSE, bio->nvmeio.cmd.fuse) ==
+		    NVME_FUSE_SECOND);
+		TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
+		    blocked_links);
+	}
+
+	entry = ctl_nvme_get_cmd_entry(&io->nvmeio);
+	if (ctl_nvmeio_lun_check(lun, entry, &io->nvmeio) != 0) {
+		ctl_done(io);
+		return;
+	}
+
+	io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+	ctl_enqueue_rtr(io);
+}
+
+static void
+ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
+{
+	switch (io->io_hdr.io_type) {
+	case CTL_IO_SCSI:
+		return (ctl_scsi_try_unblock_io(lun, io, skip));
+	case CTL_IO_NVME:
+	case CTL_IO_NVME_ADMIN:
+		return (ctl_nvme_try_unblock_io(lun, io, skip));
+	default:
+		__assert_unreachable();
+	}
+}
+
 /*
  * Try to unblock I/Os blocked by the specified I/O.
  *
@@ -11217,6 +12148,8 @@ bailout:
 static void
 ctl_failover_io(union ctl_io *io, int have_lock)
 {
+	CTL_IO_ASSERT(io, SCSI);
+
 	ctl_set_busy(&io->scsiio);
 	ctl_done(io);
 }
@@ -11773,6 +12706,7 @@ ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id,
 	 */
 	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
 		union ctl_io *xio = (union ctl_io *)xioh;
+
 		if ((targ_port == UINT32_MAX ||
 		     targ_port == xioh->nexus.targ_port) &&
 		    (init_id == UINT32_MAX ||
@@ -11784,6 +12718,7 @@ ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id,
 			if (!other_sc && !(lun->flags & CTL_LUN_PRIMARY_SC)) {
 				union ctl_ha_msg msg_info;
 
+				CTL_IO_ASSERT(xio, SCSI);
 				msg_info.hdr.nexus = xioh->nexus;
 				msg_info.task.task_action = CTL_TASK_ABORT_TASK;
 				msg_info.task.tag_num = xio->scsiio.tag_num;
@@ -11924,6 +12859,8 @@ ctl_abort_task(union ctl_io *io)
 	 */
 	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
 		union ctl_io *xio = (union ctl_io *)xioh;
+
+		CTL_IO_ASSERT(xio, SCSI);
 		if ((xioh->nexus.targ_port != io->io_hdr.nexus.targ_port)
 		 || (xioh->nexus.initid != io->io_hdr.nexus.initid)
 		 || (xioh->flags & CTL_FLAG_ABORT))
@@ -11995,6 +12932,8 @@ ctl_query_task(union ctl_io *io, int task_set)
 	mtx_unlock(&softc->ctl_lock);
 	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
 		union ctl_io *xio = (union ctl_io *)xioh;
+
+		CTL_IO_ASSERT(xio, SCSI);
 		if ((xioh->nexus.targ_port != io->io_hdr.nexus.targ_port)
 		 || (xioh->nexus.initid != io->io_hdr.nexus.initid)
 		 || (xioh->flags & CTL_FLAG_ABORT))
@@ -12108,6 +13047,8 @@ ctl_handle_isc(union ctl_io *io)
 	const struct ctl_cmd_entry *entry;
 	uint32_t targ_lun;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	targ_lun = io->io_hdr.nexus.targ_mapped_lun;
 	switch (io->io_hdr.msg_type) {
 	case CTL_MSG_SERIALIZE:
@@ -12243,6 +13184,8 @@ ctl_inject_error(struct ctl_lun *lun, union ctl_io *io)
 {
 	struct ctl_error_desc *desc, *desc2;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	mtx_assert(&lun->lun_lock, MA_OWNED);
 
 	STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) {
@@ -12321,12 +13264,7 @@ ctl_datamove_done_process(union ctl_io *io)
 {
 #ifdef CTL_TIME_IO
 	struct bintime cur_bt;
-#endif
-
-	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
-	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
 
-#ifdef CTL_TIME_IO
 	getbinuptime(&cur_bt);
 	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
 	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
@@ -12336,13 +13274,36 @@ ctl_datamove_done_process(union ctl_io *io)
 	if ((io->io_hdr.port_status != 0) &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
-		ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1,
-		    /*retry_count*/ io->io_hdr.port_status);
-	} else if (io->scsiio.kern_data_resid != 0 &&
+		switch (io->io_hdr.io_type) {
+		case CTL_IO_SCSI:
+			ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1,
+			    /*retry_count*/ io->io_hdr.port_status);
+			break;
+		case CTL_IO_NVME:
+		case CTL_IO_NVME_ADMIN:
+			if (io->io_hdr.flags & CTL_FLAG_ABORT)
+				ctl_nvme_set_command_aborted(&io->nvmeio);
+			else
+				ctl_nvme_set_data_transfer_error(&io->nvmeio);
+			break;
+		default:
+			__assert_unreachable();
+		}
+	} else if (ctl_kern_data_resid(io) != 0 &&
 	    (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
-		ctl_set_invalid_field_ciu(&io->scsiio);
+		switch (io->io_hdr.io_type) {
+		case CTL_IO_SCSI:
+			ctl_set_invalid_field_ciu(&io->scsiio);
+			break;
+		case CTL_IO_NVME:
+		case CTL_IO_NVME_ADMIN:
+			ctl_nvme_set_data_transfer_error(&io->nvmeio);
+			break;
+		default:
+			__assert_unreachable();
+		}
 	} else if (ctl_debug & CTL_DEBUG_CDB_DATA)
 		ctl_data_print(io);
 }
@@ -12352,7 +13313,7 @@ ctl_datamove_done(union ctl_io *io, bool samethr)
 {
 
 	ctl_datamove_done_process(io);
-	io->scsiio.be_move_done(io, samethr);
+	ctl_be_move_done(io, samethr);
 }
 
 void
@@ -12365,7 +13326,7 @@ ctl_datamove(union ctl_io *io)
 	CTL_DEBUG_PRINT(("ctl_datamove\n"));
 
 	/* No data transferred yet.  Frontend must update this when done. */
-	io->scsiio.kern_data_resid = io->scsiio.kern_data_len;
+	ctl_set_kern_data_resid(io, ctl_kern_data_len(io));
 
 #ifdef CTL_TIME_IO
 	getbinuptime(&io->io_hdr.dma_start_bt);
@@ -12398,20 +13359,33 @@ ctl_datamove(union ctl_io *io)
 	 * the data move.
 	 */
 	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
-		printf("ctl_datamove: tag 0x%jx on (%u:%u:%u) aborted\n",
-		       io->scsiio.tag_num, io->io_hdr.nexus.initid,
-		       io->io_hdr.nexus.targ_port,
-		       io->io_hdr.nexus.targ_lun);
+		switch (io->io_hdr.io_type) {
+		case CTL_IO_SCSI:
+			printf("ctl_datamove: tag 0x%jx on (%u:%u:%u) aborted\n",
+			    io->scsiio.tag_num, io->io_hdr.nexus.initid,
+			    io->io_hdr.nexus.targ_port,
+			    io->io_hdr.nexus.targ_lun);
+			break;
+		case CTL_IO_NVME:
+		case CTL_IO_NVME_ADMIN:
+			printf("ctl_datamove: CID 0x%x on (%u:%u:%u) aborted\n",
+			    le16toh(io->nvmeio.cmd.cid),
+			    io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port,
+			    io->io_hdr.nexus.targ_lun);
+			break;
+		default:
+			__assert_unreachable();
+		}
 		io->io_hdr.port_status = 31337;
 		ctl_datamove_done_process(io);
-		io->scsiio.be_move_done(io, true);
+		ctl_be_move_done(io, true);
 		return;
 	}
 
 	/* Don't confuse frontend with zero length data move. */
-	if (io->scsiio.kern_data_len == 0) {
+	if (ctl_kern_data_len(io) == 0) {
 		ctl_datamove_done_process(io);
-		io->scsiio.be_move_done(io, true);
+		ctl_be_move_done(io, true);
 		return;
 	}
 
@@ -12427,6 +13401,8 @@ ctl_send_datamove_done(union ctl_io *io, int have_lock)
 	struct bintime cur_bt;
 #endif
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	memset(&msg, 0, sizeof(msg));
 	msg.hdr.msg_type = CTL_MSG_DATAMOVE_DONE;
 	msg.hdr.original_sc = io;
@@ -12469,6 +13445,7 @@ ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq)
 	uint32_t i;
 
 	io = rq->context;
+	CTL_IO_ASSERT(io, SCSI);
 
 	if (rq->ret != CTL_HA_STATUS_SUCCESS) {
 		printf("%s: ISC DMA write failed with error %d", __func__,
@@ -12513,6 +13490,8 @@ ctl_datamove_remote_write(union ctl_io *io)
 	int retval;
 	void (*fe_datamove)(union ctl_io *io);
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	/*
 	 * - Get the data from the host/HBA into local memory.
 	 * - DMA memory from the local controller to the remote controller.
@@ -12541,6 +13520,8 @@ ctl_datamove_remote_dm_read_cb(union ctl_io *io, bool samethr)
 {
 	uint32_t i;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	for (i = 0; i < io->scsiio.kern_sg_entries; i++)
 		free(CTL_LSGLT(io)[i].addr, M_CTL);
 	free(CTL_RSGL(io), M_CTL);
@@ -12563,6 +13544,7 @@ ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq)
 	void (*fe_datamove)(union ctl_io *io);
 
 	io = rq->context;
+	CTL_IO_ASSERT(io, SCSI);
 
 	if (rq->ret != CTL_HA_STATUS_SUCCESS) {
 		printf("%s: ISC DMA read failed with error %d\n", __func__,
@@ -12597,6 +13579,8 @@ ctl_datamove_remote_sgl_setup(union ctl_io *io)
 	int retval;
 	int i;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	retval = 0;
 	local_sglist = CTL_LSGL(io);
 	len_to_go = io->scsiio.kern_data_len;
@@ -12634,6 +13618,8 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
 
 	rq = ctl_dt_req_alloc();
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	/*
 	 * If we failed to allocate the request, and if the DMA didn't fail
 	 * anyway, set busy status.  This is just a resource allocation
@@ -12767,7 +13753,7 @@ ctl_datamove_remote_read(union ctl_io *io)
 		 * datamove done message, or call the callback with an
 		 * error if there is a problem.
 		 */
-		for (i = 0; i < io->scsiio.kern_sg_entries; i++)
+		for (i = 0; i < ctl_kern_sg_entries(io); i++)
 			free(CTL_LSGLT(io)[i].addr, M_CTL);
 		free(CTL_RSGL(io), M_CTL);
 		CTL_RSGL(io) = NULL;
@@ -12785,6 +13771,7 @@ ctl_datamove_remote_read(union ctl_io *io)
 static void
 ctl_datamove_remote(union ctl_io *io)
 {
+	CTL_IO_ASSERT(io, SCSI);
 
 	mtx_assert(&((struct ctl_softc *)CTL_SOFTC(io))->ctl_lock, MA_NOTOWNED);
 
@@ -12837,7 +13824,7 @@ ctl_process_done(union ctl_io *io)
 		char path_str[64];
 		struct sbuf sb;
 
-		ctl_scsi_path_string(io, path_str, sizeof(path_str));
+		ctl_scsi_path_string(&io->io_hdr, path_str, sizeof(path_str));
 		sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN);
 
 		ctl_io_sbuf(io, &sb);
@@ -12851,6 +13838,8 @@ ctl_process_done(union ctl_io *io)
 
 	switch (io->io_hdr.io_type) {
 	case CTL_IO_SCSI:
+	case CTL_IO_NVME:
+	case CTL_IO_NVME_ADMIN:
 		break;
 	case CTL_IO_TASK:
 		if (ctl_debug & CTL_DEBUG_INFO)
@@ -12881,6 +13870,8 @@ ctl_process_done(union ctl_io *io)
 		uint8_t mrie = lun->MODE_IE.mrie;
 		uint8_t per = ((lun->MODE_RWER.byte3 & SMS_RWER_PER) ||
 		    (lun->MODE_VER.byte3 & SMS_VER_PER));
+
+		CTL_IO_ASSERT(io, SCSI);
 		if (((mrie == SIEP_MRIE_REC_COND && per) ||
 		     mrie == SIEP_MRIE_REC_UNCOND ||
 		     mrie == SIEP_MRIE_NO_SENSE) &&
@@ -12914,7 +13905,9 @@ ctl_process_done(union ctl_io *io)
 	 * XXX KDM should we also track I/O latency?
 	 */
 	if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS &&
-	    io->io_hdr.io_type == CTL_IO_SCSI) {
+	    (io->io_hdr.io_type == CTL_IO_SCSI ||
+	    io->io_hdr.io_type == CTL_IO_NVME ||
+	    io->io_hdr.io_type == CTL_IO_NVME_ADMIN)) {
 		int type;
 #ifdef CTL_TIME_IO
 		struct bintime bt;
@@ -12931,7 +13924,7 @@ ctl_process_done(union ctl_io *io)
 		else
 			type = CTL_STATS_NO_IO;
 
-		lun->stats.bytes[type] += io->scsiio.kern_total_len;
+		lun->stats.bytes[type] += ctl_kern_total_len(io);
 		lun->stats.operations[type] ++;
 		lun->stats.dmas[type] += io->io_hdr.num_dmas;
 #ifdef CTL_TIME_IO
@@ -12940,7 +13933,7 @@ ctl_process_done(union ctl_io *io)
 #endif
 
 		mtx_lock(&port->port_lock);
-		port->stats.bytes[type] += io->scsiio.kern_total_len;
+		port->stats.bytes[type] += ctl_kern_total_len(io);
 		port->stats.operations[type] ++;
 		port->stats.dmas[type] += io->io_hdr.num_dmas;
 #ifdef CTL_TIME_IO
@@ -12984,8 +13977,19 @@ bailout:
 	 * properly.  The FETD is responsible for freeing the I/O and doing
 	 * whatever it needs to do to clean up its state.
 	 */
-	if (io->io_hdr.flags & CTL_FLAG_ABORT)
-		ctl_set_task_aborted(&io->scsiio);
+	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
+		switch (io->io_hdr.io_type) {
+		case CTL_IO_SCSI:
+			ctl_set_task_aborted(&io->scsiio);
+			break;
+		case CTL_IO_NVME:
+		case CTL_IO_NVME_ADMIN:
+			ctl_nvme_set_command_aborted(&io->nvmeio);
+			break;
+		default:
+			__assert_unreachable();
+		}
+	}
 
 	/*
 	 * If enabled, print command error status.
@@ -13027,6 +14031,7 @@ ctl_queue_sense(union ctl_io *io)
 	uint32_t initidx, p, targ_lun;
 
 	CTL_DEBUG_PRINT(("ctl_queue_sense\n"));
+	CTL_IO_ASSERT(io, SCSI);
 
 	targ_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun);
 
@@ -13074,7 +14079,22 @@ ctl_queue(union ctl_io *io)
 {
 	struct ctl_port *port = CTL_PORT(io);
 
-	CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
+	switch (io->io_hdr.io_type) {
+	case CTL_IO_SCSI:
+	case CTL_IO_TASK:
+		CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
+		break;
+	case CTL_IO_NVME:
+		CTL_DEBUG_PRINT(("ctl_queue nvme nvm cmd=%02X\n",
+		    io->nvmeio.cmd.opc));
+		break;
+	case CTL_IO_NVME_ADMIN:
+		CTL_DEBUG_PRINT(("ctl_queue nvme admin cmd=%02X\n",
+		    io->nvmeio.cmd.opc));
+		break;
+	default:
+		break;
+	}
 
 #ifdef CTL_TIME_IO
 	io->io_hdr.start_time = time_uptime;
@@ -13088,6 +14108,8 @@ ctl_queue(union ctl_io *io)
 	switch (io->io_hdr.io_type) {
 	case CTL_IO_SCSI:
 	case CTL_IO_TASK:
+	case CTL_IO_NVME:
+	case CTL_IO_NVME_ADMIN:
 		if (ctl_debug & CTL_DEBUG_CDB)
 			ctl_io_print(io);
 		ctl_enqueue_incoming(io);
@@ -13127,6 +14149,12 @@ ctl_run(union ctl_io *io)
 			ctl_io_print(io);
 		ctl_run_task(io);
 		break;
+	case CTL_IO_NVME:
+	case CTL_IO_NVME_ADMIN:
+		if (ctl_debug & CTL_DEBUG_CDB)
+			ctl_io_print(io);
+		ctl_nvmeio_precheck(&io->nvmeio);
+		break;
 	default:
 		printf("ctl_run: unknown I/O type %d\n", io->io_hdr.io_type);
 		return (EINVAL);
@@ -13170,22 +14198,54 @@ ctl_done(union ctl_io *io)
 	 */
 #if 0
 	if (io->io_hdr.flags & CTL_FLAG_ALREADY_DONE) {
-		printf("%s: type %d msg %d cdb %x iptl: "
-		       "%u:%u:%u tag 0x%04x "
-		       "flag %#x status %x\n",
-			__func__,
-			io->io_hdr.io_type,
-			io->io_hdr.msg_type,
-			io->scsiio.cdb[0],
-			io->io_hdr.nexus.initid,
-			io->io_hdr.nexus.targ_port,
-			io->io_hdr.nexus.targ_lun,
-			(io->io_hdr.io_type ==
-			CTL_IO_TASK) ?
-			io->taskio.tag_num :
-			io->scsiio.tag_num,
-		        io->io_hdr.flags,
-			io->io_hdr.status);
+		switch (io->io_hdr.io_type) {
+		case CTL_IO_SCSI:
+		case CTL_IO_TASK:
+			printf("%s: type %d msg %d cdb %x iptl: "
+			    "%u:%u:%u tag 0x%04lx "
+			    "flag %#x status %x\n",
+			    __func__,
+			    io->io_hdr.io_type,
+			    io->io_hdr.msg_type,
+			    io->scsiio.cdb[0],
+			    io->io_hdr.nexus.initid,
+			    io->io_hdr.nexus.targ_port,
+			    io->io_hdr.nexus.targ_lun,
+			    (io->io_hdr.io_type == CTL_IO_TASK) ?
+			    io->taskio.tag_num :
+			    io->scsiio.tag_num,
+			    io->io_hdr.flags,
+			    io->io_hdr.status);
+			break;
+		case CTL_IO_NVME:
+		case CTL_IO_NVME_ADMIN:
+			printf("%s: type %d msg %d opc %x iptl: "
+			    "%u:%u:%u cid 0x%04x "
+			    "flag %#x status %x\n",
+			    __func__,
+			    io->io_hdr.io_type,
+			    io->io_hdr.msg_type,
+			    io->nvmeio.cmd.opc,
+			    io->io_hdr.nexus.initid,
+			    io->io_hdr.nexus.targ_port,
+			    io->io_hdr.nexus.targ_lun,
+			    io->nvmeio.cmd.cid,
+			    io->io_hdr.flags,
+			    io->io_hdr.status);
+			break;
+		default:
+			printf("%s: type %d msg %d iptl: "
+			    "%u:%u:%u flag %#x status %x\n",
+			    __func__,
+			    io->io_hdr.io_type,
+			    io->io_hdr.msg_type,
+			    io->io_hdr.nexus.initid,
+			    io->io_hdr.nexus.targ_port,
+			    io->io_hdr.nexus.targ_lun,
+			    io->io_hdr.flags,
+			    io->io_hdr.status);
+			break;
+		}
 	} else
 		io->io_hdr.flags |= CTL_FLAG_ALREADY_DONE;
 #endif
@@ -13264,19 +14324,41 @@ ctl_work_thread(void *arg)
 		if (io != NULL) {
 			STAILQ_REMOVE_HEAD(&thr->incoming_queue, links);
 			mtx_unlock(&thr->queue_lock);
-			if (io->io_hdr.io_type == CTL_IO_TASK)
+			switch (io->io_hdr.io_type) {
+			case CTL_IO_TASK:
 				ctl_run_task(io);
-			else
+				break;
+			case CTL_IO_SCSI:
 				ctl_scsiio_precheck(&io->scsiio);
+				break;
+			case CTL_IO_NVME:
+			case CTL_IO_NVME_ADMIN:
+				ctl_nvmeio_precheck(&io->nvmeio);
+				break;
+			default:
+				__assert_unreachable();
+			}
 			continue;
 		}
 		io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue);
 		if (io != NULL) {
 			STAILQ_REMOVE_HEAD(&thr->rtr_queue, links);
 			mtx_unlock(&thr->queue_lock);
-			retval = ctl_scsiio(&io->scsiio);
-			if (retval != CTL_RETVAL_COMPLETE)
-				CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
+			switch (io->io_hdr.io_type) {
+			case CTL_IO_SCSI:
+				retval = ctl_scsiio(&io->scsiio);
+				if (retval != CTL_RETVAL_COMPLETE)
+					CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
+				break;
+			case CTL_IO_NVME:
+			case CTL_IO_NVME_ADMIN:
+				retval = ctl_nvmeio(&io->nvmeio);
+				if (retval != CTL_RETVAL_COMPLETE)
+					CTL_DEBUG_PRINT(("ctl_nvmeio failed\n"));
+				break;
+			default:
+				__assert_unreachable();
+			}
 			continue;
 		}