aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVitaliy Gusev <gusev.vitaliy@gmail.com>2022-06-23 18:46:06 +0000
committerJohn Baldwin <jhb@FreeBSD.org>2023-01-26 19:29:17 +0000
commitb22b31b380e8810f59ada8e60fb87dcddc585054 (patch)
treed0fd418d6cc10b16361138f0add9e87dfe96e31d
parent2c86bac0a8e0b462444a9c9f641032c33c41c640 (diff)
downloadsrc-b22b31b380e8810f59ada8e60fb87dcddc585054.tar.gz
src-b22b31b380e8810f59ada8e60fb87dcddc585054.zip
bhyve: Snapshot impovements for 'blockif' backend
When pausing a block I/O device model as part of suspending a VM, wait for all active block I/O requests to finish before saving snapshot data. This avoids having to save information about in-flight requests both in the block_if layer and in storage device models. For the AHCI device model, the queues are now guaranteed to be idle when taking a snapshot, so remove the code to save queue state and rely on the initial state in a resumed VM having all queues already idle. This will also simplify adding NVMe snapshot support in the future. Reviewed by: jhb Sponsored by: vStack Differential Revision: https://reviews.freebsd.org/D26267 (cherry picked from commit cd9618bdb274375139080ee4e33ccbdc980513f3)
-rw-r--r--usr.sbin/bhyve/block_if.c105
-rw-r--r--usr.sbin/bhyve/block_if.h4
-rw-r--r--usr.sbin/bhyve/pci_ahci.c142
3 files changed, 15 insertions, 236 deletions
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index 8ed95300a13c..2ca6da0469ac 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -109,11 +109,9 @@ struct blockif_ctxt {
int bc_psectoff;
int bc_closing;
int bc_paused;
- int bc_work_count;
pthread_t bc_btid[BLOCKIF_NUMTHR];
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
- pthread_cond_t bc_paused_cond;
pthread_cond_t bc_work_done_cond;
blockif_resize_cb *bc_resize_cb;
void *bc_resize_cb_arg;
@@ -350,6 +348,12 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
(*br->br_callback)(br, err);
}
+static inline bool
+blockif_empty(const struct blockif_ctxt *bc)
+{
+ return (TAILQ_EMPTY(&bc->bc_pendq) && TAILQ_EMPTY(&bc->bc_busyq));
+}
+
static void *
blockif_thr(void *arg)
{
@@ -367,30 +371,21 @@ blockif_thr(void *arg)
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
- bc->bc_work_count++;
-
- /* We cannot process work if the interface is paused */
- while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
+ while (blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
- bc->bc_work_count--;
-
- /* If none of the workers are busy, notify the main thread */
- if (bc->bc_work_count == 0)
+ /* If none to work, notify the main thread */
+ if (blockif_empty(bc))
pthread_cond_broadcast(&bc->bc_work_done_cond);
/* Check ctxt status here to see if exit requested */
if (bc->bc_closing)
break;
- /* Make all worker threads wait here if the device is paused */
- while (bc->bc_paused)
- pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
-
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
}
pthread_mutex_unlock(&bc->bc_mtx);
@@ -623,8 +618,6 @@ blockif_open(nvlist_t *nvl, const char *ident)
pthread_mutex_init(&bc->bc_mtx, NULL);
pthread_cond_init(&bc->bc_cond, NULL);
bc->bc_paused = 0;
- bc->bc_work_count = 0;
- pthread_cond_init(&bc->bc_paused_cond, NULL);
pthread_cond_init(&bc->bc_work_done_cond, NULL);
TAILQ_INIT(&bc->bc_freeq);
TAILQ_INIT(&bc->bc_pendq);
@@ -724,6 +717,7 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
err = 0;
pthread_mutex_lock(&bc->bc_mtx);
+ assert(!bc->bc_paused);
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
/*
* Enqueue and inform the block i/o thread
@@ -748,7 +742,6 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
int
blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (blockif_request(bc, breq, BOP_READ));
}
@@ -756,7 +749,6 @@ blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
int
blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (blockif_request(bc, breq, BOP_WRITE));
}
@@ -764,7 +756,6 @@ blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
int
blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (blockif_request(bc, breq, BOP_FLUSH));
}
@@ -772,7 +763,6 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
int
blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (blockif_request(bc, breq, BOP_DELETE));
}
@@ -942,7 +932,6 @@ blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
off_t
blockif_size(struct blockif_ctxt *bc)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_size);
}
@@ -950,7 +939,6 @@ blockif_size(struct blockif_ctxt *bc)
int
blockif_sectsz(struct blockif_ctxt *bc)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_sectsz);
}
@@ -958,7 +946,6 @@ blockif_sectsz(struct blockif_ctxt *bc)
void
blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
*size = bc->bc_psectsz;
*off = bc->bc_psectoff;
@@ -967,7 +954,6 @@ blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
int
blockif_queuesz(struct blockif_ctxt *bc)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (BLOCKIF_MAXREQ - 1);
}
@@ -975,7 +961,6 @@ blockif_queuesz(struct blockif_ctxt *bc)
int
blockif_is_ro(struct blockif_ctxt *bc)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_rdonly);
}
@@ -983,7 +968,6 @@ blockif_is_ro(struct blockif_ctxt *bc)
int
blockif_candelete(struct blockif_ctxt *bc)
{
-
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}
@@ -999,7 +983,7 @@ blockif_pause(struct blockif_ctxt *bc)
bc->bc_paused = 1;
/* The interface is paused. Wait for workers to finish their work */
- while (bc->bc_work_count)
+ while (!blockif_empty(bc))
pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
pthread_mutex_unlock(&bc->bc_mtx);
@@ -1016,71 +1000,6 @@ blockif_resume(struct blockif_ctxt *bc)
pthread_mutex_lock(&bc->bc_mtx);
bc->bc_paused = 0;
- /* resume the threads waiting for paused */
- pthread_cond_broadcast(&bc->bc_paused_cond);
- /* kick the threads after restore */
- pthread_cond_broadcast(&bc->bc_cond);
- pthread_mutex_unlock(&bc->bc_mtx);
-}
-
-int
-blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
-{
- int i;
- struct iovec *iov;
- int ret;
-
- SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
-
- /*
- * XXX: The callback and parameter must be filled by the virtualized
- * device that uses the interface, during its init; we're not touching
- * them here.
- */
-
- /* Snapshot the iovecs. */
- for (i = 0; i < br->br_iovcnt; i++) {
- iov = &br->br_iov[i];
-
- SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
-
- /* We assume the iov is a guest-mapped address. */
- SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
- false, meta, ret, done);
- }
-
-done:
- return (ret);
-}
-
-int
-blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
-{
- int ret;
-
- if (bc->bc_paused == 0) {
- fprintf(stderr, "%s: Snapshot failed: "
- "interface not paused.\r\n", __func__);
- return (ENXIO);
- }
-
- pthread_mutex_lock(&bc->bc_mtx);
-
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
-
-done:
pthread_mutex_unlock(&bc->bc_mtx);
- return (ret);
}
-#endif
+#endif /* BHYVE_SNAPSHOT */
diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h
index 0407ff43cf94..cf3acc05c98f 100644
--- a/usr.sbin/bhyve/block_if.h
+++ b/usr.sbin/bhyve/block_if.h
@@ -87,10 +87,6 @@ int blockif_close(struct blockif_ctxt *bc);
#ifdef BHYVE_SNAPSHOT
void blockif_pause(struct blockif_ctxt *bc);
void blockif_resume(struct blockif_ctxt *bc);
-int blockif_snapshot_req(struct blockif_req *br,
- struct vm_snapshot_meta *meta);
-int blockif_snapshot(struct blockif_ctxt *bc,
- struct vm_snapshot_meta *meta);
#endif
#endif /* _BLOCK_IF_H_ */
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index f63ebd842757..94bf553d7590 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -2561,113 +2561,13 @@ open_fail:
#ifdef BHYVE_SNAPSHOT
static int
-pci_ahci_snapshot_save_queues(struct ahci_port *port,
- struct vm_snapshot_meta *meta)
-{
- int ret;
- int idx;
- struct ahci_ioreq *ioreq;
-
- STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
- idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
- SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
- }
-
- idx = -1;
- SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-
- TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
- idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
- SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-
- /*
- * Snapshot only the busy requests; other requests are
- * not valid.
- */
- ret = blockif_snapshot_req(&ioreq->io_req, meta);
- if (ret != 0) {
- fprintf(stderr, "%s: failed to snapshot req\r\n",
- __func__);
- goto done;
- }
- }
-
- idx = -1;
- SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-
-done:
- return (ret);
-}
-
-static int
-pci_ahci_snapshot_restore_queues(struct ahci_port *port,
- struct vm_snapshot_meta *meta)
-{
- int ret;
- int idx;
- struct ahci_ioreq *ioreq;
-
- /* Empty the free queue before restoring. */
- while (!STAILQ_EMPTY(&port->iofhd))
- STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
-
- /* Restore the free queue. */
- while (1) {
- SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
- if (idx == -1)
- break;
-
- STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
- }
-
- /* Restore the busy queue. */
- while (1) {
- SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
- if (idx == -1)
- break;
-
- ioreq = &port->ioreq[idx];
- TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
-
- /*
- * Restore only the busy requests; other requests are
- * not valid.
- */
- ret = blockif_snapshot_req(&ioreq->io_req, meta);
- if (ret != 0) {
- fprintf(stderr, "%s: failed to restore request\r\n",
- __func__);
- goto done;
- }
-
- /* Re-enqueue the requests in the block interface. */
- if (ioreq->readop)
- ret = blockif_read(port->bctx, &ioreq->io_req);
- else
- ret = blockif_write(port->bctx, &ioreq->io_req);
-
- if (ret != 0) {
- fprintf(stderr,
- "%s: failed to re-enqueue request\r\n",
- __func__);
- goto done;
- }
- }
-
-done:
- return (ret);
-}
-
-static int
pci_ahci_snapshot(struct vm_snapshot_meta *meta)
{
- int i, j, ret;
+ int i, ret;
void *bctx;
struct pci_devinst *pi;
struct pci_ahci_softc *sc;
struct ahci_port *port;
- struct ahci_cmd_hdr *hdr;
- struct ahci_ioreq *ioreq;
pi = meta->dev_data;
sc = pi->pi_arg;
@@ -2751,43 +2651,7 @@ pci_ahci_snapshot(struct vm_snapshot_meta *meta)
SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
- for (j = 0; j < port->ioqsz; j++) {
- ioreq = &port->ioreq[j];
-
- /* blockif_req snapshot done only for busy requests. */
- hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
- ioreq->slot * AHCI_CL_SIZE);
- SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
- 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
- false, meta, ret, done);
-
- SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
- SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
- }
-
- /* Perform save / restore specific operations. */
- if (meta->op == VM_SNAPSHOT_SAVE) {
- ret = pci_ahci_snapshot_save_queues(port, meta);
- if (ret != 0)
- goto done;
- } else if (meta->op == VM_SNAPSHOT_RESTORE) {
- ret = pci_ahci_snapshot_restore_queues(port, meta);
- if (ret != 0)
- goto done;
- } else {
- ret = EINVAL;
- goto done;
- }
-
- ret = blockif_snapshot(port->bctx, meta);
- if (ret != 0) {
- fprintf(stderr, "%s: failed to restore blockif\r\n",
- __func__);
- goto done;
- }
+ assert(TAILQ_EMPTY(&port->iobhd));
}
done:
@@ -2833,7 +2697,7 @@ pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
return (0);
}
-#endif
+#endif /* BHYVE_SNAPSHOT */
/*
* Use separate emulation names to distinguish drive and atapi devices