aboutsummaryrefslogtreecommitdiff
path: root/module/zfs/zil.c
diff options
context:
space:
mode:
authorArun KV <65647132+arun-kv@users.noreply.github.com>2021-09-13 20:02:39 +0000
committerTony Hutter <hutter2@llnl.gov>2021-09-14 22:45:30 +0000
commitbb80b4649af50f1e45ce920b1165bed6880a4252 (patch)
tree4e2a661f5d8620d099215da16d162eb31673f2c5 /module/zfs/zil.c
parent7816a6b85b456fb8aa69afb835246c5af1797889 (diff)
downloadsrc-bb80b4649af50f1e45ce920b1165bed6880a4252.tar.gz
src-bb80b4649af50f1e45ce920b1165bed6880a4252.zip
Fixed data integrity issue when underlying disk returns error
Errors in zil_lwb_write_done() are not propagated to zil_lwb_flush_vdevs_done() which can result in zil_commit_impl() not returning an error to applications even when zfs was not able to write data to the disk. Remove the ZIO_FLAG_DONT_PROPAGATE flag from zio_rewrite() to allow errors to propagate and consolidate the error handling for flush and write errors to a single location (rather than having error handling split between the "write done" and "flush done" handlers). Reviewed-by: George Wilson <gwilson@delphix.com> Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Signed-off-by: Arun KV <arun.kv@datacore.com> Closes #12391 Closes #12443
Diffstat (limited to 'module/zfs/zil.c')
-rw-r--r--module/zfs/zil.c34
1 files changed, 31 insertions, 3 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 2eeb4fa4fe42..640e805d093a 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -1178,6 +1178,20 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
ASSERT3P(zcw->zcw_lwb, ==, lwb);
zcw->zcw_lwb = NULL;
+ /*
+ * We expect any ZIO errors from child ZIOs to have been
+ * propagated "up" to this specific LWB's root ZIO, in
+ * order for this error handling to work correctly. This
+ * includes ZIO errors from either this LWB's write or
+ * flush, as well as any errors from other dependent LWBs
+ * (e.g. a root LWB ZIO that might be a child of this LWB).
+ *
+ * With that said, it's important to note that LWB flush
+ * errors are not propagated up to the LWB root ZIO.
+ * This is incorrect behavior, and results in VDEV flush
+ * errors not being handled correctly here. See the
+ * comment above the call to "zio_flush" for details.
+ */
zcw->zcw_zio_error = zio->io_error;
@@ -1251,6 +1265,12 @@ zil_lwb_write_done(zio_t *zio)
* nodes. We avoid calling zio_flush() since there isn't any
* good reason for doing so, after the lwb block failed to be
* written out.
+ *
+ * Additionally, we don't perform any further error handling at
+ * this point (e.g. setting "zcw_zio_error" appropriately), as
+ * we expect that to occur in "zil_lwb_flush_vdevs_done" (thus,
+ * we expect any error seen here, to have been propagated to
+ * that function).
*/
if (zio->io_error != 0) {
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL)
@@ -1281,8 +1301,17 @@ zil_lwb_write_done(zio_t *zio)
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
- if (vd != NULL)
+ if (vd != NULL) {
+ /*
+ * The "ZIO_FLAG_DONT_PROPAGATE" is currently
+ * always used within "zio_flush". This means,
+ * any errors when flushing the vdev(s), will
+ * (unfortunately) not be handled correctly,
+ * since these "zio_flush" errors will not be
+ * propagated up to "zil_lwb_flush_vdevs_done".
+ */
zio_flush(lwb->lwb_root_zio, vd);
+ }
kmem_free(zv, sizeof (*zv));
}
}
@@ -1399,8 +1428,7 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
- prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
- ZIO_FLAG_FASTWRITE, &zb);
+ prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
ASSERT3P(lwb->lwb_write_zio, !=, NULL);
lwb->lwb_state = LWB_STATE_OPENED;