aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPoul-Henning Kamp <phk@FreeBSD.org>2004-03-10 20:41:09 +0000
committerPoul-Henning Kamp <phk@FreeBSD.org>2004-03-10 20:41:09 +0000
commit7a6b2b64294749875e4dc9ae49feac24c1d862e5 (patch)
tree59ebe2a7ed44e7865042f27309af6372e0e6c948
parent724e52cd0dd6494889b65466e9350632f4d6eaa8 (diff)
downloadsrc-7a6b2b64294749875e4dc9ae49feac24c1d862e5.tar.gz
src-7a6b2b64294749875e4dc9ae49feac24c1d862e5.zip
Fix a long-standing deadlock issue with vnode backed md(4) devices:
On vnode backed md(4) devices over a certain, currently undetermined size relative to the buffer cache our "lemming-syncer" can provoke a buffer starvation which puts the md thread to sleep on wdrain. This generally tends to grind the entire system to a stop because the event that is supposed to wake up the thread will not happen until a fair bit of the piled up I/O requests in the system finish, and since a lot of those are on a md(4) vnode backed device which is currently waiting on wdrain until a fair amount of the piled up ... you get the picture. The cure is to issue all VOP_WRITES on the vnode backing the device with IO_SYNC. In addition to more closely emulating a real disk device with a non-lying write-cache, this makes the writes exempt from rate-limited (there to avoid starving the buffer cache) and consequently prevents the deadlock. Unfortunately performance takes a hit. Add "async" option to give people who know what they are doing the old behaviour.
Notes
Notes: svn path=/head/; revision=126821
-rw-r--r--sbin/mdconfig/mdconfig.83
-rw-r--r--sbin/mdconfig/mdconfig.c6
-rw-r--r--sys/dev/md/md.c8
-rw-r--r--sys/sys/mdioctl.h1
4 files changed, 14 insertions, 4 deletions
diff --git a/sbin/mdconfig/mdconfig.8 b/sbin/mdconfig/mdconfig.8
index 0b97ddf1c120..4b2f8e246861 100644
--- a/sbin/mdconfig/mdconfig.8
+++ b/sbin/mdconfig/mdconfig.8
@@ -141,6 +141,9 @@ other devices.
.It Fl o Oo Cm no Oc Ns Ar option
Set or reset options.
.Bl -tag -width indent
+.It Oo Cm no Oc Ns Cm async
+For vnode backed devices: avoid IO_SYNC for increased performance but
+at the risk of deadlocking the entire kernel.
.It Oo Cm no Oc Ns Cm reserve
Allocate and reserve all needed storage from the start, rather than as needed.
.It Oo Cm no Oc Ns Cm cluster
diff --git a/sbin/mdconfig/mdconfig.c b/sbin/mdconfig/mdconfig.c
index 6296da799268..b23e903831ab 100644
--- a/sbin/mdconfig/mdconfig.c
+++ b/sbin/mdconfig/mdconfig.c
@@ -117,7 +117,11 @@ main(int argc, char **argv)
case 'o':
if (cmdline != 2)
usage();
- if (!strcmp(optarg, "cluster"))
+ if (!strcmp(optarg, "async"))
+ mdio.md_options |= MD_ASYNC;
+ else if (!strcmp(optarg, "noasync"))
+ mdio.md_options &= ~MD_ASYNC;
+ else if (!strcmp(optarg, "cluster"))
mdio.md_options |= MD_CLUSTER;
else if (!strcmp(optarg, "nocluster"))
mdio.md_options &= ~MD_CLUSTER;
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 0d3d9189cd49..6e345dada727 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -503,13 +503,15 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
if (bp->bio_cmd == BIO_READ) {
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
+ VOP_UNLOCK(sc->vnode, 0, curthread);
} else {
(void) vn_start_write(sc->vnode, &mp, V_WAIT);
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
- error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
+ error = VOP_WRITE(sc->vnode, &auio,
+ sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred);
+ VOP_UNLOCK(sc->vnode, 0, curthread);
vn_finished_write(mp);
}
- VOP_UNLOCK(sc->vnode, 0, curthread);
bp->bio_resid = auio.uio_resid;
return (error);
}
@@ -938,7 +940,7 @@ mdcreate_vnode(struct md_ioctl *mdio, struct thread *td)
if (mdio->md_fwheads != 0)
sc->fwheads = mdio->md_fwheads;
sc->type = MD_VNODE;
- sc->flags = mdio->md_options & MD_FORCE;
+ sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC);
if (!(flags & FWRITE))
sc->flags |= MD_READONLY;
sc->secsize = DEV_BSIZE;
diff --git a/sys/sys/mdioctl.h b/sys/sys/mdioctl.h
index 3ba206b6aa9a..861619b3b8cb 100644
--- a/sys/sys/mdioctl.h
+++ b/sys/sys/mdioctl.h
@@ -90,5 +90,6 @@ struct md_ioctl {
#define MD_READONLY 0x08 /* Readonly mode */
#define MD_COMPRESS 0x10 /* Compression mode */
#define MD_FORCE 0x20 /* Don't try to prevent foot-shooting */
+#define MD_ASYNC 0x40 /* Don't try to prevent foot-shooting */
#endif /* _SYS_MDIOCTL_H_*/