aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRick Macklem <rmacklem@FreeBSD.org>2021-07-16 22:01:03 +0000
committerRick Macklem <rmacklem@FreeBSD.org>2021-08-03 23:50:33 +0000
commit9fb6e613373cf90bb25e7cf5b40a0a7f82c941f6 (patch)
tree01527c3b7e5af1110c678f86a8df346228911b6d
parent46b4c26b5b01e48894d5d0979a4bfc6bc8725a4a (diff)
downloadsrc-9fb6e613373c.tar.gz
src-9fb6e613373c.zip
nfsd: Add sysctl to set maximum I/O size up to 1Mbyte
Since MAXPHYS now allows the FreeBSD NFS client to do 1Mbyte I/O operations, add a sysctl called vfs.nfsd.srvmaxio so that the maximum NFS server I/O size can be set up to 1Mbyte. The Linux NFS client can also do 1Mbyte I/O operations. The default of 128Kbytes for the maximum I/O size has not been changed for two reasons: - kern.ipc.maxsockbuf must be increased to support 1Mbyte I/O - The limited benchmarking I can do actually shows a drop in I/O rate when the I/O size is above 256Kbytes. However, daveb@spectralogic.com reports seeing an increase in I/O rate for the 1Mbyte I/O size vs 128Kbytes using a Linux client. (cherry picked from commit ee29e6f31111ea3cd490248987f2f2ec412eb0bb)
-rw-r--r--sys/fs/nfs/nfs.h2
-rw-r--r--sys/fs/nfs/nfs_commonport.c7
-rw-r--r--sys/fs/nfs/nfs_commonsubs.c5
-rw-r--r--sys/fs/nfs/nfsproto.h1
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c81
-rw-r--r--sys/fs/nfsserver/nfs_nfsdserv.c12
-rw-r--r--sys/fs/nfsserver/nfs_nfsdstate.c11
7 files changed, 104 insertions, 15 deletions
diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h
index 272b8dbfee22..1a29a7e1d6ec 100644
--- a/sys/fs/nfs/nfs.h
+++ b/sys/fs/nfs/nfs.h
@@ -156,7 +156,7 @@
(t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0)
#define NFS_SRVMAXDATA(n) \
(((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? \
- NFS_SRVMAXIO : NFS_V2MAXDATA)
+ nfs_srvmaxio : NFS_V2MAXDATA)
#define NFS64BITSSET 0xffffffffffffffffull
#define NFS64BITSMINUS1 0xfffffffffffffffeull
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index 47038980c640..8a100749fc32 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -76,6 +76,7 @@ void (*nfsd_call_servertimer)(void) = NULL;
void (*ncl_call_invalcaches)(struct vnode *) = NULL;
vop_advlock_t *nfs_advlock_p = NULL;
vop_reclaim_t *nfs_reclaim_p = NULL;
+uint32_t nfs_srvmaxio = NFS_SRVMAXIO;
int nfs_pnfsio(task_fn_t *, void *);
@@ -303,11 +304,11 @@ nfsvno_getfs(struct nfsfsinfo *sip, int isdgram)
if (isdgram)
pref = NFS_MAXDGRAMDATA;
else
- pref = NFS_SRVMAXIO;
- sip->fs_rtmax = NFS_SRVMAXIO;
+ pref = nfs_srvmaxio;
+ sip->fs_rtmax = nfs_srvmaxio;
sip->fs_rtpref = pref;
sip->fs_rtmult = NFS_FABLKSIZE;
- sip->fs_wtmax = NFS_SRVMAXIO;
+ sip->fs_wtmax = nfs_srvmaxio;
sip->fs_wtpref = pref;
sip->fs_wtmult = NFS_FABLKSIZE;
sip->fs_dtpref = pref;
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index 1bdc13123aac..817d89284091 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -85,6 +85,7 @@ extern volatile int nfsrv_devidcnt;
extern int nfscl_debuglevel;
extern struct nfsdevicehead nfsrv_devidhead;
extern struct nfsstatsv1 nfsstatsv1;
+extern uint32_t nfs_srvmaxio;
SYSCTL_DECL(_vfs_nfs);
SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW,
@@ -2201,7 +2202,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
attrsum += NFSX_UNSIGNED;
i = fxdr_unsigned(int, *tl);
- if (compare && !(*retcmpp) && i != NFS_SRVMAXIO)
+ if (compare && !(*retcmpp) && i != nfs_srvmaxio)
*retcmpp = NFSERR_NOTSAME;
break;
default:
@@ -3012,7 +3013,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
case NFSATTRBIT_LAYOUTALIGNMENT:
case NFSATTRBIT_LAYOUTBLKSIZE:
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
- *tl = txdr_unsigned(NFS_SRVMAXIO);
+ *tl = txdr_unsigned(nfs_srvmaxio);
retnum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_XATTRSUPPORT:
diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h
index 62d86c3a4593..13e146154805 100644
--- a/sys/fs/nfs/nfsproto.h
+++ b/sys/fs/nfs/nfsproto.h
@@ -73,7 +73,6 @@
*/
#define NFS_MAXPKTHDR 404
#define NFS_MAXXDR 4096
-#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR)
#define NFS_MINPACKET 20
#define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */
#define NFSV4_MINORVERSION 0 /* V4 Minor version */
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 7bcbc738d61b..efe9aac7a136 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -76,6 +76,9 @@ extern struct nfsdontlisthead nfsrv_dontlisthead;
extern volatile int nfsrv_dontlistlen;
extern volatile int nfsrv_devidcnt;
extern int nfsrv_maxpnfsmirror;
+extern uint32_t nfs_srvmaxio;
+extern int nfs_bufpackets;
+extern u_long sb_max_adj;
struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
NFSDLOCKMUTEX;
NFSSTATESPINLOCK;
@@ -195,6 +198,84 @@ SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
+/*
+ * nfs_srvmaxio can only be increased and only when the nfsd threads are
+ * not running. The setting must be a power of 2, with the current limit of
+ * 1Mbyte.
+ */
+static int
+sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ u_int newsrvmaxio;
+ uint64_t tval;
+
+ newsrvmaxio = nfs_srvmaxio;
+ error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (newsrvmaxio == nfs_srvmaxio)
+ return (0);
+ if (newsrvmaxio < nfs_srvmaxio) {
+ printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
+ return (EINVAL);
+ }
+ if (newsrvmaxio > 1048576) {
+ printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
+ return (EINVAL);
+ }
+ if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
+ printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
+ return (EINVAL);
+ }
+
+ /*
+ * Check that kern.ipc.maxsockbuf is large enough for
+ * newsrviomax, given the setting of vfs.nfs.bufpackets.
+ */
+ if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
+ sb_max_adj) {
+ /*
+ * Suggest vfs.nfs.bufpackets * maximum RPC message for
+ * sb_max_adj.
+ */
+ tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
+
+ /*
+ * Convert suggested sb_max_adj value to a suggested
+ * sb_max value, which is what is set via kern.ipc.maxsockbuf.
+ * Perform the inverse calculation of (from uipc_sockbuf.c):
+ * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
+ * (MSIZE + MCLBYTES);
+ * XXX If the calculation of sb_max_adj from sb_max changes,
+ * this calculation must be changed as well.
+ */
+ tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */
+ tval += MCLBYTES - 1; /* Round up divide. */
+ tval /= MCLBYTES;
+ printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
+ "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
+ newsrvmaxio);
+ return (EINVAL);
+ }
+
+ NFSD_LOCK();
+ if (newnfs_numnfsd != 0) {
+ NFSD_UNLOCK();
+ printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
+ "threads are running\n");
+ return (EINVAL);
+ }
+
+
+ nfs_srvmaxio = newsrvmaxio;
+ NFSD_UNLOCK();
+ return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
+ CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
+ sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
+
#define MAX_REORDERED_RPC 16
#define NUM_HEURISTIC 1031
#define NHUSE_INIT 64
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index e564a6a48b79..0ba3472b4ff9 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -66,6 +66,7 @@ extern u_long sb_max_adj;
extern int nfsrv_pnfsatime;
extern int nfsrv_maxpnfsmirror;
extern int nfs_maxcopyrange;
+extern uint32_t nfs_srvmaxio;
static int nfs_async = 0;
SYSCTL_DECL(_vfs_nfsd);
@@ -1023,7 +1024,7 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram,
lop->lo_end = NFS64BITSSET;
}
- if (retlen > NFS_SRVMAXIO || retlen < 0)
+ if (retlen > nfs_srvmaxio || retlen < 0)
nd->nd_repstat = EIO;
if (vnode_vtype(vp) != VREG && !nd->nd_repstat) {
if (nd->nd_flag & ND_NFSV3)
@@ -4417,6 +4418,7 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
struct nfsdsession *sep = NULL;
uint32_t rdmacnt;
struct thread *p = curthread;
+ static bool do_printf = true;
if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
goto nfsmout;
@@ -4438,12 +4440,16 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++);
if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) {
sep->sess_maxreq = sb_max_adj - NFS_MAXXDR;
- printf("Consider increasing kern.ipc.maxsockbuf\n");
+ if (do_printf)
+ printf("Consider increasing kern.ipc.maxsockbuf\n");
+ do_printf = false;
}
sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++);
if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) {
sep->sess_maxresp = sb_max_adj - NFS_MAXXDR;
- printf("Consider increasing kern.ipc.maxsockbuf\n");
+ if (do_printf)
+ printf("Consider increasing kern.ipc.maxsockbuf\n");
+ do_printf = false;
}
sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++);
sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++);
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index 01280c8e49c6..750eda2027ec 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -42,6 +42,7 @@ struct nfsv4lock nfsv4rootfs_lock;
time_t nfsdev_time = 0;
int nfsrv_layouthashsize;
volatile int nfsrv_layoutcnt = 0;
+extern uint32_t nfs_srvmaxio;
extern int newnfs_numnfsd;
extern struct nfsstatsv1 nfsstatsv1;
@@ -6898,7 +6899,7 @@ nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
/* Set the stripe size to the maximum I/O size. */
- *tl++ = txdr_unsigned(NFS_SRVMAXIO & NFSFLAYUTIL_STRIPE_MASK);
+ *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
*tl++ = 0; /* 1st stripe index. */
pattern_offset = 0;
txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */
@@ -7964,13 +7965,13 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
*tl++ = txdr_unsigned(2); /* Two NFS Versions. */
*tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
*tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
- *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */
- *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */
+ *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
+ *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
*tl++ = newnfs_true; /* Tightly coupled. */
*tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
*tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
- *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */
- *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */
+ *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
+ *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
*tl = newnfs_true; /* Tightly coupled. */
ds->nfsdev_hostnamelen = strlen(dnshost);