aboutsummaryrefslogtreecommitdiff
path: root/sys/nfsclient
diff options
context:
space:
mode:
authorRick Macklem <rmacklem@FreeBSD.org>2014-12-23 00:47:46 +0000
committerRick Macklem <rmacklem@FreeBSD.org>2014-12-23 00:47:46 +0000
commitc15882f09100c13fc41902bd2620242a424a83be (patch)
treeed6528cb0f84dfe08b65ff02e6fda5247450f4fb /sys/nfsclient
parent6d514f104ec744210e4cd2e815fcf5384d405c14 (diff)
downloadsrc-c15882f09100c13fc41902bd2620242a424a83be.tar.gz
src-c15882f09100c13fc41902bd2620242a424a83be.zip
Remove the old NFS client and server from head,
which means that the NFSCLIENT and NFSSERVER kernel options will no longer work. This commit only removes the kernel components. Removal of unused code in the user utilities will be done later. This commit does not include an addition to UPDATING, but that will be committed in a few minutes. Discussed on: freebsd-fs
Notes
Notes: svn path=/head/; revision=276096
Diffstat (limited to 'sys/nfsclient')
-rw-r--r--sys/nfsclient/nfs_bio.c1794
-rw-r--r--sys/nfsclient/nfs_kdtrace.c542
-rw-r--r--sys/nfsclient/nfs_krpc.c887
-rw-r--r--sys/nfsclient/nfs_nfsiod.c346
-rw-r--r--sys/nfsclient/nfs_node.c276
-rw-r--r--sys/nfsclient/nfs_subs.c1140
-rw-r--r--sys/nfsclient/nfs_vfsops.c1582
-rw-r--r--sys/nfsclient/nfs_vnops.c3544
8 files changed, 0 insertions, 10111 deletions
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
deleted file mode 100644
index 1a5b15639d18..000000000000
--- a/sys/nfsclient/nfs_bio.c
+++ /dev/null
@@ -1,1794 +0,0 @@
-/*-
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/mount.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/vmmeter.h>
-#include <sys/vnode.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
-#include <vm/vm_pager.h>
-#include <vm/vnode_pager.h>
-
-#include <nfs/nfsproto.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsmount.h>
-#include <nfsclient/nfsnode.h>
-#include <nfs/nfs_kdtrace.h>
-
-static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size,
- struct thread *td);
-static int nfs_directio_write(struct vnode *vp, struct uio *uiop,
- struct ucred *cred, int ioflag);
-
-extern int nfs_directio_enable;
-extern int nfs_directio_allow_mmap;
-
-/*
- * Vnode op for VM getpages.
- */
-int
-nfs_getpages(struct vop_getpages_args *ap)
-{
- int i, error, nextoff, size, toff, count, npages;
- struct uio uio;
- struct iovec iov;
- vm_offset_t kva;
- struct buf *bp;
- struct vnode *vp;
- struct thread *td;
- struct ucred *cred;
- struct nfsmount *nmp;
- vm_object_t object;
- vm_page_t *pages;
- struct nfsnode *np;
-
- vp = ap->a_vp;
- np = VTONFS(vp);
- td = curthread; /* XXX */
- cred = curthread->td_ucred; /* XXX */
- nmp = VFSTONFS(vp->v_mount);
- pages = ap->a_m;
- count = ap->a_count;
-
- if ((object = vp->v_object) == NULL) {
- nfs_printf("nfs_getpages: called with non-merged cache vnode??\n");
- return (VM_PAGER_ERROR);
- }
-
- if (nfs_directio_enable && !nfs_directio_allow_mmap) {
- mtx_lock(&np->n_mtx);
- if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
- mtx_unlock(&np->n_mtx);
- nfs_printf("nfs_getpages: called on non-cacheable vnode??\n");
- return (VM_PAGER_ERROR);
- } else
- mtx_unlock(&np->n_mtx);
- }
-
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
- mtx_unlock(&nmp->nm_mtx);
- /* We'll never get here for v4, because we always have fsinfo */
- (void)nfs_fsinfo(nmp, vp, cred, td);
- } else
- mtx_unlock(&nmp->nm_mtx);
-
- npages = btoc(count);
-
- /*
- * Since the caller has busied the requested page, that page's valid
- * field will not be changed by other threads.
- */
- vm_page_assert_xbusied(pages[ap->a_reqpage]);
-
- /*
- * If the requested page is partially valid, just return it and
- * allow the pager to zero-out the blanks. Partially valid pages
- * can only occur at the file EOF.
- */
- if (pages[ap->a_reqpage]->valid != 0) {
- vm_pager_free_nonreq(object, pages, ap->a_reqpage, npages);
- return (VM_PAGER_OK);
- }
-
- /*
- * We use only the kva address for the buffer, but this is extremely
- * convienient and fast.
- */
- bp = getpbuf(&nfs_pbuf_freecnt);
-
- kva = (vm_offset_t) bp->b_data;
- pmap_qenter(kva, pages, npages);
- PCPU_INC(cnt.v_vnodein);
- PCPU_ADD(cnt.v_vnodepgsin, npages);
-
- iov.iov_base = (caddr_t) kva;
- iov.iov_len = count;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
- uio.uio_resid = count;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_rw = UIO_READ;
- uio.uio_td = td;
-
- error = (nmp->nm_rpcops->nr_readrpc)(vp, &uio, cred);
- pmap_qremove(kva, npages);
-
- relpbuf(bp, &nfs_pbuf_freecnt);
-
- if (error && (uio.uio_resid == count)) {
- nfs_printf("nfs_getpages: error %d\n", error);
- vm_pager_free_nonreq(object, pages, ap->a_reqpage, npages);
- return (VM_PAGER_ERROR);
- }
-
- /*
- * Calculate the number of bytes read and validate only that number
- * of bytes. Note that due to pending writes, size may be 0. This
- * does not mean that the remaining data is invalid!
- */
-
- size = count - uio.uio_resid;
- VM_OBJECT_WLOCK(object);
- for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
- vm_page_t m;
- nextoff = toff + PAGE_SIZE;
- m = pages[i];
-
- if (nextoff <= size) {
- /*
- * Read operation filled an entire page
- */
- m->valid = VM_PAGE_BITS_ALL;
- KASSERT(m->dirty == 0,
- ("nfs_getpages: page %p is dirty", m));
- } else if (size > toff) {
- /*
- * Read operation filled a partial page.
- */
- m->valid = 0;
- vm_page_set_valid_range(m, 0, size - toff);
- KASSERT(m->dirty == 0,
- ("nfs_getpages: page %p is dirty", m));
- } else {
- /*
- * Read operation was short. If no error
- * occured we may have hit a zero-fill
- * section. We leave valid set to 0, and page
- * is freed by vm_page_readahead_finish() if
- * its index is not equal to requested, or
- * page is zeroed and set valid by
- * vm_pager_get_pages() for requested page.
- */
- ;
- }
- if (i != ap->a_reqpage)
- vm_page_readahead_finish(m);
- }
- VM_OBJECT_WUNLOCK(object);
- return (0);
-}
-
-/*
- * Vnode op for VM putpages.
- */
-int
-nfs_putpages(struct vop_putpages_args *ap)
-{
- struct uio uio;
- struct iovec iov;
- vm_offset_t kva;
- struct buf *bp;
- int iomode, must_commit, i, error, npages, count;
- off_t offset;
- int *rtvals;
- struct vnode *vp;
- struct thread *td;
- struct ucred *cred;
- struct nfsmount *nmp;
- struct nfsnode *np;
- vm_page_t *pages;
-
- vp = ap->a_vp;
- np = VTONFS(vp);
- td = curthread; /* XXX */
- /* Set the cred to n_writecred for the write rpcs. */
- if (np->n_writecred != NULL)
- cred = crhold(np->n_writecred);
- else
- cred = crhold(curthread->td_ucred); /* XXX */
- nmp = VFSTONFS(vp->v_mount);
- pages = ap->a_m;
- count = ap->a_count;
- rtvals = ap->a_rtvals;
- npages = btoc(count);
- offset = IDX_TO_OFF(pages[0]->pindex);
-
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
- mtx_unlock(&nmp->nm_mtx);
- (void)nfs_fsinfo(nmp, vp, cred, td);
- } else
- mtx_unlock(&nmp->nm_mtx);
-
- mtx_lock(&np->n_mtx);
- if (nfs_directio_enable && !nfs_directio_allow_mmap &&
- (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
- mtx_unlock(&np->n_mtx);
- nfs_printf("nfs_putpages: called on noncache-able vnode??\n");
- mtx_lock(&np->n_mtx);
- }
-
- for (i = 0; i < npages; i++)
- rtvals[i] = VM_PAGER_ERROR;
-
- /*
- * When putting pages, do not extend file past EOF.
- */
- if (offset + count > np->n_size) {
- count = np->n_size - offset;
- if (count < 0)
- count = 0;
- }
- mtx_unlock(&np->n_mtx);
-
- /*
- * We use only the kva address for the buffer, but this is extremely
- * convienient and fast.
- */
- bp = getpbuf(&nfs_pbuf_freecnt);
-
- kva = (vm_offset_t) bp->b_data;
- pmap_qenter(kva, pages, npages);
- PCPU_INC(cnt.v_vnodeout);
- PCPU_ADD(cnt.v_vnodepgsout, count);
-
- iov.iov_base = (caddr_t) kva;
- iov.iov_len = count;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = offset;
- uio.uio_resid = count;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_rw = UIO_WRITE;
- uio.uio_td = td;
-
- if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
- iomode = NFSV3WRITE_UNSTABLE;
- else
- iomode = NFSV3WRITE_FILESYNC;
-
- error = (nmp->nm_rpcops->nr_writerpc)(vp, &uio, cred, &iomode, &must_commit);
- crfree(cred);
-
- pmap_qremove(kva, npages);
- relpbuf(bp, &nfs_pbuf_freecnt);
-
- if (!error) {
- vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid);
- if (must_commit) {
- nfs_clearcommit(vp->v_mount);
- }
- }
- return rtvals[0];
-}
-
-/*
- * For nfs, cache consistency can only be maintained approximately.
- * Although RFC1094 does not specify the criteria, the following is
- * believed to be compatible with the reference port.
- * For nfs:
- * If the file's modify time on the server has changed since the
- * last read rpc or you have written to the file,
- * you may have lost data cache consistency with the
- * server, so flush all of the file's data out of the cache.
- * Then force a getattr rpc to ensure that you have up to date
- * attributes.
- * NB: This implies that cache data can be read when up to
- * NFS_ATTRTIMEO seconds out of date. If you find that you need current
- * attributes this could be forced by setting n_attrstamp to 0 before
- * the VOP_GETATTR() call.
- */
-static inline int
-nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
-{
- int error = 0;
- struct vattr vattr;
- struct nfsnode *np = VTONFS(vp);
- int old_lock;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-
- /*
- * Grab the exclusive lock before checking whether the cache is
- * consistent.
- * XXX - We can make this cheaper later (by acquiring cheaper locks).
- * But for now, this suffices.
- */
- old_lock = nfs_upgrade_vnlock(vp);
- if (vp->v_iflag & VI_DOOMED) {
- nfs_downgrade_vnlock(vp, old_lock);
- return (EBADF);
- }
-
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NMODIFIED) {
- mtx_unlock(&np->n_mtx);
- if (vp->v_type != VREG) {
- if (vp->v_type != VDIR)
- panic("nfs: bioread, not dir");
- (nmp->nm_rpcops->nr_invaldir)(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
- if (error)
- goto out;
- }
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- error = VOP_GETATTR(vp, &vattr, cred);
- if (error)
- goto out;
- mtx_lock(&np->n_mtx);
- np->n_mtime = vattr.va_mtime;
- mtx_unlock(&np->n_mtx);
- } else {
- mtx_unlock(&np->n_mtx);
- error = VOP_GETATTR(vp, &vattr, cred);
- if (error)
- return (error);
- mtx_lock(&np->n_mtx);
- if ((np->n_flag & NSIZECHANGED)
- || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
- mtx_unlock(&np->n_mtx);
- if (vp->v_type == VDIR)
- (nmp->nm_rpcops->nr_invaldir)(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
- if (error)
- goto out;
- mtx_lock(&np->n_mtx);
- np->n_mtime = vattr.va_mtime;
- np->n_flag &= ~NSIZECHANGED;
- }
- mtx_unlock(&np->n_mtx);
- }
-out:
- nfs_downgrade_vnlock(vp, old_lock);
- return error;
-}
-
-/*
- * Vnode op for read using bio
- */
-int
-nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
-{
- struct nfsnode *np = VTONFS(vp);
- int biosize, i;
- struct buf *bp, *rabp;
- struct thread *td;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- daddr_t lbn, rabn;
- off_t end;
- int bcount;
- int seqcount;
- int nra, error = 0, n = 0, on = 0;
-
- KASSERT(uio->uio_rw == UIO_READ, ("nfs_read mode"));
- if (uio->uio_resid == 0)
- return (0);
- if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
- return (EINVAL);
- td = uio->uio_td;
-
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
- mtx_unlock(&nmp->nm_mtx);
- (void)nfs_fsinfo(nmp, vp, cred, td);
- } else
- mtx_unlock(&nmp->nm_mtx);
-
- end = uio->uio_offset + uio->uio_resid;
- if (vp->v_type != VDIR &&
- (end > nmp->nm_maxfilesize || end < uio->uio_offset))
- return (EFBIG);
-
- if (nfs_directio_enable && (ioflag & IO_DIRECT) && (vp->v_type == VREG))
- /* No caching/ no readaheads. Just read data into the user buffer */
- return nfs_readrpc(vp, uio, cred);
-
- biosize = vp->v_bufobj.bo_bsize;
- seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE);
-
- error = nfs_bioread_check_cons(vp, td, cred);
- if (error)
- return error;
-
- do {
- u_quad_t nsize;
-
- mtx_lock(&np->n_mtx);
- nsize = np->n_size;
- mtx_unlock(&np->n_mtx);
-
- switch (vp->v_type) {
- case VREG:
- nfsstats.biocache_reads++;
- lbn = uio->uio_offset / biosize;
- on = uio->uio_offset - (lbn * biosize);
-
- /*
- * Start the read ahead(s), as required.
- */
- if (nmp->nm_readahead > 0) {
- for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
- (off_t)(lbn + 1 + nra) * biosize < nsize; nra++) {
- rabn = lbn + 1 + nra;
- if (incore(&vp->v_bufobj, rabn) == NULL) {
- rabp = nfs_getcacheblk(vp, rabn, biosize, td);
- if (!rabp) {
- error = nfs_sigintr(nmp, td);
- return (error ? error : EINTR);
- }
- if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
- rabp->b_flags |= B_ASYNC;
- rabp->b_iocmd = BIO_READ;
- vfs_busy_pages(rabp, 0);
- if (nfs_asyncio(nmp, rabp, cred, td)) {
- rabp->b_flags |= B_INVAL;
- rabp->b_ioflags |= BIO_ERROR;
- vfs_unbusy_pages(rabp);
- brelse(rabp);
- break;
- }
- } else {
- brelse(rabp);
- }
- }
- }
- }
-
- /* Note that bcount is *not* DEV_BSIZE aligned. */
- bcount = biosize;
- if ((off_t)lbn * biosize >= nsize) {
- bcount = 0;
- } else if ((off_t)(lbn + 1) * biosize > nsize) {
- bcount = nsize - (off_t)lbn * biosize;
- }
- bp = nfs_getcacheblk(vp, lbn, bcount, td);
-
- if (!bp) {
- error = nfs_sigintr(nmp, td);
- return (error ? error : EINTR);
- }
-
- /*
- * If B_CACHE is not set, we must issue the read. If this
- * fails, we return an error.
- */
-
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_iocmd = BIO_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(vp, bp, cred, td);
- if (error) {
- brelse(bp);
- return (error);
- }
- }
-
- /*
- * on is the offset into the current bp. Figure out how many
- * bytes we can copy out of the bp. Note that bcount is
- * NOT DEV_BSIZE aligned.
- *
- * Then figure out how many bytes we can copy into the uio.
- */
-
- n = 0;
- if (on < bcount)
- n = MIN((unsigned)(bcount - on), uio->uio_resid);
- break;
- case VLNK:
- nfsstats.biocache_readlinks++;
- bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td);
- if (!bp) {
- error = nfs_sigintr(nmp, td);
- return (error ? error : EINTR);
- }
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_iocmd = BIO_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(vp, bp, cred, td);
- if (error) {
- bp->b_ioflags |= BIO_ERROR;
- brelse(bp);
- return (error);
- }
- }
- n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
- on = 0;
- break;
- case VDIR:
- nfsstats.biocache_readdirs++;
- if (np->n_direofoffset
- && uio->uio_offset >= np->n_direofoffset) {
- return (0);
- }
- lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
- on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
- bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, td);
- if (!bp) {
- error = nfs_sigintr(nmp, td);
- return (error ? error : EINTR);
- }
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_iocmd = BIO_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(vp, bp, cred, td);
- if (error) {
- brelse(bp);
- }
- while (error == NFSERR_BAD_COOKIE) {
- (nmp->nm_rpcops->nr_invaldir)(vp);
- error = nfs_vinvalbuf(vp, 0, td, 1);
- /*
- * Yuck! The directory has been modified on the
- * server. The only way to get the block is by
- * reading from the beginning to get all the
- * offset cookies.
- *
- * Leave the last bp intact unless there is an error.
- * Loop back up to the while if the error is another
- * NFSERR_BAD_COOKIE (double yuch!).
- */
- for (i = 0; i <= lbn && !error; i++) {
- if (np->n_direofoffset
- && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
- return (0);
- bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, td);
- if (!bp) {
- error = nfs_sigintr(nmp, td);
- return (error ? error : EINTR);
- }
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_iocmd = BIO_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(vp, bp, cred, td);
- /*
- * no error + B_INVAL == directory EOF,
- * use the block.
- */
- if (error == 0 && (bp->b_flags & B_INVAL))
- break;
- }
- /*
- * An error will throw away the block and the
- * for loop will break out. If no error and this
- * is not the block we want, we throw away the
- * block and go for the next one via the for loop.
- */
- if (error || i < lbn)
- brelse(bp);
- }
- }
- /*
- * The above while is repeated if we hit another cookie
- * error. If we hit an error and it wasn't a cookie error,
- * we give up.
- */
- if (error)
- return (error);
- }
-
- /*
- * If not eof and read aheads are enabled, start one.
- * (You need the current block first, so that you have the
- * directory offset cookie of the next block.)
- */
- if (nmp->nm_readahead > 0 &&
- (bp->b_flags & B_INVAL) == 0 &&
- (np->n_direofoffset == 0 ||
- (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
- incore(&vp->v_bufobj, lbn + 1) == NULL) {
- rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, td);
- if (rabp) {
- if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
- rabp->b_flags |= B_ASYNC;
- rabp->b_iocmd = BIO_READ;
- vfs_busy_pages(rabp, 0);
- if (nfs_asyncio(nmp, rabp, cred, td)) {
- rabp->b_flags |= B_INVAL;
- rabp->b_ioflags |= BIO_ERROR;
- vfs_unbusy_pages(rabp);
- brelse(rabp);
- }
- } else {
- brelse(rabp);
- }
- }
- }
- /*
- * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
- * chopped for the EOF condition, we cannot tell how large
- * NFS directories are going to be until we hit EOF. So
- * an NFS directory buffer is *not* chopped to its EOF. Now,
- * it just so happens that b_resid will effectively chop it
- * to EOF. *BUT* this information is lost if the buffer goes
- * away and is reconstituted into a B_CACHE state ( due to
- * being VMIO ) later. So we keep track of the directory eof
- * in np->n_direofoffset and chop it off as an extra step
- * right here.
- */
- n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
- if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
- n = np->n_direofoffset - uio->uio_offset;
- break;
- default:
- nfs_printf(" nfs_bioread: type %x unexpected\n", vp->v_type);
- bp = NULL;
- break;
- };
-
- if (n > 0) {
- error = uiomove(bp->b_data + on, (int)n, uio);
- }
- if (vp->v_type == VLNK)
- n = 0;
- if (bp != NULL)
- brelse(bp);
- } while (error == 0 && uio->uio_resid > 0 && n > 0);
- return (error);
-}
-
-/*
- * The NFS write path cannot handle iovecs with len > 1. So we need to
- * break up iovecs accordingly (restricting them to wsize).
- * For the SYNC case, we can do this with 1 copy (user buffer -> mbuf).
- * For the ASYNC case, 2 copies are needed. The first a copy from the
- * user buffer to a staging buffer and then a second copy from the staging
- * buffer to mbufs. This can be optimized by copying from the user buffer
- * directly into mbufs and passing the chain down, but that requires a
- * fair amount of re-working of the relevant codepaths (and can be done
- * later).
- */
-static int
-nfs_directio_write(vp, uiop, cred, ioflag)
- struct vnode *vp;
- struct uio *uiop;
- struct ucred *cred;
- int ioflag;
-{
- int error;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- struct thread *td = uiop->uio_td;
- int size;
- int wsize;
-
- mtx_lock(&nmp->nm_mtx);
- wsize = nmp->nm_wsize;
- mtx_unlock(&nmp->nm_mtx);
- if (ioflag & IO_SYNC) {
- int iomode, must_commit;
- struct uio uio;
- struct iovec iov;
-do_sync:
- while (uiop->uio_resid > 0) {
- size = MIN(uiop->uio_resid, wsize);
- size = MIN(uiop->uio_iov->iov_len, size);
- iov.iov_base = uiop->uio_iov->iov_base;
- iov.iov_len = size;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = uiop->uio_offset;
- uio.uio_resid = size;
- uio.uio_segflg = UIO_USERSPACE;
- uio.uio_rw = UIO_WRITE;
- uio.uio_td = td;
- iomode = NFSV3WRITE_FILESYNC;
- error = (nmp->nm_rpcops->nr_writerpc)(vp, &uio, cred,
- &iomode, &must_commit);
- KASSERT((must_commit == 0),
- ("nfs_directio_write: Did not commit write"));
- if (error)
- return (error);
- uiop->uio_offset += size;
- uiop->uio_resid -= size;
- if (uiop->uio_iov->iov_len <= size) {
- uiop->uio_iovcnt--;
- uiop->uio_iov++;
- } else {
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + size;
- uiop->uio_iov->iov_len -= size;
- }
- }
- } else {
- struct uio *t_uio;
- struct iovec *t_iov;
- struct buf *bp;
-
- /*
- * Break up the write into blocksize chunks and hand these
- * over to nfsiod's for write back.
- * Unfortunately, this incurs a copy of the data. Since
- * the user could modify the buffer before the write is
- * initiated.
- *
- * The obvious optimization here is that one of the 2 copies
- * in the async write path can be eliminated by copying the
- * data here directly into mbufs and passing the mbuf chain
- * down. But that will require a fair amount of re-working
- * of the code and can be done if there's enough interest
- * in NFS directio access.
- */
- while (uiop->uio_resid > 0) {
- size = MIN(uiop->uio_resid, wsize);
- size = MIN(uiop->uio_iov->iov_len, size);
- bp = getpbuf(&nfs_pbuf_freecnt);
- t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
- t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
- t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
- t_iov->iov_len = size;
- t_uio->uio_iov = t_iov;
- t_uio->uio_iovcnt = 1;
- t_uio->uio_offset = uiop->uio_offset;
- t_uio->uio_resid = size;
- t_uio->uio_segflg = UIO_SYSSPACE;
- t_uio->uio_rw = UIO_WRITE;
- t_uio->uio_td = td;
- KASSERT(uiop->uio_segflg == UIO_USERSPACE ||
- uiop->uio_segflg == UIO_SYSSPACE,
- ("nfs_directio_write: Bad uio_segflg"));
- if (uiop->uio_segflg == UIO_USERSPACE) {
- error = copyin(uiop->uio_iov->iov_base,
- t_iov->iov_base, size);
- if (error != 0)
- goto err_free;
- } else
- /*
- * UIO_SYSSPACE may never happen, but handle
- * it just in case it does.
- */
- bcopy(uiop->uio_iov->iov_base, t_iov->iov_base,
- size);
- bp->b_flags |= B_DIRECT;
- bp->b_iocmd = BIO_WRITE;
- if (cred != NOCRED) {
- crhold(cred);
- bp->b_wcred = cred;
- } else
- bp->b_wcred = NOCRED;
- bp->b_caller1 = (void *)t_uio;
- bp->b_vp = vp;
- error = nfs_asyncio(nmp, bp, NOCRED, td);
-err_free:
- if (error) {
- free(t_iov->iov_base, M_NFSDIRECTIO);
- free(t_iov, M_NFSDIRECTIO);
- free(t_uio, M_NFSDIRECTIO);
- bp->b_vp = NULL;
- relpbuf(bp, &nfs_pbuf_freecnt);
- if (error == EINTR)
- return (error);
- goto do_sync;
- }
- uiop->uio_offset += size;
- uiop->uio_resid -= size;
- if (uiop->uio_iov->iov_len <= size) {
- uiop->uio_iovcnt--;
- uiop->uio_iov++;
- } else {
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + size;
- uiop->uio_iov->iov_len -= size;
- }
- }
- }
- return (0);
-}
-
-/*
- * Vnode op for write using bio
- */
-int
-nfs_write(struct vop_write_args *ap)
-{
- int biosize;
- struct uio *uio = ap->a_uio;
- struct thread *td = uio->uio_td;
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct ucred *cred = ap->a_cred;
- int ioflag = ap->a_ioflag;
- struct buf *bp;
- struct vattr vattr;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- daddr_t lbn;
- off_t end;
- int bcount;
- int n, on, error = 0;
-
- KASSERT(uio->uio_rw == UIO_WRITE, ("nfs_write mode"));
- KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
- ("nfs_write proc"));
- if (vp->v_type != VREG)
- return (EIO);
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NWRITEERR) {
- np->n_flag &= ~NWRITEERR;
- mtx_unlock(&np->n_mtx);
- return (np->n_error);
- } else
- mtx_unlock(&np->n_mtx);
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
- mtx_unlock(&nmp->nm_mtx);
- (void)nfs_fsinfo(nmp, vp, cred, td);
- } else
- mtx_unlock(&nmp->nm_mtx);
-
- /*
- * Synchronously flush pending buffers if we are in synchronous
- * mode or if we are appending.
- */
- if (ioflag & (IO_APPEND | IO_SYNC)) {
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NMODIFIED) {
- mtx_unlock(&np->n_mtx);
-#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */
- /*
- * Require non-blocking, synchronous writes to
- * dirty files to inform the program it needs
- * to fsync(2) explicitly.
- */
- if (ioflag & IO_NDELAY)
- return (EAGAIN);
-#endif
-flush_and_restart:
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
- if (error)
- return (error);
- } else
- mtx_unlock(&np->n_mtx);
- }
-
- /*
- * If IO_APPEND then load uio_offset. We restart here if we cannot
- * get the append lock.
- */
- if (ioflag & IO_APPEND) {
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- error = VOP_GETATTR(vp, &vattr, cred);
- if (error)
- return (error);
- mtx_lock(&np->n_mtx);
- uio->uio_offset = np->n_size;
- mtx_unlock(&np->n_mtx);
- }
-
- if (uio->uio_offset < 0)
- return (EINVAL);
- end = uio->uio_offset + uio->uio_resid;
- if (end > nmp->nm_maxfilesize || end < uio->uio_offset)
- return (EFBIG);
- if (uio->uio_resid == 0)
- return (0);
-
- if (nfs_directio_enable && (ioflag & IO_DIRECT) && vp->v_type == VREG)
- return nfs_directio_write(vp, uio, cred, ioflag);
-
- /*
- * Maybe this should be above the vnode op call, but so long as
- * file servers have no limits, i don't think it matters
- */
- if (vn_rlimit_fsize(vp, uio, td))
- return (EFBIG);
-
- biosize = vp->v_bufobj.bo_bsize;
- /*
- * Find all of this file's B_NEEDCOMMIT buffers. If our writes
- * would exceed the local maximum per-file write commit size when
- * combined with those, we must decide whether to flush,
- * go synchronous, or return error. We don't bother checking
- * IO_UNIT -- we just make all writes atomic anyway, as there's
- * no point optimizing for something that really won't ever happen.
- */
- if (!(ioflag & IO_SYNC)) {
- int nflag;
-
- mtx_lock(&np->n_mtx);
- nflag = np->n_flag;
- mtx_unlock(&np->n_mtx);
- int needrestart = 0;
- if (nmp->nm_wcommitsize < uio->uio_resid) {
- /*
- * If this request could not possibly be completed
- * without exceeding the maximum outstanding write
- * commit size, see if we can convert it into a
- * synchronous write operation.
- */
- if (ioflag & IO_NDELAY)
- return (EAGAIN);
- ioflag |= IO_SYNC;
- if (nflag & NMODIFIED)
- needrestart = 1;
- } else if (nflag & NMODIFIED) {
- int wouldcommit = 0;
- BO_LOCK(&vp->v_bufobj);
- if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
- TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
- b_bobufs) {
- if (bp->b_flags & B_NEEDCOMMIT)
- wouldcommit += bp->b_bcount;
- }
- }
- BO_UNLOCK(&vp->v_bufobj);
- /*
- * Since we're not operating synchronously and
- * bypassing the buffer cache, we are in a commit
- * and holding all of these buffers whether
- * transmitted or not. If not limited, this
- * will lead to the buffer cache deadlocking,
- * as no one else can flush our uncommitted buffers.
- */
- wouldcommit += uio->uio_resid;
- /*
- * If we would initially exceed the maximum
- * outstanding write commit size, flush and restart.
- */
- if (wouldcommit > nmp->nm_wcommitsize)
- needrestart = 1;
- }
- if (needrestart)
- goto flush_and_restart;
- }
-
- do {
- nfsstats.biocache_writes++;
- lbn = uio->uio_offset / biosize;
- on = uio->uio_offset - (lbn * biosize);
- n = MIN((unsigned)(biosize - on), uio->uio_resid);
-again:
- /*
- * Handle direct append and file extension cases, calculate
- * unaligned buffer size.
- */
- mtx_lock(&np->n_mtx);
- if (uio->uio_offset == np->n_size && n) {
- mtx_unlock(&np->n_mtx);
- /*
- * Get the buffer (in its pre-append state to maintain
- * B_CACHE if it was previously set). Resize the
- * nfsnode after we have locked the buffer to prevent
- * readers from reading garbage.
- */
- bcount = on;
- bp = nfs_getcacheblk(vp, lbn, bcount, td);
-
- if (bp != NULL) {
- long save;
-
- mtx_lock(&np->n_mtx);
- np->n_size = uio->uio_offset + n;
- np->n_flag |= NMODIFIED;
- vnode_pager_setsize(vp, np->n_size);
- mtx_unlock(&np->n_mtx);
-
- save = bp->b_flags & B_CACHE;
- bcount += n;
- allocbuf(bp, bcount);
- bp->b_flags |= save;
- }
- } else {
- /*
- * Obtain the locked cache block first, and then
- * adjust the file's size as appropriate.
- */
- bcount = on + n;
- if ((off_t)lbn * biosize + bcount < np->n_size) {
- if ((off_t)(lbn + 1) * biosize < np->n_size)
- bcount = biosize;
- else
- bcount = np->n_size - (off_t)lbn * biosize;
- }
- mtx_unlock(&np->n_mtx);
- bp = nfs_getcacheblk(vp, lbn, bcount, td);
- mtx_lock(&np->n_mtx);
- if (uio->uio_offset + n > np->n_size) {
- np->n_size = uio->uio_offset + n;
- np->n_flag |= NMODIFIED;
- vnode_pager_setsize(vp, np->n_size);
- }
- mtx_unlock(&np->n_mtx);
- }
-
- if (!bp) {
- error = nfs_sigintr(nmp, td);
- if (!error)
- error = EINTR;
- break;
- }
-
- /*
- * Issue a READ if B_CACHE is not set. In special-append
- * mode, B_CACHE is based on the buffer prior to the write
- * op and is typically set, avoiding the read. If a read
- * is required in special append mode, the server will
- * probably send us a short-read since we extended the file
- * on our end, resulting in b_resid == 0 and, thusly,
- * B_CACHE getting set.
- *
- * We can also avoid issuing the read if the write covers
- * the entire buffer. We have to make sure the buffer state
- * is reasonable in this case since we will not be initiating
- * I/O. See the comments in kern/vfs_bio.c's getblk() for
- * more information.
- *
- * B_CACHE may also be set due to the buffer being cached
- * normally.
- */
-
- if (on == 0 && n == bcount) {
- bp->b_flags |= B_CACHE;
- bp->b_flags &= ~B_INVAL;
- bp->b_ioflags &= ~BIO_ERROR;
- }
-
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_iocmd = BIO_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(vp, bp, cred, td);
- if (error) {
- brelse(bp);
- break;
- }
- }
- if (bp->b_wcred == NOCRED)
- bp->b_wcred = crhold(cred);
- mtx_lock(&np->n_mtx);
- np->n_flag |= NMODIFIED;
- mtx_unlock(&np->n_mtx);
-
- /*
- * If dirtyend exceeds file size, chop it down. This should
- * not normally occur but there is an append race where it
- * might occur XXX, so we log it.
- *
- * If the chopping creates a reverse-indexed or degenerate
- * situation with dirtyoff/end, we 0 both of them.
- */
-
- if (bp->b_dirtyend > bcount) {
- nfs_printf("NFS append race @%lx:%d\n",
- (long)bp->b_blkno * DEV_BSIZE,
- bp->b_dirtyend - bcount);
- bp->b_dirtyend = bcount;
- }
-
- if (bp->b_dirtyoff >= bp->b_dirtyend)
- bp->b_dirtyoff = bp->b_dirtyend = 0;
-
- /*
- * If the new write will leave a contiguous dirty
- * area, just update the b_dirtyoff and b_dirtyend,
- * otherwise force a write rpc of the old dirty area.
- *
- * While it is possible to merge discontiguous writes due to
- * our having a B_CACHE buffer ( and thus valid read data
- * for the hole), we don't because it could lead to
- * significant cache coherency problems with multiple clients,
- * especially if locking is implemented later on.
- *
- * as an optimization we could theoretically maintain
- * a linked list of discontinuous areas, but we would still
- * have to commit them separately so there isn't much
- * advantage to it except perhaps a bit of asynchronization.
- */
-
- if (bp->b_dirtyend > 0 &&
- (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
- if (bwrite(bp) == EINTR) {
- error = EINTR;
- break;
- }
- goto again;
- }
-
- error = uiomove((char *)bp->b_data + on, n, uio);
-
- /*
- * Since this block is being modified, it must be written
- * again and not just committed. Since write clustering does
- * not work for the stage 1 data write, only the stage 2
- * commit rpc, we have to clear B_CLUSTEROK as well.
- */
- bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
-
- if (error) {
- bp->b_ioflags |= BIO_ERROR;
- brelse(bp);
- break;
- }
-
- /*
- * Only update dirtyoff/dirtyend if not a degenerate
- * condition.
- */
- if (n) {
- if (bp->b_dirtyend > 0) {
- bp->b_dirtyoff = min(on, bp->b_dirtyoff);
- bp->b_dirtyend = max((on + n), bp->b_dirtyend);
- } else {
- bp->b_dirtyoff = on;
- bp->b_dirtyend = on + n;
- }
- vfs_bio_set_valid(bp, on, n);
- }
-
- /*
- * If IO_SYNC do bwrite().
- *
- * IO_INVAL appears to be unused. The idea appears to be
- * to turn off caching in this case. Very odd. XXX
- */
- if ((ioflag & IO_SYNC)) {
- if (ioflag & IO_INVAL)
- bp->b_flags |= B_NOCACHE;
- error = bwrite(bp);
- if (error)
- break;
- } else if ((n + on) == biosize) {
- bp->b_flags |= B_ASYNC;
- (void) (nmp->nm_rpcops->nr_writebp)(bp, 0, NULL);
- } else {
- bdwrite(bp);
- }
- } while (uio->uio_resid > 0 && n > 0);
-
- return (error);
-}
-
-/*
- * Get an nfs cache block.
- *
- * Allocate a new one if the block isn't currently in the cache
- * and return the block marked busy. If the calling process is
- * interrupted by a signal for an interruptible mount point, return
- * NULL.
- *
- * The caller must carefully deal with the possible B_INVAL state of
- * the buffer. nfs_doio() clears B_INVAL (and nfs_asyncio() clears it
- * indirectly), so synchronous reads can be issued without worrying about
- * the B_INVAL state. We have to be a little more careful when dealing
- * with writes (see comments in nfs_write()) when extending a file past
- * its EOF.
- */
-static struct buf *
-nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct thread *td)
-{
- struct buf *bp;
- struct mount *mp;
- struct nfsmount *nmp;
-
- mp = vp->v_mount;
- nmp = VFSTONFS(mp);
-
- if (nmp->nm_flag & NFSMNT_INT) {
- sigset_t oldset;
-
- nfs_set_sigmask(td, &oldset);
- bp = getblk(vp, bn, size, PCATCH, 0, 0);
- nfs_restore_sigmask(td, &oldset);
- while (bp == NULL) {
- if (nfs_sigintr(nmp, td))
- return (NULL);
- bp = getblk(vp, bn, size, 0, 2 * hz, 0);
- }
- } else {
- bp = getblk(vp, bn, size, 0, 0, 0);
- }
-
- if (vp->v_type == VREG)
- bp->b_blkno = bn * (vp->v_bufobj.bo_bsize / DEV_BSIZE);
- return (bp);
-}
-
-/*
- * Flush and invalidate all dirty buffers. If another process is already
- * doing the flush, just wait for completion.
- */
-int
-nfs_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
-{
- struct nfsnode *np = VTONFS(vp);
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- int error = 0, slpflag, slptimeo;
- int old_lock = 0;
-
- ASSERT_VOP_LOCKED(vp, "nfs_vinvalbuf");
-
- if ((nmp->nm_flag & NFSMNT_INT) == 0)
- intrflg = 0;
- if (intrflg) {
- slpflag = PCATCH;
- slptimeo = 2 * hz;
- } else {
- slpflag = 0;
- slptimeo = 0;
- }
-
- old_lock = nfs_upgrade_vnlock(vp);
- if (vp->v_iflag & VI_DOOMED) {
- /*
- * Since vgonel() uses the generic vinvalbuf() to flush
- * dirty buffers and it does not call this function, it
- * is safe to just return OK when VI_DOOMED is set.
- */
- nfs_downgrade_vnlock(vp, old_lock);
- return (0);
- }
-
- /*
- * Now, flush as required.
- */
- if ((flags & V_SAVE) && (vp->v_bufobj.bo_object != NULL)) {
- VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
- vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
- VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
- /*
- * If the page clean was interrupted, fail the invalidation.
- * Not doing so, we run the risk of losing dirty pages in the
- * vinvalbuf() call below.
- */
- if (intrflg && (error = nfs_sigintr(nmp, td)))
- goto out;
- }
-
- error = vinvalbuf(vp, flags, slpflag, 0);
- while (error) {
- if (intrflg && (error = nfs_sigintr(nmp, td)))
- goto out;
- error = vinvalbuf(vp, flags, 0, slptimeo);
- }
- mtx_lock(&np->n_mtx);
- if (np->n_directio_asyncwr == 0)
- np->n_flag &= ~NMODIFIED;
- mtx_unlock(&np->n_mtx);
-out:
- nfs_downgrade_vnlock(vp, old_lock);
- return error;
-}
-
-/*
- * Initiate asynchronous I/O. Return an error if no nfsiods are available.
- * This is mainly to avoid queueing async I/O requests when the nfsiods
- * are all hung on a dead server.
- *
- * Note: nfs_asyncio() does not clear (BIO_ERROR|B_INVAL) but when the bp
- * is eventually dequeued by the async daemon, nfs_doio() *will*.
- */
-int
-nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thread *td)
-{
- int iod;
- int gotiod;
- int slpflag = 0;
- int slptimeo = 0;
- int error, error2;
-
- /*
- * Commits are usually short and sweet so lets save some cpu and
- * leave the async daemons for more important rpc's (such as reads
- * and writes).
- *
- * Readdirplus RPCs do vget()s to acquire the vnodes for entries
- * in the directory in order to update attributes. This can deadlock
- * with another thread that is waiting for async I/O to be done by
- * an nfsiod thread while holding a lock on one of these vnodes.
- * To avoid this deadlock, don't allow the async nfsiod threads to
- * perform Readdirplus RPCs.
- */
- mtx_lock(&nfs_iod_mtx);
- if ((bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
- (nmp->nm_bufqiods > nfs_numasync / 2)) ||
- (bp->b_vp->v_type == VDIR && (nmp->nm_flag & NFSMNT_RDIRPLUS))) {
- mtx_unlock(&nfs_iod_mtx);
- return(EIO);
- }
-again:
- if (nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- gotiod = FALSE;
-
- /*
- * Find a free iod to process this request.
- */
- for (iod = 0; iod < nfs_numasync; iod++)
- if (nfs_iodwant[iod] == NFSIOD_AVAILABLE) {
- gotiod = TRUE;
- break;
- }
-
- /*
- * Try to create one if none are free.
- */
- if (!gotiod)
- nfs_nfsiodnew();
- else {
- /*
- * Found one, so wake it up and tell it which
- * mount to process.
- */
- NFS_DPF(ASYNCIO, ("nfs_asyncio: waking iod %d for mount %p\n",
- iod, nmp));
- nfs_iodwant[iod] = NFSIOD_NOT_AVAILABLE;
- nfs_iodmount[iod] = nmp;
- nmp->nm_bufqiods++;
- wakeup(&nfs_iodwant[iod]);
- }
-
- /*
- * If none are free, we may already have an iod working on this mount
- * point. If so, it will process our request.
- */
- if (!gotiod) {
- if (nmp->nm_bufqiods > 0) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: %d iods are already processing mount %p\n",
- nmp->nm_bufqiods, nmp));
- gotiod = TRUE;
- }
- }
-
- /*
- * If we have an iod which can process the request, then queue
- * the buffer.
- */
- if (gotiod) {
- /*
- * Ensure that the queue never grows too large. We still want
- * to asynchronize so we block rather then return EIO.
- */
- while (nmp->nm_bufqlen >= 2 * nfs_numasync) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
- nmp->nm_bufqwant = TRUE;
- error = nfs_msleep(td, &nmp->nm_bufq, &nfs_iod_mtx,
- slpflag | PRIBIO,
- "nfsaio", slptimeo);
- if (error) {
- error2 = nfs_sigintr(nmp, td);
- if (error2) {
- mtx_unlock(&nfs_iod_mtx);
- return (error2);
- }
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- }
- /*
- * We might have lost our iod while sleeping,
- * so check and loop if nescessary.
- */
- goto again;
- }
-
- /* We might have lost our nfsiod */
- if (nmp->nm_bufqiods == 0) {
- NFS_DPF(ASYNCIO,
-("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
- goto again;
- }
-
- if (bp->b_iocmd == BIO_READ) {
- if (bp->b_rcred == NOCRED && cred != NOCRED)
- bp->b_rcred = crhold(cred);
- } else {
- if (bp->b_wcred == NOCRED && cred != NOCRED)
- bp->b_wcred = crhold(cred);
- }
-
- if (bp->b_flags & B_REMFREE)
- bremfreef(bp);
- BUF_KERNPROC(bp);
- TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
- nmp->nm_bufqlen++;
- if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
- mtx_lock(&(VTONFS(bp->b_vp))->n_mtx);
- VTONFS(bp->b_vp)->n_flag |= NMODIFIED;
- VTONFS(bp->b_vp)->n_directio_asyncwr++;
- mtx_unlock(&(VTONFS(bp->b_vp))->n_mtx);
- }
- mtx_unlock(&nfs_iod_mtx);
- return (0);
- }
-
- mtx_unlock(&nfs_iod_mtx);
-
- /*
- * All the iods are busy on other mounts, so return EIO to
- * force the caller to process the i/o synchronously.
- */
- NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
- return (EIO);
-}
-
-void
-nfs_doio_directwrite(struct buf *bp)
-{
- int iomode, must_commit;
- struct uio *uiop = (struct uio *)bp->b_caller1;
- char *iov_base = uiop->uio_iov->iov_base;
- struct nfsmount *nmp = VFSTONFS(bp->b_vp->v_mount);
-
- iomode = NFSV3WRITE_FILESYNC;
- uiop->uio_td = NULL; /* NULL since we're in nfsiod */
- (nmp->nm_rpcops->nr_writerpc)(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit);
- KASSERT((must_commit == 0), ("nfs_doio_directwrite: Did not commit write"));
- free(iov_base, M_NFSDIRECTIO);
- free(uiop->uio_iov, M_NFSDIRECTIO);
- free(uiop, M_NFSDIRECTIO);
- if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
- struct nfsnode *np = VTONFS(bp->b_vp);
- mtx_lock(&np->n_mtx);
- np->n_directio_asyncwr--;
- if (np->n_directio_asyncwr == 0) {
- VTONFS(bp->b_vp)->n_flag &= ~NMODIFIED;
- if ((np->n_flag & NFSYNCWAIT)) {
- np->n_flag &= ~NFSYNCWAIT;
- wakeup((caddr_t)&np->n_directio_asyncwr);
- }
- }
- mtx_unlock(&np->n_mtx);
- }
- bp->b_vp = NULL;
- relpbuf(bp, &nfs_pbuf_freecnt);
-}
-
-/*
- * Do an I/O operation to/from a cache block. This may be called
- * synchronously or from an nfsiod.
- */
-int
-nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
-{
- struct uio *uiop;
- struct nfsnode *np;
- struct nfsmount *nmp;
- int error = 0, iomode, must_commit = 0;
- struct uio uio;
- struct iovec io;
- struct proc *p = td ? td->td_proc : NULL;
- uint8_t iocmd;
-
- np = VTONFS(vp);
- nmp = VFSTONFS(vp->v_mount);
- uiop = &uio;
- uiop->uio_iov = &io;
- uiop->uio_iovcnt = 1;
- uiop->uio_segflg = UIO_SYSSPACE;
- uiop->uio_td = td;
-
- /*
- * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We
- * do this here so we do not have to do it in all the code that
- * calls us.
- */
- bp->b_flags &= ~B_INVAL;
- bp->b_ioflags &= ~BIO_ERROR;
-
- KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
- iocmd = bp->b_iocmd;
- if (iocmd == BIO_READ) {
- io.iov_len = uiop->uio_resid = bp->b_bcount;
- io.iov_base = bp->b_data;
- uiop->uio_rw = UIO_READ;
-
- switch (vp->v_type) {
- case VREG:
- uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
- nfsstats.read_bios++;
- error = (nmp->nm_rpcops->nr_readrpc)(vp, uiop, cr);
-
- if (!error) {
- if (uiop->uio_resid) {
- /*
- * If we had a short read with no error, we must have
- * hit a file hole. We should zero-fill the remainder.
- * This can also occur if the server hits the file EOF.
- *
- * Holes used to be able to occur due to pending
- * writes, but that is not possible any longer.
- */
- int nread = bp->b_bcount - uiop->uio_resid;
- int left = uiop->uio_resid;
-
- if (left > 0)
- bzero((char *)bp->b_data + nread, left);
- uiop->uio_resid = 0;
- }
- }
- /* ASSERT_VOP_LOCKED(vp, "nfs_doio"); */
- if (p && (vp->v_vflag & VV_TEXT)) {
- mtx_lock(&np->n_mtx);
- if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime)) {
- mtx_unlock(&np->n_mtx);
- PROC_LOCK(p);
- killproc(p, "text file modification");
- PROC_UNLOCK(p);
- } else
- mtx_unlock(&np->n_mtx);
- }
- break;
- case VLNK:
- uiop->uio_offset = (off_t)0;
- nfsstats.readlink_bios++;
- error = (nmp->nm_rpcops->nr_readlinkrpc)(vp, uiop, cr);
- break;
- case VDIR:
- nfsstats.readdir_bios++;
- uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
- if ((nmp->nm_flag & NFSMNT_RDIRPLUS) != 0) {
- error = nfs_readdirplusrpc(vp, uiop, cr);
- if (error == NFSERR_NOTSUPP)
- nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
- }
- if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
- error = nfs_readdirrpc(vp, uiop, cr);
- /*
- * end-of-directory sets B_INVAL but does not generate an
- * error.
- */
- if (error == 0 && uiop->uio_resid == bp->b_bcount)
- bp->b_flags |= B_INVAL;
- break;
- default:
- nfs_printf("nfs_doio: type %x unexpected\n", vp->v_type);
- break;
- };
- if (error) {
- bp->b_ioflags |= BIO_ERROR;
- bp->b_error = error;
- }
- } else {
- /*
- * If we only need to commit, try to commit
- */
- if (bp->b_flags & B_NEEDCOMMIT) {
- int retv;
- off_t off;
-
- off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
- retv = (nmp->nm_rpcops->nr_commit)(
- vp, off, bp->b_dirtyend-bp->b_dirtyoff,
- bp->b_wcred, td);
- if (retv == 0) {
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
- bp->b_resid = 0;
- bufdone(bp);
- return (0);
- }
- if (retv == NFSERR_STALEWRITEVERF) {
- nfs_clearcommit(vp->v_mount);
- }
- }
-
- /*
- * Setup for actual write
- */
- mtx_lock(&np->n_mtx);
- if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
- bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
- mtx_unlock(&np->n_mtx);
-
- if (bp->b_dirtyend > bp->b_dirtyoff) {
- io.iov_len = uiop->uio_resid = bp->b_dirtyend
- - bp->b_dirtyoff;
- uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
- + bp->b_dirtyoff;
- io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
- uiop->uio_rw = UIO_WRITE;
- nfsstats.write_bios++;
-
- if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
- iomode = NFSV3WRITE_UNSTABLE;
- else
- iomode = NFSV3WRITE_FILESYNC;
-
- error = (nmp->nm_rpcops->nr_writerpc)(vp, uiop, cr, &iomode, &must_commit);
-
- /*
- * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
- * to cluster the buffers needing commit. This will allow
- * the system to submit a single commit rpc for the whole
- * cluster. We can do this even if the buffer is not 100%
- * dirty (relative to the NFS blocksize), so we optimize the
- * append-to-file-case.
- *
- * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be
- * cleared because write clustering only works for commit
- * rpc's, not for the data portion of the write).
- */
-
- if (!error && iomode == NFSV3WRITE_UNSTABLE) {
- bp->b_flags |= B_NEEDCOMMIT;
- if (bp->b_dirtyoff == 0
- && bp->b_dirtyend == bp->b_bcount)
- bp->b_flags |= B_CLUSTEROK;
- } else {
- bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
- }
-
- /*
- * For an interrupted write, the buffer is still valid
- * and the write hasn't been pushed to the server yet,
- * so we can't set BIO_ERROR and report the interruption
- * by setting B_EINTR. For the B_ASYNC case, B_EINTR
- * is not relevant, so the rpc attempt is essentially
- * a noop. For the case of a V3 write rpc not being
- * committed to stable storage, the block is still
- * dirty and requires either a commit rpc or another
- * write rpc with iomode == NFSV3WRITE_FILESYNC before
- * the block is reused. This is indicated by setting
- * the B_DELWRI and B_NEEDCOMMIT flags.
- *
- * If the buffer is marked B_PAGING, it does not reside on
- * the vp's paging queues so we cannot call bdirty(). The
- * bp in this case is not an NFS cache block so we should
- * be safe. XXX
- *
- * The logic below breaks up errors into recoverable and
- * unrecoverable. For the former, we clear B_INVAL|B_NOCACHE
- * and keep the buffer around for potential write retries.
- * For the latter (eg ESTALE), we toss the buffer away (B_INVAL)
- * and save the error in the nfsnode. This is less than ideal
- * but necessary. Keeping such buffers around could potentially
- * cause buffer exhaustion eventually (they can never be written
- * out, so will get constantly be re-dirtied). It also causes
- * all sorts of vfs panics. For non-recoverable write errors,
- * also invalidate the attrcache, so we'll be forced to go over
- * the wire for this object, returning an error to user on next
- * call (most of the time).
- */
- if (error == EINTR || error == EIO || error == ETIMEDOUT
- || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
- int s;
-
- s = splbio();
- bp->b_flags &= ~(B_INVAL|B_NOCACHE);
- if ((bp->b_flags & B_PAGING) == 0) {
- bdirty(bp);
- bp->b_flags &= ~B_DONE;
- }
- if (error && (bp->b_flags & B_ASYNC) == 0)
- bp->b_flags |= B_EINTR;
- splx(s);
- } else {
- if (error) {
- bp->b_ioflags |= BIO_ERROR;
- bp->b_flags |= B_INVAL;
- bp->b_error = np->n_error = error;
- mtx_lock(&np->n_mtx);
- np->n_flag |= NWRITEERR;
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- mtx_unlock(&np->n_mtx);
- }
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- }
- } else {
- bp->b_resid = 0;
- bufdone(bp);
- return (0);
- }
- }
- bp->b_resid = uiop->uio_resid;
- if (must_commit)
- nfs_clearcommit(vp->v_mount);
- bufdone(bp);
- return (error);
-}
-
-/*
- * Used to aid in handling ftruncate() operations on the NFS client side.
- * Truncation creates a number of special problems for NFS. We have to
- * throw away VM pages and buffer cache buffers that are beyond EOF, and
- * we have to properly handle VM pages or (potentially dirty) buffers
- * that straddle the truncation point.
- */
-
-int
-nfs_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad_t nsize)
-{
- struct nfsnode *np = VTONFS(vp);
- u_quad_t tsize;
- int biosize = vp->v_bufobj.bo_bsize;
- int error = 0;
-
- mtx_lock(&np->n_mtx);
- tsize = np->n_size;
- np->n_size = nsize;
- mtx_unlock(&np->n_mtx);
-
- if (nsize < tsize) {
- struct buf *bp;
- daddr_t lbn;
- int bufsize;
-
- /*
- * vtruncbuf() doesn't get the buffer overlapping the
- * truncation point. We may have a B_DELWRI and/or B_CACHE
- * buffer that now needs to be truncated.
- */
- error = vtruncbuf(vp, cred, nsize, biosize);
- lbn = nsize / biosize;
- bufsize = nsize - (lbn * biosize);
- bp = nfs_getcacheblk(vp, lbn, bufsize, td);
- if (!bp)
- return EINTR;
- if (bp->b_dirtyoff > bp->b_bcount)
- bp->b_dirtyoff = bp->b_bcount;
- if (bp->b_dirtyend > bp->b_bcount)
- bp->b_dirtyend = bp->b_bcount;
- bp->b_flags |= B_RELBUF; /* don't leave garbage around */
- brelse(bp);
- } else {
- vnode_pager_setsize(vp, nsize);
- }
- return(error);
-}
-
diff --git a/sys/nfsclient/nfs_kdtrace.c b/sys/nfsclient/nfs_kdtrace.c
deleted file mode 100644
index 429dbc3baeb2..000000000000
--- a/sys/nfsclient/nfs_kdtrace.c
+++ /dev/null
@@ -1,542 +0,0 @@
-/*-
- * Copyright (c) 2009 Robert N. M. Watson
- * All rights reserved.
- *
- * This software was developed at the University of Cambridge Computer
- * Laboratory with support from a grant from Google, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-
-#include <sys/dtrace.h>
-#include <sys/dtrace_bsd.h>
-
-#include <nfs/nfsproto.h>
-
-/*
- * dtnfsclient is a DTrace provider that tracks the intent to perform RPCs
- * in the NFS client, as well as acess to and maintenance of the access and
- * attribute caches. This is not quite the same as RPCs, because NFS may
- * issue multiple RPC transactions in the event that authentication fails,
- * there's a jukebox error, or none at all if the access or attribute cache
- * hits. However, it cleanly represents the logical layer between RPC
- * transmission and vnode/vfs operations, providing access to state linking
- * the two.
- */
-
-static int dtnfsclient_unload(void);
-static void dtnfsclient_getargdesc(void *, dtrace_id_t, void *,
- dtrace_argdesc_t *);
-static void dtnfsclient_provide(void *, dtrace_probedesc_t *);
-static void dtnfsclient_destroy(void *, dtrace_id_t, void *);
-static void dtnfsclient_enable(void *, dtrace_id_t, void *);
-static void dtnfsclient_disable(void *, dtrace_id_t, void *);
-static void dtnfsclient_load(void *);
-
-static dtrace_pattr_t dtnfsclient_attr = {
-{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
-{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
-{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
-{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
-{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
-};
-
-/*
- * Description of NFSv3 and (optional) NFSv2 probes for a procedure.
- */
-struct dtnfsclient_rpc {
- char *nr_v3_name;
- char *nr_v2_name; /* Or NULL if none. */
-
- /*
- * IDs for the start and done cases, for both NFSv2 and NFSv3.
- */
- uint32_t nr_v2_id_start, nr_v2_id_done;
- uint32_t nr_v3_id_start, nr_v3_id_done;
-};
-
-/*
- * This table is indexed by NFSv3 procedure number, but also used for NFSv2
- * procedure names.
- */
-static struct dtnfsclient_rpc dtnfsclient_rpcs[NFS_NPROCS] = {
- { "null", "null" },
- { "getattr", "getattr" },
- { "setattr", "setattr" },
- { "lookup", "lookup" },
- { "access" },
- { "readlink", "readlink" },
- { "read", "read" },
- { "write", "write" },
- { "create", "create" },
- { "mkdir", "mkdir" },
- { "symlink", "symlink" },
- { "mknod" },
- { "remove", "remove" },
- { "rmdir", "rmdir" },
- { "rename", "rename" },
- { "link", "link" },
- { "readdir", "readdir" },
- { "readdirplus" },
- { "fsstat", "statfs" },
- { "fsinfo" },
- { "pathconf" },
- { "commit" },
- { "noop" },
-};
-
-/*
- * Module name strings.
- */
-static char *dtnfsclient_accesscache_str = "accesscache";
-static char *dtnfsclient_attrcache_str = "attrcache";
-static char *dtnfsclient_nfs2_str = "nfs2";
-static char *dtnfsclient_nfs3_str = "nfs3";
-
-/*
- * Function name strings.
- */
-static char *dtnfsclient_flush_str = "flush";
-static char *dtnfsclient_load_str = "load";
-static char *dtnfsclient_get_str = "get";
-
-/*
- * Name strings.
- */
-static char *dtnfsclient_done_str = "done";
-static char *dtnfsclient_hit_str = "hit";
-static char *dtnfsclient_miss_str = "miss";
-static char *dtnfsclient_start_str = "start";
-
-static dtrace_pops_t dtnfsclient_pops = {
- dtnfsclient_provide,
- NULL,
- dtnfsclient_enable,
- dtnfsclient_disable,
- NULL,
- NULL,
- dtnfsclient_getargdesc,
- NULL,
- NULL,
- dtnfsclient_destroy
-};
-
-static dtrace_provider_id_t dtnfsclient_id;
-
-/*
- * Most probes are generated from the above RPC table, but for access and
- * attribute caches, we have specific IDs we recognize and handle specially
- * in various spots.
- */
-extern uint32_t nfsclient_accesscache_flush_done_id;
-extern uint32_t nfsclient_accesscache_get_hit_id;
-extern uint32_t nfsclient_accesscache_get_miss_id;
-extern uint32_t nfsclient_accesscache_load_done_id;
-
-extern uint32_t nfsclient_attrcache_flush_done_id;
-extern uint32_t nfsclient_attrcache_get_hit_id;
-extern uint32_t nfsclient_attrcache_get_miss_id;
-extern uint32_t nfsclient_attrcache_load_done_id;
-
-/*
- * When tracing on a procedure is enabled, the DTrace ID for an RPC event is
- * stored in one of these two NFS client-allocated arrays; 0 indicates that
- * the event is not being traced so probes should not be called.
- *
- * For simplicity, we allocate both v2 and v3 arrays as NFS_NPROCS, and the
- * v2 array is simply sparse.
- */
-extern uint32_t nfsclient_nfs2_start_probes[NFS_NPROCS];
-extern uint32_t nfsclient_nfs2_done_probes[NFS_NPROCS];
-
-extern uint32_t nfsclient_nfs3_start_probes[NFS_NPROCS];
-extern uint32_t nfsclient_nfs3_done_probes[NFS_NPROCS];
-
-/*
- * Look up a DTrace probe ID to see if it's associated with a "done" event --
- * if so, we will return a fourth argument type of "int".
- */
-static int
-dtnfs23_isdoneprobe(dtrace_id_t id)
-{
- int i;
-
- for (i = 0; i < NFS_NPROCS; i++) {
- if (dtnfsclient_rpcs[i].nr_v3_id_done == id ||
- dtnfsclient_rpcs[i].nr_v2_id_done == id)
- return (1);
- }
- return (0);
-}
-
-static void
-dtnfsclient_getargdesc(void *arg, dtrace_id_t id, void *parg,
- dtrace_argdesc_t *desc)
-{
- const char *p = NULL;
-
- if (id == nfsclient_accesscache_flush_done_id ||
- id == nfsclient_attrcache_flush_done_id ||
- id == nfsclient_attrcache_get_miss_id) {
- switch (desc->dtargd_ndx) {
- case 0:
- p = "struct vnode *";
- break;
- default:
- desc->dtargd_ndx = DTRACE_ARGNONE;
- break;
- }
- } else if (id == nfsclient_accesscache_get_hit_id ||
- id == nfsclient_accesscache_get_miss_id) {
- switch (desc->dtargd_ndx) {
- case 0:
- p = "struct vnode *";
- break;
- case 1:
- p = "uid_t";
- break;
- case 2:
- p = "uint32_t";
- break;
- default:
- desc->dtargd_ndx = DTRACE_ARGNONE;
- break;
- }
- } else if (id == nfsclient_accesscache_load_done_id) {
- switch (desc->dtargd_ndx) {
- case 0:
- p = "struct vnode *";
- break;
- case 1:
- p = "uid_t";
- break;
- case 2:
- p = "uint32_t";
- break;
- case 3:
- p = "int";
- break;
- default:
- desc->dtargd_ndx = DTRACE_ARGNONE;
- break;
- }
- } else if (id == nfsclient_attrcache_get_hit_id) {
- switch (desc->dtargd_ndx) {
- case 0:
- p = "struct vnode *";
- break;
- case 1:
- p = "struct vattr *";
- break;
- default:
- desc->dtargd_ndx = DTRACE_ARGNONE;
- break;
- }
- } else if (id == nfsclient_attrcache_load_done_id) {
- switch (desc->dtargd_ndx) {
- case 0:
- p = "struct vnode *";
- break;
- case 1:
- p = "struct vattr *";
- break;
- case 2:
- p = "int";
- break;
- default:
- desc->dtargd_ndx = DTRACE_ARGNONE;
- break;
- }
- } else {
- switch (desc->dtargd_ndx) {
- case 0:
- p = "struct vnode *";
- break;
- case 1:
- p = "struct mbuf *";
- break;
- case 2:
- p = "struct ucred *";
- break;
- case 3:
- p = "int";
- break;
- case 4:
- if (dtnfs23_isdoneprobe(id)) {
- p = "int";
- break;
- }
- /* FALLSTHROUGH */
- default:
- desc->dtargd_ndx = DTRACE_ARGNONE;
- break;
- }
- }
- if (p != NULL)
- strlcpy(desc->dtargd_native, p, sizeof(desc->dtargd_native));
-}
-
-static void
-dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc)
-{
- int i;
-
- if (desc != NULL)
- return;
-
- /*
- * Register access cache probes.
- */
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_flush_str, dtnfsclient_done_str) == 0) {
- nfsclient_accesscache_flush_done_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_get_str, dtnfsclient_hit_str) == 0) {
- nfsclient_accesscache_get_hit_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_get_str, dtnfsclient_miss_str) == 0) {
- nfsclient_accesscache_get_miss_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_load_str, dtnfsclient_done_str) == 0) {
- nfsclient_accesscache_load_done_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_accesscache_str,
- dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL);
- }
-
- /*
- * Register attribute cache probes.
- */
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_flush_str, dtnfsclient_done_str) == 0) {
- nfsclient_attrcache_flush_done_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_get_str, dtnfsclient_hit_str) == 0) {
- nfsclient_attrcache_get_hit_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_get_str, dtnfsclient_miss_str) == 0) {
- nfsclient_attrcache_get_miss_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_load_str, dtnfsclient_done_str) == 0) {
- nfsclient_attrcache_load_done_id = dtrace_probe_create(
- dtnfsclient_id, dtnfsclient_attrcache_str,
- dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL);
- }
-
- /*
- * Register NFSv2 RPC procedures; note sparseness check for each slot
- * in the NFSv3 procnum-indexed array.
- */
- for (i = 0; i < NFS_NPROCS; i++) {
- if (dtnfsclient_rpcs[i].nr_v2_name != NULL &&
- dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str,
- dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) ==
- 0) {
- dtnfsclient_rpcs[i].nr_v2_id_start =
- dtrace_probe_create(dtnfsclient_id,
- dtnfsclient_nfs2_str,
- dtnfsclient_rpcs[i].nr_v2_name,
- dtnfsclient_start_str, 0,
- &nfsclient_nfs2_start_probes[i]);
- }
- if (dtnfsclient_rpcs[i].nr_v2_name != NULL &&
- dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str,
- dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_done_str) ==
- 0) {
- dtnfsclient_rpcs[i].nr_v2_id_done =
- dtrace_probe_create(dtnfsclient_id,
- dtnfsclient_nfs2_str,
- dtnfsclient_rpcs[i].nr_v2_name,
- dtnfsclient_done_str, 0,
- &nfsclient_nfs2_done_probes[i]);
- }
- }
-
- /*
- * Register NFSv3 RPC procedures.
- */
- for (i = 0; i < NFS_NPROCS; i++) {
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str,
- dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) ==
- 0) {
- dtnfsclient_rpcs[i].nr_v3_id_start =
- dtrace_probe_create(dtnfsclient_id,
- dtnfsclient_nfs3_str,
- dtnfsclient_rpcs[i].nr_v3_name,
- dtnfsclient_start_str, 0,
- &nfsclient_nfs3_start_probes[i]);
- }
- if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str,
- dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_done_str) ==
- 0) {
- dtnfsclient_rpcs[i].nr_v3_id_done =
- dtrace_probe_create(dtnfsclient_id,
- dtnfsclient_nfs3_str,
- dtnfsclient_rpcs[i].nr_v3_name,
- dtnfsclient_done_str, 0,
- &nfsclient_nfs3_done_probes[i]);
- }
- }
-}
-
-static void
-dtnfsclient_destroy(void *arg, dtrace_id_t id, void *parg)
-{
-}
-
-static void
-dtnfsclient_enable(void *arg, dtrace_id_t id, void *parg)
-{
- uint32_t *p = parg;
- void *f = dtrace_probe;
-
- if (id == nfsclient_accesscache_flush_done_id)
- dtrace_nfsclient_accesscache_flush_done_probe = f;
- else if (id == nfsclient_accesscache_get_hit_id)
- dtrace_nfsclient_accesscache_get_hit_probe = f;
- else if (id == nfsclient_accesscache_get_miss_id)
- dtrace_nfsclient_accesscache_get_miss_probe = f;
- else if (id == nfsclient_accesscache_load_done_id)
- dtrace_nfsclient_accesscache_load_done_probe = f;
- else if (id == nfsclient_attrcache_flush_done_id)
- dtrace_nfsclient_attrcache_flush_done_probe = f;
- else if (id == nfsclient_attrcache_get_hit_id)
- dtrace_nfsclient_attrcache_get_hit_probe = f;
- else if (id == nfsclient_attrcache_get_miss_id)
- dtrace_nfsclient_attrcache_get_miss_probe = f;
- else if (id == nfsclient_attrcache_load_done_id)
- dtrace_nfsclient_attrcache_load_done_probe = f;
- else
- *p = id;
-}
-
-static void
-dtnfsclient_disable(void *arg, dtrace_id_t id, void *parg)
-{
- uint32_t *p = parg;
-
- if (id == nfsclient_accesscache_flush_done_id)
- dtrace_nfsclient_accesscache_flush_done_probe = NULL;
- else if (id == nfsclient_accesscache_get_hit_id)
- dtrace_nfsclient_accesscache_get_hit_probe = NULL;
- else if (id == nfsclient_accesscache_get_miss_id)
- dtrace_nfsclient_accesscache_get_miss_probe = NULL;
- else if (id == nfsclient_accesscache_load_done_id)
- dtrace_nfsclient_accesscache_load_done_probe = NULL;
- else if (id == nfsclient_attrcache_flush_done_id)
- dtrace_nfsclient_attrcache_flush_done_probe = NULL;
- else if (id == nfsclient_attrcache_get_hit_id)
- dtrace_nfsclient_attrcache_get_hit_probe = NULL;
- else if (id == nfsclient_attrcache_get_miss_id)
- dtrace_nfsclient_attrcache_get_miss_probe = NULL;
- else if (id == nfsclient_attrcache_load_done_id)
- dtrace_nfsclient_attrcache_load_done_probe = NULL;
- else
- *p = 0;
-}
-
-static void
-dtnfsclient_load(void *dummy)
-{
-
- if (dtrace_register("nfsclient", &dtnfsclient_attr,
- DTRACE_PRIV_USER, NULL, &dtnfsclient_pops, NULL,
- &dtnfsclient_id) != 0)
- return;
-
- dtrace_nfsclient_nfs23_start_probe =
- (dtrace_nfsclient_nfs23_start_probe_func_t)dtrace_probe;
- dtrace_nfsclient_nfs23_done_probe =
- (dtrace_nfsclient_nfs23_done_probe_func_t)dtrace_probe;
-}
-
-
-static int
-dtnfsclient_unload()
-{
-
- dtrace_nfsclient_nfs23_start_probe = NULL;
- dtrace_nfsclient_nfs23_done_probe = NULL;
-
- return (dtrace_unregister(dtnfsclient_id));
-}
-
-static int
-dtnfsclient_modevent(module_t mod __unused, int type, void *data __unused)
-{
- int error = 0;
-
- switch (type) {
- case MOD_LOAD:
- break;
-
- case MOD_UNLOAD:
- break;
-
- case MOD_SHUTDOWN:
- break;
-
- default:
- error = EOPNOTSUPP;
- break;
- }
-
- return (error);
-}
-
-SYSINIT(dtnfsclient_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY,
- dtnfsclient_load, NULL);
-SYSUNINIT(dtnfsclient_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY,
- dtnfsclient_unload, NULL);
-
-DEV_MODULE(dtnfsclient, dtnfsclient_modevent, NULL);
-MODULE_VERSION(dtnfsclient, 1);
-MODULE_DEPEND(dtnfsclient, dtrace, 1, 1, 1);
-MODULE_DEPEND(dtnfsclient, opensolaris, 1, 1, 1);
-MODULE_DEPEND(dtnfsclient, oldnfs, 1, 1, 1);
diff --git a/sys/nfsclient/nfs_krpc.c b/sys/nfsclient/nfs_krpc.c
deleted file mode 100644
index f88e47f4f4e8..000000000000
--- a/sys/nfsclient/nfs_krpc.c
+++ /dev/null
@@ -1,887 +0,0 @@
-/*-
- * Copyright (c) 1989, 1991, 1993, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Socket operations for use by nfs
- */
-
-#include "opt_inet6.h"
-#include "opt_kgssapi.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/mount.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/signalvar.h>
-#include <sys/syscallsubr.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <sys/vnode.h>
-
-#include <rpc/rpc.h>
-
-#include <nfs/nfsproto.h>
-#include <nfsclient/nfs.h>
-#include <nfs/xdr_subs.h>
-#include <nfsclient/nfsm_subs.h>
-#include <nfsclient/nfsmount.h>
-#include <nfsclient/nfsnode.h>
-
-#ifdef KDTRACE_HOOKS
-#include <sys/dtrace_bsd.h>
-
-dtrace_nfsclient_nfs23_start_probe_func_t
- dtrace_nfsclient_nfs23_start_probe;
-
-dtrace_nfsclient_nfs23_done_probe_func_t
- dtrace_nfsclient_nfs23_done_probe;
-
-/*
- * Registered probes by RPC type.
- */
-uint32_t nfsclient_nfs2_start_probes[NFS_NPROCS];
-uint32_t nfsclient_nfs2_done_probes[NFS_NPROCS];
-
-uint32_t nfsclient_nfs3_start_probes[NFS_NPROCS];
-uint32_t nfsclient_nfs3_done_probes[NFS_NPROCS];
-#endif
-
-static int nfs_bufpackets = 4;
-static int nfs_reconnects;
-static int nfs3_jukebox_delay = 10;
-static int nfs_skip_wcc_data_onerr = 1;
-static int fake_wchan;
-
-SYSCTL_DECL(_vfs_oldnfs);
-
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
- "Buffer reservation size 2 < x < 64");
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
- "Number of times the nfs client has had to reconnect");
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW,
- &nfs3_jukebox_delay, 0,
- "Number of seconds to delay a retry after receiving EJUKEBOX");
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW,
- &nfs_skip_wcc_data_onerr, 0,
- "Disable weak cache consistency checking when server returns an error");
-
-static void nfs_down(struct nfsmount *, struct thread *, const char *,
- int, int);
-static void nfs_up(struct nfsmount *, struct thread *, const char *,
- int, int);
-static int nfs_msg(struct thread *, const char *, const char *, int);
-
-extern int nfsv2_procid[];
-
-struct nfs_cached_auth {
- int ca_refs; /* refcount, including 1 from the cache */
- uid_t ca_uid; /* uid that corresponds to this auth */
- AUTH *ca_auth; /* RPC auth handle */
-};
-
-/*
- * RTT estimator
- */
-
-static enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = {
- NFS_DEFAULT_TIMER, /* NULL */
- NFS_GETATTR_TIMER, /* GETATTR */
- NFS_DEFAULT_TIMER, /* SETATTR */
- NFS_LOOKUP_TIMER, /* LOOKUP */
- NFS_GETATTR_TIMER, /* ACCESS */
- NFS_READ_TIMER, /* READLINK */
- NFS_READ_TIMER, /* READ */
- NFS_WRITE_TIMER, /* WRITE */
- NFS_DEFAULT_TIMER, /* CREATE */
- NFS_DEFAULT_TIMER, /* MKDIR */
- NFS_DEFAULT_TIMER, /* SYMLINK */
- NFS_DEFAULT_TIMER, /* MKNOD */
- NFS_DEFAULT_TIMER, /* REMOVE */
- NFS_DEFAULT_TIMER, /* RMDIR */
- NFS_DEFAULT_TIMER, /* RENAME */
- NFS_DEFAULT_TIMER, /* LINK */
- NFS_READ_TIMER, /* READDIR */
- NFS_READ_TIMER, /* READDIRPLUS */
- NFS_DEFAULT_TIMER, /* FSSTAT */
- NFS_DEFAULT_TIMER, /* FSINFO */
- NFS_DEFAULT_TIMER, /* PATHCONF */
- NFS_DEFAULT_TIMER, /* COMMIT */
- NFS_DEFAULT_TIMER, /* NOOP */
-};
-
-/*
- * Choose the correct RTT timer for this NFS procedure.
- */
-static inline enum nfs_rto_timer_t
-nfs_rto_timer(u_int32_t procnum)
-{
-
- return (nfs_proct[procnum]);
-}
-
-/*
- * Initialize the RTT estimator state for a new mount point.
- */
-static void
-nfs_init_rtt(struct nfsmount *nmp)
-{
- int i;
-
- for (i = 0; i < NFS_MAX_TIMER; i++) {
- nmp->nm_timers[i].rt_srtt = hz;
- nmp->nm_timers[i].rt_deviate = 0;
- nmp->nm_timers[i].rt_rtxcur = hz;
- }
-}
-
-/*
- * Initialize sockets and congestion for a new NFS connection.
- * We do not free the sockaddr if error.
- */
-int
-nfs_connect(struct nfsmount *nmp)
-{
- int rcvreserve, sndreserve;
- int pktscale;
- struct sockaddr *saddr;
- struct ucred *origcred;
- struct thread *td = curthread;
- CLIENT *client;
- struct netconfig *nconf;
- rpcvers_t vers;
- int one = 1, retries;
- struct timeval timo;
-
- /*
- * We need to establish the socket using the credentials of
- * the mountpoint. Some parts of this process (such as
- * sobind() and soconnect()) will use the curent thread's
- * credential instead of the socket credential. To work
- * around this, temporarily change the current thread's
- * credential to that of the mountpoint.
- *
- * XXX: It would be better to explicitly pass the correct
- * credential to sobind() and soconnect().
- */
- origcred = td->td_ucred;
- td->td_ucred = nmp->nm_mountp->mnt_cred;
- saddr = nmp->nm_nam;
-
- vers = NFS_VER2;
- if (nmp->nm_flag & NFSMNT_NFSV3)
- vers = NFS_VER3;
- else if (nmp->nm_flag & NFSMNT_NFSV4)
- vers = NFS_VER4;
- if (saddr->sa_family == AF_INET)
- if (nmp->nm_sotype == SOCK_DGRAM)
- nconf = getnetconfigent("udp");
- else
- nconf = getnetconfigent("tcp");
- else
- if (nmp->nm_sotype == SOCK_DGRAM)
- nconf = getnetconfigent("udp6");
- else
- nconf = getnetconfigent("tcp6");
-
- /*
- * Get buffer reservation size from sysctl, but impose reasonable
- * limits.
- */
- pktscale = nfs_bufpackets;
- if (pktscale < 2)
- pktscale = 2;
- if (pktscale > 64)
- pktscale = 64;
- mtx_lock(&nmp->nm_mtx);
- if (nmp->nm_sotype == SOCK_DGRAM) {
- sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
- rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
- NFS_MAXPKTHDR) * pktscale;
- } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
- sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
- rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
- NFS_MAXPKTHDR) * pktscale;
- } else {
- if (nmp->nm_sotype != SOCK_STREAM)
- panic("nfscon sotype");
- sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
- sizeof (u_int32_t)) * pktscale;
- rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
- sizeof (u_int32_t)) * pktscale;
- }
- mtx_unlock(&nmp->nm_mtx);
-
- client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers,
- sndreserve, rcvreserve);
- CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
- if (nmp->nm_flag & NFSMNT_INT)
- CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
- if (nmp->nm_flag & NFSMNT_RESVPORT)
- CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
- if ((nmp->nm_flag & NFSMNT_SOFT) != 0) {
- if (nmp->nm_sotype == SOCK_DGRAM)
- /*
- * For UDP, the large timeout for a reconnect will
- * be set to "nm_retry * nm_timeo / 2", so we only
- * want to do 2 reconnect timeout retries.
- */
- retries = 2;
- else
- retries = nmp->nm_retry;
- } else
- retries = INT_MAX;
- CLNT_CONTROL(client, CLSET_RETRIES, &retries);
-
- /*
- * For UDP, there are 2 timeouts:
- * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
- * that does a retransmit of an RPC request using the same socket
- * and xid. This is what you normally want to do, since NFS
- * servers depend on "same xid" for their Duplicate Request Cache.
- * - timeout specified in CLNT_CALL_MBUF(), which specifies when
- * retransmits on the same socket should fail and a fresh socket
- * created. Each of these timeouts counts as one CLSET_RETRIES,
- * as set above.
- * Set the initial retransmit timeout for UDP. This timeout doesn't
- * exist for TCP and the following call just fails, which is ok.
- */
- timo.tv_sec = nmp->nm_timeo / NFS_HZ;
- timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
- CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
-
- mtx_lock(&nmp->nm_mtx);
- if (nmp->nm_client) {
- /*
- * Someone else already connected.
- */
- CLNT_RELEASE(client);
- } else
- nmp->nm_client = client;
-
- /*
- * Protocols that do not require connections may be optionally left
- * unconnected for servers that reply from a port other than NFS_PORT.
- */
- if (!(nmp->nm_flag & NFSMNT_NOCONN)) {
- mtx_unlock(&nmp->nm_mtx);
- CLNT_CONTROL(client, CLSET_CONNECT, &one);
- } else
- mtx_unlock(&nmp->nm_mtx);
-
- /* Restore current thread's credentials. */
- td->td_ucred = origcred;
-
- mtx_lock(&nmp->nm_mtx);
- /* Initialize other non-zero congestion variables. */
- nfs_init_rtt(nmp);
- mtx_unlock(&nmp->nm_mtx);
- return (0);
-}
-
-/*
- * NFS disconnect. Clean up and unlink.
- */
-void
-nfs_disconnect(struct nfsmount *nmp)
-{
- CLIENT *client;
-
- mtx_lock(&nmp->nm_mtx);
- if (nmp->nm_client) {
- client = nmp->nm_client;
- nmp->nm_client = NULL;
- mtx_unlock(&nmp->nm_mtx);
- rpc_gss_secpurge_call(client);
- CLNT_CLOSE(client);
- CLNT_RELEASE(client);
- } else
- mtx_unlock(&nmp->nm_mtx);
-}
-
-void
-nfs_safedisconnect(struct nfsmount *nmp)
-{
-
- nfs_disconnect(nmp);
-}
-
-static AUTH *
-nfs_getauth(struct nfsmount *nmp, struct ucred *cred)
-{
- rpc_gss_service_t svc;
- AUTH *auth;
-
- switch (nmp->nm_secflavor) {
- case RPCSEC_GSS_KRB5:
- case RPCSEC_GSS_KRB5I:
- case RPCSEC_GSS_KRB5P:
- if (!nmp->nm_mech_oid)
- if (!rpc_gss_mech_to_oid_call("kerberosv5",
- &nmp->nm_mech_oid))
- return (NULL);
- if (nmp->nm_secflavor == RPCSEC_GSS_KRB5)
- svc = rpc_gss_svc_none;
- else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I)
- svc = rpc_gss_svc_integrity;
- else
- svc = rpc_gss_svc_privacy;
- auth = rpc_gss_secfind_call(nmp->nm_client, cred,
- nmp->nm_principal, nmp->nm_mech_oid, svc);
- if (auth)
- return (auth);
- /* fallthrough */
- case AUTH_SYS:
- default:
- return (authunix_create(cred));
-
- }
-}
-
-/*
- * Callback from the RPC code to generate up/down notifications.
- */
-
-struct nfs_feedback_arg {
- struct nfsmount *nf_mount;
- int nf_lastmsg; /* last tprintf */
- int nf_tprintfmsg;
- struct thread *nf_td;
-};
-
-static void
-nfs_feedback(int type, int proc, void *arg)
-{
- struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
- struct nfsmount *nmp = nf->nf_mount;
- time_t now;
-
- switch (type) {
- case FEEDBACK_REXMIT2:
- case FEEDBACK_RECONNECT:
- now = time_uptime;
- if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) {
- nfs_down(nmp, nf->nf_td,
- "not responding", 0, NFSSTA_TIMEO);
- nf->nf_tprintfmsg = TRUE;
- nf->nf_lastmsg = now;
- }
- break;
-
- case FEEDBACK_OK:
- nfs_up(nf->nf_mount, nf->nf_td,
- "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
- break;
- }
-}
-
-/*
- * nfs_request - goes something like this
- * - fill in request struct
- * - links it into list
- * - calls nfs_send() for first transmit
- * - calls nfs_receive() to get reply
- * - break down rpc header and return with nfs reply pointed to
- * by mrep or error
- * nb: always frees up mreq mbuf list
- */
-int
-nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum,
- struct thread *td, struct ucred *cred, struct mbuf **mrp,
- struct mbuf **mdp, caddr_t *dposp)
-{
- struct mbuf *mrep;
- u_int32_t *tl;
- struct nfsmount *nmp;
- struct mbuf *md;
- time_t waituntil;
- caddr_t dpos;
- int error = 0, timeo;
- AUTH *auth = NULL;
- enum nfs_rto_timer_t timer;
- struct nfs_feedback_arg nf;
- struct rpc_callextra ext;
- enum clnt_stat stat;
- struct timeval timo;
-
- /* Reject requests while attempting a forced unmount. */
- if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
- m_freem(mreq);
- return (ESTALE);
- }
- nmp = VFSTONFS(vp->v_mount);
- bzero(&nf, sizeof(struct nfs_feedback_arg));
- nf.nf_mount = nmp;
- nf.nf_td = td;
- nf.nf_lastmsg = time_uptime -
- ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
-
- /*
- * XXX if not already connected call nfs_connect now. Longer
- * term, change nfs_mount to call nfs_connect unconditionally
- * and let clnt_reconnect_create handle reconnects.
- */
- if (!nmp->nm_client)
- nfs_connect(nmp);
-
- auth = nfs_getauth(nmp, cred);
- if (!auth) {
- m_freem(mreq);
- return (EACCES);
- }
- bzero(&ext, sizeof(ext));
- ext.rc_auth = auth;
-
- ext.rc_feedback = nfs_feedback;
- ext.rc_feedback_arg = &nf;
-
- /*
- * Use a conservative timeout for RPCs other than getattr,
- * lookup, read or write. The justification for doing "other"
- * this way is that these RPCs happen so infrequently that
- * timer est. would probably be stale. Also, since many of
- * these RPCs are non-idempotent, a conservative timeout is
- * desired.
- */
- timer = nfs_rto_timer(procnum);
- if (timer != NFS_DEFAULT_TIMER)
- ext.rc_timers = &nmp->nm_timers[timer - 1];
- else
- ext.rc_timers = NULL;
-
-#ifdef KDTRACE_HOOKS
- if (dtrace_nfsclient_nfs23_start_probe != NULL) {
- uint32_t probe_id;
- int probe_procnum;
-
- if (nmp->nm_flag & NFSMNT_NFSV3) {
- probe_id = nfsclient_nfs3_start_probes[procnum];
- probe_procnum = procnum;
- } else {
- probe_id = nfsclient_nfs2_start_probes[procnum];
- probe_procnum = nfsv2_procid[procnum];
- }
- if (probe_id != 0)
- (dtrace_nfsclient_nfs23_start_probe)(probe_id, vp,
- mreq, cred, probe_procnum);
- }
-#endif
-
- nfsstats.rpcrequests++;
-tryagain:
- /*
- * This timeout specifies when a new socket should be created,
- * along with new xid values. For UDP, this should be done
- * infrequently, since retransmits of RPC requests should normally
- * use the same xid.
- */
- if (nmp->nm_sotype == SOCK_DGRAM) {
- if ((nmp->nm_flag & NFSMNT_SOFT) != 0) {
- /*
- * CLSET_RETRIES is set to 2, so this should be half
- * of the total timeout required.
- */
- timeo = nmp->nm_retry * nmp->nm_timeo / 2;
- if (timeo < 1)
- timeo = 1;
- timo.tv_sec = timeo / NFS_HZ;
- timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ;
- } else {
- /* For UDP hard mounts, use a large value. */
- timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
- timo.tv_usec = 0;
- }
- } else {
- timo.tv_sec = nmp->nm_timeo / NFS_HZ;
- timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
- }
- mrep = NULL;
- stat = CLNT_CALL_MBUF(nmp->nm_client, &ext,
- (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum],
- mreq, &mrep, timo);
-
- /*
- * If there was a successful reply and a tprintf msg.
- * tprintf a response.
- */
- if (stat == RPC_SUCCESS)
- error = 0;
- else if (stat == RPC_TIMEDOUT) {
- nfsstats.rpctimeouts++;
- error = ETIMEDOUT;
- } else if (stat == RPC_VERSMISMATCH) {
- nfsstats.rpcinvalid++;
- error = EOPNOTSUPP;
- } else if (stat == RPC_PROGVERSMISMATCH) {
- nfsstats.rpcinvalid++;
- error = EPROTONOSUPPORT;
- } else if (stat == RPC_INTR) {
- error = EINTR;
- } else {
- nfsstats.rpcinvalid++;
- error = EACCES;
- }
- if (error)
- goto nfsmout;
-
- KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
-
- /*
- * Search for any mbufs that are not a multiple of 4 bytes long
- * or with m_data not longword aligned.
- * These could cause pointer alignment problems, so copy them to
- * well aligned mbufs.
- */
- error = nfs_realign(&mrep, M_NOWAIT);
- if (error == ENOMEM) {
- m_freem(mrep);
- AUTH_DESTROY(auth);
- nfsstats.rpcinvalid++;
- return (error);
- }
-
- md = mrep;
- dpos = mtod(mrep, caddr_t);
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- if (*tl != 0) {
- error = fxdr_unsigned(int, *tl);
- if ((nmp->nm_flag & NFSMNT_NFSV3) &&
- error == NFSERR_TRYLATER) {
- m_freem(mrep);
- error = 0;
- waituntil = time_second + nfs3_jukebox_delay;
- while (time_second < waituntil)
- (void)tsleep(&fake_wchan, PSOCK, "nqnfstry",
- hz);
- goto tryagain;
- }
- /*
- * Make sure NFSERR_RETERR isn't bogusly set by a server
- * such as amd. (No actual NFS error has bit 31 set.)
- */
- error &= ~NFSERR_RETERR;
-
- /*
- * If the File Handle was stale, invalidate the lookup
- * cache, just in case.
- */
- if (error == ESTALE)
- nfs_purgecache(vp);
- /*
- * Skip wcc data on non-ENOENT NFS errors for now.
- * NetApp filers return corrupt postop attrs in the
- * wcc data for NFS err EROFS. Not sure if they could
- * return corrupt postop attrs for others errors.
- * Blocking ENOENT post-op attributes breaks negative
- * name caching, so always allow it through.
- */
- if ((nmp->nm_flag & NFSMNT_NFSV3) &&
- (!nfs_skip_wcc_data_onerr || error == ENOENT)) {
- *mrp = mrep;
- *mdp = md;
- *dposp = dpos;
- error |= NFSERR_RETERR;
- } else
- m_freem(mrep);
- goto nfsmout;
- }
-
-#ifdef KDTRACE_HOOKS
- if (dtrace_nfsclient_nfs23_done_probe != NULL) {
- uint32_t probe_id;
- int probe_procnum;
-
- if (nmp->nm_flag & NFSMNT_NFSV3) {
- probe_id = nfsclient_nfs3_done_probes[procnum];
- probe_procnum = procnum;
- } else {
- probe_id = nfsclient_nfs2_done_probes[procnum];
- probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
- procnum : nfsv2_procid[procnum];
- }
- if (probe_id != 0)
- (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp,
- mreq, cred, probe_procnum, 0);
- }
-#endif
- m_freem(mreq);
- *mrp = mrep;
- *mdp = md;
- *dposp = dpos;
- AUTH_DESTROY(auth);
- return (0);
-
-nfsmout:
-#ifdef KDTRACE_HOOKS
- if (dtrace_nfsclient_nfs23_done_probe != NULL) {
- uint32_t probe_id;
- int probe_procnum;
-
- if (nmp->nm_flag & NFSMNT_NFSV3) {
- probe_id = nfsclient_nfs3_done_probes[procnum];
- probe_procnum = procnum;
- } else {
- probe_id = nfsclient_nfs2_done_probes[procnum];
- probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
- procnum : nfsv2_procid[procnum];
- }
- if (probe_id != 0)
- (dtrace_nfsclient_nfs23_done_probe)(probe_id, vp,
- mreq, cred, probe_procnum, error);
- }
-#endif
- m_freem(mreq);
- if (auth)
- AUTH_DESTROY(auth);
- return (error);
-}
-
-/*
- * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
- * wait for all requests to complete. This is used by forced unmounts
- * to terminate any outstanding RPCs.
- */
-int
-nfs_nmcancelreqs(struct nfsmount *nmp)
-{
-
- if (nmp->nm_client)
- CLNT_CLOSE(nmp->nm_client);
- return (0);
-}
-
-/*
- * Any signal that can interrupt an NFS operation in an intr mount
- * should be added to this set. SIGSTOP and SIGKILL cannot be masked.
- */
-int nfs_sig_set[] = {
- SIGINT,
- SIGTERM,
- SIGHUP,
- SIGKILL,
- SIGQUIT
-};
-
-/*
- * Check to see if one of the signals in our subset is pending on
- * the process (in an intr mount).
- */
-static int
-nfs_sig_pending(sigset_t set)
-{
- int i;
-
- for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++)
- if (SIGISMEMBER(set, nfs_sig_set[i]))
- return (1);
- return (0);
-}
-
-/*
- * The set/restore sigmask functions are used to (temporarily) overwrite
- * the thread td_sigmask during an RPC call (for example). These are also
- * used in other places in the NFS client that might tsleep().
- */
-void
-nfs_set_sigmask(struct thread *td, sigset_t *oldset)
-{
- sigset_t newset;
- int i;
- struct proc *p;
-
- SIGFILLSET(newset);
- if (td == NULL)
- td = curthread; /* XXX */
- p = td->td_proc;
- /* Remove the NFS set of signals from newset. */
- PROC_LOCK(p);
- mtx_lock(&p->p_sigacts->ps_mtx);
- for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) {
- /*
- * But make sure we leave the ones already masked
- * by the process, i.e. remove the signal from the
- * temporary signalmask only if it wasn't already
- * in p_sigmask.
- */
- if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) &&
- !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i]))
- SIGDELSET(newset, nfs_sig_set[i]);
- }
- mtx_unlock(&p->p_sigacts->ps_mtx);
- kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
- SIGPROCMASK_PROC_LOCKED);
- PROC_UNLOCK(p);
-}
-
-void
-nfs_restore_sigmask(struct thread *td, sigset_t *set)
-{
- if (td == NULL)
- td = curthread; /* XXX */
- kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
-}
-
-/*
- * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
- * old one after msleep() returns.
- */
-int
-nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority,
- char *wmesg, int timo)
-{
- sigset_t oldset;
- int error;
- struct proc *p;
-
- if ((priority & PCATCH) == 0)
- return msleep(ident, mtx, priority, wmesg, timo);
- if (td == NULL)
- td = curthread; /* XXX */
- nfs_set_sigmask(td, &oldset);
- error = msleep(ident, mtx, priority, wmesg, timo);
- nfs_restore_sigmask(td, &oldset);
- p = td->td_proc;
- return (error);
-}
-
-/*
- * Test for a termination condition pending on the process.
- * This is used for NFSMNT_INT mounts.
- */
-int
-nfs_sigintr(struct nfsmount *nmp, struct thread *td)
-{
- struct proc *p;
- sigset_t tmpset;
-
- /* Terminate all requests while attempting a forced unmount. */
- if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
- return (EIO);
- if (!(nmp->nm_flag & NFSMNT_INT))
- return (0);
- if (td == NULL)
- return (0);
- p = td->td_proc;
- PROC_LOCK(p);
- tmpset = p->p_siglist;
- SIGSETOR(tmpset, td->td_siglist);
- SIGSETNAND(tmpset, td->td_sigmask);
- mtx_lock(&p->p_sigacts->ps_mtx);
- SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
- mtx_unlock(&p->p_sigacts->ps_mtx);
- if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
- && nfs_sig_pending(tmpset)) {
- PROC_UNLOCK(p);
- return (EINTR);
- }
- PROC_UNLOCK(p);
- return (0);
-}
-
-static int
-nfs_msg(struct thread *td, const char *server, const char *msg, int error)
-{
- struct proc *p;
-
- p = td ? td->td_proc : NULL;
- if (error)
- tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server,
- msg, error);
- else
- tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
- return (0);
-}
-
-static void
-nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
- int error, int flags)
-{
- if (nmp == NULL)
- return;
- mtx_lock(&nmp->nm_mtx);
- if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
- nmp->nm_state |= NFSSTA_TIMEO;
- mtx_unlock(&nmp->nm_mtx);
- vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
- VQ_NOTRESP, 0);
- } else
- mtx_unlock(&nmp->nm_mtx);
- mtx_lock(&nmp->nm_mtx);
- if ((flags & NFSSTA_LOCKTIMEO) &&
- !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
- nmp->nm_state |= NFSSTA_LOCKTIMEO;
- mtx_unlock(&nmp->nm_mtx);
- vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
- VQ_NOTRESPLOCK, 0);
- } else
- mtx_unlock(&nmp->nm_mtx);
- nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
-}
-
-static void
-nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
- int flags, int tprintfmsg)
-{
- if (nmp == NULL)
- return;
- if (tprintfmsg)
- nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
-
- mtx_lock(&nmp->nm_mtx);
- if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
- nmp->nm_state &= ~NFSSTA_TIMEO;
- mtx_unlock(&nmp->nm_mtx);
- vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
- VQ_NOTRESP, 1);
- } else
- mtx_unlock(&nmp->nm_mtx);
-
- mtx_lock(&nmp->nm_mtx);
- if ((flags & NFSSTA_LOCKTIMEO) &&
- (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
- nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
- mtx_unlock(&nmp->nm_mtx);
- vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
- VQ_NOTRESPLOCK, 1);
- } else
- mtx_unlock(&nmp->nm_mtx);
-}
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
deleted file mode 100644
index 2fb2c88151ac..000000000000
--- a/sys/nfsclient/nfs_nfsiod.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*-
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sysproto.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/file.h>
-#include <sys/filedesc.h>
-#include <sys/vnode.h>
-#include <sys/malloc.h>
-#include <sys/mount.h>
-#include <sys/proc.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/domain.h>
-#include <sys/protosw.h>
-#include <sys/namei.h>
-#include <sys/unistd.h>
-#include <sys/kthread.h>
-#include <sys/fcntl.h>
-#include <sys/lockf.h>
-#include <sys/mutex.h>
-#include <sys/taskqueue.h>
-
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-
-#include <nfs/xdr_subs.h>
-#include <nfs/nfsproto.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsm_subs.h>
-#include <nfsclient/nfsmount.h>
-#include <nfsclient/nfsnode.h>
-#include <nfs/nfs_lock.h>
-
-static MALLOC_DEFINE(M_NFSSVC, "nfsclient_srvsock", "Nfs server structure");
-
-static void nfssvc_iod(void *);
-
-static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
-
-SYSCTL_DECL(_vfs_oldnfs);
-
-/* Maximum number of seconds a nfsiod kthread will sleep before exiting */
-static unsigned int nfs_iodmaxidle = 120;
-SYSCTL_UINT(_vfs_oldnfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0,
- "Max number of seconds an nfsiod kthread will sleep before exiting");
-
-/* Maximum number of nfsiod kthreads */
-unsigned int nfs_iodmax = 20;
-
-/* Minimum number of nfsiod kthreads to keep as spares */
-static unsigned int nfs_iodmin = 0;
-
-static int nfs_nfsiodnew_sync(void);
-
-static int
-sysctl_iodmin(SYSCTL_HANDLER_ARGS)
-{
- int error, i;
- int newmin;
-
- newmin = nfs_iodmin;
- error = sysctl_handle_int(oidp, &newmin, 0, req);
- if (error || (req->newptr == NULL))
- return (error);
- mtx_lock(&nfs_iod_mtx);
- if (newmin > nfs_iodmax) {
- error = EINVAL;
- goto out;
- }
- nfs_iodmin = newmin;
- if (nfs_numasync >= nfs_iodmin)
- goto out;
- /*
- * If the current number of nfsiod is lower
- * than the new minimum, create some more.
- */
- for (i = nfs_iodmin - nfs_numasync; i > 0; i--)
- nfs_nfsiodnew_sync();
-out:
- mtx_unlock(&nfs_iod_mtx);
- return (0);
-}
-SYSCTL_PROC(_vfs_oldnfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0,
- sizeof (nfs_iodmin), sysctl_iodmin, "IU",
- "Min number of nfsiod kthreads to keep as spares");
-
-
-static int
-sysctl_iodmax(SYSCTL_HANDLER_ARGS)
-{
- int error, i;
- int iod, newmax;
-
- newmax = nfs_iodmax;
- error = sysctl_handle_int(oidp, &newmax, 0, req);
- if (error || (req->newptr == NULL))
- return (error);
- if (newmax > NFS_MAXASYNCDAEMON)
- return (EINVAL);
- mtx_lock(&nfs_iod_mtx);
- nfs_iodmax = newmax;
- if (nfs_numasync <= nfs_iodmax)
- goto out;
- /*
- * If there are some asleep nfsiods that should
- * exit, wakeup() them so that they check nfs_iodmax
- * and exit. Those who are active will exit as
- * soon as they finish I/O.
- */
- iod = nfs_numasync - 1;
- for (i = 0; i < nfs_numasync - nfs_iodmax; i++) {
- if (nfs_iodwant[iod] == NFSIOD_AVAILABLE)
- wakeup(&nfs_iodwant[iod]);
- iod--;
- }
-out:
- mtx_unlock(&nfs_iod_mtx);
- return (0);
-}
-SYSCTL_PROC(_vfs_oldnfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0,
- sizeof (nfs_iodmax), sysctl_iodmax, "IU",
- "Max number of nfsiod kthreads");
-
-static int
-nfs_nfsiodnew_sync(void)
-{
- int error, i;
-
- mtx_assert(&nfs_iod_mtx, MA_OWNED);
- for (i = 0; i < nfs_iodmax; i++) {
- if (nfs_asyncdaemon[i] == 0) {
- nfs_asyncdaemon[i] = 1;
- break;
- }
- }
- if (i == nfs_iodmax)
- return (0);
- mtx_unlock(&nfs_iod_mtx);
- error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL,
- RFHIGHPID, 0, "nfsiod %d", i);
- mtx_lock(&nfs_iod_mtx);
- if (error == 0) {
- nfs_numasync++;
- nfs_iodwant[i] = NFSIOD_AVAILABLE;
- } else
- nfs_asyncdaemon[i] = 0;
- return (error);
-}
-
-void
-nfs_nfsiodnew_tq(__unused void *arg, int pending)
-{
-
- mtx_lock(&nfs_iod_mtx);
- while (pending > 0) {
- pending--;
- nfs_nfsiodnew_sync();
- }
- mtx_unlock(&nfs_iod_mtx);
-}
-
-void
-nfs_nfsiodnew(void)
-{
-
- mtx_assert(&nfs_iod_mtx, MA_OWNED);
- taskqueue_enqueue(taskqueue_thread, &nfs_nfsiodnew_task);
-}
-
-static void
-nfsiod_setup(void *dummy)
-{
- int error;
-
- TUNABLE_INT_FETCH("vfs.oldnfs.iodmin", &nfs_iodmin);
- mtx_lock(&nfs_iod_mtx);
- /* Silently limit the start number of nfsiod's */
- if (nfs_iodmin > NFS_MAXASYNCDAEMON)
- nfs_iodmin = NFS_MAXASYNCDAEMON;
-
- while (nfs_numasync < nfs_iodmin) {
- error = nfs_nfsiodnew_sync();
- if (error == -1)
- panic("nfsiod_setup: nfs_nfsiodnew failed");
- }
- mtx_unlock(&nfs_iod_mtx);
-}
-SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
-
-static int nfs_defect = 0;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0,
- "Allow nfsiods to migrate serving different mounts");
-
-/*
- * Asynchronous I/O daemons for client nfs.
- * They do read-ahead and write-behind operations on the block I/O cache.
- * Returns if we hit the timeout defined by the iodmaxidle sysctl.
- */
-static void
-nfssvc_iod(void *instance)
-{
- struct buf *bp;
- struct nfsmount *nmp;
- int myiod, timo;
- int error = 0;
-
- mtx_lock(&nfs_iod_mtx);
- myiod = (int *)instance - nfs_asyncdaemon;
- /*
- * Main loop
- */
- for (;;) {
- while (((nmp = nfs_iodmount[myiod]) == NULL)
- || !TAILQ_FIRST(&nmp->nm_bufq)) {
- if (myiod >= nfs_iodmax)
- goto finish;
- if (nmp)
- nmp->nm_bufqiods--;
- if (nfs_iodwant[myiod] == NFSIOD_NOT_AVAILABLE)
- nfs_iodwant[myiod] = NFSIOD_AVAILABLE;
- nfs_iodmount[myiod] = NULL;
- /*
- * Always keep at least nfs_iodmin kthreads.
- */
- timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz;
- error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH,
- "-", timo);
- if (error) {
- nmp = nfs_iodmount[myiod];
- /*
- * Rechecking the nm_bufq closes a rare race where the
- * nfsiod is woken up at the exact time the idle timeout
- * fires
- */
- if (nmp && TAILQ_FIRST(&nmp->nm_bufq))
- error = 0;
- break;
- }
- }
- if (error)
- break;
- while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) {
- int giant_locked = 0;
-
- /* Take one off the front of the list */
- TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
- nmp->nm_bufqlen--;
- if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
- nmp->nm_bufqwant = 0;
- wakeup(&nmp->nm_bufq);
- }
- mtx_unlock(&nfs_iod_mtx);
- if (NFS_ISV4(bp->b_vp)) {
- giant_locked = 1;
- mtx_lock(&Giant);
- }
- if (bp->b_flags & B_DIRECT) {
- KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set"));
- (void)nfs_doio_directwrite(bp);
- } else {
- if (bp->b_iocmd == BIO_READ)
- (void) nfs_doio(bp->b_vp, bp, bp->b_rcred, NULL);
- else
- (void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL);
- }
- if (giant_locked)
- mtx_unlock(&Giant);
- mtx_lock(&nfs_iod_mtx);
- /*
- * Make sure the nmp hasn't been dismounted as soon as
- * nfs_doio() completes for the last buffer.
- */
- nmp = nfs_iodmount[myiod];
- if (nmp == NULL)
- break;
-
- /*
- * If there are more than one iod on this mount, then defect
- * so that the iods can be shared out fairly between the mounts
- */
- if (nfs_defect && nmp->nm_bufqiods > 1) {
- NFS_DPF(ASYNCIO,
- ("nfssvc_iod: iod %d defecting from mount %p\n",
- myiod, nmp));
- nfs_iodmount[myiod] = NULL;
- nmp->nm_bufqiods--;
- break;
- }
- }
- }
-finish:
- nfs_asyncdaemon[myiod] = 0;
- if (nmp)
- nmp->nm_bufqiods--;
- nfs_iodwant[myiod] = NFSIOD_NOT_AVAILABLE;
- nfs_iodmount[myiod] = NULL;
- /* Someone may be waiting for the last nfsiod to terminate. */
- if (--nfs_numasync == 0)
- wakeup(&nfs_numasync);
- mtx_unlock(&nfs_iod_mtx);
- if ((error == 0) || (error == EWOULDBLOCK))
- kproc_exit(0);
- /* Abnormal termination */
- kproc_exit(1);
-}
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
deleted file mode 100644
index cee4343156b3..000000000000
--- a/sys/nfsclient/nfs_node.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*-
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_node.c 8.6 (Berkeley) 5/22/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/fcntl.h>
-#include <sys/fnv_hash.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/mount.h>
-#include <sys/namei.h>
-#include <sys/proc.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-#include <sys/taskqueue.h>
-#include <sys/vnode.h>
-
-#include <vm/uma.h>
-
-#include <nfs/nfsproto.h>
-#include <nfs/nfs_lock.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsnode.h>
-#include <nfsclient/nfsmount.h>
-
-static uma_zone_t nfsnode_zone;
-
-static void nfs_freesillyrename(void *arg, __unused int pending);
-
-#define TRUE 1
-#define FALSE 0
-
-void
-nfs_nhinit(void)
-{
-
- nfsnode_zone = uma_zcreate("NFSNODE", sizeof(struct nfsnode), NULL,
- NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-}
-
-void
-nfs_nhuninit(void)
-{
- uma_zdestroy(nfsnode_zone);
-}
-
-struct nfs_vncmp {
- int fhsize;
- void *fh;
-};
-
-static int
-nfs_vncmpf(struct vnode *vp, void *arg)
-{
- struct nfs_vncmp *a;
- struct nfsnode *np;
-
- a = arg;
- np = VTONFS(vp);
- return (bcmp(a->fh, np->n_fhp, a->fhsize));
-}
-
-/*
- * Look up a vnode/nfsnode by file handle.
- * Callers must check for mount points!!
- * In all cases, a pointer to a
- * nfsnode structure is returned.
- */
-int
-nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp, int flags)
-{
- struct thread *td = curthread; /* XXX */
- struct nfsnode *np;
- struct vnode *vp;
- struct vnode *nvp;
- int error;
- u_int hash;
- struct nfsmount *nmp;
- struct nfs_vncmp ncmp;
-
- nmp = VFSTONFS(mntp);
- *npp = NULL;
-
- hash = fnv_32_buf(fhp->fh_bytes, fhsize, FNV1_32_INIT);
- ncmp.fhsize = fhsize;
- ncmp.fh = fhp;
-
- error = vfs_hash_get(mntp, hash, flags,
- td, &nvp, nfs_vncmpf, &ncmp);
- if (error)
- return (error);
- if (nvp != NULL) {
- *npp = VTONFS(nvp);
- return (0);
- }
- np = uma_zalloc(nfsnode_zone, M_WAITOK | M_ZERO);
-
- error = getnewvnode("nfs", mntp, &nfs_vnodeops, &nvp);
- if (error) {
- uma_zfree(nfsnode_zone, np);
- return (error);
- }
- vp = nvp;
- vp->v_bufobj.bo_ops = &buf_ops_nfs;
- vp->v_data = np;
- np->n_vnode = vp;
- /*
- * Initialize the mutex even if the vnode is going to be a loser.
- * This simplifies the logic in reclaim, which can then unconditionally
- * destroy the mutex (in the case of the loser, or if hash_insert happened
- * to return an error no special casing is needed).
- */
- mtx_init(&np->n_mtx, "NFSnode lock", NULL, MTX_DEF);
- /*
- * NFS supports recursive and shared locking.
- */
- lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL);
- VN_LOCK_AREC(vp);
- VN_LOCK_ASHARE(vp);
- if (fhsize > NFS_SMALLFH) {
- np->n_fhp = malloc(fhsize, M_NFSBIGFH, M_WAITOK);
- } else
- np->n_fhp = &np->n_fh;
- bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
- np->n_fhsize = fhsize;
- error = insmntque(vp, mntp);
- if (error != 0) {
- *npp = NULL;
- if (np->n_fhsize > NFS_SMALLFH) {
- free((caddr_t)np->n_fhp, M_NFSBIGFH);
- }
- mtx_destroy(&np->n_mtx);
- uma_zfree(nfsnode_zone, np);
- return (error);
- }
- error = vfs_hash_insert(vp, hash, flags,
- td, &nvp, nfs_vncmpf, &ncmp);
- if (error)
- return (error);
- if (nvp != NULL) {
- *npp = VTONFS(nvp);
- /* vfs_hash_insert() vput()'s the losing vnode */
- return (0);
- }
- *npp = np;
-
- return (0);
-}
-
-/*
- * Do the vrele(sp->s_dvp) as a separate task in order to avoid a
- * deadlock because of a LOR when vrele() locks the directory vnode.
- */
-static void
-nfs_freesillyrename(void *arg, __unused int pending)
-{
- struct sillyrename *sp;
-
- sp = arg;
- vrele(sp->s_dvp);
- free(sp, M_NFSREQ);
-}
-
-int
-nfs_inactive(struct vop_inactive_args *ap)
-{
- struct nfsnode *np;
- struct sillyrename *sp;
- struct thread *td = curthread; /* XXX */
-
- np = VTONFS(ap->a_vp);
- mtx_lock(&np->n_mtx);
- if (ap->a_vp->v_type != VDIR) {
- sp = np->n_sillyrename;
- np->n_sillyrename = NULL;
- } else
- sp = NULL;
- if (sp) {
- mtx_unlock(&np->n_mtx);
- (void)nfs_vinvalbuf(ap->a_vp, 0, td, 1);
- /*
- * Remove the silly file that was rename'd earlier
- */
- (sp->s_removeit)(sp);
- crfree(sp->s_cred);
- TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp);
- taskqueue_enqueue(taskqueue_thread, &sp->s_task);
- mtx_lock(&np->n_mtx);
- }
- np->n_flag &= NMODIFIED;
- mtx_unlock(&np->n_mtx);
- return (0);
-}
-
-/*
- * Reclaim an nfsnode so that it can be used for other purposes.
- */
-int
-nfs_reclaim(struct vop_reclaim_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct nfsdmap *dp, *dp2;
-
- /*
- * If the NLM is running, give it a chance to abort pending
- * locks.
- */
- if (nfs_reclaim_p)
- nfs_reclaim_p(ap);
-
- /*
- * Destroy the vm object and flush associated pages.
- */
- vnode_destroy_vobject(vp);
-
- vfs_hash_remove(vp);
-
- /*
- * Free up any directory cookie structures and
- * large file handle structures that might be associated with
- * this nfs node.
- */
- if (vp->v_type == VDIR) {
- dp = LIST_FIRST(&np->n_cookies);
- while (dp) {
- dp2 = dp;
- dp = LIST_NEXT(dp, ndm_list);
- free((caddr_t)dp2, M_NFSDIROFF);
- }
- }
- if (np->n_writecred != NULL)
- crfree(np->n_writecred);
- if (np->n_fhsize > NFS_SMALLFH) {
- free((caddr_t)np->n_fhp, M_NFSBIGFH);
- }
- mtx_destroy(&np->n_mtx);
- uma_zfree(nfsnode_zone, vp->v_data);
- vp->v_data = NULL;
- return (0);
-}
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
deleted file mode 100644
index d4fd5eb02386..000000000000
--- a/sys/nfsclient/nfs_subs.c
+++ /dev/null
@@ -1,1140 +0,0 @@
-/*-
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * These functions support the macros and help fiddle mbuf chains for
- * the nfs op functions. They do things like create the rpc header and
- * copy data between mbuf chains and uio lists.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/proc.h>
-#include <sys/mount.h>
-#include <sys/vnode.h>
-#include <sys/namei.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/malloc.h>
-#include <sys/rwlock.h>
-#include <sys/sysent.h>
-#include <sys/syscall.h>
-#include <sys/sysproto.h>
-#include <sys/taskqueue.h>
-
-#include <vm/vm.h>
-#include <vm/vm_object.h>
-#include <vm/vm_extern.h>
-#include <vm/uma.h>
-
-#include <nfs/nfsproto.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsnode.h>
-#include <nfs/nfs_kdtrace.h>
-#include <nfs/xdr_subs.h>
-#include <nfsclient/nfsm_subs.h>
-#include <nfsclient/nfsmount.h>
-
-#include <netinet/in.h>
-
-/*
- * Note that stdarg.h and the ANSI style va_start macro is used for both
- * ANSI and traditional C compilers.
- */
-#include <machine/stdarg.h>
-
-#ifdef KDTRACE_HOOKS
-dtrace_nfsclient_attrcache_flush_probe_func_t
- dtrace_nfsclient_attrcache_flush_done_probe;
-uint32_t nfsclient_attrcache_flush_done_id;
-
-dtrace_nfsclient_attrcache_get_hit_probe_func_t
- dtrace_nfsclient_attrcache_get_hit_probe;
-uint32_t nfsclient_attrcache_get_hit_id;
-
-dtrace_nfsclient_attrcache_get_miss_probe_func_t
- dtrace_nfsclient_attrcache_get_miss_probe;
-uint32_t nfsclient_attrcache_get_miss_id;
-
-dtrace_nfsclient_attrcache_load_probe_func_t
- dtrace_nfsclient_attrcache_load_done_probe;
-uint32_t nfsclient_attrcache_load_done_id;
-#endif /* !KDTRACE_HOOKS */
-
-/*
- * Data items converted to xdr at startup, since they are constant
- * This is kinda hokey, but may save a little time doing byte swaps
- */
-u_int32_t nfs_xdrneg1;
-u_int32_t nfs_true, nfs_false;
-
-/* And other global data */
-static u_int32_t nfs_xid = 0;
-static enum vtype nv2tov_type[8]= {
- VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON
-};
-
-int nfs_ticks;
-int nfs_pbuf_freecnt = -1; /* start out unlimited */
-
-struct nfs_bufq nfs_bufq;
-static struct mtx nfs_xid_mtx;
-struct task nfs_nfsiodnew_task;
-
-/*
- * and the reverse mapping from generic to Version 2 procedure numbers
- */
-int nfsv2_procid[NFS_NPROCS] = {
- NFSV2PROC_NULL,
- NFSV2PROC_GETATTR,
- NFSV2PROC_SETATTR,
- NFSV2PROC_LOOKUP,
- NFSV2PROC_NOOP,
- NFSV2PROC_READLINK,
- NFSV2PROC_READ,
- NFSV2PROC_WRITE,
- NFSV2PROC_CREATE,
- NFSV2PROC_MKDIR,
- NFSV2PROC_SYMLINK,
- NFSV2PROC_CREATE,
- NFSV2PROC_REMOVE,
- NFSV2PROC_RMDIR,
- NFSV2PROC_RENAME,
- NFSV2PROC_LINK,
- NFSV2PROC_READDIR,
- NFSV2PROC_NOOP,
- NFSV2PROC_STATFS,
- NFSV2PROC_NOOP,
- NFSV2PROC_NOOP,
- NFSV2PROC_NOOP,
- NFSV2PROC_NOOP,
-};
-
-LIST_HEAD(nfsnodehashhead, nfsnode);
-
-u_int32_t
-nfs_xid_gen(void)
-{
- uint32_t xid;
-
- mtx_lock(&nfs_xid_mtx);
-
- /* Get a pretty random xid to start with */
- if (!nfs_xid)
- nfs_xid = random();
- /*
- * Skip zero xid if it should ever happen.
- */
- if (++nfs_xid == 0)
- nfs_xid++;
- xid = nfs_xid;
- mtx_unlock(&nfs_xid_mtx);
- return xid;
-}
-
-/*
- * copies a uio scatter/gather list to an mbuf chain.
- * NOTE: can ony handle iovcnt == 1
- */
-int
-nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos)
-{
- char *uiocp;
- struct mbuf *mp, *mp2;
- int xfer, left, mlen;
- int uiosiz, clflg, rem;
- char *cp;
-
- KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1"));
-
- if (siz > MLEN) /* or should it >= MCLBYTES ?? */
- clflg = 1;
- else
- clflg = 0;
- rem = nfsm_rndup(siz)-siz;
- mp = mp2 = *mq;
- while (siz > 0) {
- left = uiop->uio_iov->iov_len;
- uiocp = uiop->uio_iov->iov_base;
- if (left > siz)
- left = siz;
- uiosiz = left;
- while (left > 0) {
- mlen = M_TRAILINGSPACE(mp);
- if (mlen == 0) {
- if (clflg)
- mp = m_getcl(M_WAITOK, MT_DATA, 0);
- else
- mp = m_get(M_WAITOK, MT_DATA);
- mp2->m_next = mp;
- mp2 = mp;
- mlen = M_TRAILINGSPACE(mp);
- }
- xfer = (left > mlen) ? mlen : left;
-#ifdef notdef
- /* Not Yet.. */
- if (uiop->uio_iov->iov_op != NULL)
- (*(uiop->uio_iov->iov_op))
- (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
- else
-#endif
- if (uiop->uio_segflg == UIO_SYSSPACE)
- bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
- else
- copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
- mp->m_len += xfer;
- left -= xfer;
- uiocp += xfer;
- uiop->uio_offset += xfer;
- uiop->uio_resid -= xfer;
- }
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + uiosiz;
- uiop->uio_iov->iov_len -= uiosiz;
- siz -= uiosiz;
- }
- if (rem > 0) {
- if (rem > M_TRAILINGSPACE(mp)) {
- mp = m_get(M_WAITOK, MT_DATA);
- mp2->m_next = mp;
- }
- cp = mtod(mp, caddr_t)+mp->m_len;
- for (left = 0; left < rem; left++)
- *cp++ = '\0';
- mp->m_len += rem;
- *bpos = cp;
- } else
- *bpos = mtod(mp, caddr_t)+mp->m_len;
- *mq = mp;
- return (0);
-}
-
-/*
- * Copy a string into mbufs for the hard cases...
- */
-int
-nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz)
-{
- struct mbuf *m1 = NULL, *m2;
- long left, xfer, len, tlen;
- u_int32_t *tl;
- int putsize;
-
- putsize = 1;
- m2 = *mb;
- left = M_TRAILINGSPACE(m2);
- if (left > 0) {
- tl = ((u_int32_t *)(*bpos));
- *tl++ = txdr_unsigned(siz);
- putsize = 0;
- left -= NFSX_UNSIGNED;
- m2->m_len += NFSX_UNSIGNED;
- if (left > 0) {
- bcopy(cp, (caddr_t) tl, left);
- siz -= left;
- cp += left;
- m2->m_len += left;
- left = 0;
- }
- }
- /* Loop around adding mbufs */
- while (siz > 0) {
- if (siz > MLEN) {
- m1 = m_getcl(M_WAITOK, MT_DATA, 0);
- m1->m_len = MCLBYTES;
- } else {
- m1 = m_get(M_WAITOK, MT_DATA);
- m1->m_len = MLEN;
- }
- m2->m_next = m1;
- m2 = m1;
- tl = mtod(m1, u_int32_t *);
- tlen = 0;
- if (putsize) {
- *tl++ = txdr_unsigned(siz);
- m1->m_len -= NFSX_UNSIGNED;
- tlen = NFSX_UNSIGNED;
- putsize = 0;
- }
- if (siz < m1->m_len) {
- len = nfsm_rndup(siz);
- xfer = siz;
- if (xfer < len)
- *(tl+(xfer>>2)) = 0;
- } else {
- xfer = len = m1->m_len;
- }
- bcopy(cp, (caddr_t) tl, xfer);
- m1->m_len = len+tlen;
- siz -= xfer;
- cp += xfer;
- }
- *mb = m1;
- *bpos = mtod(m1, caddr_t)+m1->m_len;
- return (0);
-}
-
-/*
- * Called once to initialize data structures...
- */
-int
-nfs_init(struct vfsconf *vfsp)
-{
- int i;
-
- nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- nfs_true = txdr_unsigned(TRUE);
- nfs_false = txdr_unsigned(FALSE);
- nfs_xdrneg1 = txdr_unsigned(-1);
- nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
- if (nfs_ticks < 1)
- nfs_ticks = 1;
- /* Ensure async daemons disabled */
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
- nfs_iodwant[i] = NFSIOD_NOT_AVAILABLE;
- nfs_iodmount[i] = NULL;
- }
- nfs_nhinit(); /* Init the nfsnode table */
-
- /*
- * Initialize reply list and start timer
- */
- mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF);
- mtx_init(&nfs_xid_mtx, "NFS xid lock", NULL, MTX_DEF);
- TASK_INIT(&nfs_nfsiodnew_task, 0, nfs_nfsiodnew_tq, NULL);
-
- nfs_pbuf_freecnt = nswbuf / 2 + 1;
-
- return (0);
-}
-
-int
-nfs_uninit(struct vfsconf *vfsp)
-{
- int i;
-
- /*
- * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup
- * any sleeping nfsiods so they check nfs_iodmax and exit.
- * Drain nfsiodnew task before we wait for them to finish.
- */
- mtx_lock(&nfs_iod_mtx);
- nfs_iodmax = 0;
- mtx_unlock(&nfs_iod_mtx);
- taskqueue_drain(taskqueue_thread, &nfs_nfsiodnew_task);
- mtx_lock(&nfs_iod_mtx);
- for (i = 0; i < nfs_numasync; i++)
- if (nfs_iodwant[i] == NFSIOD_AVAILABLE)
- wakeup(&nfs_iodwant[i]);
- /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */
- while (nfs_numasync)
- msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0);
- mtx_unlock(&nfs_iod_mtx);
- nfs_nhuninit();
- uma_zdestroy(nfsmount_zone);
- return (0);
-}
-
-void
-nfs_dircookie_lock(struct nfsnode *np)
-{
- mtx_lock(&np->n_mtx);
- while (np->n_flag & NDIRCOOKIELK)
- (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0);
- np->n_flag |= NDIRCOOKIELK;
- mtx_unlock(&np->n_mtx);
-}
-
-void
-nfs_dircookie_unlock(struct nfsnode *np)
-{
- mtx_lock(&np->n_mtx);
- np->n_flag &= ~NDIRCOOKIELK;
- wakeup(&np->n_flag);
- mtx_unlock(&np->n_mtx);
-}
-
-int
-nfs_upgrade_vnlock(struct vnode *vp)
-{
- int old_lock;
-
- ASSERT_VOP_LOCKED(vp, "nfs_upgrade_vnlock");
- old_lock = VOP_ISLOCKED(vp);
- if (old_lock != LK_EXCLUSIVE) {
- KASSERT(old_lock == LK_SHARED,
- ("nfs_upgrade_vnlock: wrong old_lock %d", old_lock));
- /* Upgrade to exclusive lock, this might block */
- vn_lock(vp, LK_UPGRADE | LK_RETRY);
- }
- return (old_lock);
-}
-
-void
-nfs_downgrade_vnlock(struct vnode *vp, int old_lock)
-{
- if (old_lock != LK_EXCLUSIVE) {
- KASSERT(old_lock == LK_SHARED, ("wrong old_lock %d", old_lock));
- /* Downgrade from exclusive lock. */
- vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
- }
-}
-
-void
-nfs_printf(const char *fmt, ...)
-{
- va_list ap;
-
- mtx_lock(&Giant);
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
- mtx_unlock(&Giant);
-}
-
-/*
- * Attribute cache routines.
- * nfs_loadattrcache() - loads or updates the cache contents from attributes
- * that are on the mbuf list
- * nfs_getattrcache() - returns valid attributes if found in cache, returns
- * error otherwise
- */
-
-/*
- * Load the attribute cache (that lives in the nfsnode entry) with
- * the values on the mbuf list and
- * Iff vap not NULL
- * copy the attributes to *vaper
- */
-int
-nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
- struct vattr *vaper, int dontshrink)
-{
- struct vnode *vp = *vpp;
- struct vattr *vap;
- struct nfs_fattr *fp;
- struct nfsnode *np = NULL;
- int32_t t1;
- caddr_t cp2;
- int rdev;
- struct mbuf *md;
- enum vtype vtyp;
- u_short vmode;
- struct timespec mtime, mtime_save;
- int v3 = NFS_ISV3(vp);
- int error = 0;
- u_quad_t nsize;
- int setnsize;
-
- md = *mdp;
- t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
- cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_WAITOK);
- if (cp2 == NULL) {
- error = EBADRPC;
- goto out;
- }
- fp = (struct nfs_fattr *)cp2;
- if (v3) {
- vtyp = nfsv3tov_type(fp->fa_type);
- vmode = fxdr_unsigned(u_short, fp->fa_mode);
- rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
- fxdr_unsigned(int, fp->fa3_rdev.specdata2));
- fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
- } else {
- vtyp = nfsv2tov_type(fp->fa_type);
- vmode = fxdr_unsigned(u_short, fp->fa_mode);
- /*
- * XXX
- *
- * The duplicate information returned in fa_type and fa_mode
- * is an ambiguity in the NFS version 2 protocol.
- *
- * VREG should be taken literally as a regular file. If a
- * server intents to return some type information differently
- * in the upper bits of the mode field (e.g. for sockets, or
- * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
- * leave the examination of the mode bits even in the VREG
- * case to avoid breakage for bogus servers, but we make sure
- * that there are actually type bits set in the upper part of
- * fa_mode (and failing that, trust the va_type field).
- *
- * NFSv3 cleared the issue, and requires fa_mode to not
- * contain any type information (while also introduing sockets
- * and FIFOs for fa_type).
- */
- if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
- vtyp = IFTOVT(vmode);
- rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
- fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
-
- /*
- * Really ugly NFSv2 kludge.
- */
- if (vtyp == VCHR && rdev == 0xffffffff)
- vtyp = VFIFO;
- }
-
- /*
- * If v_type == VNON it is a new node, so fill in the v_type,
- * n_mtime fields. Check to see if it represents a special
- * device, and if so, check for a possible alias. Once the
- * correct vnode has been obtained, fill in the rest of the
- * information.
- */
- np = VTONFS(vp);
- mtx_lock(&np->n_mtx);
- if (vp->v_type != vtyp) {
- vp->v_type = vtyp;
- if (vp->v_type == VFIFO)
- vp->v_op = &nfs_fifoops;
- np->n_mtime = mtime;
- }
- vap = &np->n_vattr;
- vap->va_type = vtyp;
- vap->va_mode = (vmode & 07777);
- vap->va_rdev = rdev;
- mtime_save = vap->va_mtime;
- vap->va_mtime = mtime;
- vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
- if (v3) {
- vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
- vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
- vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
- vap->va_size = fxdr_hyper(&fp->fa3_size);
- vap->va_blocksize = NFS_FABLKSIZE;
- vap->va_bytes = fxdr_hyper(&fp->fa3_used);
- vap->va_fileid = fxdr_unsigned(int32_t,
- fp->fa3_fileid.nfsuquad[1]);
- fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
- fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
- vap->va_flags = 0;
- vap->va_filerev = 0;
- } else {
- vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
- vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
- vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
- vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
- vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
- vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
- * NFS_FABLKSIZE;
- vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
- fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
- vap->va_flags = 0;
- vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
- fp->fa2_ctime.nfsv2_sec);
- vap->va_ctime.tv_nsec = 0;
- vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec);
- vap->va_filerev = 0;
- }
- np->n_attrstamp = time_second;
- setnsize = 0;
- nsize = 0;
- if (vap->va_size != np->n_size) {
- if (vap->va_type == VREG) {
- if (dontshrink && vap->va_size < np->n_size) {
- /*
- * We've been told not to shrink the file;
- * zero np->n_attrstamp to indicate that
- * the attributes are stale.
- */
- vap->va_size = np->n_size;
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- vnode_pager_setsize(vp, np->n_size);
- } else if (np->n_flag & NMODIFIED) {
- /*
- * We've modified the file: Use the larger
- * of our size, and the server's size.
- */
- if (vap->va_size < np->n_size) {
- vap->va_size = np->n_size;
- } else {
- np->n_size = vap->va_size;
- np->n_flag |= NSIZECHANGED;
- }
- vnode_pager_setsize(vp, np->n_size);
- } else if (vap->va_size < np->n_size) {
- /*
- * When shrinking the size, the call to
- * vnode_pager_setsize() cannot be done
- * with the mutex held, so delay it until
- * after the mtx_unlock call.
- */
- nsize = np->n_size = vap->va_size;
- np->n_flag |= NSIZECHANGED;
- setnsize = 1;
- } else {
- np->n_size = vap->va_size;
- np->n_flag |= NSIZECHANGED;
- vnode_pager_setsize(vp, np->n_size);
- }
- } else {
- np->n_size = vap->va_size;
- }
- }
- /*
- * The following checks are added to prevent a race between (say)
- * a READDIR+ and a WRITE.
- * READDIR+, WRITE requests sent out.
- * READDIR+ resp, WRITE resp received on client.
- * However, the WRITE resp was handled before the READDIR+ resp
- * causing the post op attrs from the write to be loaded first
- * and the attrs from the READDIR+ to be loaded later. If this
- * happens, we have stale attrs loaded into the attrcache.
- * We detect this by for the mtime moving back. We invalidate the
- * attrcache when this happens.
- */
- if (timespeccmp(&mtime_save, &vap->va_mtime, >)) {
- /* Size changed or mtime went backwards */
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- }
- if (vaper != NULL) {
- bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
- if (np->n_flag & NCHG) {
- if (np->n_flag & NACC)
- vaper->va_atime = np->n_atim;
- if (np->n_flag & NUPD)
- vaper->va_mtime = np->n_mtim;
- }
- }
-
-#ifdef KDTRACE_HOOKS
- if (np->n_attrstamp != 0)
- KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, &np->n_vattr, 0);
-#endif
- mtx_unlock(&np->n_mtx);
- if (setnsize)
- vnode_pager_setsize(vp, nsize);
-out:
-#ifdef KDTRACE_HOOKS
- if (error)
- KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, NULL, error);
-#endif
- return (error);
-}
-
-#ifdef NFS_ACDEBUG
-#include <sys/sysctl.h>
-SYSCTL_DECL(_vfs_oldnfs);
-static int nfs_acdebug;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0,
- "Toggle acdebug (attribute cache debug) flag");
-#endif
-
-/*
- * Check the time stamp
- * If the cache is valid, copy contents to *vap and return 0
- * otherwise return an error
- */
-int
-nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
-{
- struct nfsnode *np;
- struct vattr *vap;
- struct nfsmount *nmp;
- int timeo;
-
- np = VTONFS(vp);
- vap = &np->n_vattr;
- nmp = VFSTONFS(vp->v_mount);
-#ifdef NFS_ACDEBUG
- mtx_lock(&Giant); /* nfs_printf() */
-#endif
- mtx_lock(&np->n_mtx);
- /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
- timeo = (time_second - np->n_mtime.tv_sec) / 10;
-
-#ifdef NFS_ACDEBUG
- if (nfs_acdebug>1)
- nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo);
-#endif
-
- if (vap->va_type == VDIR) {
- if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
- timeo = nmp->nm_acdirmin;
- else if (timeo > nmp->nm_acdirmax)
- timeo = nmp->nm_acdirmax;
- } else {
- if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
- timeo = nmp->nm_acregmin;
- else if (timeo > nmp->nm_acregmax)
- timeo = nmp->nm_acregmax;
- }
-
-#ifdef NFS_ACDEBUG
- if (nfs_acdebug > 2)
- nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
- nmp->nm_acregmin, nmp->nm_acregmax,
- nmp->nm_acdirmin, nmp->nm_acdirmax);
-
- if (nfs_acdebug)
- nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n",
- (time_second - np->n_attrstamp), timeo);
-#endif
-
- if ((time_second - np->n_attrstamp) >= timeo) {
- nfsstats.attrcache_misses++;
- mtx_unlock(&np->n_mtx);
-#ifdef NFS_ACDEBUG
- mtx_unlock(&Giant); /* nfs_printf() */
-#endif
- KDTRACE_NFS_ATTRCACHE_GET_MISS(vp);
- return (ENOENT);
- }
- nfsstats.attrcache_hits++;
- if (vap->va_size != np->n_size) {
- if (vap->va_type == VREG) {
- if (np->n_flag & NMODIFIED) {
- if (vap->va_size < np->n_size)
- vap->va_size = np->n_size;
- else
- np->n_size = vap->va_size;
- } else {
- np->n_size = vap->va_size;
- }
- vnode_pager_setsize(vp, np->n_size);
- } else {
- np->n_size = vap->va_size;
- }
- }
- bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
- if (np->n_flag & NCHG) {
- if (np->n_flag & NACC)
- vaper->va_atime = np->n_atim;
- if (np->n_flag & NUPD)
- vaper->va_mtime = np->n_mtim;
- }
- mtx_unlock(&np->n_mtx);
-#ifdef NFS_ACDEBUG
- mtx_unlock(&Giant); /* nfs_printf() */
-#endif
- KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap);
- return (0);
-}
-
-/*
- * Purge all cached information about an NFS vnode including name
- * cache entries, the attribute cache, and the access cache. This is
- * called when an NFS request for a node fails with a stale
- * filehandle.
- */
-void
-nfs_purgecache(struct vnode *vp)
-{
- struct nfsnode *np;
- int i;
-
- np = VTONFS(vp);
- cache_purge(vp);
- mtx_lock(&np->n_mtx);
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
- np->n_accesscache[i].stamp = 0;
- KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
- mtx_unlock(&np->n_mtx);
-}
-
-static nfsuint64 nfs_nullcookie = { { 0, 0 } };
-/*
- * This function finds the directory cookie that corresponds to the
- * logical byte offset given.
- */
-nfsuint64 *
-nfs_getcookie(struct nfsnode *np, off_t off, int add)
-{
- struct nfsdmap *dp, *dp2;
- int pos;
- nfsuint64 *retval = NULL;
-
- pos = (uoff_t)off / NFS_DIRBLKSIZ;
- if (pos == 0 || off < 0) {
- KASSERT(!add, ("nfs getcookie add at <= 0"));
- return (&nfs_nullcookie);
- }
- pos--;
- dp = LIST_FIRST(&np->n_cookies);
- if (!dp) {
- if (add) {
- dp = malloc(sizeof (struct nfsdmap),
- M_NFSDIROFF, M_WAITOK);
- dp->ndm_eocookie = 0;
- LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
- } else
- goto out;
- }
- while (pos >= NFSNUMCOOKIES) {
- pos -= NFSNUMCOOKIES;
- if (LIST_NEXT(dp, ndm_list)) {
- if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
- pos >= dp->ndm_eocookie)
- goto out;
- dp = LIST_NEXT(dp, ndm_list);
- } else if (add) {
- dp2 = malloc(sizeof (struct nfsdmap),
- M_NFSDIROFF, M_WAITOK);
- dp2->ndm_eocookie = 0;
- LIST_INSERT_AFTER(dp, dp2, ndm_list);
- dp = dp2;
- } else
- goto out;
- }
- if (pos >= dp->ndm_eocookie) {
- if (add)
- dp->ndm_eocookie = pos + 1;
- else
- goto out;
- }
- retval = &dp->ndm_cookies[pos];
-out:
- return (retval);
-}
-
-/*
- * Invalidate cached directory information, except for the actual directory
- * blocks (which are invalidated separately).
- * Done mainly to avoid the use of stale offset cookies.
- */
-void
-nfs_invaldir(struct vnode *vp)
-{
- struct nfsnode *np = VTONFS(vp);
-
- KASSERT(vp->v_type == VDIR, ("nfs: invaldir not dir"));
- nfs_dircookie_lock(np);
- np->n_direofoffset = 0;
- np->n_cookieverf.nfsuquad[0] = 0;
- np->n_cookieverf.nfsuquad[1] = 0;
- if (LIST_FIRST(&np->n_cookies))
- LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0;
- nfs_dircookie_unlock(np);
-}
-
-/*
- * The write verifier has changed (probably due to a server reboot), so all
- * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
- * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
- * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
- * mount point.
- *
- * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
- * writes are not clusterable.
- */
-void
-nfs_clearcommit(struct mount *mp)
-{
- struct vnode *vp, *nvp;
- struct buf *bp, *nbp;
- struct bufobj *bo;
-
- MNT_VNODE_FOREACH_ALL(vp, mp, nvp) {
- bo = &vp->v_bufobj;
- vholdl(vp);
- VI_UNLOCK(vp);
- BO_LOCK(bo);
- TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
- if (!BUF_ISLOCKED(bp) &&
- (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
- == (B_DELWRI | B_NEEDCOMMIT))
- bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
- }
- BO_UNLOCK(bo);
- vdrop(vp);
- }
-}
-
-/*
- * Helper functions for former macros. Some of these should be
- * moved to their callers.
- */
-
-int
-nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f,
- struct mbuf **md, caddr_t *dpos)
-{
- struct nfsnode *ttnp;
- struct vnode *ttvp;
- nfsfh_t *ttfhp;
- u_int32_t *tl;
- int ttfhsize;
- int t1;
-
- if (v3) {
- tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
- if (tl == NULL)
- return EBADRPC;
- *f = fxdr_unsigned(int, *tl);
- } else
- *f = 1;
- if (*f) {
- t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos);
- if (t1 != 0)
- return t1;
- t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE);
- if (t1 != 0)
- return t1;
- *v = NFSTOV(ttnp);
- }
- if (v3) {
- tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
- if (tl == NULL)
- return EBADRPC;
- if (*f)
- *f = fxdr_unsigned(int, *tl);
- else if (fxdr_unsigned(int, *tl))
- nfsm_adv_xx(NFSX_V3FATTR, md, dpos);
- }
- if (*f) {
- ttvp = *v;
- t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0);
- if (t1)
- return t1;
- *v = ttvp;
- }
- return 0;
-}
-
-int
-nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos)
-{
- u_int32_t *tl;
-
- if (v3) {
- tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
- if (tl == NULL)
- return EBADRPC;
- *s = fxdr_unsigned(int, *tl);
- if (*s <= 0 || *s > NFSX_V3FHMAX)
- return EBADRPC;
- } else
- *s = NFSX_V2FH;
- *f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos);
- if (*f == NULL)
- return EBADRPC;
- else
- return 0;
-}
-
-
-int
-nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
- caddr_t *dpos)
-{
- int t1;
-
- struct vnode *ttvp = *v;
- t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0);
- if (t1 != 0)
- return t1;
- *v = ttvp;
- return 0;
-}
-
-int
-nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va,
- struct mbuf **md, caddr_t *dpos)
-{
- u_int32_t *tl;
- int t1;
-
- struct vnode *ttvp = *v;
- tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
- if (tl == NULL)
- return EBADRPC;
- *f = fxdr_unsigned(int, *tl);
- if (*f != 0) {
- t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 1);
- if (t1 != 0) {
- *f = 0;
- return t1;
- }
- *v = ttvp;
- }
- return 0;
-}
-
-int
-nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos)
-{
- u_int32_t *tl;
- int ttattrf, ttretf = 0;
- int t1;
-
- tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
- if (tl == NULL)
- return EBADRPC;
- if (*tl == nfs_true) {
- tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos);
- if (tl == NULL)
- return EBADRPC;
- mtx_lock(&(VTONFS(*v))->n_mtx);
- if (*f)
- ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) &&
- VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3)));
- mtx_unlock(&(VTONFS(*v))->n_mtx);
- }
- t1 = nfsm_postop_attr_xx(v, &ttattrf, NULL, md, dpos);
- if (t1)
- return t1;
- if (*f)
- *f = ttretf;
- else
- *f = ttattrf;
- return 0;
-}
-
-int
-nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos)
-{
- u_int32_t *tl;
- int t1;
-
- if (s > m)
- return ENAMETOOLONG;
- t1 = nfsm_rndup(s) + NFSX_UNSIGNED;
- if (t1 <= M_TRAILINGSPACE(*mb)) {
- tl = nfsm_build_xx(t1, mb, bpos);
- *tl++ = txdr_unsigned(s);
- *(tl + ((t1 >> 2) - 2)) = 0;
- bcopy(a, tl, s);
- } else {
- t1 = nfsm_strtmbuf(mb, bpos, a, s);
- if (t1 != 0)
- return t1;
- }
- return 0;
-}
-
-int
-nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos)
-{
- u_int32_t *tl;
- int t1;
- caddr_t cp;
-
- if (v3) {
- t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED;
- if (t1 < M_TRAILINGSPACE(*mb)) {
- tl = nfsm_build_xx(t1, mb, bpos);
- *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize);
- *(tl + ((t1 >> 2) - 2)) = 0;
- bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize);
- } else {
- t1 = nfsm_strtmbuf(mb, bpos,
- (const char *)VTONFS(v)->n_fhp,
- VTONFS(v)->n_fhsize);
- if (t1 != 0)
- return t1;
- }
- } else {
- cp = nfsm_build_xx(NFSX_V2FH, mb, bpos);
- bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH);
- }
- return 0;
-}
-
-void
-nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb,
- caddr_t *bpos)
-{
- u_int32_t *tl;
-
- if (va->va_mode != (mode_t)VNOVAL) {
- tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos);
- *tl++ = nfs_true;
- *tl = txdr_unsigned(va->va_mode);
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = nfs_false;
- }
- if (full && va->va_uid != (uid_t)VNOVAL) {
- tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos);
- *tl++ = nfs_true;
- *tl = txdr_unsigned(va->va_uid);
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = nfs_false;
- }
- if (full && va->va_gid != (gid_t)VNOVAL) {
- tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos);
- *tl++ = nfs_true;
- *tl = txdr_unsigned(va->va_gid);
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = nfs_false;
- }
- if (full && va->va_size != VNOVAL) {
- tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos);
- *tl++ = nfs_true;
- txdr_hyper(va->va_size, tl);
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = nfs_false;
- }
- if (va->va_atime.tv_sec != VNOVAL) {
- if ((va->va_vaflags & VA_UTIMES_NULL) == 0) {
- tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos);
- *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
- txdr_nfsv3time(&va->va_atime, tl);
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
- }
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
- }
- if (va->va_mtime.tv_sec != VNOVAL) {
- if ((va->va_vaflags & VA_UTIMES_NULL) == 0) {
- tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos);
- *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
- txdr_nfsv3time(&va->va_mtime, tl);
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
- }
- } else {
- tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
- *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
- }
-}
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
deleted file mode 100644
index 0c002d2e78b1..000000000000
--- a/sys/nfsclient/nfs_vfsops.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*-
- * Copyright (c) 1989, 1993, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-
-#include "opt_bootp.h"
-#include "opt_nfsroot.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/jail.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/mount.h>
-#include <sys/proc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/sockio.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <sys/vnode.h>
-#include <sys/signalvar.h>
-
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/uma.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-
-#include <rpc/rpc.h>
-
-#include <nfs/nfsproto.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsnode.h>
-#include <nfsclient/nfsmount.h>
-#include <nfs/xdr_subs.h>
-#include <nfsclient/nfsm_subs.h>
-#include <nfs/nfsdiskless.h>
-
-FEATURE(nfsclient, "NFS client");
-
-MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
-MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
-MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
-MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
-MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
-
-uma_zone_t nfsmount_zone;
-
-struct nfsstats nfsstats;
-
-SYSCTL_NODE(_vfs, OID_AUTO, oldnfs, CTLFLAG_RW, 0, "Old NFS filesystem");
-SYSCTL_STRUCT(_vfs_oldnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
- &nfsstats, nfsstats, "S,nfsstats");
-static int nfs_ip_paranoia = 1;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
- &nfs_ip_paranoia, 0,
- "Disallow accepting replies from IPs which differ from those sent");
-#ifdef NFS_DEBUG
-int nfs_debug;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
- "Toggle debug flag");
-#endif
-static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
-SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_INITIAL_DELAY,
- downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
- "Delay before printing \"nfs server not responding\" messages");
-/* how long between console messages "nfs server foo not responding" */
-static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
-SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_DELAY,
- downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
- "Delay between printing \"nfs server not responding\" messages");
-
-static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
- struct nfs_args *argp, const char *hostname);
-static int mountnfs(struct nfs_args *, struct mount *,
- struct sockaddr *, char *, struct vnode **,
- struct ucred *cred, int, int);
-static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
- struct sockaddr_storage *, int *, off_t *,
- struct timeval *);
-static vfs_mount_t nfs_mount;
-static vfs_cmount_t nfs_cmount;
-static vfs_unmount_t nfs_unmount;
-static vfs_root_t nfs_root;
-static vfs_statfs_t nfs_statfs;
-static vfs_sync_t nfs_sync;
-static vfs_sysctl_t nfs_sysctl;
-
-static int fake_wchan;
-
-/*
- * nfs vfs operations.
- */
-static struct vfsops nfs_vfsops = {
- .vfs_init = nfs_init,
- .vfs_mount = nfs_mount,
- .vfs_cmount = nfs_cmount,
- .vfs_root = nfs_root,
- .vfs_statfs = nfs_statfs,
- .vfs_sync = nfs_sync,
- .vfs_uninit = nfs_uninit,
- .vfs_unmount = nfs_unmount,
- .vfs_sysctl = nfs_sysctl,
-};
-VFS_SET(nfs_vfsops, oldnfs, VFCF_NETWORK | VFCF_SBDRY);
-
-/* So that loader and kldload(2) can find us, wherever we are.. */
-MODULE_VERSION(oldnfs, 1);
-MODULE_DEPEND(oldnfs, krpc, 1, 1, 1);
-#ifdef KGSSAPI
-MODULE_DEPEND(oldnfs, kgssapi, 1, 1, 1);
-#endif
-MODULE_DEPEND(oldnfs, nfs_common, 1, 1, 1);
-MODULE_DEPEND(oldnfs, nfslock, 1, 1, 1);
-
-static struct nfs_rpcops nfs_rpcops = {
- nfs_readrpc,
- nfs_writerpc,
- nfs_writebp,
- nfs_readlinkrpc,
- nfs_invaldir,
- nfs_commit,
-};
-
-/*
- * This structure is now defined in sys/nfs/nfs_diskless.c so that it
- * can be shared by both NFS clients. It is declared here so that it
- * will be defined for kernels built without NFS_ROOT, although it
- * isn't used in that case.
- */
-#ifndef NFS_ROOT
-struct nfs_diskless nfs_diskless = { { { 0 } } };
-struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
-int nfs_diskless_valid = 0;
-#endif
-
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
- &nfs_diskless_valid, 0,
- "Has the diskless struct been filled correctly");
-
-SYSCTL_STRING(_vfs_oldnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
- nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
-
-SYSCTL_OPAQUE(_vfs_oldnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
- &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
- "%Ssockaddr_in", "Diskless root nfs address");
-
-
-void nfsargs_ntoh(struct nfs_args *);
-static int nfs_mountdiskless(char *,
- struct sockaddr_in *, struct nfs_args *,
- struct thread *, struct vnode **, struct mount *);
-static void nfs_convert_diskless(void);
-static void nfs_convert_oargs(struct nfs_args *args,
- struct onfs_args *oargs);
-
-int
-nfs_iosize(struct nfsmount *nmp)
-{
- int iosize;
-
- /*
- * Calculate the size used for io buffers. Use the larger
- * of the two sizes to minimise nfs requests but make sure
- * that it is at least one VM page to avoid wasting buffer
- * space.
- */
- iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
- iosize = imax(iosize, PAGE_SIZE);
- return (iosize);
-}
-
-static void
-nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
-{
-
- args->version = NFS_ARGSVERSION;
- args->addr = oargs->addr;
- args->addrlen = oargs->addrlen;
- args->sotype = oargs->sotype;
- args->proto = oargs->proto;
- args->fh = oargs->fh;
- args->fhsize = oargs->fhsize;
- args->flags = oargs->flags;
- args->wsize = oargs->wsize;
- args->rsize = oargs->rsize;
- args->readdirsize = oargs->readdirsize;
- args->timeo = oargs->timeo;
- args->retrans = oargs->retrans;
- args->maxgrouplist = oargs->maxgrouplist;
- args->readahead = oargs->readahead;
- args->deadthresh = oargs->deadthresh;
- args->hostname = oargs->hostname;
-}
-
-static void
-nfs_convert_diskless(void)
-{
-
- bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
- sizeof(struct ifaliasreq));
- bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
- sizeof(struct sockaddr_in));
- nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
- if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
- nfsv3_diskless.root_fhsize = NFSX_V3FH;
- bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
- } else {
- nfsv3_diskless.root_fhsize = NFSX_V2FH;
- bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
- }
- bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
- sizeof(struct sockaddr_in));
- bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
- nfsv3_diskless.root_time = nfs_diskless.root_time;
- bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
- MAXHOSTNAMELEN);
- nfs_diskless_valid = 3;
-}
-
-/*
- * nfs statfs call
- */
-static int
-nfs_statfs(struct mount *mp, struct statfs *sbp)
-{
- struct vnode *vp;
- struct thread *td;
- struct nfs_statfs *sfp;
- caddr_t bpos, dpos;
- struct nfsmount *nmp = VFSTONFS(mp);
- int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
- struct mbuf *mreq, *mrep, *md, *mb;
- struct nfsnode *np;
- u_quad_t tquad;
-
- td = curthread;
-#ifndef nolint
- sfp = NULL;
-#endif
- error = vfs_busy(mp, MBF_NOWAIT);
- if (error)
- return (error);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
- if (error) {
- vfs_unbusy(mp);
- return (error);
- }
- vp = NFSTOV(np);
- mtx_lock(&nmp->nm_mtx);
- if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
- mtx_unlock(&nmp->nm_mtx);
- (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
- } else
- mtx_unlock(&nmp->nm_mtx);
- nfsstats.rpccnt[NFSPROC_FSSTAT]++;
- mreq = m_get2(NFSX_FH(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
- if (v3)
- nfsm_postop_attr(vp, retattr);
- if (error) {
- if (mrep != NULL)
- m_freem(mrep);
- goto nfsmout;
- }
- sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
- mtx_lock(&nmp->nm_mtx);
- sbp->f_iosize = nfs_iosize(nmp);
- mtx_unlock(&nmp->nm_mtx);
- if (v3) {
- sbp->f_bsize = NFS_FABLKSIZE;
- tquad = fxdr_hyper(&sfp->sf_tbytes);
- sbp->f_blocks = tquad / NFS_FABLKSIZE;
- tquad = fxdr_hyper(&sfp->sf_fbytes);
- sbp->f_bfree = tquad / NFS_FABLKSIZE;
- tquad = fxdr_hyper(&sfp->sf_abytes);
- sbp->f_bavail = tquad / NFS_FABLKSIZE;
- sbp->f_files = (fxdr_unsigned(int32_t,
- sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
- sbp->f_ffree = (fxdr_unsigned(int32_t,
- sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
- } else {
- sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
- sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
- sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
- sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
- sbp->f_files = 0;
- sbp->f_ffree = 0;
- }
- m_freem(mrep);
-nfsmout:
- vput(vp);
- vfs_unbusy(mp);
- return (error);
-}
-
-/*
- * nfs version 3 fsinfo rpc call
- */
-int
-nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
- struct thread *td)
-{
- struct nfsv3_fsinfo *fsp;
- u_int32_t pref, max;
- caddr_t bpos, dpos;
- int error = 0, retattr;
- struct mbuf *mreq, *mrep, *md, *mb;
- u_int64_t maxfsize;
-
- nfsstats.rpccnt[NFSPROC_FSINFO]++;
- mreq = m_get2(NFSX_FH(1), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, 1);
- nfsm_request(vp, NFSPROC_FSINFO, td, cred);
- nfsm_postop_attr(vp, retattr);
- if (!error) {
- fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
- pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
- mtx_lock(&nmp->nm_mtx);
- if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
- nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
- ~(NFS_FABLKSIZE - 1);
- max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
- if (max < nmp->nm_wsize && max > 0) {
- nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
- if (nmp->nm_wsize == 0)
- nmp->nm_wsize = max;
- }
- pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
- if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
- nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
- ~(NFS_FABLKSIZE - 1);
- max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
- if (max < nmp->nm_rsize && max > 0) {
- nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
- if (nmp->nm_rsize == 0)
- nmp->nm_rsize = max;
- }
- pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
- if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
- nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
- ~(NFS_DIRBLKSIZ - 1);
- if (max < nmp->nm_readdirsize && max > 0) {
- nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
- if (nmp->nm_readdirsize == 0)
- nmp->nm_readdirsize = max;
- }
- maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
- if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
- nmp->nm_maxfilesize = maxfsize;
- nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
- nmp->nm_state |= NFSSTA_GOTFSINFO;
- mtx_unlock(&nmp->nm_mtx);
- }
- m_freem(mrep);
-nfsmout:
- return (error);
-}
-
-/*
- * Mount a remote root fs via. nfs. This depends on the info in the
- * nfs_diskless structure that has been filled in properly by some primary
- * bootstrap.
- * It goes something like this:
- * - do enough of "ifconfig" by calling ifioctl() so that the system
- * can talk to the server
- * - If nfs_diskless.mygateway is filled in, use that address as
- * a default gateway.
- * - build the rootfs mount point and call mountnfs() to do the rest.
- *
- * It is assumed to be safe to read, modify, and write the nfsv3_diskless
- * structure, as well as other global NFS client variables here, as
- * nfs_mountroot() will be called once in the boot before any other NFS
- * client activity occurs.
- */
-int
-nfs_mountroot(struct mount *mp)
-{
- struct thread *td = curthread;
- struct nfsv3_diskless *nd = &nfsv3_diskless;
- struct socket *so;
- struct vnode *vp;
- struct ifreq ir;
- int error;
- u_long l;
- char buf[128];
- char *cp;
-
-
-#if defined(BOOTP_NFSROOT) && defined(BOOTP)
- bootpc_init(); /* use bootp to get nfs_diskless filled in */
-#elif defined(NFS_ROOT)
- nfs_setup_diskless();
-#endif
-
- if (nfs_diskless_valid == 0) {
- return (-1);
- }
- if (nfs_diskless_valid == 1)
- nfs_convert_diskless();
-
- /*
- * XXX splnet, so networks will receive...
- */
- splnet();
-
- /*
- * Do enough of ifconfig(8) so that the critical net interface can
- * talk to the server.
- */
- error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
- td->td_ucred, td);
- if (error)
- panic("nfs_mountroot: socreate(%04x): %d",
- nd->myif.ifra_addr.sa_family, error);
-
-#if 0 /* XXX Bad idea */
- /*
- * We might not have been told the right interface, so we pass
- * over the first ten interfaces of the same kind, until we get
- * one of them configured.
- */
-
- for (i = strlen(nd->myif.ifra_name) - 1;
- nd->myif.ifra_name[i] >= '0' &&
- nd->myif.ifra_name[i] <= '9';
- nd->myif.ifra_name[i] ++) {
- error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
- if(!error)
- break;
- }
-#endif
-
- error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
- if (error)
- panic("nfs_mountroot: SIOCAIFADDR: %d", error);
-
- if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
- ir.ifr_mtu = strtol(cp, NULL, 10);
- bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
- freeenv(cp);
- error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
- if (error)
- printf("nfs_mountroot: SIOCSIFMTU: %d", error);
- }
- soclose(so);
-
- /*
- * If the gateway field is filled in, set it as the default route.
- * Note that pxeboot will set a default route of 0 if the route
- * is not set by the DHCP server. Check also for a value of 0
- * to avoid panicking inappropriately in that situation.
- */
- if (nd->mygateway.sin_len != 0 &&
- nd->mygateway.sin_addr.s_addr != 0) {
- struct sockaddr_in mask, sin;
-
- bzero((caddr_t)&mask, sizeof(mask));
- sin = mask;
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(sin);
- /* XXX MRT use table 0 for this sort of thing */
- CURVNET_SET(TD_TO_VNET(td));
- error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
- (struct sockaddr *)&nd->mygateway,
- (struct sockaddr *)&mask,
- RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
- CURVNET_RESTORE();
- if (error)
- panic("nfs_mountroot: RTM_ADD: %d", error);
- }
-
- /*
- * Create the rootfs mount point.
- */
- nd->root_args.fh = nd->root_fh;
- nd->root_args.fhsize = nd->root_fhsize;
- l = ntohl(nd->root_saddr.sin_addr.s_addr);
- snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
- (l >> 24) & 0xff, (l >> 16) & 0xff,
- (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
- printf("NFS ROOT: %s\n", buf);
- nd->root_args.hostname = buf;
- if ((error = nfs_mountdiskless(buf,
- &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
- return (error);
- }
-
- /*
- * This is not really an nfs issue, but it is much easier to
- * set hostname here and then let the "/etc/rc.xxx" files
- * mount the right /var based upon its preset value.
- */
- mtx_lock(&prison0.pr_mtx);
- strlcpy(prison0.pr_hostname, nd->my_hostnam,
- sizeof (prison0.pr_hostname));
- mtx_unlock(&prison0.pr_mtx);
- inittodr(ntohl(nd->root_time));
- return (0);
-}
-
-/*
- * Internal version of mount system call for diskless setup.
- */
-static int
-nfs_mountdiskless(char *path,
- struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
- struct vnode **vpp, struct mount *mp)
-{
- struct sockaddr *nam;
- int error;
-
- nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
- if ((error = mountnfs(args, mp, nam, path, vpp, td->td_ucred,
- NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
- printf("nfs_mountroot: mount %s on /: %d\n", path, error);
- return (error);
- }
- return (0);
-}
-
-static int
-nfs_sec_name_to_num(char *sec)
-{
- if (!strcmp(sec, "krb5"))
- return (RPCSEC_GSS_KRB5);
- if (!strcmp(sec, "krb5i"))
- return (RPCSEC_GSS_KRB5I);
- if (!strcmp(sec, "krb5p"))
- return (RPCSEC_GSS_KRB5P);
- if (!strcmp(sec, "sys"))
- return (AUTH_SYS);
- /*
- * Userland should validate the string but we will try and
- * cope with unexpected values.
- */
- return (AUTH_SYS);
-}
-
-static void
-nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
- const char *hostname)
-{
- int s;
- int adjsock;
- int maxio;
- char *p;
- char *secname;
- char *principal;
-
- s = splnet();
-
- /*
- * Set read-only flag if requested; otherwise, clear it if this is
- * an update. If this is not an update, then either the read-only
- * flag is already clear, or this is a root mount and it was set
- * intentionally at some previous point.
- */
- if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
- MNT_ILOCK(mp);
- mp->mnt_flag |= MNT_RDONLY;
- MNT_IUNLOCK(mp);
- } else if (mp->mnt_flag & MNT_UPDATE) {
- MNT_ILOCK(mp);
- mp->mnt_flag &= ~MNT_RDONLY;
- MNT_IUNLOCK(mp);
- }
-
- /*
- * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
- * no sense in that context. Also, set up appropriate retransmit
- * and soft timeout behavior.
- */
- if (argp->sotype == SOCK_STREAM) {
- nmp->nm_flag &= ~NFSMNT_NOCONN;
- nmp->nm_flag |= NFSMNT_DUMBTIMR;
- nmp->nm_timeo = NFS_MAXTIMEO;
- nmp->nm_retry = NFS_RETRANS_TCP;
- }
-
- /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
- if ((argp->flags & NFSMNT_NFSV3) == 0)
- nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
-
- /* Re-bind if rsrvd port requested and wasn't on one */
- adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
- && (argp->flags & NFSMNT_RESVPORT);
- /* Also re-bind if we're switching to/from a connected UDP socket */
- adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
- (argp->flags & NFSMNT_NOCONN));
-
- /* Update flags atomically. Don't change the lock bits. */
- nmp->nm_flag = argp->flags | nmp->nm_flag;
- splx(s);
-
- if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
- nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
- if (nmp->nm_timeo < NFS_MINTIMEO)
- nmp->nm_timeo = NFS_MINTIMEO;
- else if (nmp->nm_timeo > NFS_MAXTIMEO)
- nmp->nm_timeo = NFS_MAXTIMEO;
- }
-
- if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
- nmp->nm_retry = argp->retrans;
- if (nmp->nm_retry > NFS_MAXREXMIT)
- nmp->nm_retry = NFS_MAXREXMIT;
- }
-
- if (argp->flags & NFSMNT_NFSV3) {
- if (argp->sotype == SOCK_DGRAM)
- maxio = NFS_MAXDGRAMDATA;
- else
- maxio = NFS_MAXDATA;
- } else
- maxio = NFS_V2MAXDATA;
-
- if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
- nmp->nm_wsize = argp->wsize;
- /* Round down to multiple of blocksize */
- nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
- if (nmp->nm_wsize <= 0)
- nmp->nm_wsize = NFS_FABLKSIZE;
- }
- if (nmp->nm_wsize > maxio)
- nmp->nm_wsize = maxio;
- if (nmp->nm_wsize > MAXBSIZE)
- nmp->nm_wsize = MAXBSIZE;
-
- if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
- nmp->nm_rsize = argp->rsize;
- /* Round down to multiple of blocksize */
- nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
- if (nmp->nm_rsize <= 0)
- nmp->nm_rsize = NFS_FABLKSIZE;
- }
- if (nmp->nm_rsize > maxio)
- nmp->nm_rsize = maxio;
- if (nmp->nm_rsize > MAXBSIZE)
- nmp->nm_rsize = MAXBSIZE;
-
- if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
- nmp->nm_readdirsize = argp->readdirsize;
- }
- if (nmp->nm_readdirsize > maxio)
- nmp->nm_readdirsize = maxio;
- if (nmp->nm_readdirsize > nmp->nm_rsize)
- nmp->nm_readdirsize = nmp->nm_rsize;
-
- if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
- nmp->nm_acregmin = argp->acregmin;
- else
- nmp->nm_acregmin = NFS_MINATTRTIMO;
- if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
- nmp->nm_acregmax = argp->acregmax;
- else
- nmp->nm_acregmax = NFS_MAXATTRTIMO;
- if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
- nmp->nm_acdirmin = argp->acdirmin;
- else
- nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
- if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
- nmp->nm_acdirmax = argp->acdirmax;
- else
- nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
- if (nmp->nm_acdirmin > nmp->nm_acdirmax)
- nmp->nm_acdirmin = nmp->nm_acdirmax;
- if (nmp->nm_acregmin > nmp->nm_acregmax)
- nmp->nm_acregmin = nmp->nm_acregmax;
-
- if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
- if (argp->maxgrouplist <= NFS_MAXGRPS)
- nmp->nm_numgrps = argp->maxgrouplist;
- else
- nmp->nm_numgrps = NFS_MAXGRPS;
- }
- if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
- if (argp->readahead <= NFS_MAXRAHEAD)
- nmp->nm_readahead = argp->readahead;
- else
- nmp->nm_readahead = NFS_MAXRAHEAD;
- }
- if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
- if (argp->wcommitsize < nmp->nm_wsize)
- nmp->nm_wcommitsize = nmp->nm_wsize;
- else
- nmp->nm_wcommitsize = argp->wcommitsize;
- }
- if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
- if (argp->deadthresh <= NFS_MAXDEADTHRESH)
- nmp->nm_deadthresh = argp->deadthresh;
- else
- nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
- }
-
- adjsock |= ((nmp->nm_sotype != argp->sotype) ||
- (nmp->nm_soproto != argp->proto));
- nmp->nm_sotype = argp->sotype;
- nmp->nm_soproto = argp->proto;
-
- if (nmp->nm_client && adjsock) {
- nfs_safedisconnect(nmp);
- if (nmp->nm_sotype == SOCK_DGRAM)
- while (nfs_connect(nmp)) {
- printf("nfs_args: retrying connect\n");
- (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
- }
- }
-
- if (hostname) {
- strlcpy(nmp->nm_hostname, hostname,
- sizeof(nmp->nm_hostname));
- p = strchr(nmp->nm_hostname, ':');
- if (p)
- *p = '\0';
- }
-
- if (vfs_getopt(mp->mnt_optnew, "sec",
- (void **) &secname, NULL) == 0) {
- nmp->nm_secflavor = nfs_sec_name_to_num(secname);
- } else {
- nmp->nm_secflavor = AUTH_SYS;
- }
-
- if (vfs_getopt(mp->mnt_optnew, "principal",
- (void **) &principal, NULL) == 0) {
- strlcpy(nmp->nm_principal, principal,
- sizeof(nmp->nm_principal));
- } else {
- snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
- "nfs@%s", nmp->nm_hostname);
- }
-}
-
-static const char *nfs_opts[] = { "from", "nfs_args",
- "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
- "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
- "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
- "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp",
- "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin",
- "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
- "sec", "maxgroups", "principal", "negnametimeo", "nocto", "wcommitsize",
- "nametimeo",
- NULL };
-
-/*
- * VFS Operations.
- *
- * mount system call
- * It seems a bit dumb to copyinstr() the host and path here and then
- * bcopy() them in mountnfs(), but I wanted to detect errors before
- * doing the sockargs() call because sockargs() allocates an mbuf and
- * an error after that means that I have to release the mbuf.
- */
-/* ARGSUSED */
-static int
-nfs_mount(struct mount *mp)
-{
- struct nfs_args args = {
- .version = NFS_ARGSVERSION,
- .addr = NULL,
- .addrlen = sizeof (struct sockaddr_in),
- .sotype = SOCK_STREAM,
- .proto = 0,
- .fh = NULL,
- .fhsize = 0,
- .flags = NFSMNT_RESVPORT,
- .wsize = NFS_WSIZE,
- .rsize = NFS_RSIZE,
- .readdirsize = NFS_READDIRSIZE,
- .timeo = 10,
- .retrans = NFS_RETRANS,
- .maxgrouplist = NFS_MAXGRPS,
- .readahead = NFS_DEFRAHEAD,
- .wcommitsize = 0, /* was: NQ_DEFLEASE */
- .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
- .hostname = NULL,
- /* args version 4 */
- .acregmin = NFS_MINATTRTIMO,
- .acregmax = NFS_MAXATTRTIMO,
- .acdirmin = NFS_MINDIRATTRTIMO,
- .acdirmax = NFS_MAXDIRATTRTIMO,
- };
- int error, ret, has_nfs_args_opt;
- int has_addr_opt, has_fh_opt, has_hostname_opt;
- struct sockaddr *nam;
- struct vnode *vp;
- char hst[MNAMELEN];
- size_t len;
- u_char nfh[NFSX_V3FHMAX];
- char *opt;
- int nametimeo = NFS_DEFAULT_NAMETIMEO;
- int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
-
- has_nfs_args_opt = 0;
- has_addr_opt = 0;
- has_fh_opt = 0;
- has_hostname_opt = 0;
-
- if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
- error = EINVAL;
- goto out;
- }
-
- if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
- error = nfs_mountroot(mp);
- goto out;
- }
-
- /*
- * The old mount_nfs program passed the struct nfs_args
- * from userspace to kernel. The new mount_nfs program
- * passes string options via nmount() from userspace to kernel
- * and we populate the struct nfs_args in the kernel.
- */
- if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
- error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
- sizeof args);
- if (error)
- goto out;
-
- if (args.version != NFS_ARGSVERSION) {
- error = EPROGMISMATCH;
- goto out;
- }
- has_nfs_args_opt = 1;
- }
-
- if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
- args.flags |= NFSMNT_DUMBTIMR;
- if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
- args.flags |= NFSMNT_NOCONN;
- if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
- args.flags |= NFSMNT_NOCONN;
- if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
- args.flags |= NFSMNT_NOLOCKD;
- if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
- args.flags &= ~NFSMNT_NOLOCKD;
- if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
- args.flags |= NFSMNT_INT;
- if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
- args.flags |= NFSMNT_RDIRPLUS;
- if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
- args.flags |= NFSMNT_RESVPORT;
- if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
- args.flags &= ~NFSMNT_RESVPORT;
- if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
- args.flags |= NFSMNT_SOFT;
- if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
- args.flags &= ~NFSMNT_SOFT;
- if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
- args.sotype = SOCK_DGRAM;
- if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
- args.sotype = SOCK_DGRAM;
- if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
- args.sotype = SOCK_STREAM;
- if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
- args.flags |= NFSMNT_NFSV3;
- if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
- args.flags |= NFSMNT_NOCTO;
- if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
- if (opt == NULL) {
- vfs_mount_error(mp, "illegal readdirsize");
- error = EINVAL;
- goto out;
- }
- ret = sscanf(opt, "%d", &args.readdirsize);
- if (ret != 1 || args.readdirsize <= 0) {
- vfs_mount_error(mp, "illegal readdirsize: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_READDIRSIZE;
- }
- if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
- if (opt == NULL) {
- vfs_mount_error(mp, "illegal readahead");
- error = EINVAL;
- goto out;
- }
- ret = sscanf(opt, "%d", &args.readahead);
- if (ret != 1 || args.readahead <= 0) {
- vfs_mount_error(mp, "illegal readahead: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_READAHEAD;
- }
- if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
- if (opt == NULL) {
- vfs_mount_error(mp, "illegal wsize");
- error = EINVAL;
- goto out;
- }
- ret = sscanf(opt, "%d", &args.wsize);
- if (ret != 1 || args.wsize <= 0) {
- vfs_mount_error(mp, "illegal wsize: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_WSIZE;
- }
- if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
- if (opt == NULL) {
- vfs_mount_error(mp, "illegal rsize");
- error = EINVAL;
- goto out;
- }
- ret = sscanf(opt, "%d", &args.rsize);
- if (ret != 1 || args.rsize <= 0) {
- vfs_mount_error(mp, "illegal wsize: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_RSIZE;
- }
- if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
- if (opt == NULL) {
- vfs_mount_error(mp, "illegal retrans");
- error = EINVAL;
- goto out;
- }
- ret = sscanf(opt, "%d", &args.retrans);
- if (ret != 1 || args.retrans <= 0) {
- vfs_mount_error(mp, "illegal retrans: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_RETRANS;
- }
- if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.acregmin);
- if (ret != 1 || args.acregmin < 0) {
- vfs_mount_error(mp, "illegal acregmin: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_ACREGMIN;
- }
- if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.acregmax);
- if (ret != 1 || args.acregmax < 0) {
- vfs_mount_error(mp, "illegal acregmax: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_ACREGMAX;
- }
- if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.acdirmin);
- if (ret != 1 || args.acdirmin < 0) {
- vfs_mount_error(mp, "illegal acdirmin: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_ACDIRMIN;
- }
- if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.acdirmax);
- if (ret != 1 || args.acdirmax < 0) {
- vfs_mount_error(mp, "illegal acdirmax: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_ACDIRMAX;
- }
- if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.wcommitsize);
- if (ret != 1 || args.wcommitsize < 0) {
- vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_WCOMMITSIZE;
- }
- if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.deadthresh);
- if (ret != 1 || args.deadthresh <= 0) {
- vfs_mount_error(mp, "illegal deadthresh: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_DEADTHRESH;
- }
- if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.timeo);
- if (ret != 1 || args.timeo <= 0) {
- vfs_mount_error(mp, "illegal timeout: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_TIMEO;
- }
- if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &args.maxgrouplist);
- if (ret != 1 || args.maxgrouplist <= 0) {
- vfs_mount_error(mp, "illegal maxgroups: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- args.flags |= NFSMNT_MAXGRPS;
- }
- if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
- ret = sscanf(opt, "%d", &nametimeo);
- if (ret != 1 || nametimeo < 0) {
- vfs_mount_error(mp, "illegal nametimeo: %s", opt);
- error = EINVAL;
- goto out;
- }
- }
- if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
- == 0) {
- ret = sscanf(opt, "%d", &negnametimeo);
- if (ret != 1 || negnametimeo < 0) {
- vfs_mount_error(mp, "illegal negnametimeo: %s",
- opt);
- error = EINVAL;
- goto out;
- }
- }
- if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
- &args.addrlen) == 0) {
- has_addr_opt = 1;
- if (args.addrlen > SOCK_MAXADDRLEN) {
- error = ENAMETOOLONG;
- goto out;
- }
- nam = malloc(args.addrlen, M_SONAME,
- M_WAITOK);
- bcopy(args.addr, nam, args.addrlen);
- nam->sa_len = args.addrlen;
- }
- if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
- &args.fhsize) == 0) {
- has_fh_opt = 1;
- }
- if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
- NULL) == 0) {
- has_hostname_opt = 1;
- }
- if (args.hostname == NULL) {
- vfs_mount_error(mp, "Invalid hostname");
- error = EINVAL;
- goto out;
- }
- if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
- vfs_mount_error(mp, "Bad file handle");
- error = EINVAL;
- goto out;
- }
-
- if (mp->mnt_flag & MNT_UPDATE) {
- struct nfsmount *nmp = VFSTONFS(mp);
-
- if (nmp == NULL) {
- error = EIO;
- goto out;
- }
-
- /*
- * If a change from TCP->UDP is done and there are thread(s)
- * that have I/O RPC(s) in progress with a tranfer size
- * greater than NFS_MAXDGRAMDATA, those thread(s) will be
- * hung, retrying the RPC(s) forever. Usually these threads
- * will be seen doing an uninterruptible sleep on wait channel
- * "newnfsreq" (truncated to "newnfsre" by procstat).
- */
- if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
- tprintf(curthread->td_proc, LOG_WARNING,
- "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
-
- /*
- * When doing an update, we can't change from or to
- * v3, switch lockd strategies or change cookie translation
- */
- args.flags = (args.flags &
- ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
- (nmp->nm_flag &
- (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
- nfs_decode_args(mp, nmp, &args, NULL);
- goto out;
- }
-
- /*
- * Make the nfs_ip_paranoia sysctl serve as the default connection
- * or no-connection mode for those protocols that support
- * no-connection mode (the flag will be cleared later for protocols
- * that do not support no-connection mode). This will allow a client
- * to receive replies from a different IP then the request was
- * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
- * not 0.
- */
- if (nfs_ip_paranoia == 0)
- args.flags |= NFSMNT_NOCONN;
-
- if (has_nfs_args_opt) {
- /*
- * In the 'nfs_args' case, the pointers in the args
- * structure are in userland - we copy them in here.
- */
- if (!has_fh_opt) {
- error = copyin((caddr_t)args.fh, (caddr_t)nfh,
- args.fhsize);
- if (error) {
- goto out;
- }
- args.fh = nfh;
- }
- if (!has_hostname_opt) {
- error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
- if (error) {
- goto out;
- }
- bzero(&hst[len], MNAMELEN - len);
- args.hostname = hst;
- }
- if (!has_addr_opt) {
- /* sockargs() call must be after above copyin() calls */
- error = getsockaddr(&nam, (caddr_t)args.addr,
- args.addrlen);
- if (error) {
- goto out;
- }
- }
- } else if (has_addr_opt == 0) {
- vfs_mount_error(mp, "No server address");
- error = EINVAL;
- goto out;
- }
- error = mountnfs(&args, mp, nam, args.hostname, &vp,
- curthread->td_ucred, nametimeo, negnametimeo);
-out:
- if (!error) {
- MNT_ILOCK(mp);
- mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
- MNT_IUNLOCK(mp);
- }
- return (error);
-}
-
-
-/*
- * VFS Operations.
- *
- * mount system call
- * It seems a bit dumb to copyinstr() the host and path here and then
- * bcopy() them in mountnfs(), but I wanted to detect errors before
- * doing the sockargs() call because sockargs() allocates an mbuf and
- * an error after that means that I have to release the mbuf.
- */
-/* ARGSUSED */
-static int
-nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
-{
- int error;
- struct nfs_args args;
-
- error = copyin(data, &args, sizeof (struct nfs_args));
- if (error)
- return error;
-
- ma = mount_arg(ma, "nfs_args", &args, sizeof args);
-
- error = kernel_mount(ma, flags);
- return (error);
-}
-
-/*
- * Common code for mount and mountroot
- */
-static int
-mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
- char *hst, struct vnode **vpp, struct ucred *cred, int nametimeo,
- int negnametimeo)
-{
- struct nfsmount *nmp;
- struct nfsnode *np;
- int error;
- struct vattr attrs;
-
- if (mp->mnt_flag & MNT_UPDATE) {
- nmp = VFSTONFS(mp);
- printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
- free(nam, M_SONAME);
- return (0);
- } else {
- nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
- bzero((caddr_t)nmp, sizeof (struct nfsmount));
- TAILQ_INIT(&nmp->nm_bufq);
- mp->mnt_data = nmp;
- nmp->nm_getinfo = nfs_getnlminfo;
- nmp->nm_vinvalbuf = nfs_vinvalbuf;
- }
- vfs_getnewfsid(mp);
- nmp->nm_mountp = mp;
- mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
-
- /*
- * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
- * high, depending on whether we end up with negative offsets in
- * the client or server somewhere. 2GB-1 may be safer.
- *
- * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
- * that we can handle until we find out otherwise.
- */
- if ((argp->flags & NFSMNT_NFSV3) == 0)
- nmp->nm_maxfilesize = 0xffffffffLL;
- else
- nmp->nm_maxfilesize = OFF_MAX;
-
- nmp->nm_timeo = NFS_TIMEO;
- nmp->nm_retry = NFS_RETRANS;
- if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
- nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
- } else {
- nmp->nm_wsize = NFS_WSIZE;
- nmp->nm_rsize = NFS_RSIZE;
- }
- nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
- nmp->nm_readdirsize = NFS_READDIRSIZE;
- nmp->nm_numgrps = NFS_MAXGRPS;
- nmp->nm_readahead = NFS_DEFRAHEAD;
- nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
- nmp->nm_nametimeo = nametimeo;
- nmp->nm_negnametimeo = negnametimeo;
- nmp->nm_tprintf_delay = nfs_tprintf_delay;
- if (nmp->nm_tprintf_delay < 0)
- nmp->nm_tprintf_delay = 0;
- nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
- if (nmp->nm_tprintf_initial_delay < 0)
- nmp->nm_tprintf_initial_delay = 0;
- nmp->nm_fhsize = argp->fhsize;
- bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
- bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
- nmp->nm_nam = nam;
- /* Set up the sockets and per-host congestion */
- nmp->nm_sotype = argp->sotype;
- nmp->nm_soproto = argp->proto;
- nmp->nm_rpcops = &nfs_rpcops;
-
- nfs_decode_args(mp, nmp, argp, hst);
-
- /*
- * For Connection based sockets (TCP,...) defer the connect until
- * the first request, in case the server is not responding.
- */
- if (nmp->nm_sotype == SOCK_DGRAM &&
- (error = nfs_connect(nmp)))
- goto bad;
-
- /*
- * This is silly, but it has to be set so that vinifod() works.
- * We do not want to do an nfs_statfs() here since we can get
- * stuck on a dead server and we are holding a lock on the mount
- * point.
- */
- mtx_lock(&nmp->nm_mtx);
- mp->mnt_stat.f_iosize = nfs_iosize(nmp);
- mtx_unlock(&nmp->nm_mtx);
- /*
- * A reference count is needed on the nfsnode representing the
- * remote root. If this object is not persistent, then backward
- * traversals of the mount point (i.e. "..") will not work if
- * the nfsnode gets flushed out of the cache. Ufs does not have
- * this problem, because one can identify root inodes by their
- * number == ROOTINO (2).
- */
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
- if (error)
- goto bad;
- *vpp = NFSTOV(np);
-
- /*
- * Get file attributes and transfer parameters for the
- * mountpoint. This has the side effect of filling in
- * (*vpp)->v_type with the correct value.
- */
- if (argp->flags & NFSMNT_NFSV3)
- nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
- else
- VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
-
- /*
- * Lose the lock but keep the ref.
- */
- VOP_UNLOCK(*vpp, 0);
-
- return (0);
-bad:
- nfs_disconnect(nmp);
- mtx_destroy(&nmp->nm_mtx);
- uma_zfree(nfsmount_zone, nmp);
- free(nam, M_SONAME);
- return (error);
-}
-
-/*
- * unmount system call
- */
-static int
-nfs_unmount(struct mount *mp, int mntflags)
-{
- struct nfsmount *nmp;
- int error, flags = 0, i;
-
- if (mntflags & MNT_FORCE)
- flags |= FORCECLOSE;
- nmp = VFSTONFS(mp);
- /*
- * Goes something like this..
- * - Call vflush() to clear out vnodes for this filesystem
- * - Close the socket
- * - Free up the data structures
- */
- /* In the forced case, cancel any outstanding requests. */
- if (flags & FORCECLOSE) {
- error = nfs_nmcancelreqs(nmp);
- if (error)
- goto out;
- }
- /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
- error = vflush(mp, 1, flags, curthread);
- if (error)
- goto out;
-
- /*
- * We are now committed to the unmount.
- */
- /* Make sure no nfsiods are assigned to this mount. */
- mtx_lock(&nfs_iod_mtx);
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
- if (nfs_iodmount[i] == nmp) {
- nfs_iodwant[i] = NFSIOD_AVAILABLE;
- nfs_iodmount[i] = NULL;
- }
- mtx_unlock(&nfs_iod_mtx);
- nfs_disconnect(nmp);
- free(nmp->nm_nam, M_SONAME);
-
- mtx_destroy(&nmp->nm_mtx);
- uma_zfree(nfsmount_zone, nmp);
-out:
- return (error);
-}
-
-/*
- * Return root of a filesystem
- */
-static int
-nfs_root(struct mount *mp, int flags, struct vnode **vpp)
-{
- struct vnode *vp;
- struct nfsmount *nmp;
- struct nfsnode *np;
- int error;
-
- nmp = VFSTONFS(mp);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
- if (error)
- return error;
- vp = NFSTOV(np);
- /*
- * Get transfer parameters and attributes for root vnode once.
- */
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
- (nmp->nm_flag & NFSMNT_NFSV3)) {
- mtx_unlock(&nmp->nm_mtx);
- nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
- } else
- mtx_unlock(&nmp->nm_mtx);
- if (vp->v_type == VNON)
- vp->v_type = VDIR;
- vp->v_vflag |= VV_ROOT;
- *vpp = vp;
- return (0);
-}
-
-/*
- * Flush out the buffer cache
- */
-/* ARGSUSED */
-static int
-nfs_sync(struct mount *mp, int waitfor)
-{
- struct vnode *vp, *mvp;
- struct thread *td;
- int error, allerror = 0;
-
- td = curthread;
-
- MNT_ILOCK(mp);
- /*
- * If a forced dismount is in progress, return from here so that
- * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
- * calling VFS_UNMOUNT().
- */
- if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
- MNT_IUNLOCK(mp);
- return (EBADF);
- }
- MNT_IUNLOCK(mp);
-
- /*
- * Force stale buffer cache information to be flushed.
- */
-loop:
- MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
- /* XXX Racy bv_cnt check. */
- if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
- waitfor == MNT_LAZY) {
- VI_UNLOCK(vp);
- continue;
- }
- if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
- MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
- goto loop;
- }
- error = VOP_FSYNC(vp, waitfor, td);
- if (error)
- allerror = error;
- VOP_UNLOCK(vp, 0);
- vrele(vp);
- }
- return (allerror);
-}
-
-static int
-nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
-{
- struct nfsmount *nmp = VFSTONFS(mp);
- struct vfsquery vq;
- int error;
-
- bzero(&vq, sizeof(vq));
- switch (op) {
-#if 0
- case VFS_CTL_NOLOCKS:
- val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
- if (req->oldptr != NULL) {
- error = SYSCTL_OUT(req, &val, sizeof(val));
- if (error)
- return (error);
- }
- if (req->newptr != NULL) {
- error = SYSCTL_IN(req, &val, sizeof(val));
- if (error)
- return (error);
- if (val)
- nmp->nm_flag |= NFSMNT_NOLOCKS;
- else
- nmp->nm_flag &= ~NFSMNT_NOLOCKS;
- }
- break;
-#endif
- case VFS_CTL_QUERY:
- mtx_lock(&nmp->nm_mtx);
- if (nmp->nm_state & NFSSTA_TIMEO)
- vq.vq_flags |= VQ_NOTRESP;
- mtx_unlock(&nmp->nm_mtx);
-#if 0
- if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
- (nmp->nm_state & NFSSTA_LOCKTIMEO))
- vq.vq_flags |= VQ_NOTRESPLOCK;
-#endif
- error = SYSCTL_OUT(req, &vq, sizeof(vq));
- break;
- case VFS_CTL_TIMEO:
- if (req->oldptr != NULL) {
- error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
- sizeof(nmp->nm_tprintf_initial_delay));
- if (error)
- return (error);
- }
- if (req->newptr != NULL) {
- error = vfs_suser(mp, req->td);
- if (error)
- return (error);
- error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
- sizeof(nmp->nm_tprintf_initial_delay));
- if (error)
- return (error);
- if (nmp->nm_tprintf_initial_delay < 0)
- nmp->nm_tprintf_initial_delay = 0;
- }
- break;
- default:
- return (ENOTSUP);
- }
- return (0);
-}
-
-/*
- * Extract the information needed by the nlm from the nfs vnode.
- */
-static void
-nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
- struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
- struct timeval *timeop)
-{
- struct nfsmount *nmp;
- struct nfsnode *np = VTONFS(vp);
-
- nmp = VFSTONFS(vp->v_mount);
- if (fhlenp != NULL)
- *fhlenp = (size_t)np->n_fhsize;
- if (fhp != NULL)
- bcopy(np->n_fhp, fhp, np->n_fhsize);
- if (sp != NULL)
- bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
- if (is_v3p != NULL)
- *is_v3p = NFS_ISV3(vp);
- if (sizep != NULL)
- *sizep = np->n_size;
- if (timeop != NULL) {
- timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
- timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
- }
-}
-
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
deleted file mode 100644
index 2516d7db94c2..000000000000
--- a/sys/nfsclient/nfs_vnops.c
+++ /dev/null
@@ -1,3544 +0,0 @@
-/*-
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * vnode op calls for Sun NFS version 2 and 3
- */
-
-#include "opt_inet.h"
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/resourcevar.h>
-#include <sys/proc.h>
-#include <sys/mount.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/jail.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/namei.h>
-#include <sys/socket.h>
-#include <sys/vnode.h>
-#include <sys/dirent.h>
-#include <sys/fcntl.h>
-#include <sys/lockf.h>
-#include <sys/rwlock.h>
-#include <sys/stat.h>
-#include <sys/sysctl.h>
-#include <sys/signalvar.h>
-
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_object.h>
-
-#include <nfs/nfsproto.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsnode.h>
-#include <nfsclient/nfsmount.h>
-#include <nfs/nfs_kdtrace.h>
-#include <nfs/nfs_lock.h>
-#include <nfs/xdr_subs.h>
-#include <nfsclient/nfsm_subs.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/vnet.h>
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-
-#include <machine/stdarg.h>
-
-#ifdef KDTRACE_HOOKS
-#include <sys/dtrace_bsd.h>
-
-dtrace_nfsclient_accesscache_flush_probe_func_t
- dtrace_nfsclient_accesscache_flush_done_probe;
-uint32_t nfsclient_accesscache_flush_done_id;
-
-dtrace_nfsclient_accesscache_get_probe_func_t
- dtrace_nfsclient_accesscache_get_hit_probe,
- dtrace_nfsclient_accesscache_get_miss_probe;
-uint32_t nfsclient_accesscache_get_hit_id;
-uint32_t nfsclient_accesscache_get_miss_id;
-
-dtrace_nfsclient_accesscache_load_probe_func_t
- dtrace_nfsclient_accesscache_load_done_probe;
-uint32_t nfsclient_accesscache_load_done_id;
-#endif /* !KDTRACE_HOOKS */
-
-/* Defs */
-#define TRUE 1
-#define FALSE 0
-
-/*
- * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
- * calls are not in getblk() and brelse() so that they would not be necessary
- * here.
- */
-#ifndef B_VMIO
-#define vfs_busy_pages(bp, f)
-#endif
-
-static vop_read_t nfsfifo_read;
-static vop_write_t nfsfifo_write;
-static vop_close_t nfsfifo_close;
-static int nfs_flush(struct vnode *, int, int);
-static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *);
-static vop_lookup_t nfs_lookup;
-static vop_create_t nfs_create;
-static vop_mknod_t nfs_mknod;
-static vop_open_t nfs_open;
-static vop_close_t nfs_close;
-static vop_access_t nfs_access;
-static vop_getattr_t nfs_getattr;
-static vop_setattr_t nfs_setattr;
-static vop_read_t nfs_read;
-static vop_fsync_t nfs_fsync;
-static vop_remove_t nfs_remove;
-static vop_link_t nfs_link;
-static vop_rename_t nfs_rename;
-static vop_mkdir_t nfs_mkdir;
-static vop_rmdir_t nfs_rmdir;
-static vop_symlink_t nfs_symlink;
-static vop_readdir_t nfs_readdir;
-static vop_strategy_t nfs_strategy;
-static int nfs_lookitup(struct vnode *, const char *, int,
- struct ucred *, struct thread *, struct nfsnode **);
-static int nfs_sillyrename(struct vnode *, struct vnode *,
- struct componentname *);
-static vop_access_t nfsspec_access;
-static vop_readlink_t nfs_readlink;
-static vop_print_t nfs_print;
-static vop_advlock_t nfs_advlock;
-static vop_advlockasync_t nfs_advlockasync;
-
-/*
- * Global vfs data structures for nfs
- */
-struct vop_vector nfs_vnodeops = {
- .vop_default = &default_vnodeops,
- .vop_access = nfs_access,
- .vop_advlock = nfs_advlock,
- .vop_advlockasync = nfs_advlockasync,
- .vop_close = nfs_close,
- .vop_create = nfs_create,
- .vop_fsync = nfs_fsync,
- .vop_getattr = nfs_getattr,
- .vop_getpages = nfs_getpages,
- .vop_putpages = nfs_putpages,
- .vop_inactive = nfs_inactive,
- .vop_link = nfs_link,
- .vop_lookup = nfs_lookup,
- .vop_mkdir = nfs_mkdir,
- .vop_mknod = nfs_mknod,
- .vop_open = nfs_open,
- .vop_print = nfs_print,
- .vop_read = nfs_read,
- .vop_readdir = nfs_readdir,
- .vop_readlink = nfs_readlink,
- .vop_reclaim = nfs_reclaim,
- .vop_remove = nfs_remove,
- .vop_rename = nfs_rename,
- .vop_rmdir = nfs_rmdir,
- .vop_setattr = nfs_setattr,
- .vop_strategy = nfs_strategy,
- .vop_symlink = nfs_symlink,
- .vop_write = nfs_write,
-};
-
-struct vop_vector nfs_fifoops = {
- .vop_default = &fifo_specops,
- .vop_access = nfsspec_access,
- .vop_close = nfsfifo_close,
- .vop_fsync = nfs_fsync,
- .vop_getattr = nfs_getattr,
- .vop_inactive = nfs_inactive,
- .vop_print = nfs_print,
- .vop_read = nfsfifo_read,
- .vop_reclaim = nfs_reclaim,
- .vop_setattr = nfs_setattr,
- .vop_write = nfsfifo_write,
-};
-
-static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
- struct componentname *cnp, struct vattr *vap);
-static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
- struct ucred *cred, struct thread *td);
-static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
- int fnamelen, struct vnode *tdvp,
- const char *tnameptr, int tnamelen,
- struct ucred *cred, struct thread *td);
-static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
- struct sillyrename *sp);
-
-/*
- * Global variables
- */
-struct mtx nfs_iod_mtx;
-enum nfsiod_state nfs_iodwant[NFS_MAXASYNCDAEMON];
-struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
-int nfs_numasync = 0;
-#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
-
-SYSCTL_DECL(_vfs_oldnfs);
-
-static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
- &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
-
-static int nfs_prime_access_cache = 0;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
- &nfs_prime_access_cache, 0,
- "Prime NFS ACCESS cache when fetching attributes");
-
-static int nfsv3_commit_on_close = 0;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
- &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
-
-static int nfs_clean_pages_on_close = 1;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
- &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
-
-int nfs_directio_enable = 0;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
- &nfs_directio_enable, 0, "Enable NFS directio");
-
-/*
- * This sysctl allows other processes to mmap a file that has been opened
- * O_DIRECT by a process. In general, having processes mmap the file while
- * Direct IO is in progress can lead to Data Inconsistencies. But, we allow
- * this by default to prevent DoS attacks - to prevent a malicious user from
- * opening up files O_DIRECT preventing other users from mmap'ing these
- * files. "Protected" environments where stricter consistency guarantees are
- * required can disable this knob. The process that opened the file O_DIRECT
- * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
- * meaningful.
- */
-int nfs_directio_allow_mmap = 1;
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
- &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
-
-#if 0
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
- &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
-
-SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
- &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
-#endif
-
-#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
- | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
- | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
-
-/*
- * SMP Locking Note :
- * The list of locks after the description of the lock is the ordering
- * of other locks acquired with the lock held.
- * np->n_mtx : Protects the fields in the nfsnode.
- VM Object Lock
- VI_MTX (acquired indirectly)
- * nmp->nm_mtx : Protects the fields in the nfsmount.
- rep->r_mtx
- * nfs_iod_mtx : Global lock, protects shared nfsiod state.
- * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
- nmp->nm_mtx
- rep->r_mtx
- * rep->r_mtx : Protects the fields in an nfsreq.
- */
-
-static int
-nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
- struct ucred *cred, uint32_t *retmode)
-{
- const int v3 = 1;
- u_int32_t *tl;
- int error = 0, attrflag, i, lrupos;
-
- struct mbuf *mreq, *mrep, *md, *mb;
- caddr_t bpos, dpos;
- u_int32_t rmode;
- struct nfsnode *np = VTONFS(vp);
-
- nfsstats.rpccnt[NFSPROC_ACCESS]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED, M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
- *tl = txdr_unsigned(wmode);
- nfsm_request(vp, NFSPROC_ACCESS, td, cred);
- nfsm_postop_attr(vp, attrflag);
- if (!error) {
- lrupos = 0;
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- rmode = fxdr_unsigned(u_int32_t, *tl);
- mtx_lock(&np->n_mtx);
- for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
- if (np->n_accesscache[i].uid == cred->cr_uid) {
- np->n_accesscache[i].mode = rmode;
- np->n_accesscache[i].stamp = time_second;
- break;
- }
- if (i > 0 && np->n_accesscache[i].stamp <
- np->n_accesscache[lrupos].stamp)
- lrupos = i;
- }
- if (i == NFS_ACCESSCACHESIZE) {
- np->n_accesscache[lrupos].uid = cred->cr_uid;
- np->n_accesscache[lrupos].mode = rmode;
- np->n_accesscache[lrupos].stamp = time_second;
- }
- mtx_unlock(&np->n_mtx);
- if (retmode != NULL)
- *retmode = rmode;
- KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
- }
- m_freem(mrep);
-nfsmout:
-#ifdef KDTRACE_HOOKS
- if (error) {
- KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
- error);
- }
-#endif
- return (error);
-}
-
-/*
- * nfs access vnode op.
- * For nfs version 2, just return ok. File accesses may fail later.
- * For nfs version 3, use the access rpc to check accessibility. If file modes
- * are changed on the server, accesses might still fail later.
- */
-static int
-nfs_access(struct vop_access_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- int error = 0, i, gotahit;
- u_int32_t mode, rmode, wmode;
- int v3 = NFS_ISV3(vp);
- struct nfsnode *np = VTONFS(vp);
-
- /*
- * Disallow write attempts on filesystems mounted read-only;
- * unless the file is a socket, fifo, or a block or character
- * device resident on the filesystem.
- */
- if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
- switch (vp->v_type) {
- case VREG:
- case VDIR:
- case VLNK:
- return (EROFS);
- default:
- break;
- }
- }
- /*
- * For nfs v3, check to see if we have done this recently, and if
- * so return our cached result instead of making an ACCESS call.
- * If not, do an access rpc, otherwise you are stuck emulating
- * ufs_access() locally using the vattr. This may not be correct,
- * since the server may apply other access criteria such as
- * client uid-->server uid mapping that we do not know about.
- */
- if (v3) {
- if (ap->a_accmode & VREAD)
- mode = NFSV3ACCESS_READ;
- else
- mode = 0;
- if (vp->v_type != VDIR) {
- if (ap->a_accmode & VWRITE)
- mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
- if (ap->a_accmode & VEXEC)
- mode |= NFSV3ACCESS_EXECUTE;
- } else {
- if (ap->a_accmode & VWRITE)
- mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
- NFSV3ACCESS_DELETE);
- if (ap->a_accmode & VEXEC)
- mode |= NFSV3ACCESS_LOOKUP;
- }
- /* XXX safety belt, only make blanket request if caching */
- if (nfsaccess_cache_timeout > 0) {
- wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
- NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
- NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
- } else {
- wmode = mode;
- }
-
- /*
- * Does our cached result allow us to give a definite yes to
- * this request?
- */
- gotahit = 0;
- mtx_lock(&np->n_mtx);
- for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
- if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
- if (time_second < (np->n_accesscache[i].stamp +
- nfsaccess_cache_timeout) &&
- (np->n_accesscache[i].mode & mode) == mode) {
- nfsstats.accesscache_hits++;
- gotahit = 1;
- }
- break;
- }
- }
- mtx_unlock(&np->n_mtx);
-#ifdef KDTRACE_HOOKS
- if (gotahit)
- KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
- ap->a_cred->cr_uid, mode);
- else
- KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
- ap->a_cred->cr_uid, mode);
-#endif
- if (gotahit == 0) {
- /*
- * Either a no, or a don't know. Go to the wire.
- */
- nfsstats.accesscache_misses++;
- error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred,
- &rmode);
- if (!error) {
- if ((rmode & mode) != mode)
- error = EACCES;
- }
- }
- return (error);
- } else {
- if ((error = nfsspec_access(ap)) != 0) {
- return (error);
- }
- /*
- * Attempt to prevent a mapped root from accessing a file
- * which it shouldn't. We try to read a byte from the file
- * if the user is root and the file is not zero length.
- * After calling nfsspec_access, we should have the correct
- * file size cached.
- */
- mtx_lock(&np->n_mtx);
- if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
- && VTONFS(vp)->n_size > 0) {
- struct iovec aiov;
- struct uio auio;
- char buf[1];
-
- mtx_unlock(&np->n_mtx);
- aiov.iov_base = buf;
- aiov.iov_len = 1;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = 0;
- auio.uio_resid = 1;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = UIO_READ;
- auio.uio_td = ap->a_td;
-
- if (vp->v_type == VREG)
- error = nfs_readrpc(vp, &auio, ap->a_cred);
- else if (vp->v_type == VDIR) {
- char* bp;
- bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
- aiov.iov_base = bp;
- aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
- error = nfs_readdirrpc(vp, &auio, ap->a_cred);
- free(bp, M_TEMP);
- } else if (vp->v_type == VLNK)
- error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
- else
- error = EACCES;
- } else
- mtx_unlock(&np->n_mtx);
- return (error);
- }
-}
-
-int nfs_otw_getattr_avoid = 0;
-
-/*
- * nfs open vnode op
- * Check to see if the type is ok
- * and that deletion is not in progress.
- * For paged in text files, you will need to flush the page cache
- * if consistency is lost.
- */
-/* ARGSUSED */
-static int
-nfs_open(struct vop_open_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct vattr vattr;
- int error;
- int fmode = ap->a_mode;
- struct ucred *cred;
-
- if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
- return (EOPNOTSUPP);
-
- /*
- * Get a valid lease. If cached data is stale, flush it.
- */
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NMODIFIED) {
- mtx_unlock(&np->n_mtx);
- error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
- if (error == EINTR || error == EIO)
- return (error);
- mtx_lock(&np->n_mtx);
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- if (vp->v_type == VDIR)
- np->n_direofoffset = 0;
- mtx_unlock(&np->n_mtx);
- error = VOP_GETATTR(vp, &vattr, ap->a_cred);
- if (error)
- return (error);
- mtx_lock(&np->n_mtx);
- np->n_mtime = vattr.va_mtime;
- } else {
- mtx_unlock(&np->n_mtx);
- error = VOP_GETATTR(vp, &vattr, ap->a_cred);
- if (error)
- return (error);
- mtx_lock(&np->n_mtx);
- if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
- if (vp->v_type == VDIR)
- np->n_direofoffset = 0;
- mtx_unlock(&np->n_mtx);
- error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
- if (error == EINTR || error == EIO) {
- return (error);
- }
- mtx_lock(&np->n_mtx);
- np->n_mtime = vattr.va_mtime;
- }
- }
- /*
- * If the object has >= 1 O_DIRECT active opens, we disable caching.
- */
- if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
- if (np->n_directio_opens == 0) {
- mtx_unlock(&np->n_mtx);
- error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
- if (error)
- return (error);
- mtx_lock(&np->n_mtx);
- np->n_flag |= NNONCACHE;
- }
- np->n_directio_opens++;
- }
-
- /*
- * If this is an open for writing, capture a reference to the
- * credentials, so they can be used by nfs_putpages(). Using
- * these write credentials is preferable to the credentials of
- * whatever thread happens to be doing the VOP_PUTPAGES() since
- * the write RPCs are less likely to fail with EACCES.
- */
- if ((fmode & FWRITE) != 0) {
- cred = np->n_writecred;
- np->n_writecred = crhold(ap->a_cred);
- } else
- cred = NULL;
- mtx_unlock(&np->n_mtx);
- if (cred != NULL)
- crfree(cred);
- vnode_create_vobject(vp, vattr.va_size, ap->a_td);
- return (0);
-}
-
-/*
- * nfs close vnode op
- * What an NFS client should do upon close after writing is a debatable issue.
- * Most NFS clients push delayed writes to the server upon close, basically for
- * two reasons:
- * 1 - So that any write errors may be reported back to the client process
- * doing the close system call. By far the two most likely errors are
- * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
- * 2 - To put a worst case upper bound on cache inconsistency between
- * multiple clients for the file.
- * There is also a consistency problem for Version 2 of the protocol w.r.t.
- * not being able to tell if other clients are writing a file concurrently,
- * since there is no way of knowing if the changed modify time in the reply
- * is only due to the write for this client.
- * (NFS Version 3 provides weak cache consistency data in the reply that
- * should be sufficient to detect and handle this case.)
- *
- * The current code does the following:
- * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
- * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
- * or commit them (this satisfies 1 and 2 except for the
- * case where the server crashes after this close but
- * before the commit RPC, which is felt to be "good
- * enough". Changing the last argument to nfs_flush() to
- * a 1 would force a commit operation, if it is felt a
- * commit is necessary now.
- */
-/* ARGSUSED */
-static int
-nfs_close(struct vop_close_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- int error = 0;
- int fmode = ap->a_fflag;
-
- if (vp->v_type == VREG) {
- /*
- * Examine and clean dirty pages, regardless of NMODIFIED.
- * This closes a major hole in close-to-open consistency.
- * We want to push out all dirty pages (and buffers) on
- * close, regardless of whether they were dirtied by
- * mmap'ed writes or via write().
- */
- if (nfs_clean_pages_on_close && vp->v_object) {
- VM_OBJECT_WLOCK(vp->v_object);
- vm_object_page_clean(vp->v_object, 0, 0, 0);
- VM_OBJECT_WUNLOCK(vp->v_object);
- }
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NMODIFIED) {
- mtx_unlock(&np->n_mtx);
- if (NFS_ISV3(vp)) {
- /*
- * Under NFSv3 we have dirty buffers to dispose of. We
- * must flush them to the NFS server. We have the option
- * of waiting all the way through the commit rpc or just
- * waiting for the initial write. The default is to only
- * wait through the initial write so the data is in the
- * server's cache, which is roughly similar to the state
- * a standard disk subsystem leaves the file in on close().
- *
- * We cannot clear the NMODIFIED bit in np->n_flag due to
- * potential races with other processes, and certainly
- * cannot clear it if we don't commit.
- */
- int cm = nfsv3_commit_on_close ? 1 : 0;
- error = nfs_flush(vp, MNT_WAIT, cm);
- /* np->n_flag &= ~NMODIFIED; */
- } else
- error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
- mtx_lock(&np->n_mtx);
- }
- if (np->n_flag & NWRITEERR) {
- np->n_flag &= ~NWRITEERR;
- error = np->n_error;
- }
- mtx_unlock(&np->n_mtx);
- }
- if (nfs_directio_enable)
- KASSERT((np->n_directio_asyncwr == 0),
- ("nfs_close: dirty unflushed (%d) directio buffers\n",
- np->n_directio_asyncwr));
- if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
- mtx_lock(&np->n_mtx);
- KASSERT((np->n_directio_opens > 0),
- ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
- np->n_directio_opens--;
- if (np->n_directio_opens == 0)
- np->n_flag &= ~NNONCACHE;
- mtx_unlock(&np->n_mtx);
- }
- return (error);
-}
-
-/*
- * nfs getattr call from vfs.
- */
-static int
-nfs_getattr(struct vop_getattr_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct thread *td = curthread;
- struct vattr *vap = ap->a_vap;
- struct vattr vattr;
- caddr_t bpos, dpos;
- int error = 0;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3 = NFS_ISV3(vp);
-
- /*
- * Update local times for special files.
- */
- mtx_lock(&np->n_mtx);
- if (np->n_flag & (NACC | NUPD))
- np->n_flag |= NCHG;
- mtx_unlock(&np->n_mtx);
- /*
- * First look in the cache.
- */
- if (nfs_getattrcache(vp, &vattr) == 0)
- goto nfsmout;
- if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) {
- nfsstats.accesscache_misses++;
- nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL);
- if (nfs_getattrcache(vp, &vattr) == 0)
- goto nfsmout;
- }
- nfsstats.rpccnt[NFSPROC_GETATTR]++;
- mreq = m_get2(NFSX_FH(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred);
- if (!error) {
- nfsm_loadattr(vp, &vattr);
- }
- m_freem(mrep);
-nfsmout:
- vap->va_type = vattr.va_type;
- vap->va_mode = vattr.va_mode;
- vap->va_nlink = vattr.va_nlink;
- vap->va_uid = vattr.va_uid;
- vap->va_gid = vattr.va_gid;
- vap->va_fsid = vattr.va_fsid;
- vap->va_fileid = vattr.va_fileid;
- vap->va_size = vattr.va_size;
- vap->va_blocksize = vattr.va_blocksize;
- vap->va_atime = vattr.va_atime;
- vap->va_mtime = vattr.va_mtime;
- vap->va_ctime = vattr.va_ctime;
- vap->va_gen = vattr.va_gen;
- vap->va_flags = vattr.va_flags;
- vap->va_rdev = vattr.va_rdev;
- vap->va_bytes = vattr.va_bytes;
- vap->va_filerev = vattr.va_filerev;
-
- return (error);
-}
-
-/*
- * nfs setattr call.
- */
-static int
-nfs_setattr(struct vop_setattr_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct vattr *vap = ap->a_vap;
- struct thread *td = curthread;
- int error = 0;
- u_quad_t tsize;
-
-#ifndef nolint
- tsize = (u_quad_t)0;
-#endif
-
- /*
- * Setting of flags is not supported.
- */
- if (vap->va_flags != VNOVAL)
- return (EOPNOTSUPP);
-
- /*
- * Disallow write attempts if the filesystem is mounted read-only.
- */
- if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
- vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
- vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
- (vp->v_mount->mnt_flag & MNT_RDONLY)) {
- error = EROFS;
- goto out;
- }
- if (vap->va_size != VNOVAL) {
- switch (vp->v_type) {
- case VDIR:
- return (EISDIR);
- case VCHR:
- case VBLK:
- case VSOCK:
- case VFIFO:
- if (vap->va_mtime.tv_sec == VNOVAL &&
- vap->va_atime.tv_sec == VNOVAL &&
- vap->va_mode == (mode_t)VNOVAL &&
- vap->va_uid == (uid_t)VNOVAL &&
- vap->va_gid == (gid_t)VNOVAL)
- return (0);
- vap->va_size = VNOVAL;
- break;
- default:
- /*
- * Disallow write attempts if the filesystem is
- * mounted read-only.
- */
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
- return (EROFS);
- /*
- * We run vnode_pager_setsize() early (why?),
- * we must set np->n_size now to avoid vinvalbuf
- * V_SAVE races that might setsize a lower
- * value.
- */
- mtx_lock(&np->n_mtx);
- tsize = np->n_size;
- mtx_unlock(&np->n_mtx);
- error = nfs_meta_setsize(vp, ap->a_cred, td,
- vap->va_size);
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NMODIFIED) {
- tsize = np->n_size;
- mtx_unlock(&np->n_mtx);
- if (vap->va_size == 0)
- error = nfs_vinvalbuf(vp, 0, td, 1);
- else
- error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
- if (error) {
- vnode_pager_setsize(vp, tsize);
- goto out;
- }
- } else
- mtx_unlock(&np->n_mtx);
- /*
- * np->n_size has already been set to vap->va_size
- * in nfs_meta_setsize(). We must set it again since
- * nfs_loadattrcache() could be called through
- * nfs_meta_setsize() and could modify np->n_size.
- */
- mtx_lock(&np->n_mtx);
- np->n_vattr.va_size = np->n_size = vap->va_size;
- mtx_unlock(&np->n_mtx);
- };
- } else {
- mtx_lock(&np->n_mtx);
- if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
- (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
- mtx_unlock(&np->n_mtx);
- if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
- (error == EINTR || error == EIO))
- return error;
- } else
- mtx_unlock(&np->n_mtx);
- }
- error = nfs_setattrrpc(vp, vap, ap->a_cred);
- if (error && vap->va_size != VNOVAL) {
- mtx_lock(&np->n_mtx);
- np->n_size = np->n_vattr.va_size = tsize;
- vnode_pager_setsize(vp, tsize);
- mtx_unlock(&np->n_mtx);
- }
-out:
- return (error);
-}
-
-/*
- * Do an nfs setattr rpc.
- */
-static int
-nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred)
-{
- struct nfsv2_sattr *sp;
- struct nfsnode *np = VTONFS(vp);
- caddr_t bpos, dpos;
- u_int32_t *tl;
- int error = 0, i, wccflag = NFSV3_WCCRATTR;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3 = NFS_ISV3(vp);
-
- nfsstats.rpccnt[NFSPROC_SETATTR]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_SATTR(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- if (v3) {
- nfsm_v3attrbuild(vap, TRUE);
- tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
- *tl = nfs_false;
- } else {
- sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
- if (vap->va_mode == (mode_t)VNOVAL)
- sp->sa_mode = nfs_xdrneg1;
- else
- sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
- if (vap->va_uid == (uid_t)VNOVAL)
- sp->sa_uid = nfs_xdrneg1;
- else
- sp->sa_uid = txdr_unsigned(vap->va_uid);
- if (vap->va_gid == (gid_t)VNOVAL)
- sp->sa_gid = nfs_xdrneg1;
- else
- sp->sa_gid = txdr_unsigned(vap->va_gid);
- sp->sa_size = txdr_unsigned(vap->va_size);
- txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
- txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
- }
- nfsm_request(vp, NFSPROC_SETATTR, curthread, cred);
- if (v3) {
- mtx_lock(&np->n_mtx);
- for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
- np->n_accesscache[i].stamp = 0;
- mtx_unlock(&np->n_mtx);
- KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
- nfsm_wcc_data(vp, wccflag);
- } else
- nfsm_loadattr(vp, NULL);
- m_freem(mrep);
-nfsmout:
- return (error);
-}
-
-/*
- * nfs lookup call, one step at a time...
- * First look in cache
- * If not found, unlock the directory nfsnode and do the rpc
- */
-static int
-nfs_lookup(struct vop_lookup_args *ap)
-{
- struct componentname *cnp = ap->a_cnp;
- struct vnode *dvp = ap->a_dvp;
- struct vnode **vpp = ap->a_vpp;
- struct mount *mp = dvp->v_mount;
- struct vattr dvattr, vattr;
- struct timespec nctime;
- int flags = cnp->cn_flags;
- struct vnode *newvp;
- struct nfsmount *nmp;
- caddr_t bpos, dpos;
- struct mbuf *mreq, *mrep, *md, *mb;
- long len;
- nfsfh_t *fhp;
- struct nfsnode *np, *newnp;
- int error = 0, attrflag, dattrflag, fhsize, ltype, ncticks;
- int v3 = NFS_ISV3(dvp);
- struct thread *td = cnp->cn_thread;
-
- *vpp = NULLVP;
- if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
- return (EROFS);
- if (dvp->v_type != VDIR)
- return (ENOTDIR);
- nmp = VFSTONFS(mp);
- np = VTONFS(dvp);
- if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
- *vpp = NULLVP;
- return (error);
- }
- error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
- if (error > 0 && error != ENOENT)
- return (error);
- if (error == -1) {
- /*
- * Lookups of "." are special and always return the
- * current directory. cache_lookup() already handles
- * associated locking bookkeeping, etc.
- */
- if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
- /* XXX: Is this really correct? */
- if (cnp->cn_nameiop != LOOKUP &&
- (flags & ISLASTCN))
- cnp->cn_flags |= SAVENAME;
- return (0);
- }
-
- /*
- * We only accept a positive hit in the cache if the
- * change time of the file matches our cached copy.
- * Otherwise, we discard the cache entry and fallback
- * to doing a lookup RPC. We also only trust cache
- * entries for less than nm_nametimeo seconds.
- *
- * To better handle stale file handles and attributes,
- * clear the attribute cache of this node if it is a
- * leaf component, part of an open() call, and not
- * locally modified before fetching the attributes.
- * This should allow stale file handles to be detected
- * here where we can fall back to a LOOKUP RPC to
- * recover rather than having nfs_open() detect the
- * stale file handle and failing open(2) with ESTALE.
- */
- newvp = *vpp;
- newnp = VTONFS(newvp);
- if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
- (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
- !(newnp->n_flag & NMODIFIED)) {
- mtx_lock(&newnp->n_mtx);
- newnp->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
- mtx_unlock(&newnp->n_mtx);
- }
- if ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
- VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
- timespeccmp(&vattr.va_ctime, &nctime, ==)) {
- nfsstats.lookupcache_hits++;
- if (cnp->cn_nameiop != LOOKUP &&
- (flags & ISLASTCN))
- cnp->cn_flags |= SAVENAME;
- return (0);
- }
- cache_purge(newvp);
- if (dvp != newvp)
- vput(newvp);
- else
- vrele(newvp);
- *vpp = NULLVP;
- } else if (error == ENOENT) {
- if (dvp->v_iflag & VI_DOOMED)
- return (ENOENT);
- /*
- * We only accept a negative hit in the cache if the
- * modification time of the parent directory matches
- * the cached copy in the name cache entry.
- * Otherwise, we discard all of the negative cache
- * entries for this directory. We also only trust
- * negative cache entries for up to nm_negnametimeo
- * seconds.
- */
- if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
- VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
- timespeccmp(&vattr.va_mtime, &nctime, ==)) {
- nfsstats.lookupcache_hits++;
- return (ENOENT);
- }
- cache_purge_negative(dvp);
- }
-
- attrflag = dattrflag = 0;
- error = 0;
- newvp = NULLVP;
- nfsstats.lookupcache_misses++;
- nfsstats.rpccnt[NFSPROC_LOOKUP]++;
- len = cnp->cn_namelen;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len), M_WAITOK,
- MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
- nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
- if (error) {
- if (v3) {
- nfsm_postop_attr_va(dvp, dattrflag, &vattr);
- m_freem(mrep);
- }
- goto nfsmout;
- }
- nfsm_getfh(fhp, fhsize, v3);
-
- /*
- * Handle RENAME case...
- */
- if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
- if (NFS_CMPFH(np, fhp, fhsize)) {
- m_freem(mrep);
- return (EISDIR);
- }
- error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE);
- if (error) {
- m_freem(mrep);
- return (error);
- }
- newvp = NFSTOV(np);
- if (v3) {
- nfsm_postop_attr(newvp, attrflag);
- nfsm_postop_attr(dvp, attrflag);
- } else
- nfsm_loadattr(newvp, NULL);
- *vpp = newvp;
- m_freem(mrep);
- cnp->cn_flags |= SAVENAME;
- return (0);
- }
-
- if (flags & ISDOTDOT) {
- ltype = VOP_ISLOCKED(dvp);
- error = vfs_busy(mp, MBF_NOWAIT);
- if (error != 0) {
- vfs_ref(mp);
- VOP_UNLOCK(dvp, 0);
- error = vfs_busy(mp, 0);
- vn_lock(dvp, ltype | LK_RETRY);
- vfs_rel(mp);
- if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
- vfs_unbusy(mp);
- error = ENOENT;
- }
- if (error != 0) {
- m_freem(mrep);
- return (error);
- }
- }
- VOP_UNLOCK(dvp, 0);
- error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
- if (error == 0)
- newvp = NFSTOV(np);
- vfs_unbusy(mp);
- if (newvp != dvp)
- vn_lock(dvp, ltype | LK_RETRY);
- if (dvp->v_iflag & VI_DOOMED) {
- if (error == 0) {
- if (newvp == dvp)
- vrele(newvp);
- else
- vput(newvp);
- }
- error = ENOENT;
- }
- if (error) {
- m_freem(mrep);
- return (error);
- }
- } else if (NFS_CMPFH(np, fhp, fhsize)) {
- VREF(dvp);
- newvp = dvp;
- } else {
- error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
- if (error) {
- m_freem(mrep);
- return (error);
- }
- newvp = NFSTOV(np);
-
- /*
- * Flush the attribute cache when opening a leaf node
- * to ensure that fresh attributes are fetched in
- * nfs_open() if we are unable to fetch attributes
- * from the LOOKUP reply.
- */
- if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
- !(np->n_flag & NMODIFIED)) {
- mtx_lock(&np->n_mtx);
- np->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
- mtx_unlock(&np->n_mtx);
- }
- }
- if (v3) {
- nfsm_postop_attr_va(newvp, attrflag, &vattr);
- nfsm_postop_attr_va(dvp, dattrflag, &dvattr);
- } else {
- nfsm_loadattr(newvp, &vattr);
- attrflag = 1;
- }
- if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
- cnp->cn_flags |= SAVENAME;
- if ((cnp->cn_flags & MAKEENTRY) &&
- (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
- attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
- cache_enter_time(dvp, newvp, cnp, &vattr.va_ctime,
- newvp->v_type != VDIR ? NULL : &dvattr.va_ctime);
- *vpp = newvp;
- m_freem(mrep);
-nfsmout:
- if (error) {
- if (newvp != NULLVP) {
- vput(newvp);
- *vpp = NULLVP;
- }
-
- if (error != ENOENT)
- goto done;
-
- /* The requested file was not found. */
- if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
- (flags & ISLASTCN)) {
- /*
- * XXX: UFS does a full VOP_ACCESS(dvp,
- * VWRITE) here instead of just checking
- * MNT_RDONLY.
- */
- if (mp->mnt_flag & MNT_RDONLY)
- return (EROFS);
- cnp->cn_flags |= SAVENAME;
- return (EJUSTRETURN);
- }
-
- if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
- /*
- * Cache the modification time of the parent
- * directory from the post-op attributes in
- * the name cache entry. The negative cache
- * entry will be ignored once the directory
- * has changed. Don't bother adding the entry
- * if the directory has already changed.
- */
- mtx_lock(&np->n_mtx);
- if (timespeccmp(&np->n_vattr.va_mtime,
- &vattr.va_mtime, ==)) {
- mtx_unlock(&np->n_mtx);
- cache_enter_time(dvp, NULL, cnp,
- &vattr.va_mtime, NULL);
- } else
- mtx_unlock(&np->n_mtx);
- }
- return (ENOENT);
- }
-done:
- return (error);
-}
-
-/*
- * nfs read call.
- * Just call nfs_bioread() to do the work.
- */
-static int
-nfs_read(struct vop_read_args *ap)
-{
- struct vnode *vp = ap->a_vp;
-
- switch (vp->v_type) {
- case VREG:
- return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
- case VDIR:
- return (EISDIR);
- default:
- return (EOPNOTSUPP);
- }
-}
-
-/*
- * nfs readlink call
- */
-static int
-nfs_readlink(struct vop_readlink_args *ap)
-{
- struct vnode *vp = ap->a_vp;
-
- if (vp->v_type != VLNK)
- return (EINVAL);
- return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
-}
-
-/*
- * Do a readlink rpc.
- * Called by nfs_doio() from below the buffer cache.
- */
-int
-nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
-{
- caddr_t bpos, dpos;
- int error = 0, len, attrflag;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3 = NFS_ISV3(vp);
-
- nfsstats.rpccnt[NFSPROC_READLINK]++;
- mreq = m_get2(NFSX_FH(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
- if (v3)
- nfsm_postop_attr(vp, attrflag);
- if (!error) {
- nfsm_strsiz(len, NFS_MAXPATHLEN);
- if (len == NFS_MAXPATHLEN) {
- struct nfsnode *np = VTONFS(vp);
- mtx_lock(&np->n_mtx);
- if (np->n_size && np->n_size < NFS_MAXPATHLEN)
- len = np->n_size;
- mtx_unlock(&np->n_mtx);
- }
- nfsm_mtouio(uiop, len);
- }
- m_freem(mrep);
-nfsmout:
- return (error);
-}
-
-/*
- * nfs read rpc call
- * Ditto above
- */
-int
-nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
-{
- u_int32_t *tl;
- caddr_t bpos, dpos;
- struct mbuf *mreq, *mrep, *md, *mb;
- struct nfsmount *nmp;
- off_t end;
- int error = 0, len, retlen, tsiz, eof, attrflag;
- int v3 = NFS_ISV3(vp);
- int rsize;
-
-#ifndef nolint
- eof = 0;
-#endif
- nmp = VFSTONFS(vp->v_mount);
- tsiz = uiop->uio_resid;
- mtx_lock(&nmp->nm_mtx);
- end = uiop->uio_offset + tsiz;
- if (end > nmp->nm_maxfilesize || end < uiop->uio_offset) {
- mtx_unlock(&nmp->nm_mtx);
- return (EFBIG);
- }
- rsize = nmp->nm_rsize;
- mtx_unlock(&nmp->nm_mtx);
- while (tsiz > 0) {
- nfsstats.rpccnt[NFSPROC_READ]++;
- len = (tsiz > rsize) ? rsize : tsiz;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED * 3, M_WAITOK,
- MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
- if (v3) {
- txdr_hyper(uiop->uio_offset, tl);
- *(tl + 2) = txdr_unsigned(len);
- } else {
- *tl++ = txdr_unsigned(uiop->uio_offset);
- *tl++ = txdr_unsigned(len);
- *tl = 0;
- }
- nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
- if (v3) {
- nfsm_postop_attr(vp, attrflag);
- if (error) {
- m_freem(mrep);
- goto nfsmout;
- }
- tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
- eof = fxdr_unsigned(int, *(tl + 1));
- } else {
- nfsm_loadattr(vp, NULL);
- }
- nfsm_strsiz(retlen, rsize);
- nfsm_mtouio(uiop, retlen);
- m_freem(mrep);
- tsiz -= retlen;
- if (v3) {
- if (eof || retlen == 0) {
- tsiz = 0;
- }
- } else if (retlen < len) {
- tsiz = 0;
- }
- }
-nfsmout:
- return (error);
-}
-
-/*
- * nfs write call
- */
-int
-nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
- int *iomode, int *must_commit)
-{
- u_int32_t *tl;
- int32_t backup;
- caddr_t bpos, dpos;
- struct mbuf *mreq, *mrep, *md, *mb;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- off_t end;
- int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
- int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
- int wsize;
-
- KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
- *must_commit = 0;
- tsiz = uiop->uio_resid;
- mtx_lock(&nmp->nm_mtx);
- end = uiop->uio_offset + tsiz;
- if (end > nmp->nm_maxfilesize || end < uiop->uio_offset) {
- mtx_unlock(&nmp->nm_mtx);
- return (EFBIG);
- }
- wsize = nmp->nm_wsize;
- mtx_unlock(&nmp->nm_mtx);
- while (tsiz > 0) {
- nfsstats.rpccnt[NFSPROC_WRITE]++;
- len = (tsiz > wsize) ? wsize : tsiz;
- mreq = m_get2(NFSX_FH(v3) + 5 * NFSX_UNSIGNED, M_WAITOK,
- MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- if (v3) {
- tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
- txdr_hyper(uiop->uio_offset, tl);
- tl += 2;
- *tl++ = txdr_unsigned(len);
- *tl++ = txdr_unsigned(*iomode);
- *tl = txdr_unsigned(len);
- } else {
- u_int32_t x;
-
- tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
- /* Set both "begin" and "current" to non-garbage. */
- x = txdr_unsigned((u_int32_t)uiop->uio_offset);
- *tl++ = x; /* "begin offset" */
- *tl++ = x; /* "current offset" */
- x = txdr_unsigned(len);
- *tl++ = x; /* total to this offset */
- *tl = x; /* size of this write */
- }
- nfsm_uiotom(uiop, len);
- nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
- if (v3) {
- wccflag = NFSV3_WCCCHK;
- nfsm_wcc_data(vp, wccflag);
- if (!error) {
- tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
- + NFSX_V3WRITEVERF);
- rlen = fxdr_unsigned(int, *tl++);
- if (rlen == 0) {
- error = NFSERR_IO;
- m_freem(mrep);
- break;
- } else if (rlen < len) {
- backup = len - rlen;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base -
- backup;
- uiop->uio_iov->iov_len += backup;
- uiop->uio_offset -= backup;
- uiop->uio_resid += backup;
- len = rlen;
- }
- commit = fxdr_unsigned(int, *tl++);
-
- /*
- * Return the lowest committment level
- * obtained by any of the RPCs.
- */
- if (committed == NFSV3WRITE_FILESYNC)
- committed = commit;
- else if (committed == NFSV3WRITE_DATASYNC &&
- commit == NFSV3WRITE_UNSTABLE)
- committed = commit;
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
- bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
- NFSX_V3WRITEVERF);
- nmp->nm_state |= NFSSTA_HASWRITEVERF;
- } else if (bcmp((caddr_t)tl,
- (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
- *must_commit = 1;
- bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
- NFSX_V3WRITEVERF);
- }
- mtx_unlock(&nmp->nm_mtx);
- }
- } else {
- nfsm_loadattr(vp, NULL);
- }
- if (wccflag) {
- mtx_lock(&(VTONFS(vp))->n_mtx);
- VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
- mtx_unlock(&(VTONFS(vp))->n_mtx);
- }
- m_freem(mrep);
- if (error)
- break;
- tsiz -= len;
- }
-nfsmout:
- if (DOINGASYNC(vp))
- committed = NFSV3WRITE_FILESYNC;
- *iomode = committed;
- if (error)
- uiop->uio_resid = tsiz;
- return (error);
-}
-
-/*
- * nfs mknod rpc
- * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
- * mode set to specify the file type and the size field for rdev.
- */
-static int
-nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
- struct vattr *vap)
-{
- struct nfsv2_sattr *sp;
- u_int32_t *tl;
- struct vnode *newvp = NULL;
- struct nfsnode *np = NULL;
- struct vattr vattr;
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
- struct mbuf *mreq, *mrep, *md, *mb;
- u_int32_t rdev;
- int v3 = NFS_ISV3(dvp);
-
- if (vap->va_type == VCHR || vap->va_type == VBLK)
- rdev = txdr_unsigned(vap->va_rdev);
- else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
- rdev = nfs_xdrneg1;
- else {
- return (EOPNOTSUPP);
- }
- if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
- return (error);
- nfsstats.rpccnt[NFSPROC_MKNOD]++;
- mreq = m_get2(NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
- nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
- if (v3) {
- tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
- *tl++ = vtonfsv3_type(vap->va_type);
- nfsm_v3attrbuild(vap, FALSE);
- if (vap->va_type == VCHR || vap->va_type == VBLK) {
- tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
- *tl++ = txdr_unsigned(major(vap->va_rdev));
- *tl = txdr_unsigned(minor(vap->va_rdev));
- }
- } else {
- sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
- sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
- sp->sa_uid = nfs_xdrneg1;
- sp->sa_gid = nfs_xdrneg1;
- sp->sa_size = rdev;
- txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
- txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
- }
- nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
- if (!error) {
- nfsm_mtofh(dvp, newvp, v3, gotvp);
- if (!gotvp) {
- if (newvp) {
- vput(newvp);
- newvp = NULL;
- }
- error = nfs_lookitup(dvp, cnp->cn_nameptr,
- cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
- if (!error)
- newvp = NFSTOV(np);
- }
- }
- if (v3)
- nfsm_wcc_data(dvp, wccflag);
- m_freem(mrep);
-nfsmout:
- if (error) {
- if (newvp)
- vput(newvp);
- } else {
- *vpp = newvp;
- }
- mtx_lock(&(VTONFS(dvp))->n_mtx);
- VTONFS(dvp)->n_flag |= NMODIFIED;
- if (!wccflag) {
- VTONFS(dvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
- }
- mtx_unlock(&(VTONFS(dvp))->n_mtx);
- return (error);
-}
-
-/*
- * nfs mknod vop
- * just call nfs_mknodrpc() to do the work.
- */
-/* ARGSUSED */
-static int
-nfs_mknod(struct vop_mknod_args *ap)
-{
- return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
-}
-
-static u_long create_verf;
-/*
- * nfs file create call
- */
-static int
-nfs_create(struct vop_create_args *ap)
-{
- struct vnode *dvp = ap->a_dvp;
- struct vattr *vap = ap->a_vap;
- struct componentname *cnp = ap->a_cnp;
- struct nfsv2_sattr *sp;
- u_int32_t *tl;
- struct nfsnode *np = NULL;
- struct vnode *newvp = NULL;
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
- struct mbuf *mreq, *mrep, *md, *mb;
- struct vattr vattr;
- int v3 = NFS_ISV3(dvp);
-
- /*
- * Oops, not for me..
- */
- if (vap->va_type == VSOCK) {
- error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap);
- return (error);
- }
-
- if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) {
- return (error);
- }
- if (vap->va_vaflags & VA_EXCLUSIVE)
- fmode |= O_EXCL;
-again:
- nfsstats.rpccnt[NFSPROC_CREATE]++;
- mreq = m_get2(NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
- nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
- if (v3) {
- tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
- if (fmode & O_EXCL) {
- *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
- tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
-#ifdef INET
- CURVNET_SET(CRED_TO_VNET(cnp->cn_cred));
- IN_IFADDR_RLOCK();
- if (!TAILQ_EMPTY(&V_in_ifaddrhead))
- *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr;
- else
-#endif
- *tl++ = create_verf;
-#ifdef INET
- IN_IFADDR_RUNLOCK();
- CURVNET_RESTORE();
-#endif
- *tl = ++create_verf;
- } else {
- *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
- nfsm_v3attrbuild(vap, FALSE);
- }
- } else {
- sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
- sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
- sp->sa_uid = nfs_xdrneg1;
- sp->sa_gid = nfs_xdrneg1;
- sp->sa_size = 0;
- txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
- txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
- }
- nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
- if (!error) {
- nfsm_mtofh(dvp, newvp, v3, gotvp);
- if (!gotvp) {
- if (newvp) {
- vput(newvp);
- newvp = NULL;
- }
- error = nfs_lookitup(dvp, cnp->cn_nameptr,
- cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
- if (!error)
- newvp = NFSTOV(np);
- }
- }
- if (v3)
- nfsm_wcc_data(dvp, wccflag);
- m_freem(mrep);
-nfsmout:
- if (error) {
- if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
- fmode &= ~O_EXCL;
- goto again;
- }
- if (newvp)
- vput(newvp);
- } else if (v3 && (fmode & O_EXCL)) {
- /*
- * We are normally called with only a partially initialized
- * VAP. Since the NFSv3 spec says that server may use the
- * file attributes to store the verifier, the spec requires
- * us to do a SETATTR RPC. FreeBSD servers store the verifier
- * in atime, but we can't really assume that all servers will
- * so we ensure that our SETATTR sets both atime and mtime.
- */
- if (vap->va_mtime.tv_sec == VNOVAL)
- vfs_timestamp(&vap->va_mtime);
- if (vap->va_atime.tv_sec == VNOVAL)
- vap->va_atime = vap->va_mtime;
- error = nfs_setattrrpc(newvp, vap, cnp->cn_cred);
- if (error)
- vput(newvp);
- }
- if (!error) {
- *ap->a_vpp = newvp;
- }
- mtx_lock(&(VTONFS(dvp))->n_mtx);
- VTONFS(dvp)->n_flag |= NMODIFIED;
- if (!wccflag) {
- VTONFS(dvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
- }
- mtx_unlock(&(VTONFS(dvp))->n_mtx);
- return (error);
-}
-
-/*
- * nfs file remove call
- * To try and make nfs semantics closer to ufs semantics, a file that has
- * other processes using the vnode is renamed instead of removed and then
- * removed later on the last close.
- * - If v_usecount > 1
- * If a rename is not already in the works
- * call nfs_sillyrename() to set it up
- * else
- * do the remove rpc
- */
-static int
-nfs_remove(struct vop_remove_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *dvp = ap->a_dvp;
- struct componentname *cnp = ap->a_cnp;
- struct nfsnode *np = VTONFS(vp);
- int error = 0;
- struct vattr vattr;
-
- KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
- KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
- if (vp->v_type == VDIR)
- error = EPERM;
- else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
- !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) {
- /*
- * Purge the name cache so that the chance of a lookup for
- * the name succeeding while the remove is in progress is
- * minimized. Without node locking it can still happen, such
- * that an I/O op returns ESTALE, but since you get this if
- * another host removes the file..
- */
- cache_purge(vp);
- /*
- * throw away biocache buffers, mainly to avoid
- * unnecessary delayed writes later.
- */
- error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
- /* Do the rpc */
- if (error != EINTR && error != EIO)
- error = nfs_removerpc(dvp, cnp->cn_nameptr,
- cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
- /*
- * Kludge City: If the first reply to the remove rpc is lost..
- * the reply to the retransmitted request will be ENOENT
- * since the file was in fact removed
- * Therefore, we cheat and return success.
- */
- if (error == ENOENT)
- error = 0;
- } else if (!np->n_sillyrename)
- error = nfs_sillyrename(dvp, vp, cnp);
- mtx_lock(&np->n_mtx);
- np->n_attrstamp = 0;
- mtx_unlock(&np->n_mtx);
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- return (error);
-}
-
-/*
- * nfs file remove rpc called from nfs_inactive
- */
-int
-nfs_removeit(struct sillyrename *sp)
-{
- /*
- * Make sure that the directory vnode is still valid.
- * XXX we should lock sp->s_dvp here.
- */
- if (sp->s_dvp->v_type == VBAD)
- return (0);
- return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
- NULL));
-}
-
-/*
- * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
- */
-static int
-nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
- struct ucred *cred, struct thread *td)
-{
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3 = NFS_ISV3(dvp);
-
- nfsstats.rpccnt[NFSPROC_REMOVE]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen),
- M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
- nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
- if (v3)
- nfsm_wcc_data(dvp, wccflag);
- m_freem(mrep);
-nfsmout:
- mtx_lock(&(VTONFS(dvp))->n_mtx);
- VTONFS(dvp)->n_flag |= NMODIFIED;
- if (!wccflag) {
- VTONFS(dvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
- }
- mtx_unlock(&(VTONFS(dvp))->n_mtx);
- return (error);
-}
-
-/*
- * nfs file rename call
- */
-static int
-nfs_rename(struct vop_rename_args *ap)
-{
- struct vnode *fvp = ap->a_fvp;
- struct vnode *tvp = ap->a_tvp;
- struct vnode *fdvp = ap->a_fdvp;
- struct vnode *tdvp = ap->a_tdvp;
- struct componentname *tcnp = ap->a_tcnp;
- struct componentname *fcnp = ap->a_fcnp;
- int error;
-
- KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
- (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
- /* Check for cross-device rename */
- if ((fvp->v_mount != tdvp->v_mount) ||
- (tvp && (fvp->v_mount != tvp->v_mount))) {
- error = EXDEV;
- goto out;
- }
-
- if (fvp == tvp) {
- nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
- error = 0;
- goto out;
- }
- if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
- goto out;
-
- /*
- * We have to flush B_DELWRI data prior to renaming
- * the file. If we don't, the delayed-write buffers
- * can be flushed out later after the file has gone stale
- * under NFSV3. NFSV2 does not have this problem because
- * ( as far as I can tell ) it flushes dirty buffers more
- * often.
- *
- * Skip the rename operation if the fsync fails, this can happen
- * due to the server's volume being full, when we pushed out data
- * that was written back to our cache earlier. Not checking for
- * this condition can result in potential (silent) data loss.
- */
- error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
- VOP_UNLOCK(fvp, 0);
- if (!error && tvp)
- error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
- if (error)
- goto out;
-
- /*
- * If the tvp exists and is in use, sillyrename it before doing the
- * rename of the new file over it.
- * XXX Can't sillyrename a directory.
- */
- if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
- tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
- vput(tvp);
- tvp = NULL;
- }
-
- error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
- tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
- tcnp->cn_thread);
-
- if (fvp->v_type == VDIR) {
- if (tvp != NULL && tvp->v_type == VDIR)
- cache_purge(tdvp);
- cache_purge(fdvp);
- }
-
-out:
- if (tdvp == tvp)
- vrele(tdvp);
- else
- vput(tdvp);
- if (tvp)
- vput(tvp);
- vrele(fdvp);
- vrele(fvp);
- /*
- * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
- */
- if (error == ENOENT)
- error = 0;
- return (error);
-}
-
-/*
- * nfs file rename rpc called from nfs_remove() above
- */
-static int
-nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
- struct sillyrename *sp)
-{
-
- return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
- sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
-}
-
-/*
- * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
- */
-static int
-nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
- struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
- struct thread *td)
-{
- caddr_t bpos, dpos;
- int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3 = NFS_ISV3(fdvp);
-
- nfsstats.rpccnt[NFSPROC_RENAME]++;
- mreq = m_get2((NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
- nfsm_rndup(tnamelen), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(fdvp, v3);
- nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
- nfsm_fhtom(tdvp, v3);
- nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
- nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
- if (v3) {
- nfsm_wcc_data(fdvp, fwccflag);
- nfsm_wcc_data(tdvp, twccflag);
- }
- m_freem(mrep);
-nfsmout:
- mtx_lock(&(VTONFS(fdvp))->n_mtx);
- VTONFS(fdvp)->n_flag |= NMODIFIED;
- mtx_unlock(&(VTONFS(fdvp))->n_mtx);
- mtx_lock(&(VTONFS(tdvp))->n_mtx);
- VTONFS(tdvp)->n_flag |= NMODIFIED;
- mtx_unlock(&(VTONFS(tdvp))->n_mtx);
- if (!fwccflag) {
- VTONFS(fdvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
- }
- if (!twccflag) {
- VTONFS(tdvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
- }
- return (error);
-}
-
-/*
- * nfs hard link create call
- */
-static int
-nfs_link(struct vop_link_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *tdvp = ap->a_tdvp;
- struct componentname *cnp = ap->a_cnp;
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3;
-
- if (vp->v_mount != tdvp->v_mount) {
- return (EXDEV);
- }
-
- /*
- * Push all writes to the server, so that the attribute cache
- * doesn't get "out of sync" with the server.
- * XXX There should be a better way!
- */
- VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
-
- v3 = NFS_ISV3(vp);
- nfsstats.rpccnt[NFSPROC_LINK]++;
- mreq = m_get2(NFSX_FH(v3)*2 + NFSX_UNSIGNED +
- nfsm_rndup(cnp->cn_namelen), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- nfsm_fhtom(tdvp, v3);
- nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
- nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
- if (v3) {
- nfsm_postop_attr(vp, attrflag);
- nfsm_wcc_data(tdvp, wccflag);
- }
- m_freem(mrep);
-nfsmout:
- mtx_lock(&(VTONFS(tdvp))->n_mtx);
- VTONFS(tdvp)->n_flag |= NMODIFIED;
- mtx_unlock(&(VTONFS(tdvp))->n_mtx);
- if (!attrflag) {
- VTONFS(vp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
- }
- if (!wccflag) {
- VTONFS(tdvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
- }
- return (error);
-}
-
-/*
- * nfs symbolic link create call
- */
-static int
-nfs_symlink(struct vop_symlink_args *ap)
-{
- struct vnode *dvp = ap->a_dvp;
- struct vattr *vap = ap->a_vap;
- struct componentname *cnp = ap->a_cnp;
- struct nfsv2_sattr *sp;
- caddr_t bpos, dpos;
- int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
- struct mbuf *mreq, *mrep, *md, *mb;
- struct vnode *newvp = NULL;
- int v3 = NFS_ISV3(dvp);
-
- nfsstats.rpccnt[NFSPROC_SYMLINK]++;
- slen = strlen(ap->a_target);
- mreq = m_get2(NFSX_FH(v3) + 2*NFSX_UNSIGNED +
- nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3),
- M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
- if (v3) {
- nfsm_v3attrbuild(vap, FALSE);
- }
- nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
- if (!v3) {
- sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
- sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
- sp->sa_uid = nfs_xdrneg1;
- sp->sa_gid = nfs_xdrneg1;
- sp->sa_size = nfs_xdrneg1;
- txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
- txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
- }
-
- /*
- * Issue the NFS request and get the rpc response.
- *
- * Only NFSv3 responses returning an error of 0 actually return
- * a file handle that can be converted into newvp without having
- * to do an extra lookup rpc.
- */
- nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
- if (v3) {
- if (error == 0)
- nfsm_mtofh(dvp, newvp, v3, gotvp);
- nfsm_wcc_data(dvp, wccflag);
- }
-
- /*
- * out code jumps -> here, mrep is also freed.
- */
-
- m_freem(mrep);
-nfsmout:
-
- /*
- * If we do not have an error and we could not extract the newvp from
- * the response due to the request being NFSv2, we have to do a
- * lookup in order to obtain a newvp to return.
- */
- if (error == 0 && newvp == NULL) {
- struct nfsnode *np = NULL;
-
- error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
- cnp->cn_cred, cnp->cn_thread, &np);
- if (!error)
- newvp = NFSTOV(np);
- }
- if (error) {
- if (newvp)
- vput(newvp);
- } else {
- *ap->a_vpp = newvp;
- }
- mtx_lock(&(VTONFS(dvp))->n_mtx);
- VTONFS(dvp)->n_flag |= NMODIFIED;
- mtx_unlock(&(VTONFS(dvp))->n_mtx);
- if (!wccflag) {
- VTONFS(dvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
- }
- return (error);
-}
-
-/*
- * nfs make dir call
- */
-static int
-nfs_mkdir(struct vop_mkdir_args *ap)
-{
- struct vnode *dvp = ap->a_dvp;
- struct vattr *vap = ap->a_vap;
- struct componentname *cnp = ap->a_cnp;
- struct nfsv2_sattr *sp;
- int len;
- struct nfsnode *np = NULL;
- struct vnode *newvp = NULL;
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR;
- int gotvp = 0;
- struct mbuf *mreq, *mrep, *md, *mb;
- struct vattr vattr;
- int v3 = NFS_ISV3(dvp);
-
- if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
- return (error);
- len = cnp->cn_namelen;
- nfsstats.rpccnt[NFSPROC_MKDIR]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) +
- NFSX_SATTR(v3), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
- if (v3) {
- nfsm_v3attrbuild(vap, FALSE);
- } else {
- sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
- sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
- sp->sa_uid = nfs_xdrneg1;
- sp->sa_gid = nfs_xdrneg1;
- sp->sa_size = nfs_xdrneg1;
- txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
- txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
- }
- nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
- if (!error)
- nfsm_mtofh(dvp, newvp, v3, gotvp);
- if (v3)
- nfsm_wcc_data(dvp, wccflag);
- m_freem(mrep);
-nfsmout:
- mtx_lock(&(VTONFS(dvp))->n_mtx);
- VTONFS(dvp)->n_flag |= NMODIFIED;
- mtx_unlock(&(VTONFS(dvp))->n_mtx);
- if (!wccflag) {
- VTONFS(dvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
- }
- if (error == 0 && newvp == NULL) {
- error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
- cnp->cn_thread, &np);
- if (!error) {
- newvp = NFSTOV(np);
- if (newvp->v_type != VDIR)
- error = EEXIST;
- }
- }
- if (error) {
- if (newvp)
- vput(newvp);
- } else
- *ap->a_vpp = newvp;
- return (error);
-}
-
-/*
- * nfs remove directory call
- */
-static int
-nfs_rmdir(struct vop_rmdir_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct vnode *dvp = ap->a_dvp;
- struct componentname *cnp = ap->a_cnp;
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR;
- struct mbuf *mreq, *mrep, *md, *mb;
- int v3 = NFS_ISV3(dvp);
-
- if (dvp == vp)
- return (EINVAL);
- nfsstats.rpccnt[NFSPROC_RMDIR]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED +
- nfsm_rndup(cnp->cn_namelen), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
- nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
- if (v3)
- nfsm_wcc_data(dvp, wccflag);
- m_freem(mrep);
-nfsmout:
- mtx_lock(&(VTONFS(dvp))->n_mtx);
- VTONFS(dvp)->n_flag |= NMODIFIED;
- mtx_unlock(&(VTONFS(dvp))->n_mtx);
- if (!wccflag) {
- VTONFS(dvp)->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
- }
- cache_purge(dvp);
- cache_purge(vp);
- /*
- * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
- */
- if (error == ENOENT)
- error = 0;
- return (error);
-}
-
-/*
- * nfs readdir call
- */
-static int
-nfs_readdir(struct vop_readdir_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct uio *uio = ap->a_uio;
- int tresid, error = 0;
- struct vattr vattr;
-
- if (vp->v_type != VDIR)
- return(EPERM);
-
- /*
- * First, check for hit on the EOF offset cache
- */
- if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
- (np->n_flag & NMODIFIED) == 0) {
- if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
- mtx_lock(&np->n_mtx);
- if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
- mtx_unlock(&np->n_mtx);
- nfsstats.direofcache_hits++;
- goto out;
- } else
- mtx_unlock(&np->n_mtx);
- }
- }
-
- /*
- * Call nfs_bioread() to do the real work.
- */
- tresid = uio->uio_resid;
- error = nfs_bioread(vp, uio, 0, ap->a_cred);
-
- if (!error && uio->uio_resid == tresid) {
- nfsstats.direofcache_misses++;
- }
-out:
- return (error);
-}
-
-/*
- * Readdir rpc call.
- * Called from below the buffer cache by nfs_doio().
- */
-int
-nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
-{
- int len, left;
- struct dirent *dp = NULL;
- u_int32_t *tl;
- caddr_t cp;
- nfsuint64 *cookiep;
- caddr_t bpos, dpos;
- struct mbuf *mreq, *mrep, *md, *mb;
- nfsuint64 cookie;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- struct nfsnode *dnp = VTONFS(vp);
- u_quad_t fileno;
- int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
- int attrflag;
- int v3 = NFS_ISV3(vp);
-
- KASSERT(uiop->uio_iovcnt == 1 &&
- (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
- (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
- ("nfs readdirrpc bad uio"));
-
- /*
- * If there is no cookie, assume directory was stale.
- */
- nfs_dircookie_lock(dnp);
- cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
- if (cookiep) {
- cookie = *cookiep;
- nfs_dircookie_unlock(dnp);
- } else {
- nfs_dircookie_unlock(dnp);
- return (NFSERR_BAD_COOKIE);
- }
-
- /*
- * Loop around doing readdir rpc's of size nm_readdirsize
- * truncated to a multiple of DIRBLKSIZ.
- * The stopping criteria is EOF or buffer full.
- */
- while (more_dirs && bigenough) {
- nfsstats.rpccnt[NFSPROC_READDIR]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_READDIR(v3), M_WAITOK,
- MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, v3);
- if (v3) {
- tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
- *tl++ = cookie.nfsuquad[0];
- *tl++ = cookie.nfsuquad[1];
- mtx_lock(&dnp->n_mtx);
- *tl++ = dnp->n_cookieverf.nfsuquad[0];
- *tl++ = dnp->n_cookieverf.nfsuquad[1];
- mtx_unlock(&dnp->n_mtx);
- } else {
- tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
- *tl++ = cookie.nfsuquad[0];
- }
- *tl = txdr_unsigned(nmp->nm_readdirsize);
- nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
- if (v3) {
- nfsm_postop_attr(vp, attrflag);
- if (!error) {
- tl = nfsm_dissect(u_int32_t *,
- 2 * NFSX_UNSIGNED);
- mtx_lock(&dnp->n_mtx);
- dnp->n_cookieverf.nfsuquad[0] = *tl++;
- dnp->n_cookieverf.nfsuquad[1] = *tl;
- mtx_unlock(&dnp->n_mtx);
- } else {
- m_freem(mrep);
- goto nfsmout;
- }
- }
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- more_dirs = fxdr_unsigned(int, *tl);
-
- /* loop thru the dir entries, doctoring them to 4bsd form */
- while (more_dirs && bigenough) {
- if (v3) {
- tl = nfsm_dissect(u_int32_t *,
- 3 * NFSX_UNSIGNED);
- fileno = fxdr_hyper(tl);
- len = fxdr_unsigned(int, *(tl + 2));
- } else {
- tl = nfsm_dissect(u_int32_t *,
- 2 * NFSX_UNSIGNED);
- fileno = fxdr_unsigned(u_quad_t, *tl++);
- len = fxdr_unsigned(int, *tl);
- }
- if (len <= 0 || len > NFS_MAXNAMLEN) {
- error = EBADRPC;
- m_freem(mrep);
- goto nfsmout;
- }
- tlen = nfsm_rndup(len);
- if (tlen == len)
- tlen += 4; /* To ensure null termination */
- left = DIRBLKSIZ - blksiz;
- if ((tlen + DIRHDSIZ) > left) {
- dp->d_reclen += left;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + left;
- uiop->uio_iov->iov_len -= left;
- uiop->uio_offset += left;
- uiop->uio_resid -= left;
- blksiz = 0;
- }
- if ((tlen + DIRHDSIZ) > uiop->uio_resid)
- bigenough = 0;
- if (bigenough) {
- dp = (struct dirent *)uiop->uio_iov->iov_base;
- dp->d_fileno = (int)fileno;
- dp->d_namlen = len;
- dp->d_reclen = tlen + DIRHDSIZ;
- dp->d_type = DT_UNKNOWN;
- blksiz += dp->d_reclen;
- if (blksiz == DIRBLKSIZ)
- blksiz = 0;
- uiop->uio_offset += DIRHDSIZ;
- uiop->uio_resid -= DIRHDSIZ;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
- uiop->uio_iov->iov_len -= DIRHDSIZ;
- nfsm_mtouio(uiop, len);
- cp = uiop->uio_iov->iov_base;
- tlen -= len;
- *cp = '\0'; /* null terminate */
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + tlen;
- uiop->uio_iov->iov_len -= tlen;
- uiop->uio_offset += tlen;
- uiop->uio_resid -= tlen;
- } else
- nfsm_adv(nfsm_rndup(len));
- if (v3) {
- tl = nfsm_dissect(u_int32_t *,
- 3 * NFSX_UNSIGNED);
- } else {
- tl = nfsm_dissect(u_int32_t *,
- 2 * NFSX_UNSIGNED);
- }
- if (bigenough) {
- cookie.nfsuquad[0] = *tl++;
- if (v3)
- cookie.nfsuquad[1] = *tl++;
- } else if (v3)
- tl += 2;
- else
- tl++;
- more_dirs = fxdr_unsigned(int, *tl);
- }
- /*
- * If at end of rpc data, get the eof boolean
- */
- if (!more_dirs) {
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- more_dirs = (fxdr_unsigned(int, *tl) == 0);
- }
- m_freem(mrep);
- }
- /*
- * Fill last record, iff any, out to a multiple of DIRBLKSIZ
- * by increasing d_reclen for the last record.
- */
- if (blksiz > 0) {
- left = DIRBLKSIZ - blksiz;
- dp->d_reclen += left;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + left;
- uiop->uio_iov->iov_len -= left;
- uiop->uio_offset += left;
- uiop->uio_resid -= left;
- }
-
- /*
- * We are now either at the end of the directory or have filled the
- * block.
- */
- if (bigenough)
- dnp->n_direofoffset = uiop->uio_offset;
- else {
- if (uiop->uio_resid > 0)
- nfs_printf("EEK! readdirrpc resid > 0\n");
- nfs_dircookie_lock(dnp);
- cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
- *cookiep = cookie;
- nfs_dircookie_unlock(dnp);
- }
-nfsmout:
- return (error);
-}
-
-/*
- * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
- */
-int
-nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
-{
- int len, left;
- struct dirent *dp;
- u_int32_t *tl;
- caddr_t cp;
- struct vnode *newvp;
- nfsuint64 *cookiep;
- caddr_t bpos, dpos, dpossav1, dpossav2;
- struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
- struct nameidata nami, *ndp = &nami;
- struct componentname *cnp = &ndp->ni_cnd;
- nfsuint64 cookie;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- struct nfsnode *dnp = VTONFS(vp), *np;
- struct vattr vattr, dvattr;
- nfsfh_t *fhp;
- u_quad_t fileno;
- int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
- int attrflag, dattrflag, fhsize;
-
-#ifndef nolint
- dp = NULL;
-#endif
- KASSERT(uiop->uio_iovcnt == 1 &&
- (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
- (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
- ("nfs readdirplusrpc bad uio"));
- ndp->ni_dvp = vp;
- newvp = NULLVP;
-
- /*
- * If there is no cookie, assume directory was stale.
- */
- nfs_dircookie_lock(dnp);
- cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
- if (cookiep) {
- cookie = *cookiep;
- nfs_dircookie_unlock(dnp);
- } else {
- nfs_dircookie_unlock(dnp);
- return (NFSERR_BAD_COOKIE);
- }
- /*
- * Loop around doing readdir rpc's of size nm_readdirsize
- * truncated to a multiple of DIRBLKSIZ.
- * The stopping criteria is EOF or buffer full.
- */
- while (more_dirs && bigenough) {
- nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
- mreq = m_get2(NFSX_FH(1) + 6 * NFSX_UNSIGNED, M_WAITOK,
- MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, 1);
- tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
- *tl++ = cookie.nfsuquad[0];
- *tl++ = cookie.nfsuquad[1];
- mtx_lock(&dnp->n_mtx);
- *tl++ = dnp->n_cookieverf.nfsuquad[0];
- *tl++ = dnp->n_cookieverf.nfsuquad[1];
- mtx_unlock(&dnp->n_mtx);
- *tl++ = txdr_unsigned(nmp->nm_readdirsize);
- *tl = txdr_unsigned(nmp->nm_rsize);
- nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
- nfsm_postop_attr_va(vp, dattrflag, &dvattr);
- if (error) {
- m_freem(mrep);
- goto nfsmout;
- }
- tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
- mtx_lock(&dnp->n_mtx);
- dnp->n_cookieverf.nfsuquad[0] = *tl++;
- dnp->n_cookieverf.nfsuquad[1] = *tl++;
- mtx_unlock(&dnp->n_mtx);
- more_dirs = fxdr_unsigned(int, *tl);
-
- /* loop thru the dir entries, doctoring them to 4bsd form */
- while (more_dirs && bigenough) {
- tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
- fileno = fxdr_hyper(tl);
- len = fxdr_unsigned(int, *(tl + 2));
- if (len <= 0 || len > NFS_MAXNAMLEN) {
- error = EBADRPC;
- m_freem(mrep);
- goto nfsmout;
- }
- tlen = nfsm_rndup(len);
- if (tlen == len)
- tlen += 4; /* To ensure null termination*/
- left = DIRBLKSIZ - blksiz;
- if ((tlen + DIRHDSIZ) > left) {
- dp->d_reclen += left;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + left;
- uiop->uio_iov->iov_len -= left;
- uiop->uio_offset += left;
- uiop->uio_resid -= left;
- blksiz = 0;
- }
- if ((tlen + DIRHDSIZ) > uiop->uio_resid)
- bigenough = 0;
- if (bigenough) {
- dp = (struct dirent *)uiop->uio_iov->iov_base;
- dp->d_fileno = (int)fileno;
- dp->d_namlen = len;
- dp->d_reclen = tlen + DIRHDSIZ;
- dp->d_type = DT_UNKNOWN;
- blksiz += dp->d_reclen;
- if (blksiz == DIRBLKSIZ)
- blksiz = 0;
- uiop->uio_offset += DIRHDSIZ;
- uiop->uio_resid -= DIRHDSIZ;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
- uiop->uio_iov->iov_len -= DIRHDSIZ;
- cnp->cn_nameptr = uiop->uio_iov->iov_base;
- cnp->cn_namelen = len;
- nfsm_mtouio(uiop, len);
- cp = uiop->uio_iov->iov_base;
- tlen -= len;
- *cp = '\0';
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + tlen;
- uiop->uio_iov->iov_len -= tlen;
- uiop->uio_offset += tlen;
- uiop->uio_resid -= tlen;
- } else
- nfsm_adv(nfsm_rndup(len));
- tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
- if (bigenough) {
- cookie.nfsuquad[0] = *tl++;
- cookie.nfsuquad[1] = *tl++;
- } else
- tl += 2;
-
- /*
- * Since the attributes are before the file handle
- * (sigh), we must skip over the attributes and then
- * come back and get them.
- */
- attrflag = fxdr_unsigned(int, *tl);
- if (attrflag) {
- dpossav1 = dpos;
- mdsav1 = md;
- nfsm_adv(NFSX_V3FATTR);
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- doit = fxdr_unsigned(int, *tl);
- /*
- * Skip loading the attrs for "..". There's a
- * race between loading the attrs here and
- * lookups that look for the directory currently
- * being read (in the parent). We try to acquire
- * the exclusive lock on ".." here, owning the
- * lock on the directory being read. Lookup will
- * hold the lock on ".." and try to acquire the
- * lock on the directory being read.
- *
- * There are other ways of fixing this, one would
- * be to do a trylock on the ".." vnode and skip
- * loading the attrs on ".." if it happens to be
- * locked by another process. But skipping the
- * attrload on ".." seems the easiest option.
- */
- if (strcmp(dp->d_name, "..") == 0) {
- doit = 0;
- /*
- * We've already skipped over the attrs,
- * skip over the filehandle. And store d_type
- * as VDIR.
- */
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- i = fxdr_unsigned(int, *tl);
- nfsm_adv(nfsm_rndup(i));
- dp->d_type = IFTODT(VTTOIF(VDIR));
- }
- if (doit) {
- nfsm_getfh(fhp, fhsize, 1);
- if (NFS_CMPFH(dnp, fhp, fhsize)) {
- VREF(vp);
- newvp = vp;
- np = dnp;
- } else {
- error = nfs_nget(vp->v_mount, fhp,
- fhsize, &np, LK_EXCLUSIVE);
- if (error)
- doit = 0;
- else
- newvp = NFSTOV(np);
- }
- }
- if (doit && bigenough) {
- dpossav2 = dpos;
- dpos = dpossav1;
- mdsav2 = md;
- md = mdsav1;
- nfsm_loadattr(newvp, &vattr);
- dpos = dpossav2;
- md = mdsav2;
- dp->d_type = IFTODT(VTTOIF(vattr.va_type));
- ndp->ni_vp = newvp;
- if (newvp->v_type != VDIR || dattrflag != 0)
- cache_enter_time(ndp->ni_dvp, ndp->ni_vp,
- cnp, &vattr.va_ctime,
- newvp->v_type != VDIR ? NULL :
- &dvattr.va_ctime);
- }
- } else {
- /* Just skip over the file handle */
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- i = fxdr_unsigned(int, *tl);
- if (i) {
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- fhsize = fxdr_unsigned(int, *tl);
- nfsm_adv(nfsm_rndup(fhsize));
- }
- }
- if (newvp != NULLVP) {
- if (newvp == vp)
- vrele(newvp);
- else
- vput(newvp);
- newvp = NULLVP;
- }
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- more_dirs = fxdr_unsigned(int, *tl);
- }
- /*
- * If at end of rpc data, get the eof boolean
- */
- if (!more_dirs) {
- tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
- more_dirs = (fxdr_unsigned(int, *tl) == 0);
- }
- m_freem(mrep);
- }
- /*
- * Fill last record, iff any, out to a multiple of DIRBLKSIZ
- * by increasing d_reclen for the last record.
- */
- if (blksiz > 0) {
- left = DIRBLKSIZ - blksiz;
- dp->d_reclen += left;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + left;
- uiop->uio_iov->iov_len -= left;
- uiop->uio_offset += left;
- uiop->uio_resid -= left;
- }
-
- /*
- * We are now either at the end of the directory or have filled the
- * block.
- */
- if (bigenough)
- dnp->n_direofoffset = uiop->uio_offset;
- else {
- if (uiop->uio_resid > 0)
- nfs_printf("EEK! readdirplusrpc resid > 0\n");
- nfs_dircookie_lock(dnp);
- cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
- *cookiep = cookie;
- nfs_dircookie_unlock(dnp);
- }
-nfsmout:
- if (newvp != NULLVP) {
- if (newvp == vp)
- vrele(newvp);
- else
- vput(newvp);
- newvp = NULLVP;
- }
- return (error);
-}
-
-/*
- * Silly rename. To make the NFS filesystem that is stateless look a little
- * more like the "ufs" a remove of an active vnode is translated to a rename
- * to a funny looking filename that is removed by nfs_inactive on the
- * nfsnode. There is the potential for another process on a different client
- * to create the same funny name between the nfs_lookitup() fails and the
- * nfs_rename() completes, but...
- */
-static int
-nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
-{
- struct sillyrename *sp;
- struct nfsnode *np;
- int error;
- short pid;
- unsigned int lticks;
-
- cache_purge(dvp);
- np = VTONFS(vp);
- KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
- sp = malloc(sizeof (struct sillyrename),
- M_NFSREQ, M_WAITOK);
- sp->s_cred = crhold(cnp->cn_cred);
- sp->s_dvp = dvp;
- sp->s_removeit = nfs_removeit;
- VREF(dvp);
-
- /*
- * Fudge together a funny name.
- * Changing the format of the funny name to accomodate more
- * sillynames per directory.
- * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
- * CPU ticks since boot.
- */
- pid = cnp->cn_thread->td_proc->p_pid;
- lticks = (unsigned int)ticks;
- for ( ; ; ) {
- sp->s_namlen = sprintf(sp->s_name,
- ".nfs.%08x.%04x4.4", lticks,
- pid);
- if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
- cnp->cn_thread, NULL))
- break;
- lticks++;
- }
- error = nfs_renameit(dvp, cnp, sp);
- if (error)
- goto bad;
- error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
- cnp->cn_thread, &np);
- np->n_sillyrename = sp;
- return (0);
-bad:
- vrele(sp->s_dvp);
- crfree(sp->s_cred);
- free((caddr_t)sp, M_NFSREQ);
- return (error);
-}
-
-/*
- * Look up a file name and optionally either update the file handle or
- * allocate an nfsnode, depending on the value of npp.
- * npp == NULL --> just do the lookup
- * *npp == NULL --> allocate a new nfsnode and make sure attributes are
- * handled too
- * *npp != NULL --> update the file handle in the vnode
- */
-static int
-nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
- struct thread *td, struct nfsnode **npp)
-{
- struct vnode *newvp = NULL;
- struct nfsnode *np, *dnp = VTONFS(dvp);
- caddr_t bpos, dpos;
- int error = 0, fhlen, attrflag;
- struct mbuf *mreq, *mrep, *md, *mb;
- nfsfh_t *nfhp;
- int v3 = NFS_ISV3(dvp);
-
- nfsstats.rpccnt[NFSPROC_LOOKUP]++;
- mreq = m_get2(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len),
- M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(dvp, v3);
- nfsm_strtom(name, len, NFS_MAXNAMLEN);
- nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
- if (npp && !error) {
- nfsm_getfh(nfhp, fhlen, v3);
- if (*npp) {
- np = *npp;
- if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
- free((caddr_t)np->n_fhp, M_NFSBIGFH);
- np->n_fhp = &np->n_fh;
- } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
- np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
- bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
- np->n_fhsize = fhlen;
- newvp = NFSTOV(np);
- } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
- VREF(dvp);
- newvp = dvp;
- } else {
- error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
- if (error) {
- m_freem(mrep);
- return (error);
- }
- newvp = NFSTOV(np);
- }
- if (v3) {
- nfsm_postop_attr(newvp, attrflag);
- if (!attrflag && *npp == NULL) {
- m_freem(mrep);
- if (newvp == dvp)
- vrele(newvp);
- else
- vput(newvp);
- return (ENOENT);
- }
- } else
- nfsm_loadattr(newvp, NULL);
- }
- m_freem(mrep);
-nfsmout:
- if (npp && *npp == NULL) {
- if (error) {
- if (newvp) {
- if (newvp == dvp)
- vrele(newvp);
- else
- vput(newvp);
- }
- } else
- *npp = np;
- }
- return (error);
-}
-
-/*
- * Nfs Version 3 commit rpc
- */
-int
-nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
- struct thread *td)
-{
- u_int32_t *tl;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- caddr_t bpos, dpos;
- int error = 0, wccflag = NFSV3_WCCRATTR;
- struct mbuf *mreq, *mrep, *md, *mb;
-
- mtx_lock(&nmp->nm_mtx);
- if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
- mtx_unlock(&nmp->nm_mtx);
- return (0);
- }
- mtx_unlock(&nmp->nm_mtx);
- nfsstats.rpccnt[NFSPROC_COMMIT]++;
- mreq = m_get2(NFSX_FH(1), M_WAITOK, MT_DATA, 0);
- mb = mreq;
- bpos = mtod(mb, caddr_t);
- nfsm_fhtom(vp, 1);
- tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
- txdr_hyper(offset, tl);
- tl += 2;
- *tl = txdr_unsigned(cnt);
- nfsm_request(vp, NFSPROC_COMMIT, td, cred);
- nfsm_wcc_data(vp, wccflag);
- if (!error) {
- tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
- if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
- NFSX_V3WRITEVERF)) {
- bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
- NFSX_V3WRITEVERF);
- error = NFSERR_STALEWRITEVERF;
- }
- }
- m_freem(mrep);
-nfsmout:
- return (error);
-}
-
-/*
- * Strategy routine.
- * For async requests when nfsiod(s) are running, queue the request by
- * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
- * request.
- */
-static int
-nfs_strategy(struct vop_strategy_args *ap)
-{
- struct buf *bp = ap->a_bp;
- struct ucred *cr;
-
- KASSERT(!(bp->b_flags & B_DONE),
- ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
- BUF_ASSERT_HELD(bp);
-
- if (bp->b_iocmd == BIO_READ)
- cr = bp->b_rcred;
- else
- cr = bp->b_wcred;
-
- /*
- * If the op is asynchronous and an i/o daemon is waiting
- * queue the request, wake it up and wait for completion
- * otherwise just do it ourselves.
- */
- if ((bp->b_flags & B_ASYNC) == 0 ||
- nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
- (void)nfs_doio(ap->a_vp, bp, cr, curthread);
- return (0);
-}
-
-/*
- * fsync vnode op. Just call nfs_flush() with commit == 1.
- */
-/* ARGSUSED */
-static int
-nfs_fsync(struct vop_fsync_args *ap)
-{
-
- return (nfs_flush(ap->a_vp, ap->a_waitfor, 1));
-}
-
-/*
- * Flush all the blocks associated with a vnode.
- * Walk through the buffer pool and push any dirty pages
- * associated with the vnode.
- */
-static int
-nfs_flush(struct vnode *vp, int waitfor, int commit)
-{
- struct nfsnode *np = VTONFS(vp);
- struct buf *bp;
- int i;
- struct buf *nbp;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
- int passone = 1;
- u_quad_t off, endoff, toff;
- struct ucred* wcred = NULL;
- struct buf **bvec = NULL;
- struct bufobj *bo;
- struct thread *td = curthread;
-#ifndef NFS_COMMITBVECSIZ
-#define NFS_COMMITBVECSIZ 20
-#endif
- struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
- int bvecsize = 0, bveccount;
-
- if (nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- if (!commit)
- passone = 0;
- bo = &vp->v_bufobj;
- /*
- * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
- * server, but has not been committed to stable storage on the server
- * yet. On the first pass, the byte range is worked out and the commit
- * rpc is done. On the second pass, nfs_writebp() is called to do the
- * job.
- */
-again:
- off = (u_quad_t)-1;
- endoff = 0;
- bvecpos = 0;
- if (NFS_ISV3(vp) && commit) {
- if (bvec != NULL && bvec != bvec_on_stack)
- free(bvec, M_TEMP);
- /*
- * Count up how many buffers waiting for a commit.
- */
- bveccount = 0;
- BO_LOCK(bo);
- TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
- if (!BUF_ISLOCKED(bp) &&
- (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
- == (B_DELWRI | B_NEEDCOMMIT))
- bveccount++;
- }
- /*
- * Allocate space to remember the list of bufs to commit. It is
- * important to use M_NOWAIT here to avoid a race with nfs_write.
- * If we can't get memory (for whatever reason), we will end up
- * committing the buffers one-by-one in the loop below.
- */
- if (bveccount > NFS_COMMITBVECSIZ) {
- /*
- * Release the vnode interlock to avoid a lock
- * order reversal.
- */
- BO_UNLOCK(bo);
- bvec = (struct buf **)
- malloc(bveccount * sizeof(struct buf *),
- M_TEMP, M_NOWAIT);
- BO_LOCK(bo);
- if (bvec == NULL) {
- bvec = bvec_on_stack;
- bvecsize = NFS_COMMITBVECSIZ;
- } else
- bvecsize = bveccount;
- } else {
- bvec = bvec_on_stack;
- bvecsize = NFS_COMMITBVECSIZ;
- }
- TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
- if (bvecpos >= bvecsize)
- break;
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
- nbp = TAILQ_NEXT(bp, b_bobufs);
- continue;
- }
- if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
- (B_DELWRI | B_NEEDCOMMIT)) {
- BUF_UNLOCK(bp);
- nbp = TAILQ_NEXT(bp, b_bobufs);
- continue;
- }
- BO_UNLOCK(bo);
- bremfree(bp);
- /*
- * Work out if all buffers are using the same cred
- * so we can deal with them all with one commit.
- *
- * NOTE: we are not clearing B_DONE here, so we have
- * to do it later on in this routine if we intend to
- * initiate I/O on the bp.
- *
- * Note: to avoid loopback deadlocks, we do not
- * assign b_runningbufspace.
- */
- if (wcred == NULL)
- wcred = bp->b_wcred;
- else if (wcred != bp->b_wcred)
- wcred = NOCRED;
- vfs_busy_pages(bp, 1);
-
- BO_LOCK(bo);
- /*
- * bp is protected by being locked, but nbp is not
- * and vfs_busy_pages() may sleep. We have to
- * recalculate nbp.
- */
- nbp = TAILQ_NEXT(bp, b_bobufs);
-
- /*
- * A list of these buffers is kept so that the
- * second loop knows which buffers have actually
- * been committed. This is necessary, since there
- * may be a race between the commit rpc and new
- * uncommitted writes on the file.
- */
- bvec[bvecpos++] = bp;
- toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
- bp->b_dirtyoff;
- if (toff < off)
- off = toff;
- toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
- if (toff > endoff)
- endoff = toff;
- }
- BO_UNLOCK(bo);
- }
- if (bvecpos > 0) {
- /*
- * Commit data on the server, as required.
- * If all bufs are using the same wcred, then use that with
- * one call for all of them, otherwise commit each one
- * separately.
- */
- if (wcred != NOCRED)
- retv = nfs_commit(vp, off, (int)(endoff - off),
- wcred, td);
- else {
- retv = 0;
- for (i = 0; i < bvecpos; i++) {
- off_t off, size;
- bp = bvec[i];
- off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
- bp->b_dirtyoff;
- size = (u_quad_t)(bp->b_dirtyend
- - bp->b_dirtyoff);
- retv = nfs_commit(vp, off, (int)size,
- bp->b_wcred, td);
- if (retv) break;
- }
- }
-
- if (retv == NFSERR_STALEWRITEVERF)
- nfs_clearcommit(vp->v_mount);
-
- /*
- * Now, either mark the blocks I/O done or mark the
- * blocks dirty, depending on whether the commit
- * succeeded.
- */
- for (i = 0; i < bvecpos; i++) {
- bp = bvec[i];
- bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
- if (retv) {
- /*
- * Error, leave B_DELWRI intact
- */
- vfs_unbusy_pages(bp);
- brelse(bp);
- } else {
- /*
- * Success, remove B_DELWRI ( bundirty() ).
- *
- * b_dirtyoff/b_dirtyend seem to be NFS
- * specific. We should probably move that
- * into bundirty(). XXX
- */
- bufobj_wref(bo);
- bp->b_flags |= B_ASYNC;
- bundirty(bp);
- bp->b_flags &= ~B_DONE;
- bp->b_ioflags &= ~BIO_ERROR;
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bufdone(bp);
- }
- }
- }
-
- /*
- * Start/do any write(s) that are required.
- */
-loop:
- BO_LOCK(bo);
- TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
- if (waitfor != MNT_WAIT || passone)
- continue;
-
- error = BUF_TIMELOCK(bp,
- LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
- BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
- if (error == 0) {
- BUF_UNLOCK(bp);
- goto loop;
- }
- if (error == ENOLCK) {
- error = 0;
- goto loop;
- }
- if (nfs_sigintr(nmp, td)) {
- error = EINTR;
- goto done;
- }
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- goto loop;
- }
- if ((bp->b_flags & B_DELWRI) == 0)
- panic("nfs_fsync: not dirty");
- if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
- BUF_UNLOCK(bp);
- continue;
- }
- BO_UNLOCK(bo);
- bremfree(bp);
- if (passone || !commit)
- bp->b_flags |= B_ASYNC;
- else
- bp->b_flags |= B_ASYNC;
- bwrite(bp);
- if (nfs_sigintr(nmp, td)) {
- error = EINTR;
- goto done;
- }
- goto loop;
- }
- if (passone) {
- passone = 0;
- BO_UNLOCK(bo);
- goto again;
- }
- if (waitfor == MNT_WAIT) {
- while (bo->bo_numoutput) {
- error = bufobj_wwait(bo, slpflag, slptimeo);
- if (error) {
- BO_UNLOCK(bo);
- error = nfs_sigintr(nmp, td);
- if (error)
- goto done;
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- BO_LOCK(bo);
- }
- }
- if (bo->bo_dirty.bv_cnt != 0 && commit) {
- BO_UNLOCK(bo);
- goto loop;
- }
- /*
- * Wait for all the async IO requests to drain
- */
- BO_UNLOCK(bo);
- mtx_lock(&np->n_mtx);
- while (np->n_directio_asyncwr > 0) {
- np->n_flag |= NFSYNCWAIT;
- error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
- &np->n_mtx, slpflag | (PRIBIO + 1),
- "nfsfsync", 0);
- if (error) {
- if (nfs_sigintr(nmp, td)) {
- mtx_unlock(&np->n_mtx);
- error = EINTR;
- goto done;
- }
- }
- }
- mtx_unlock(&np->n_mtx);
- } else
- BO_UNLOCK(bo);
- mtx_lock(&np->n_mtx);
- if (np->n_flag & NWRITEERR) {
- error = np->n_error;
- np->n_flag &= ~NWRITEERR;
- }
- if (commit && bo->bo_dirty.bv_cnt == 0 &&
- bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
- np->n_flag &= ~NMODIFIED;
- mtx_unlock(&np->n_mtx);
-done:
- if (bvec != NULL && bvec != bvec_on_stack)
- free(bvec, M_TEMP);
- return (error);
-}
-
-/*
- * NFS advisory byte-level locks.
- */
-static int
-nfs_advlock(struct vop_advlock_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- u_quad_t size;
- int error;
-
- error = vn_lock(vp, LK_SHARED);
- if (error)
- return (error);
- if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
- size = VTONFS(vp)->n_size;
- VOP_UNLOCK(vp, 0);
- error = lf_advlock(ap, &(vp->v_lockf), size);
- } else {
- if (nfs_advlock_p)
- error = nfs_advlock_p(ap);
- else
- error = ENOLCK;
- }
-
- return (error);
-}
-
-/*
- * NFS advisory byte-level locks.
- */
-static int
-nfs_advlockasync(struct vop_advlockasync_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- u_quad_t size;
- int error;
-
- error = vn_lock(vp, LK_SHARED);
- if (error)
- return (error);
- if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
- size = VTONFS(vp)->n_size;
- VOP_UNLOCK(vp, 0);
- error = lf_advlockasync(ap, &(vp->v_lockf), size);
- } else {
- VOP_UNLOCK(vp, 0);
- error = EOPNOTSUPP;
- }
- return (error);
-}
-
-/*
- * Print out the contents of an nfsnode.
- */
-static int
-nfs_print(struct vop_print_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
-
- nfs_printf("\tfileid %ld fsid 0x%x",
- np->n_vattr.va_fileid, np->n_vattr.va_fsid);
- if (vp->v_type == VFIFO)
- fifo_printinfo(vp);
- printf("\n");
- return (0);
-}
-
-/*
- * This is the "real" nfs::bwrite(struct buf*).
- * We set B_CACHE if this is a VMIO buffer.
- */
-int
-nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
-{
- int s;
- int oldflags = bp->b_flags;
-#if 0
- int retv = 1;
- off_t off;
-#endif
-
- BUF_ASSERT_HELD(bp);
-
- if (bp->b_flags & B_INVAL) {
- brelse(bp);
- return(0);
- }
-
- bp->b_flags |= B_CACHE;
-
- /*
- * Undirty the bp. We will redirty it later if the I/O fails.
- */
-
- s = splbio();
- bundirty(bp);
- bp->b_flags &= ~B_DONE;
- bp->b_ioflags &= ~BIO_ERROR;
- bp->b_iocmd = BIO_WRITE;
-
- bufobj_wref(bp->b_bufobj);
- curthread->td_ru.ru_oublock++;
- splx(s);
-
- /*
- * Note: to avoid loopback deadlocks, we do not
- * assign b_runningbufspace.
- */
- vfs_busy_pages(bp, 1);
-
- BUF_KERNPROC(bp);
- bp->b_iooffset = dbtob(bp->b_blkno);
- bstrategy(bp);
-
- if( (oldflags & B_ASYNC) == 0) {
- int rtval = bufwait(bp);
-
- if (oldflags & B_DELWRI) {
- s = splbio();
- reassignbuf(bp);
- splx(s);
- }
- brelse(bp);
- return (rtval);
- }
-
- return (0);
-}
-
-/*
- * nfs special file access vnode op.
- * Essentially just get vattr and then imitate iaccess() since the device is
- * local to the client.
- */
-static int
-nfsspec_access(struct vop_access_args *ap)
-{
- struct vattr *vap;
- struct ucred *cred = ap->a_cred;
- struct vnode *vp = ap->a_vp;
- accmode_t accmode = ap->a_accmode;
- struct vattr vattr;
- int error;
-
- /*
- * Disallow write attempts on filesystems mounted read-only;
- * unless the file is a socket, fifo, or a block or character
- * device resident on the filesystem.
- */
- if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
- switch (vp->v_type) {
- case VREG:
- case VDIR:
- case VLNK:
- return (EROFS);
- default:
- break;
- }
- }
- vap = &vattr;
- error = VOP_GETATTR(vp, vap, cred);
- if (error)
- goto out;
- error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
- accmode, cred, NULL);
-out:
- return error;
-}
-
-/*
- * Read wrapper for fifos.
- */
-static int
-nfsfifo_read(struct vop_read_args *ap)
-{
- struct nfsnode *np = VTONFS(ap->a_vp);
- int error;
-
- /*
- * Set access flag.
- */
- mtx_lock(&np->n_mtx);
- np->n_flag |= NACC;
- vfs_timestamp(&np->n_atim);
- mtx_unlock(&np->n_mtx);
- error = fifo_specops.vop_read(ap);
- return error;
-}
-
-/*
- * Write wrapper for fifos.
- */
-static int
-nfsfifo_write(struct vop_write_args *ap)
-{
- struct nfsnode *np = VTONFS(ap->a_vp);
-
- /*
- * Set update flag.
- */
- mtx_lock(&np->n_mtx);
- np->n_flag |= NUPD;
- vfs_timestamp(&np->n_mtim);
- mtx_unlock(&np->n_mtx);
- return(fifo_specops.vop_write(ap));
-}
-
-/*
- * Close wrapper for fifos.
- *
- * Update the times on the nfsnode then do fifo close.
- */
-static int
-nfsfifo_close(struct vop_close_args *ap)
-{
- struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- struct vattr vattr;
- struct timespec ts;
-
- mtx_lock(&np->n_mtx);
- if (np->n_flag & (NACC | NUPD)) {
- vfs_timestamp(&ts);
- if (np->n_flag & NACC)
- np->n_atim = ts;
- if (np->n_flag & NUPD)
- np->n_mtim = ts;
- np->n_flag |= NCHG;
- if (vrefcnt(vp) == 1 &&
- (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
- VATTR_NULL(&vattr);
- if (np->n_flag & NACC)
- vattr.va_atime = np->n_atim;
- if (np->n_flag & NUPD)
- vattr.va_mtime = np->n_mtim;
- mtx_unlock(&np->n_mtx);
- (void)VOP_SETATTR(vp, &vattr, ap->a_cred);
- goto out;
- }
- }
- mtx_unlock(&np->n_mtx);
-out:
- return (fifo_specops.vop_close(ap));
-}
-
-/*
- * Just call nfs_writebp() with the force argument set to 1.
- *
- * NOTE: B_DONE may or may not be set in a_bp on call.
- */
-static int
-nfs_bwrite(struct buf *bp)
-{
-
- return (nfs_writebp(bp, 1, curthread));
-}
-
-struct buf_ops buf_ops_nfs = {
- .bop_name = "buf_ops_nfs",
- .bop_write = nfs_bwrite,
- .bop_strategy = bufstrategy,
- .bop_sync = bufsync,
- .bop_bdflush = bufbdflush,
-};