aboutsummaryrefslogtreecommitdiff
path: root/sys/fs/fuse/fuse_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/fs/fuse/fuse_internal.c')
-rw-r--r--sys/fs/fuse/fuse_internal.c855
1 files changed, 690 insertions, 165 deletions
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index f85fb6c63f72..ba9b77c44dc1 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -33,6 +33,11 @@
* Copyright (C) 2005 Csaba Henk.
* All rights reserved.
*
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -59,8 +64,9 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/module.h>
#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/module.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/conf.h>
@@ -89,35 +95,78 @@ __FBSDID("$FreeBSD$");
#include "fuse.h"
#include "fuse_file.h"
#include "fuse_internal.h"
+#include "fuse_io.h"
#include "fuse_ipc.h"
#include "fuse_node.h"
#include "fuse_file.h"
-#include "fuse_param.h"
-SDT_PROVIDER_DECLARE(fuse);
+SDT_PROVIDER_DECLARE(fusefs);
/*
* Fuse trace probe:
* arg0: verbosity. Higher numbers give more verbose messages
* arg1: Textual message
*/
-SDT_PROBE_DEFINE2(fuse, , internal, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
#ifdef ZERO_PAD_INCOMPLETE_BUFS
static int isbzero(void *buf, size_t len);
#endif
-/* access */
+counter_u64_t fuse_lookup_cache_hits;
+counter_u64_t fuse_lookup_cache_misses;
+
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
+ &fuse_lookup_cache_hits, "number of positive cache hits in lookup");
+
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
+ &fuse_lookup_cache_misses, "number of cache misses in lookup");
int
+fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
+ struct vnode **vpp)
+{
+ struct bintime now;
+ struct thread *td = curthread;
+ uint64_t nodeid = ino;
+ int error;
+
+ *vpp = NULL;
+
+ error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
+ fuse_vnode_cmp, &nodeid);
+ if (error)
+ return error;
+ /*
+ * Check the entry cache timeout. We have to do this within fusefs
+ * instead of by using cache_enter_time/cache_lookup because those
+ * routines are only intended to work with pathnames, not inodes
+ */
+ if (*vpp != NULL) {
+ getbinuptime(&now);
+ if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
+ counter_u64_add(fuse_lookup_cache_hits, 1);
+ return 0;
+ } else {
+ /* Entry cache timeout */
+ counter_u64_add(fuse_lookup_cache_misses, 1);
+ cache_purge(*vpp);
+ vput(*vpp);
+ *vpp = NULL;
+ }
+ }
+ return 0;
+}
+
+/* Synchronously send a FUSE_ACCESS operation */
+int
fuse_internal_access(struct vnode *vp,
- mode_t mode,
- struct fuse_access_param *facp,
+ accmode_t mode,
struct thread *td,
struct ucred *cred)
{
int err = 0;
- uint32_t mask = 0;
+ uint32_t mask = F_OK;
int dataflags;
int vtype;
struct mount *mp;
@@ -125,77 +174,57 @@ fuse_internal_access(struct vnode *vp,
struct fuse_access_in *fai;
struct fuse_data *data;
- /* NOT YET DONE */
- /*
- * If this vnop gives you trouble, just return 0 here for a lazy
- * kludge.
- */
- /* return 0;*/
-
mp = vnode_mount(vp);
vtype = vnode_vtype(vp);
data = fuse_get_mpdata(mp);
dataflags = data->dataflags;
- if ((mode & VWRITE) && vfs_isrdonly(mp)) {
- return EACCES;
- }
- /* Unless explicitly permitted, deny everyone except the fs owner. */
- if (vnode_isvroot(vp) && !(facp->facc_flags & FACCESS_NOCHECKSPY)) {
- if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
- int denied = fuse_match_cred(data->daemoncred,
- cred);
+ if (mode == 0)
+ return 0;
- if (denied) {
- return EPERM;
- }
+ if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
+ switch (vp->v_type) {
+ case VDIR:
+ /* FALLTHROUGH */
+ case VLNK:
+ /* FALLTHROUGH */
+ case VREG:
+ return EROFS;
+ default:
+ break;
}
- facp->facc_flags |= FACCESS_NOCHECKSPY;
}
- if (!(facp->facc_flags & FACCESS_DO_ACCESS)) {
- return 0;
- }
- if (((vtype == VREG) && (mode & VEXEC))) {
-#ifdef NEED_MOUNT_ARGUMENT_FOR_THIS
- /* Let the kernel handle this through open / close heuristics.*/
- return ENOTSUP;
-#else
- /* Let the kernel handle this. */
- return 0;
-#endif
- }
- if (!fsess_isimpl(mp, FUSE_ACCESS)) {
- /* Let the kernel handle this. */
- return 0;
+
+ /* Unless explicitly permitted, deny everyone except the fs owner. */
+ if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
+ if (fuse_match_cred(data->daemoncred, cred))
+ return EPERM;
}
+
if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
- /* Let the kernel handle this. */
- return 0;
- }
- if ((mode & VADMIN) != 0) {
- err = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
- if (err) {
- return err;
- }
+ struct vattr va;
+
+ fuse_internal_getattr(vp, &va, cred, td);
+ return vaccess(vp->v_type, va.va_mode, va.va_uid,
+ va.va_gid, mode, cred, NULL);
}
- if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) {
+
+ if (!fsess_isimpl(mp, FUSE_ACCESS))
+ return 0;
+
+ if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0)
mask |= W_OK;
- }
- if ((mode & VREAD) != 0) {
+ if ((mode & VREAD) != 0)
mask |= R_OK;
- }
- if ((mode & VEXEC) != 0) {
+ if ((mode & VEXEC) != 0)
mask |= X_OK;
- }
- bzero(&fdi, sizeof(fdi));
fdisp_init(&fdi, sizeof(*fai));
fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
fai = fdi.indata;
- fai->mask = F_OK;
- fai->mask |= mask;
+ fai->mask = mask;
err = fdisp_wait_answ(&fdi);
fdisp_destroy(&fdi);
@@ -208,9 +237,9 @@ fuse_internal_access(struct vnode *vp,
}
/*
- * Cache FUSE attributes from feo, in attr cache associated with vnode 'vp'.
- * Optionally, if argument 'vap' is not NULL, store a copy of the converted
- * attributes there as well.
+ * Cache FUSE attributes from attr, in attribute cache associated with vnode
+ * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the
+ * converted attributes there as well.
*
* If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
* return the result to the caller).
@@ -221,49 +250,57 @@ fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
{
struct mount *mp;
struct fuse_vnode_data *fvdat;
+ struct fuse_data *data;
struct vattr *vp_cache_at;
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
+ data = fuse_get_mpdata(mp);
- /* Honor explicit do-not-cache requests from user filesystems. */
- if (attr_valid == 0 && attr_valid_nsec == 0)
- fvdat->valid_attr_cache = false;
- else
- fvdat->valid_attr_cache = true;
-
- vp_cache_at = VTOVA(vp);
+ ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
- if (vap == NULL && vp_cache_at == NULL)
- return;
+ fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
+ &fvdat->attr_cache_timeout);
- if (vap == NULL)
- vap = vp_cache_at;
+ /* Fix our buffers if the filesize changed without us knowing */
+ if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
+ (void)fuse_vnode_setsize(vp, attr->size);
+ fvdat->cached_attrs.va_size = attr->size;
+ }
- vattr_null(vap);
+ if (attr_valid > 0 || attr_valid_nsec > 0)
+ vp_cache_at = &(fvdat->cached_attrs);
+ else if (vap != NULL)
+ vp_cache_at = vap;
+ else
+ return;
- vap->va_fsid = mp->mnt_stat.f_fsid.val[0];
- vap->va_fileid = attr->ino;
- vap->va_mode = attr->mode & ~S_IFMT;
- vap->va_nlink = attr->nlink;
- vap->va_uid = attr->uid;
- vap->va_gid = attr->gid;
- vap->va_rdev = attr->rdev;
- vap->va_size = attr->size;
+ vattr_null(vp_cache_at);
+ vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
+ vp_cache_at->va_fileid = attr->ino;
+ vp_cache_at->va_mode = attr->mode & ~S_IFMT;
+ vp_cache_at->va_nlink = attr->nlink;
+ vp_cache_at->va_uid = attr->uid;
+ vp_cache_at->va_gid = attr->gid;
+ vp_cache_at->va_rdev = attr->rdev;
+ vp_cache_at->va_size = attr->size;
/* XXX on i386, seconds are truncated to 32 bits */
- vap->va_atime.tv_sec = attr->atime;
- vap->va_atime.tv_nsec = attr->atimensec;
- vap->va_mtime.tv_sec = attr->mtime;
- vap->va_mtime.tv_nsec = attr->mtimensec;
- vap->va_ctime.tv_sec = attr->ctime;
- vap->va_ctime.tv_nsec = attr->ctimensec;
- vap->va_blocksize = PAGE_SIZE;
- vap->va_type = IFTOVT(attr->mode);
- vap->va_bytes = attr->blocks * S_BLKSIZE;
- vap->va_flags = 0;
-
- if (vap != vp_cache_at && vp_cache_at != NULL)
- memcpy(vp_cache_at, vap, sizeof(*vap));
+ vp_cache_at->va_atime.tv_sec = attr->atime;
+ vp_cache_at->va_atime.tv_nsec = attr->atimensec;
+ vp_cache_at->va_mtime.tv_sec = attr->mtime;
+ vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
+ vp_cache_at->va_ctime.tv_sec = attr->ctime;
+ vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
+ if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
+ vp_cache_at->va_blocksize = attr->blksize;
+ else
+ vp_cache_at->va_blocksize = PAGE_SIZE;
+ vp_cache_at->va_type = IFTOVT(attr->mode);
+ vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
+ vp_cache_at->va_flags = 0;
+
+ if (vap != vp_cache_at && vap != NULL)
+ memcpy(vap, vp_cache_at, sizeof(*vap));
}
@@ -281,30 +318,178 @@ fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
int
fuse_internal_fsync(struct vnode *vp,
struct thread *td,
- struct ucred *cred,
- struct fuse_filehandle *fufh)
+ int waitfor,
+ bool datasync)
{
- int op = FUSE_FSYNC;
- struct fuse_fsync_in *ffsi;
+ struct fuse_fsync_in *ffsi = NULL;
struct fuse_dispatcher fdi;
+ struct fuse_filehandle *fufh;
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ struct mount *mp = vnode_mount(vp);
+ int op = FUSE_FSYNC;
+ int err = 0;
- if (vnode_isdir(vp)) {
- op = FUSE_FSYNCDIR;
+ if (!fsess_isimpl(vnode_mount(vp),
+ (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
+ return 0;
}
+ if (vnode_isdir(vp))
+ op = FUSE_FSYNCDIR;
+
+ if (!fsess_isimpl(mp, op))
+ return 0;
+
fdisp_init(&fdi, sizeof(*ffsi));
- fdisp_make_vp(&fdi, op, vp, td, cred);
- ffsi = fdi.indata;
- ffsi->fh = fufh->fh_id;
+ /*
+ * fsync every open file handle for this file, because we can't be sure
+ * which file handle the caller is really referring to.
+ */
+ LIST_FOREACH(fufh, &fvdat->handles, next) {
+ if (ffsi == NULL)
+ fdisp_make_vp(&fdi, op, vp, td, NULL);
+ else
+ fdisp_refresh_vp(&fdi, op, vp, td, NULL);
+ ffsi = fdi.indata;
+ ffsi->fh = fufh->fh_id;
+ ffsi->fsync_flags = 0;
+
+ if (datasync)
+ ffsi->fsync_flags = 1;
+
+ if (waitfor == MNT_WAIT) {
+ err = fdisp_wait_answ(&fdi);
+ } else {
+ fuse_insert_callback(fdi.tick,
+ fuse_internal_fsync_callback);
+ fuse_insert_message(fdi.tick, false);
+ }
+ if (err == ENOSYS) {
+ /* ENOSYS means "success, and don't call again" */
+ fsess_set_notimpl(mp, op);
+ err = 0;
+ break;
+ }
+ }
+ fdisp_destroy(&fdi);
- ffsi->fsync_flags = 1; /* datasync */
+ return err;
+}
- fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback);
- fuse_insert_message(fdi.tick);
+/* Asynchronous invalidation */
+SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit,
+ "struct vnode*", "struct vnode*");
+int
+fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
+{
+ struct fuse_notify_inval_entry_out fnieo;
+ struct componentname cn;
+ struct vnode *dvp, *vp;
+ char name[PATH_MAX];
+ int err;
- fdisp_destroy(&fdi);
+ if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
+ return (err);
- return 0;
+ if (fnieo.namelen > sizeof(name))
+ return (EINVAL);
+
+ if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
+ return (err);
+ name[fnieo.namelen] = '\0';
+ /* fusefs does not cache "." or ".." entries */
+ if (strncmp(name, ".", sizeof(".")) == 0 ||
+ strncmp(name, "..", sizeof("..")) == 0)
+ return (0);
+
+ if (fnieo.parent == FUSE_ROOT_ID)
+ err = VFS_ROOT(mp, LK_SHARED, &dvp);
+ else
+ err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
+ LK_SHARED, &dvp);
+ /*
+ * If dvp is not in the cache, then it must've been reclaimed. And
+ * since fuse_vnop_reclaim does a cache_purge, name's entry must've
+ * been invalidated already. So we can safely return if dvp == NULL
+ */
+ if (err != 0 || dvp == NULL)
+ return (err);
+ /*
+ * XXX we can't check dvp's generation because the FUSE invalidate
+ * entry message doesn't include it. Worse case is that we invalidate
+ * an entry that didn't need to be invalidated.
+ */
+
+ cn.cn_nameiop = LOOKUP;
+ cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */
+ cn.cn_thread = curthread;
+ cn.cn_cred = curthread->td_ucred;
+ cn.cn_lkflags = LK_SHARED;
+ cn.cn_pnbuf = NULL;
+ cn.cn_nameptr = name;
+ cn.cn_namelen = fnieo.namelen;
+ err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
+ MPASS(err == 0);
+ fuse_vnode_clear_attr_cache(dvp);
+ vput(dvp);
+ return (0);
+}
+
+int
+fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
+{
+ struct fuse_notify_inval_inode_out fniio;
+ struct vnode *vp;
+ int err;
+
+ if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
+ return (err);
+
+ if (fniio.ino == FUSE_ROOT_ID)
+ err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
+ else
+ err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
+ &vp);
+ if (err != 0 || vp == NULL)
+ return (err);
+ /*
+ * XXX we can't check vp's generation because the FUSE invalidate
+ * entry message doesn't include it. Worse case is that we invalidate
+ * an inode that didn't need to be invalidated.
+ */
+
+ /*
+ * Flush and invalidate buffers if off >= 0. Technically we only need
+ * to flush and invalidate the range of offsets [off, off + len), but
+ * for simplicity's sake we do everything.
+ */
+ if (fniio.off >= 0)
+ fuse_io_invalbuf(vp, curthread);
+ fuse_vnode_clear_attr_cache(vp);
+ vput(vp);
+ return (0);
+}
+
+/* mknod */
+int
+fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
+ struct componentname *cnp, struct vattr *vap)
+{
+ struct fuse_data *data;
+ struct fuse_mknod_in fmni;
+ size_t insize;
+
+ data = fuse_get_mpdata(dvp->v_mount);
+ fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ fmni.rdev = vap->va_rdev;
+ if (fuse_libabi_geq(data, 7, 12)) {
+ insize = sizeof(fmni);
+ fmni.umask = curthread->td_proc->p_fd->fd_cmask;
+ } else {
+ insize = FUSE_COMPAT_MKNOD_IN_SIZE;
+ }
+ return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
+ insize, vap->va_type));
}
/* readdir */
@@ -312,16 +497,19 @@ fuse_internal_fsync(struct vnode *vp,
int
fuse_internal_readdir(struct vnode *vp,
struct uio *uio,
+ off_t startoff,
struct fuse_filehandle *fufh,
- struct fuse_iov *cookediov)
+ struct fuse_iov *cookediov,
+ int *ncookies,
+ u_long *cookies)
{
int err = 0;
struct fuse_dispatcher fdi;
- struct fuse_read_in *fri;
+ struct fuse_read_in *fri = NULL;
+ int fnd_start;
- if (uio_resid(uio) == 0) {
+ if (uio_resid(uio) == 0)
return 0;
- }
fdisp_init(&fdi, 0);
/*
@@ -329,51 +517,70 @@ fuse_internal_readdir(struct vnode *vp,
* I/O).
*/
+ /*
+ * fnd_start is set non-zero once the offset in the directory gets
+ * to the startoff. This is done because directories must be read
+ * from the beginning (offset == 0) when fuse_vnop_readdir() needs
+ * to do an open of the directory.
+ * If it is not set non-zero here, it will be set non-zero in
+ * fuse_internal_readdir_processdata() when uio_offset == startoff.
+ */
+ fnd_start = 0;
+ if (uio->uio_offset == startoff)
+ fnd_start = 1;
while (uio_resid(uio) > 0) {
-
fdi.iosize = sizeof(*fri);
- fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+ if (fri == NULL)
+ fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+ else
+ fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
fri = fdi.indata;
fri->fh = fufh->fh_id;
fri->offset = uio_offset(uio);
- fri->size = min(uio_resid(uio), FUSE_DEFAULT_IOSIZE);
- /* mp->max_read */
+ fri->size = MIN(uio->uio_resid,
+ fuse_get_mpdata(vp->v_mount)->max_read);
- if ((err = fdisp_wait_answ(&fdi))) {
+ if ((err = fdisp_wait_answ(&fdi)))
break;
- }
- if ((err = fuse_internal_readdir_processdata(uio, fri->size, fdi.answ,
- fdi.iosize, cookediov))) {
+ if ((err = fuse_internal_readdir_processdata(uio, startoff,
+ &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
+ ncookies, &cookies)))
break;
- }
}
fdisp_destroy(&fdi);
return ((err == -1) ? 0 : err);
}
+/*
+ * Return -1 to indicate that this readdir is finished, 0 if it copied
+ * all the directory data read in and it may be possible to read more
+ * and greater than 0 for a failure.
+ */
int
fuse_internal_readdir_processdata(struct uio *uio,
+ off_t startoff,
+ int *fnd_start,
size_t reqsize,
void *buf,
size_t bufsize,
- void *param)
+ struct fuse_iov *cookediov,
+ int *ncookies,
+ u_long **cookiesp)
{
int err = 0;
- int cou = 0;
int bytesavail;
size_t freclen;
struct dirent *de;
struct fuse_dirent *fudge;
- struct fuse_iov *cookediov = param;
+ u_long *cookies;
- if (bufsize < FUSE_NAME_OFFSET) {
+ cookies = *cookiesp;
+ if (bufsize < FUSE_NAME_OFFSET)
return -1;
- }
for (;;) {
-
if (bufsize < FUSE_NAME_OFFSET) {
err = -1;
break;
@@ -381,10 +588,12 @@ fuse_internal_readdir_processdata(struct uio *uio,
fudge = (struct fuse_dirent *)buf;
freclen = FUSE_DIRENT_SIZE(fudge);
- cou++;
-
if (bufsize < freclen) {
- err = ((cou == 1) ? -1 : 0);
+ /*
+ * This indicates a partial directory entry at the
+ * end of the directory data.
+ */
+ err = -1;
break;
}
#ifdef ZERO_PAD_INCOMPLETE_BUFS
@@ -402,30 +611,47 @@ fuse_internal_readdir_processdata(struct uio *uio,
&fudge->namelen);
if (bytesavail > uio_resid(uio)) {
+ /* Out of space for the dir so we are done. */
err = -1;
break;
}
- fiov_refresh(cookediov);
- fiov_adjust(cookediov, bytesavail);
-
- de = (struct dirent *)cookediov->base;
- de->d_fileno = fudge->ino;
- de->d_reclen = bytesavail;
- de->d_type = fudge->type;
- de->d_namlen = fudge->namelen;
- memcpy((char *)cookediov->base + sizeof(struct dirent) -
- MAXNAMLEN - 1,
- (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
- dirent_terminate(de);
-
- err = uiomove(cookediov->base, cookediov->len, uio);
- if (err) {
- break;
- }
+ /*
+ * Don't start to copy the directory entries out until
+ * the requested offset in the directory is found.
+ */
+ if (*fnd_start != 0) {
+ fiov_adjust(cookediov, bytesavail);
+ bzero(cookediov->base, bytesavail);
+
+ de = (struct dirent *)cookediov->base;
+ de->d_fileno = fudge->ino;
+ de->d_reclen = bytesavail;
+ de->d_type = fudge->type;
+ de->d_namlen = fudge->namelen;
+ memcpy((char *)cookediov->base + sizeof(struct dirent) -
+ MAXNAMLEN - 1,
+ (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
+ dirent_terminate(de);
+
+ err = uiomove(cookediov->base, cookediov->len, uio);
+ if (err)
+ break;
+ if (cookies != NULL) {
+ if (*ncookies == 0) {
+ err = -1;
+ break;
+ }
+ *cookies = fudge->off;
+ cookies++;
+ (*ncookies)--;
+ }
+ } else if (startoff == fudge->off)
+ *fnd_start = 1;
buf = (char *)buf + freclen;
bufsize -= freclen;
uio_setoffset(uio, fudge->off);
}
+ *cookiesp = cookies;
return err;
}
@@ -439,11 +665,8 @@ fuse_internal_remove(struct vnode *dvp,
enum fuse_opcode op)
{
struct fuse_dispatcher fdi;
- struct fuse_vnode_data *fvdat;
- int err;
-
- err = 0;
- fvdat = VTOFUD(vp);
+ nlink_t nlink;
+ int err = 0;
fdisp_init(&fdi, cnp->cn_namelen + 1);
fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
@@ -453,6 +676,35 @@ fuse_internal_remove(struct vnode *dvp,
err = fdisp_wait_answ(&fdi);
fdisp_destroy(&fdi);
+
+ if (err)
+ return (err);
+
+ /*
+ * Access the cached nlink even if the attr cached has expired. If
+ * it's inaccurate, the worst that will happen is:
+ * 1) We'll recycle the vnode even though the file has another link we
+ * don't know about, costing a bit of cpu time, or
+ * 2) We won't recycle the vnode even though all of its links are gone.
+ * It will linger around until vnlru reclaims it, costing a bit of
+ * temporary memory.
+ */
+ nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
+
+ /*
+ * Purge the parent's attribute cache because the daemon
+ * should've updated its mtime and ctime.
+ */
+ fuse_vnode_clear_attr_cache(dvp);
+
+ /* NB: nlink could be zero if it was never cached */
+ if (nlink <= 1 || vnode_vtype(vp) == VDIR) {
+ fuse_internal_vnode_disappear(vp);
+ } else {
+ cache_purge(vp);
+ fuse_vnode_update(vp, FN_CTIMECHANGE);
+ }
+
return err;
}
@@ -532,6 +784,13 @@ fuse_internal_newentry_core(struct vnode *dvp,
feo->nodeid, 1);
return err;
}
+
+ /*
+ * Purge the parent's attribute cache because the daemon should've
+ * updated its mtime and ctime
+ */
+ fuse_vnode_clear_attr_cache(dvp);
+
fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
feo->attr_valid_nsec, NULL);
@@ -593,10 +852,79 @@ fuse_internal_forget_send(struct mount *mp,
ffi = fdi.indata;
ffi->nlookup = nlookup;
- fuse_insert_message(fdi.tick);
+ fuse_insert_message(fdi.tick, false);
fdisp_destroy(&fdi);
}
+/* Fetch the vnode's attributes from the daemon*/
+int
+fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
+ struct ucred *cred, struct thread *td)
+{
+ struct fuse_dispatcher fdi;
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ struct fuse_getattr_in *fgai;
+ struct fuse_attr_out *fao;
+ off_t old_filesize = fvdat->cached_attrs.va_size;
+ struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
+ struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
+ enum vtype vtyp;
+ int err;
+
+ fdisp_init(&fdi, sizeof(*fgai));
+ fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
+ fgai = fdi.indata;
+ /*
+ * We could look up a file handle and set it in fgai->fh, but that
+ * involves extra runtime work and I'm unaware of any file systems that
+ * care.
+ */
+ fgai->getattr_flags = 0;
+ if ((err = fdisp_wait_answ(&fdi))) {
+ if (err == ENOENT)
+ fuse_internal_vnode_disappear(vp);
+ goto out;
+ }
+
+ fao = (struct fuse_attr_out *)fdi.answ;
+ vtyp = IFTOVT(fao->attr.mode);
+ if (fvdat->flag & FN_SIZECHANGE)
+ fao->attr.size = old_filesize;
+ if (fvdat->flag & FN_CTIMECHANGE) {
+ fao->attr.ctime = old_ctime.tv_sec;
+ fao->attr.ctimensec = old_ctime.tv_nsec;
+ }
+ if (fvdat->flag & FN_MTIMECHANGE) {
+ fao->attr.mtime = old_mtime.tv_sec;
+ fao->attr.mtimensec = old_mtime.tv_nsec;
+ }
+ fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
+ fao->attr_valid_nsec, vap);
+ if (vtyp != vnode_vtype(vp)) {
+ fuse_internal_vnode_disappear(vp);
+ err = ENOENT;
+ }
+
+out:
+ fdisp_destroy(&fdi);
+ return err;
+}
+
+/* Read a vnode's attributes from cache or fetch them from the fuse daemon */
+int
+fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
+ struct thread *td)
+{
+ struct vattr *attrs;
+
+ if ((attrs = VTOVA(vp)) != NULL) {
+ *vap = *attrs; /* struct copy */
+ return 0;
+ }
+
+ return fuse_internal_do_getattr(vp, vap, cred, td);
+}
+
void
fuse_internal_vnode_disappear(struct vnode *vp)
{
@@ -604,7 +932,6 @@ fuse_internal_vnode_disappear(struct vnode *vp)
ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
fvdat->flag |= FN_REVOKED;
- fvdat->valid_attr_cache = false;
cache_purge(vp);
}
@@ -625,27 +952,69 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
}
fiio = fticket_resp(tick)->base;
- /* XXX: Do we want to check anything further besides this? */
- if (fiio->major < 7) {
- SDT_PROBE2(fuse, , internal, trace, 1,
+ data->fuse_libabi_major = fiio->major;
+ data->fuse_libabi_minor = fiio->minor;
+ if (!fuse_libabi_geq(data, 7, 4)) {
+ /*
+ * With a little work we could support servers as old as 7.1.
+ * But there would be little payoff.
+ */
+ SDT_PROBE2(fusefs, , internal, trace, 1,
"userpace version too low");
err = EPROTONOSUPPORT;
goto out;
}
- data->fuse_libabi_major = fiio->major;
- data->fuse_libabi_minor = fiio->minor;
if (fuse_libabi_geq(data, 7, 5)) {
- if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) {
+ if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
+ fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
data->max_write = fiio->max_write;
+ if (fiio->flags & FUSE_ASYNC_READ)
+ data->dataflags |= FSESS_ASYNC_READ;
+ if (fiio->flags & FUSE_POSIX_LOCKS)
+ data->dataflags |= FSESS_POSIX_LOCKS;
+ if (fiio->flags & FUSE_EXPORT_SUPPORT)
+ data->dataflags |= FSESS_EXPORT_SUPPORT;
+ /*
+ * Don't bother to check FUSE_BIG_WRITES, because it's
+ * redundant with max_write
+ */
+ /*
+ * max_background and congestion_threshold are not
+ * implemented
+ */
} else {
err = EINVAL;
}
} else {
- /* Old fix values */
+ /* Old fixed values */
data->max_write = 4096;
}
+ if (fuse_libabi_geq(data, 7, 6))
+ data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
+
+ if (!fuse_libabi_geq(data, 7, 7))
+ fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
+
+ if (!fuse_libabi_geq(data, 7, 8)) {
+ fsess_set_notimpl(data->mp, FUSE_BMAP);
+ fsess_set_notimpl(data->mp, FUSE_DESTROY);
+ }
+
+ if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 &&
+ fiio->time_gran <= 1000000000)
+ data->time_gran = fiio->time_gran;
+ else
+ data->time_gran = 1;
+
+ if (!fuse_libabi_geq(data, 7, 23))
+ data->cache_mode = fuse_data_cache_mode;
+ else if (fiio->flags & FUSE_WRITEBACK_CACHE)
+ data->cache_mode = FUSE_CACHE_WB;
+ else
+ data->cache_mode = FUSE_CACHE_WT;
+
out:
if (err) {
fdata_set_dead(data);
@@ -669,12 +1038,154 @@ fuse_internal_send_init(struct fuse_data *data, struct thread *td)
fiii = fdi.indata;
fiii->major = FUSE_KERNEL_VERSION;
fiii->minor = FUSE_KERNEL_MINOR_VERSION;
- fiii->max_readahead = FUSE_DEFAULT_IOSIZE * 16;
- fiii->flags = 0;
+ /*
+ * fusefs currently reads ahead no more than one cache block at a time.
+ * See fuse_read_biobackend
+ */
+ fiii->max_readahead = maxbcachebuf;
+ /*
+ * Unsupported features:
+ * FUSE_FILE_OPS: No known FUSE server or client supports it
+ * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
+ * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
+ * when default ACLs are in use.
+ * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD
+ * doesn't have splice(2).
+ * FUSE_FLOCK_LOCKS: not yet implemented
+ * FUSE_HAS_IOCTL_DIR: not yet implemented
+ * FUSE_AUTO_INVAL_DATA: not yet implemented
+ * FUSE_DO_READDIRPLUS: not yet implemented
+ * FUSE_READDIRPLUS_AUTO: not yet implemented
+ * FUSE_ASYNC_DIO: not yet implemented
+ * FUSE_NO_OPEN_SUPPORT: not yet implemented
+ */
+ fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
+ | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE;
fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
- fuse_insert_message(fdi.tick);
+ fuse_insert_message(fdi.tick, false);
+ fdisp_destroy(&fdi);
+}
+
+/*
+ * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL,
+ * send the request with root credentials
+ */
+int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
+ struct thread *td, struct ucred *cred)
+{
+ struct fuse_vnode_data *fvdat;
+ struct fuse_dispatcher fdi;
+ struct fuse_setattr_in *fsai;
+ struct mount *mp;
+ pid_t pid = td->td_proc->p_pid;
+ struct fuse_data *data;
+ int dataflags;
+ int err = 0;
+ enum vtype vtyp;
+ int sizechanged = -1;
+ uint64_t newsize = 0;
+
+ mp = vnode_mount(vp);
+ fvdat = VTOFUD(vp);
+ data = fuse_get_mpdata(mp);
+ dataflags = data->dataflags;
+
+ fdisp_init(&fdi, sizeof(*fsai));
+ fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
+ if (!cred) {
+ fdi.finh->uid = 0;
+ fdi.finh->gid = 0;
+ }
+ fsai = fdi.indata;
+ fsai->valid = 0;
+
+ if (vap->va_uid != (uid_t)VNOVAL) {
+ fsai->uid = vap->va_uid;
+ fsai->valid |= FATTR_UID;
+ }
+ if (vap->va_gid != (gid_t)VNOVAL) {
+ fsai->gid = vap->va_gid;
+ fsai->valid |= FATTR_GID;
+ }
+ if (vap->va_size != VNOVAL) {
+ struct fuse_filehandle *fufh = NULL;
+
+ /*Truncate to a new value. */
+ fsai->size = vap->va_size;
+ sizechanged = 1;
+ newsize = vap->va_size;
+ fsai->valid |= FATTR_SIZE;
+
+ fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
+ if (fufh) {
+ fsai->fh = fufh->fh_id;
+ fsai->valid |= FATTR_FH;
+ }
+ VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
+ }
+ if (vap->va_atime.tv_sec != VNOVAL) {
+ fsai->atime = vap->va_atime.tv_sec;
+ fsai->atimensec = vap->va_atime.tv_nsec;
+ fsai->valid |= FATTR_ATIME;
+ if (vap->va_vaflags & VA_UTIMES_NULL)
+ fsai->valid |= FATTR_ATIME_NOW;
+ }
+ if (vap->va_mtime.tv_sec != VNOVAL) {
+ fsai->mtime = vap->va_mtime.tv_sec;
+ fsai->mtimensec = vap->va_mtime.tv_nsec;
+ fsai->valid |= FATTR_MTIME;
+ if (vap->va_vaflags & VA_UTIMES_NULL)
+ fsai->valid |= FATTR_MTIME_NOW;
+ } else if (fvdat->flag & FN_MTIMECHANGE) {
+ fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+ fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
+ fsai->valid |= FATTR_MTIME;
+ }
+ if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) {
+ fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+ fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
+ fsai->valid |= FATTR_CTIME;
+ }
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ fsai->mode = vap->va_mode & ALLPERMS;
+ fsai->valid |= FATTR_MODE;
+ }
+ if (!fsai->valid) {
+ goto out;
+ }
+
+ if ((err = fdisp_wait_answ(&fdi)))
+ goto out;
+ vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
+
+ if (vnode_vtype(vp) != vtyp) {
+ if (vnode_vtype(vp) == VNON && vtyp != VNON) {
+ SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
+ "vnode_vtype is VNON and vtype isn't.");
+ } else {
+ /*
+ * STALE vnode, ditch
+ *
+ * The vnode has changed its type "behind our back".
+ * There's nothing really we can do, so let us just
+ * force an internal revocation and tell the caller to
+ * try again, if interested.
+ */
+ fuse_internal_vnode_disappear(vp);
+ err = EAGAIN;
+ }
+ }
+ if (err == 0) {
+ struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
+ fuse_vnode_undirty_cached_timestamps(vp);
+ fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
+ fao->attr_valid_nsec, NULL);
+ }
+
+out:
fdisp_destroy(&fdi);
+ return err;
}
#ifdef ZERO_PAD_INCOMPLETE_BUFS
@@ -692,3 +1203,17 @@ isbzero(void *buf, size_t len)
}
#endif
+
+void
+fuse_internal_init(void)
+{
+ fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK);
+ fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK);
+}
+
+void
+fuse_internal_destroy(void)
+{
+ counter_u64_free(fuse_lookup_cache_hits);
+ counter_u64_free(fuse_lookup_cache_misses);
+}