aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/libc/sys/open.241
-rw-r--r--sys/kern/kern_descrip.c46
-rw-r--r--sys/kern/vfs_aio.c5
-rw-r--r--sys/kern/vfs_lookup.c6
-rw-r--r--sys/kern/vfs_syscalls.c61
-rw-r--r--sys/kern/vfs_vnops.c34
-rw-r--r--sys/sys/fcntl.h8
-rw-r--r--sys/sys/file.h1
-rw-r--r--sys/sys/filedesc.h2
9 files changed, 168 insertions, 36 deletions
diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
index e24c823d039a..f9c54bfc7581 100644
--- a/lib/libc/sys/open.2
+++ b/lib/libc/sys/open.2
@@ -28,7 +28,7 @@
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
.\" $FreeBSD$
.\"
-.Dd February 23, 2021
+.Dd March 18, 2021
.Dt OPEN 2
.Os
.Sh NAME
@@ -168,6 +168,7 @@ O_DIRECTORY error if file is not a directory
O_CLOEXEC set FD_CLOEXEC upon open
O_VERIFY verify the contents of the file
O_RESOLVE_BENEATH path resolution must not cross the fd directory
+O_PATH record only the target path in the opened descriptor
.Ed
.Pp
Opening a file with
@@ -316,6 +317,44 @@ The primary use for this descriptor will be as the lookup descriptor for the
.Fn *at
family of functions.
.Pp
+.Dv O_PATH
+returns a file descriptor that can be used as a directory file descriptor for
+.Xr openat 2
+and other system calls taking a file descriptor argument, like
+.Xr fstatat 2
+and others.
+The other functionality of the returned file descriptor is limited to
+the descriptor-level operations.
+It can be used for
+.Bl -tag -width SCM_RIGHTS -offset indent -compact
+.It Xr fcntl 2
+but advisory locking is not allowed
+.It Xr dup 2
+.It Xr close 2
+.It Xr fstat 2
+.It Xr fexecve 2
+requires that
+.Dv O_EXEC
+was also specified at open time
+.It Dv SCM_RIGHTS
+can be passed over a
+.Xr unix 4
+socket using a
+.Dv SCM_RIGHTS
+message
+.El
+But operations like
+.Xr read 2 ,
+.Xr ftruncate 2 ,
+and any other that operate on file and not on file descriptor (except
+.Xr fstat 2 ),
+are not allowed.
+See also the description of
+.Dv AT_EMPTY_PATH
+flag for
+.Xr fstatat 2
+and related syscalls.
+.Pp
If successful,
.Fn open
returns a non-negative integer, termed a file descriptor.
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 7a43fbb2eb80..81af58fbddd1 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/selinfo.h>
+#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
@@ -546,6 +547,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp);
if (error != 0)
break;
+ if (fp->f_ops == &path_fileops) {
+ fdrop(fp, td);
+ error = EBADF;
+ break;
+ }
do {
tmp = flg = fp->f_flag;
tmp &= ~FCNTLFLAGS;
@@ -610,7 +616,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@@ -715,7 +721,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@@ -771,7 +777,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_unlocked(fdp, fd, &cap_no_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
fdrop(fp, td);
error = EBADF;
break;
@@ -3544,7 +3550,7 @@ sys_flock(struct thread *td, struct flock_args *uap)
error = fget(td, uap->fd, &cap_flock_rights, &fp);
if (error != 0)
return (error);
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
fdrop(fp, td);
return (EOPNOTSUPP);
}
@@ -4960,6 +4966,38 @@ struct fileops badfileops = {
.fo_fill_kinfo = badfo_fill_kinfo,
};
+static int
+path_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ return (POLLNVAL);
+}
+
+static int
+path_close(struct file *fp, struct thread *td)
+{
+ MPASS(fp->f_type == DTYPE_VNODE);
+ fp->f_ops = &badfileops;
+ vrele(fp->f_vnode);
+ return (0);
+}
+
+struct fileops path_fileops = {
+ .fo_read = badfo_readwrite,
+ .fo_write = badfo_readwrite,
+ .fo_truncate = badfo_truncate,
+ .fo_ioctl = badfo_ioctl,
+ .fo_poll = path_poll,
+ .fo_kqfilter = badfo_kqfilter,
+ .fo_stat = vn_statfile,
+ .fo_close = path_close,
+ .fo_chmod = badfo_chmod,
+ .fo_chown = badfo_chown,
+ .fo_sendfile = badfo_sendfile,
+ .fo_fill_kinfo = vn_fill_kinfo,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
int
invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 9b45a06c5f9f..640e82b6f0ff 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1619,6 +1619,11 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
goto err3;
}
+ if (fp != NULL && fp->f_ops == &path_fileops) {
+ error = EBADF;
+ goto err3;
+ }
+
job->fd_file = fp;
mtx_lock(&aio_job_mtx);
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index f4ec3cea9fff..f979676f4c7d 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -360,8 +360,10 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
if (cnp->cn_flags & AUDITVNODE2)
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
/*
- * Effectively inlined fgetvp_rights, because we need to
- * inspect the file as well as grabbing the vnode.
+ * Effectively inlined fgetvp_rights, because
+ * we need to inspect the file as well as
+ * grabbing the vnode. No check for O_PATH,
+ * files to implement its semantic.
*/
error = fget_cap(td, ndp->ni_dirfd, &rights,
&dfp, &ndp->ni_filecaps);
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 45f155ebff3d..5a1efcdec467 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -375,7 +375,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
int error;
AUDIT_ARG_FD(fd);
- error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
+ error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
@@ -891,7 +891,7 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap)
int error;
AUDIT_ARG_FD(uap->fd);
- error = getvnode(td, uap->fd, &cap_fchdir_rights,
+ error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
&fp);
if (error != 0)
return (error);
@@ -1023,9 +1023,10 @@ change_dir(struct vnode *vp, struct thread *td)
static __inline void
flags_to_rights(int flags, cap_rights_t *rightsp)
{
-
if (flags & O_EXEC) {
cap_rights_set_one(rightsp, CAP_FEXECVE);
+ if (flags & O_PATH)
+ return;
} else {
switch ((flags & O_ACCMODE)) {
case O_RDONLY:
@@ -1112,11 +1113,15 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
AUDIT_ARG_MODE(mode);
cap_rights_init_one(&rights, CAP_LOOKUP);
flags_to_rights(flags, &rights);
+
/*
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
- * may be specified.
+ * may be specified. On the other hand, for O_PATH any mode
+ * except O_EXEC is ignored.
*/
- if (flags & O_EXEC) {
+ if ((flags & O_PATH) != 0) {
+ flags &= ~(O_CREAT | O_ACCMODE);
+ } else if ((flags & O_EXEC) != 0) {
if (flags & O_ACCMODE)
return (EINVAL);
} else if ((flags & O_ACCMODE) == O_ACCMODE) {
@@ -1145,8 +1150,10 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
* wonderous happened deep below and we just pass it up
* pretending we know what we do.
*/
- if (error == ENXIO && fp->f_ops != &badfileops)
+ if (error == ENXIO && fp->f_ops != &badfileops) {
+ MPASS((flags & O_PATH) == 0);
goto success;
+ }
/*
* Handle special fdopen() case. bleh.
@@ -1176,14 +1183,16 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
* files that switched type in the cdevsw fdopen() method.
*/
fp->f_vnode = vp;
+
/*
* If the file wasn't claimed by devfs bind it to the normal
* vnode operations here.
*/
if (fp->f_ops == &badfileops) {
- KASSERT(vp->v_type != VFIFO,
+ KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
("Unexpected fifo fp %p vp %p", fp, vp));
- finit_vnode(fp, flags, NULL, &vnops);
+ finit_vnode(fp, flags, NULL, (flags & O_PATH) != 0 ?
+ &path_fileops : &vnops);
}
VOP_UNLOCK(vp);
@@ -1882,7 +1891,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
fp = NULL;
if (fd != FD_NONE) {
- error = getvnode(td, fd, &cap_no_rights, &fp);
+ error = getvnode_path(td, fd, &cap_no_rights, &fp);
if (error != 0)
return (error);
}
@@ -4255,12 +4264,13 @@ out:
}
/*
- * Convert a user file descriptor to a kernel file entry and check that, if it
- * is a capability, the correct rights are present. A reference on the file
- * entry is held upon returning.
+ * This variant of getvnode() allows O_PATH files. Caller should
+ * ensure that returned file and vnode are only used for compatible
+ * semantics.
*/
int
-getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp)
{
struct file *fp;
int error;
@@ -4285,11 +4295,36 @@ getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
fdrop(fp, td);
return (EINVAL);
}
+
*fpp = fp;
return (0);
}
/*
+ * Convert a user file descriptor to a kernel file entry and check
+ * that, if it is a capability, the correct rights are present.
+ * A reference on the file entry is held upon returning.
+ */
+int
+getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+{
+ int error;
+
+ error = getvnode_path(td, fd, rightsp, fpp);
+
+ /*
+ * Filter out O_PATH file descriptors, most getvnode() callers
+ * do not call fo_ methods.
+ */
+ if (error == 0 && (*fpp)->f_ops == &path_fileops) {
+ fdrop(*fpp, td);
+ error = EBADF;
+ }
+
+ return (error);
+}
+
+/*
* Get an (NFS) file handle.
*/
#ifndef _SYS_SYSPROTO_H_
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 98f37d26ea8c..6339295b0556 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -391,25 +391,30 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
return (EOPNOTSUPP);
if (vp->v_type != VDIR && fmode & O_DIRECTORY)
return (ENOTDIR);
+
accmode = 0;
- if (fmode & (FWRITE | O_TRUNC)) {
- if (vp->v_type == VDIR)
- return (EISDIR);
- accmode |= VWRITE;
+ if ((fmode & O_PATH) == 0) {
+ if ((fmode & (FWRITE | O_TRUNC)) != 0) {
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ accmode |= VWRITE;
+ }
+ if ((fmode & FREAD) != 0)
+ accmode |= VREAD;
+ if ((fmode & O_APPEND) && (fmode & FWRITE))
+ accmode |= VAPPEND;
+#ifdef MAC
+ if ((fmode & O_CREAT) != 0)
+ accmode |= VCREAT;
+#endif
}
- if (fmode & FREAD)
- accmode |= VREAD;
- if (fmode & FEXEC)
+ if ((fmode & FEXEC) != 0)
accmode |= VEXEC;
- if ((fmode & O_APPEND) && (fmode & FWRITE))
- accmode |= VAPPEND;
#ifdef MAC
- if (fmode & O_CREAT)
- accmode |= VCREAT;
- if (fmode & O_VERIFY)
+ if ((fmode & O_VERIFY) != 0)
accmode |= VVERIFY;
error = mac_vnode_check_open(cred, vp, accmode);
- if (error)
+ if (error != 0)
return (error);
accmode &= ~(VCREAT | VVERIFY);
@@ -419,6 +424,9 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
if (error != 0)
return (error);
}
+ if ((fmode & O_PATH) != 0)
+ return (0);
+
if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
vn_lock(vp, LK_UPGRADE | LK_RETRY);
error = VOP_OPEN(vp, fmode, cred, td, fp);
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 0fa4e7758c9d..c328abaa02af 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -135,7 +135,7 @@ typedef __pid_t pid_t;
#if __BSD_VISIBLE
#define O_VERIFY 0x00200000 /* open only after verification */
-/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */
+#define O_PATH 0x00400000 /* fd is only a path */
#define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk
out of cwd */
#endif
@@ -156,10 +156,12 @@ typedef __pid_t pid_t;
/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
#define FFLAGS(oflags) ((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
-#define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
+#define OFLAGS(fflags) \
+ (((fflags) & (O_EXEC | O_PATH)) != 0 ? (fflags) : (fflags) - 1)
/* bits to save after open */
-#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC)
+#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK| \
+ O_DIRECT|FEXEC|O_PATH)
/* bits settable by fcntl(F_SETFL, ...) */
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
diff --git a/sys/sys/file.h b/sys/sys/file.h
index c4fc70f517a4..9237ee5ceb9d 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -239,6 +239,7 @@ struct xfile {
extern struct fileops vnops;
extern struct fileops badfileops;
+extern struct fileops path_fileops;
extern struct fileops socketops;
extern int maxfiles; /* kernel limit on number of open files */
extern int maxfilesperproc; /* per process limit on number of open files */
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index 8c5aa258ed28..7f18d8a2286c 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -265,6 +265,8 @@ struct filedesc_to_leader *
struct filedesc *fdp, struct proc *leader);
int getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
+int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,