aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames McLaughlin <james_mclgh.net>2023-08-17 19:57:17 +0000
committerDmitry Chagin <dchagin@FreeBSD.org>2023-08-17 19:57:17 +0000
commitbb66c59753836cd8abb596fe316dcdb77ea66999 (patch)
treeb21541734e8543ae4764c7183c0ee0e59c256ff7
parent7307c4396347af670dc86e2e7a9dfadb55e1c458 (diff)
downloadsrc-bb66c59753836cd8abb596fe316dcdb77ea66999.tar.gz
src-bb66c59753836cd8abb596fe316dcdb77ea66999.zip
linux(4): Add sendfile fallback for non-socket fds
Before Linux 2.6.33, out_fd must refer to a socket. Since Linux 2.6.33 it can be any file. The patch was originally provided by James McLaughlin and adapted by me for copy_file_range. PR: 262535 Differential revision: https://reviews.freebsd.org/D34555 MFC after: 1 month
-rw-r--r--sys/compat/linux/linux_socket.c209
1 files changed, 177 insertions, 32 deletions
diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c
index 45b94cb2f994..f768392be546 100644
--- a/sys/compat/linux/linux_socket.c
+++ b/sys/compat/linux/linux_socket.c
@@ -36,10 +36,12 @@
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
+#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysproto.h>
+#include <sys/vnode.h>
#include <sys/un.h>
#include <sys/unistd.h>
@@ -2374,57 +2376,200 @@ out:
return (error);
}
+/*
+ * Based on sendfile_getsock from kern_sendfile.c
+ * Determines whether an fd is a stream socket that can be used
+ * with FreeBSD sendfile.
+ */
+static bool
+is_stream_socket(struct file *fp)
+{
+ struct socket *so;
+
+ /*
+ * The socket must be a stream socket and connected.
+ */
+ if (fp->f_type != DTYPE_SOCKET)
+ return (false);
+ so = fp->f_data;
+ if (so->so_type != SOCK_STREAM)
+ return (false);
+ /*
+ * SCTP one-to-one style sockets currently don't work with
+ * sendfile().
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_SCTP)
+ return (false);
+ return (!SOLISTENING(so));
+}
+
+static bool
+is_regular_file(struct file *fp)
+{
+
+ return (fp->f_type == DTYPE_VNODE && fp->f_vnode != NULL &&
+ fp->f_vnode->v_type == VREG);
+}
+
static int
-linux_sendfile_common(struct thread *td, l_int out, l_int in,
- off_t *offset, l_size_t count)
+sendfile_fallback(struct thread *td, struct file *fp, l_int out,
+ off_t *offset, l_size_t count, off_t *sbytes)
{
- off_t bytes_read;
- int error;
- l_loff_t current_offset;
- struct file *fp;
+ off_t current_offset, out_offset, to_send;
+ l_size_t bytes_sent, n_read;
+ struct file *ofp;
+ struct iovec aiov;
+ struct uio auio;
+ bool seekable;
+ size_t bufsz;
+ void *buf;
+ int flags, error;
- AUDIT_ARG_FD(in);
- error = fget_read(td, in, &cap_pread_rights, &fp);
+ if (offset == NULL) {
+ if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0)
+ return (error);
+ current_offset = td->td_uretoff.tdu_off;
+ } else {
+ if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
+ return (ESPIPE);
+ current_offset = *offset;
+ }
+ error = fget_write(td, out, &cap_pwrite_rights, &ofp);
if (error != 0)
return (error);
-
- if (offset != NULL) {
- current_offset = *offset;
- } else {
- error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
- fo_seek(fp, 0, SEEK_CUR, td) : ESPIPE;
- if (error != 0)
+ seekable = (ofp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0;
+ if (seekable) {
+ if ((error = fo_seek(ofp, 0, SEEK_CUR, td)) != 0)
goto drop;
+ out_offset = td->td_uretoff.tdu_off;
+ } else
+ out_offset = 0;
+
+ flags = FOF_OFFSET | FOF_NOUPDATE;
+ bufsz = min(count, MAXPHYS);
+ buf = malloc(bufsz, M_LINUX, M_WAITOK);
+ bytes_sent = 0;
+ while (bytes_sent < count) {
+ to_send = min(count - bytes_sent, bufsz);
+ aiov.iov_base = buf;
+ aiov.iov_len = bufsz;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_td = td;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = current_offset;
+ auio.uio_resid = to_send;
+ error = fo_read(fp, &auio, fp->f_cred, flags, td);
+ if (error != 0)
+ break;
+ n_read = to_send - auio.uio_resid;
+ if (n_read == 0)
+ break;
+ aiov.iov_base = buf;
+ aiov.iov_len = bufsz;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_td = td;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_offset = (seekable) ? out_offset : 0;
+ auio.uio_resid = n_read;
+ error = fo_write(ofp, &auio, ofp->f_cred, flags, td);
+ if (error != 0)
+ break;
+ bytes_sent += n_read;
+ current_offset += n_read;
+ out_offset += n_read;
+ }
+ free(buf, M_LINUX);
+
+ if (error == 0) {
+ *sbytes = bytes_sent;
+ if (offset != NULL)
+ *offset = current_offset;
+ else
+ error = fo_seek(fp, current_offset, SEEK_SET, td);
+ }
+ if (error == 0 && seekable)
+ error = fo_seek(ofp, out_offset, SEEK_SET, td);
+
+drop:
+ fdrop(ofp, td);
+ return (error);
+}
+
+static int
+sendfile_sendfile(struct thread *td, struct file *fp, l_int out,
+ off_t *offset, l_size_t count, off_t *sbytes)
+{
+ off_t current_offset;
+ int error;
+
+ if (offset == NULL) {
+ if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
+ return (ESPIPE);
+ if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0)
+ return (error);
current_offset = td->td_uretoff.tdu_off;
+ } else
+ current_offset = *offset;
+ error = fo_sendfile(fp, out, NULL, NULL, current_offset, count,
+ sbytes, 0, td);
+ if (error == 0) {
+ current_offset += *sbytes;
+ if (offset != NULL)
+ *offset = current_offset;
+ else
+ error = fo_seek(fp, current_offset, SEEK_SET, td);
}
+ return (error);
+}
- bytes_read = 0;
+static int
+linux_sendfile_common(struct thread *td, l_int out, l_int in,
+ off_t *offset, l_size_t count)
+{
+ struct file *fp, *ofp;
+ off_t sbytes;
+ int error;
/* Linux cannot have 0 count. */
- if (count <= 0 || current_offset < 0) {
+ if (count <= 0 || (offset != NULL && *offset < 0))
+ return (EINVAL);
+
+ AUDIT_ARG_FD(in);
+ error = fget_read(td, in, &cap_pread_rights, &fp);
+ if (error != 0)
+ return (error);
+ if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) ||
+ (fp->f_type == DTYPE_VNODE &&
+ (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) {
error = EINVAL;
goto drop;
}
-
- error = fo_sendfile(fp, out, NULL, NULL, current_offset, count,
- &bytes_read, 0, td);
+ error = fget_unlocked(td, out, &cap_no_rights, &ofp);
if (error != 0)
goto drop;
- current_offset += bytes_read;
- if (offset != NULL) {
- *offset = current_offset;
+ if (is_regular_file(fp) && is_regular_file(ofp)) {
+ error = kern_copy_file_range(td, in, offset, out, NULL, count,
+ 0);
} else {
- error = fo_seek(fp, current_offset, SEEK_SET, td);
- if (error != 0)
- goto drop;
+ sbytes = 0;
+ if (is_stream_socket(ofp))
+ error = sendfile_sendfile(td, fp, out, offset, count,
+ &sbytes);
+ else
+ error = sendfile_fallback(td, fp, out, offset, count,
+ &sbytes);
+ if (error == 0)
+ td->td_retval[0] = sbytes;
}
+ fdrop(ofp, td);
- td->td_retval[0] = (ssize_t)bytes_read;
drop:
fdrop(fp, td);
- if (error == ENOTSOCK)
- error = EINVAL;
return (error);
}
@@ -2434,10 +2579,10 @@ linux_sendfile(struct thread *td, struct linux_sendfile_args *arg)
/*
* Differences between FreeBSD and Linux sendfile:
* - Linux doesn't send anything when count is 0 (FreeBSD uses 0 to
- * mean send the whole file.) In linux_sendfile given fds are still
- * checked for validity when the count is 0.
+ * mean send the whole file).
* - Linux can send to any fd whereas FreeBSD only supports sockets.
- * The same restriction follows for linux_sendfile.
+ * We therefore use FreeBSD sendfile where possible for performance,
+ * but fall back on a manual copy (sendfile_fallback).
* - Linux doesn't have an equivalent for FreeBSD's flags and sf_hdtr.
* - Linux takes an offset pointer and updates it to the read location.
* FreeBSD takes in an offset and a 'bytes read' parameter which is