aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/kern_sendfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/kern_sendfile.c')
-rw-r--r--sys/kern/kern_sendfile.c133
1 files changed, 127 insertions, 6 deletions
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 61edffa893eb..a78c00384467 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/kernel.h>
+#include <netinet/in.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sysproto.h>
@@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$");
#define EXT_FLAG_SYNC EXT_FLAG_VENDOR1
#define EXT_FLAG_NOCACHE EXT_FLAG_VENDOR2
+#define EXT_FLAG_CACHE_LAST EXT_FLAG_VENDOR3
/*
* Structure describing a single sendfile(2) I/O, which may consist of
@@ -201,6 +203,39 @@ sendfile_free_mext(struct mbuf *m)
}
}
+static void
+sendfile_free_mext_pg(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+ int i;
+ bool nocache, cache_last;
+
+ KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
+ ("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
+
+ nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
+ cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
+ ext_pgs = m->m_ext.ext_pgs;
+
+ for (i = 0; i < ext_pgs->npgs; i++) {
+ if (cache_last && i == ext_pgs->npgs - 1)
+ nocache = false;
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ sendfile_free_page(pg, nocache);
+ }
+
+ if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
+ struct sendfile_sync *sfs = m->m_ext.ext_arg2;
+
+ mtx_lock(&sfs->mtx);
+ KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
+ if (--sfs->count == 0)
+ cv_signal(&sfs->cv);
+ mtx_unlock(&sfs->mtx);
+ }
+}
+
/*
* Helper function to calculate how much data to put into page i of n.
* Only first and last pages are special.
@@ -283,8 +318,6 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
CURVNET_SET(so->so_vnet);
if (sfio->error) {
- struct mbuf *m;
-
/*
* I/O operation failed. The state of data in the socket
* is now inconsistent, and all what we can do is to tear
@@ -299,9 +332,7 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
so->so_proto->pr_usrreqs->pru_abort(so);
so->so_error = EIO;
- m = sfio->m;
- for (int i = 0; i < sfio->npages; i++)
- m = m_free(m);
+ mb_free_notready(sfio->m, sfio->npages);
} else
(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
sfio->npages);
@@ -540,13 +571,15 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct vnode *vp;
struct vm_object *obj;
struct socket *so;
+ struct mbuf_ext_pgs *ext_pgs;
struct mbuf *m, *mh, *mhtail;
struct sf_buf *sf;
struct shmfd *shmfd;
struct sendfile_sync *sfs;
struct vattr va;
off_t off, sbytes, rem, obj_size;
- int error, softerr, bsize, hdrlen;
+ int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
+ bool use_ext_pgs;
obj = NULL;
so = NULL;
@@ -554,6 +587,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
sfs = NULL;
hdrlen = sbytes = 0;
softerr = 0;
+ use_ext_pgs = false;
error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
if (error != 0)
@@ -714,6 +748,17 @@ retry_space:
if (space > rem)
space = rem;
+ else if (space > PAGE_SIZE) {
+ /*
+ * Use page boundaries when possible for large
+ * requests.
+ */
+ if (off & PAGE_MASK)
+ space -= (PAGE_SIZE - (off & PAGE_MASK));
+ space = trunc_page(space);
+ if (off & PAGE_MASK)
+ space += (PAGE_SIZE - (off & PAGE_MASK));
+ }
npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
@@ -751,6 +796,22 @@ retry_space:
* dumped into socket buffer.
*/
pa = sfio->pa;
+
+ /*
+ * Use unmapped mbufs if enabled for TCP. Unmapped
+ * bufs are restricted to TCP as that is what has been
+ * tested. In particular, unmapped mbufs have not
+ * been tested with UNIX-domain sockets.
+ */
+ if (mb_use_ext_pgs &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ use_ext_pgs = true;
+ max_pgs = MBUF_PEXT_MAX_PGS;
+
+ /* Start at last index, to wrap on first use. */
+ ext_pgs_idx = max_pgs - 1;
+ }
+
for (int i = 0; i < npages; i++) {
struct mbuf *m0;
@@ -766,6 +827,66 @@ retry_space:
break;
}
+ if (use_ext_pgs) {
+ off_t xfs;
+
+ ext_pgs_idx++;
+ if (ext_pgs_idx == max_pgs) {
+ m0 = mb_alloc_ext_pgs(M_WAITOK, false,
+ sendfile_free_mext_pg);
+
+ if (flags & SF_NOCACHE) {
+ m0->m_ext.ext_flags |=
+ EXT_FLAG_NOCACHE;
+
+ /*
+ * See comment below regarding
+ * ignoring SF_NOCACHE for the
+ * last page.
+ */
+ if ((npages - i <= max_pgs) &&
+ ((off + space) & PAGE_MASK) &&
+ (rem > space || rhpages > 0))
+ m0->m_ext.ext_flags |=
+ EXT_FLAG_CACHE_LAST;
+ }
+ if (sfs != NULL) {
+ m0->m_ext.ext_flags |=
+ EXT_FLAG_SYNC;
+ m0->m_ext.ext_arg2 = sfs;
+ mtx_lock(&sfs->mtx);
+ sfs->count++;
+ mtx_unlock(&sfs->mtx);
+ }
+ ext_pgs = m0->m_ext.ext_pgs;
+ if (i == 0)
+ sfio->m = m0;
+ ext_pgs_idx = 0;
+
+ /* Append to mbuf chain. */
+ if (mtail != NULL)
+ mtail->m_next = m0;
+ else
+ m = m0;
+ mtail = m0;
+ ext_pgs->first_pg_off =
+ vmoff(i, off) & PAGE_MASK;
+ }
+ if (nios) {
+ mtail->m_flags |= M_NOTREADY;
+ ext_pgs->nrdy++;
+ }
+
+ ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]);
+ ext_pgs->npgs++;
+ xfs = xfsize(i, npages, off, space);
+ ext_pgs->last_pg_len = xfs;
+ MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs);
+ mtail->m_len += xfs;
+ mtail->m_ext.ext_size += PAGE_SIZE;
+ continue;
+ }
+
/*
* Get a sendfile buf. When allocating the
* first buffer for mbuf chain, we usually