aboutsummaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorAlan Somers <asomers@FreeBSD.org>2021-01-02 23:34:20 +0000
committerAlan Somers <asomers@FreeBSD.org>2021-01-03 02:57:58 +0000
commit022ca2fc7fe08d51f33a1d23a9be49e6d132914e (patch)
tree3b757d6a22fe9ab1d6a9b8c7e98ee9c85b382877 /sys/kern
parent486580c44ce29c1e3b1d9b858a08d9df9428b699 (diff)
downloadsrc-022ca2fc7fe08d51f33a1d23a9be49e6d132914e.tar.gz
src-022ca2fc7fe08d51f33a1d23a9be49e6d132914e.zip
Add aio_writev and aio_readv
POSIX AIO is great, but it lacks vectored I/O functions. This commit fixes that shortcoming by adding aio_writev and aio_readv. They aren't part of the standard, but they're an obvious extension. They work just like their synchronous equivalents pwritev and preadv. It isn't yet possible to use vectored aiocbs with lio_listio, but that could be added in the future. Reviewed by: jhb, kib, bcr Relnotes: yes Differential Revision: https://reviews.freebsd.org/D27743
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/capabilities.conf2
-rw-r--r--sys/kern/sys_socket.c35
-rw-r--r--sys/kern/syscalls.master12
-rw-r--r--sys/kern/vfs_aio.c497
4 files changed, 350 insertions, 196 deletions
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
index 3d552255d823..602ec7088fc6 100644
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -100,6 +100,8 @@ aio_return
aio_suspend
aio_waitcomplete
aio_write
+aio_writev
+aio_readv
##
## audit(2) is a global operation, submitting to the global trail, but it is
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 0fe200c119d2..18803b6a5ac0 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -600,9 +600,7 @@ soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job)
struct ucred *td_savedcred;
struct thread *td;
struct file *fp;
- struct uio uio;
- struct iovec iov;
- size_t cnt, done;
+ size_t cnt, done, job_total_nbytes;
long ru_before;
int error, flags;
@@ -614,16 +612,11 @@ retry:
td_savedcred = td->td_ucred;
td->td_ucred = job->cred;
+ job_total_nbytes = job->uiop->uio_resid + job->aio_done;
done = job->aio_done;
- cnt = job->uaiocb.aio_nbytes - done;
- iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done);
- iov.iov_len = cnt;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = 0;
- uio.uio_resid = cnt;
- uio.uio_segflg = UIO_USERSPACE;
- uio.uio_td = td;
+ cnt = job->uiop->uio_resid;
+ job->uiop->uio_offset = 0;
+ job->uiop->uio_td = td;
flags = MSG_NBIO;
/*
@@ -633,26 +626,26 @@ retry:
*/
if (sb == &so->so_rcv) {
- uio.uio_rw = UIO_READ;
ru_before = td->td_ru.ru_msgrcv;
#ifdef MAC
error = mac_socket_check_receive(fp->f_cred, so);
if (error == 0)
#endif
- error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
+ error = soreceive(so, NULL, job->uiop, NULL, NULL,
+ &flags);
if (td->td_ru.ru_msgrcv != ru_before)
job->msgrcv = 1;
} else {
if (!TAILQ_EMPTY(&sb->sb_aiojobq))
flags |= MSG_MORETOCOME;
- uio.uio_rw = UIO_WRITE;
ru_before = td->td_ru.ru_msgsnd;
#ifdef MAC
error = mac_socket_check_send(fp->f_cred, so);
if (error == 0)
#endif
- error = sosend(so, NULL, &uio, NULL, NULL, flags, td);
+ error = sosend(so, NULL, job->uiop, NULL, NULL, flags,
+ td);
if (td->td_ru.ru_msgsnd != ru_before)
job->msgsnd = 1;
if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
@@ -662,7 +655,7 @@ retry:
}
}
- done += cnt - uio.uio_resid;
+ done += cnt - job->uiop->uio_resid;
job->aio_done = done;
td->td_ucred = td_savedcred;
@@ -676,7 +669,7 @@ retry:
* been made, requeue this request at the head of the
* queue to try again when the socket is ready.
*/
- MPASS(done != job->uaiocb.aio_nbytes);
+ MPASS(done != job_total_nbytes);
SOCKBUF_LOCK(sb);
if (done == 0 || !(so->so_state & SS_NBIO)) {
empty_results++;
@@ -782,10 +775,10 @@ soo_aio_cancel(struct kaiocb *job)
so = job->fd_file->f_data;
opcode = job->uaiocb.aio_lio_opcode;
- if (opcode == LIO_READ)
+ if (opcode == LIO_READ || opcode == LIO_READV)
sb = &so->so_rcv;
else {
- MPASS(opcode == LIO_WRITE);
+ MPASS(opcode == LIO_WRITE || opcode == LIO_WRITEV);
sb = &so->so_snd;
}
@@ -817,9 +810,11 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
switch (job->uaiocb.aio_lio_opcode) {
case LIO_READ:
+ case LIO_READV:
sb = &so->so_rcv;
break;
case LIO_WRITE:
+ case LIO_WRITEV:
sb = &so->so_snd;
break;
default:
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index b7ea5e939635..aaa0a1277461 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -1477,7 +1477,17 @@
_In_opt_ struct sigevent *sig
);
}
-258-271 AUE_NULL UNIMPL nosys
+258 AUE_AIO_WRITEV STD {
+ int aio_writev(
+ _Inout_ struct aiocb *aiocbp
+ );
+ }
+259 AUE_AIO_READV STD {
+ int aio_readv(
+ _Inout_ struct aiocb *aiocbp
+ );
+ }
+260-271 AUE_NULL UNIMPL nosys
272 AUE_O_GETDENTS COMPAT11 {
int getdents(
int fd,
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 37e19557d807..d83c9d725e68 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -292,7 +292,7 @@ struct kaioinfo {
* Different ABIs provide their own operations.
*/
struct aiocb_ops {
- int (*aio_copyin)(struct aiocb *ujob, struct aiocb *kjob);
+ int (*aio_copyin)(struct aiocb *ujob, struct kaiocb *kjob, int ty);
long (*fetch_status)(struct aiocb *ujob);
long (*fetch_error)(struct aiocb *ujob);
int (*store_status)(struct aiocb *ujob, long status);
@@ -307,6 +307,7 @@ static struct mtx aio_job_mtx;
static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */
static struct unrhdr *aiod_unr;
+static void aio_biocleanup(struct bio *bp);
void aio_init_aioinfo(struct proc *p);
static int aio_onceonly(void);
static int aio_free_entry(struct kaiocb *job);
@@ -559,6 +560,8 @@ aio_free_entry(struct kaiocb *job)
if (job->fd_file)
fdrop(job->fd_file, curthread);
crfree(job->cred);
+ if (job->uiop != &job->uio)
+ free(job->uiop, M_IOV);
uma_zfree(aiocb_zone, job);
AIO_LOCK(ki);
@@ -754,36 +757,29 @@ aio_process_rw(struct kaiocb *job)
struct thread *td;
struct aiocb *cb;
struct file *fp;
- struct uio auio;
- struct iovec aiov;
ssize_t cnt;
long msgsnd_st, msgsnd_end;
long msgrcv_st, msgrcv_end;
long oublock_st, oublock_end;
long inblock_st, inblock_end;
- int error;
+ int error, opcode;
KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ ||
- job->uaiocb.aio_lio_opcode == LIO_WRITE,
+ job->uaiocb.aio_lio_opcode == LIO_READV ||
+ job->uaiocb.aio_lio_opcode == LIO_WRITE ||
+ job->uaiocb.aio_lio_opcode == LIO_WRITEV,
("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
aio_switch_vmspace(job);
td = curthread;
td_savedcred = td->td_ucred;
td->td_ucred = job->cred;
+ job->uiop->uio_td = td;
cb = &job->uaiocb;
fp = job->fd_file;
- aiov.iov_base = (void *)(uintptr_t)cb->aio_buf;
- aiov.iov_len = cb->aio_nbytes;
-
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = cb->aio_offset;
- auio.uio_resid = cb->aio_nbytes;
- cnt = cb->aio_nbytes;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_td = td;
+ opcode = job->uaiocb.aio_lio_opcode;
+ cnt = job->uiop->uio_resid;
msgrcv_st = td->td_ru.ru_msgrcv;
msgsnd_st = td->td_ru.ru_msgsnd;
@@ -794,17 +790,16 @@ aio_process_rw(struct kaiocb *job)
* aio_aqueue() acquires a reference to the file that is
* released in aio_free_entry().
*/
- if (cb->aio_lio_opcode == LIO_READ) {
- auio.uio_rw = UIO_READ;
- if (auio.uio_resid == 0)
+ if (opcode == LIO_READ || opcode == LIO_READV) {
+ if (job->uiop->uio_resid == 0)
error = 0;
else
- error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td);
+ error = fo_read(fp, job->uiop, fp->f_cred, FOF_OFFSET,
+ td);
} else {
if (fp->f_type == DTYPE_VNODE)
bwillwrite();
- auio.uio_rw = UIO_WRITE;
- error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td);
+ error = fo_write(fp, job->uiop, fp->f_cred, FOF_OFFSET, td);
}
msgrcv_end = td->td_ru.ru_msgrcv;
msgsnd_end = td->td_ru.ru_msgsnd;
@@ -816,17 +811,18 @@ aio_process_rw(struct kaiocb *job)
job->inblock = inblock_end - inblock_st;
job->outblock = oublock_end - oublock_st;
- if ((error) && (auio.uio_resid != cnt)) {
+ if (error != 0 && job->uiop->uio_resid != cnt) {
if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
error = 0;
- if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) {
+ if (error == EPIPE &&
+ (opcode == LIO_WRITE || opcode == LIO_WRITEV)) {
PROC_LOCK(job->userproc);
kern_psignal(job->userproc, SIGPIPE);
PROC_UNLOCK(job->userproc);
}
}
- cnt -= auio.uio_resid;
+ cnt -= job->uiop->uio_resid;
td->td_ucred = td_savedcred;
if (error)
aio_complete(job, -1, error);
@@ -1210,21 +1206,23 @@ aio_qbio(struct proc *p, struct kaiocb *job)
{
struct aiocb *cb;
struct file *fp;
- struct bio *bp;
struct buf *pbuf;
struct vnode *vp;
struct cdevsw *csw;
struct cdev *dev;
struct kaioinfo *ki;
- struct vm_page **pages;
- int error, npages, poff, ref;
+ struct bio **bios = NULL;
+ off_t offset;
+ int bio_cmd, error, i, iovcnt, opcode, poff, ref;
vm_prot_t prot;
+ bool use_unmapped;
cb = &job->uaiocb;
fp = job->fd_file;
+ opcode = cb->aio_lio_opcode;
- if (!(cb->aio_lio_opcode == LIO_WRITE ||
- cb->aio_lio_opcode == LIO_READ))
+ if (!(opcode == LIO_WRITE || opcode == LIO_WRITEV ||
+ opcode == LIO_READ || opcode == LIO_READV))
return (-1);
if (fp == NULL || fp->f_type != DTYPE_VNODE)
return (-1);
@@ -1234,8 +1232,21 @@ aio_qbio(struct proc *p, struct kaiocb *job)
return (-1);
if (vp->v_bufobj.bo_bsize == 0)
return (-1);
- if (cb->aio_nbytes % vp->v_bufobj.bo_bsize)
+
+ bio_cmd = opcode == LIO_WRITE || opcode == LIO_WRITEV ? BIO_WRITE :
+ BIO_READ;
+ iovcnt = job->uiop->uio_iovcnt;
+ if (iovcnt > max_buf_aio)
return (-1);
+ for (i = 0; i < iovcnt; i++) {
+ if (job->uiop->uio_iov[i].iov_len % vp->v_bufobj.bo_bsize != 0)
+ return (-1);
+ if (job->uiop->uio_iov[i].iov_len > maxphys) {
+ error = -1;
+ return (-1);
+ }
+ }
+ offset = cb->aio_offset;
ref = 0;
csw = devvn_refthread(vp, &dev, &ref);
@@ -1246,89 +1257,106 @@ aio_qbio(struct proc *p, struct kaiocb *job)
error = -1;
goto unref;
}
- if (cb->aio_nbytes > dev->si_iosize_max) {
+ if (job->uiop->uio_resid > dev->si_iosize_max) {
error = -1;
goto unref;
}
ki = p->p_aioinfo;
- poff = (vm_offset_t)cb->aio_buf & PAGE_MASK;
- if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
- if (cb->aio_nbytes > maxphys) {
- error = -1;
- goto unref;
- }
+ job->error = 0;
- pbuf = NULL;
- pages = malloc(sizeof(vm_page_t) * (atop(round_page(
- cb->aio_nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO);
- } else {
- if (cb->aio_nbytes > maxphys) {
- error = -1;
- goto unref;
- }
- if (ki->kaio_buffer_count >= max_buf_aio) {
+ use_unmapped = (dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed;
+ if (!use_unmapped) {
+ AIO_LOCK(ki);
+ if (ki->kaio_buffer_count + iovcnt > max_buf_aio) {
+ AIO_UNLOCK(ki);
error = EAGAIN;
goto unref;
}
-
- pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
- BUF_KERNPROC(pbuf);
- AIO_LOCK(ki);
- ki->kaio_buffer_count++;
+ ki->kaio_buffer_count += iovcnt;
AIO_UNLOCK(ki);
- pages = pbuf->b_pages;
- }
- bp = g_alloc_bio();
-
- bp->bio_length = cb->aio_nbytes;
- bp->bio_bcount = cb->aio_nbytes;
- bp->bio_done = aio_biowakeup;
- bp->bio_offset = cb->aio_offset;
- bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ;
- bp->bio_dev = dev;
- bp->bio_caller1 = job;
- bp->bio_caller2 = pbuf;
-
- prot = VM_PROT_READ;
- if (cb->aio_lio_opcode == LIO_READ)
- prot |= VM_PROT_WRITE; /* Less backwards than it looks */
- npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
- (vm_offset_t)cb->aio_buf, bp->bio_length, prot, pages,
- atop(maxphys) + 1);
- if (npages < 0) {
- error = EFAULT;
- goto doerror;
}
- if (pbuf != NULL) {
- pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages);
- bp->bio_data = pbuf->b_data + poff;
- atomic_add_int(&num_buf_aio, 1);
- pbuf->b_npages = npages;
- } else {
- bp->bio_ma = pages;
- bp->bio_ma_n = npages;
- bp->bio_ma_offset = poff;
- bp->bio_data = unmapped_buf;
- bp->bio_flags |= BIO_UNMAPPED;
- atomic_add_int(&num_unmapped_aio, 1);
+
+ bios = malloc(sizeof(struct bio *) * iovcnt, M_TEMP, M_WAITOK);
+ atomic_store_int(&job->nbio, iovcnt);
+ for (i = 0; i < iovcnt; i++) {
+ struct vm_page** pages;
+ struct bio *bp;
+ void *buf;
+ size_t nbytes;
+ int npages;
+
+ buf = job->uiop->uio_iov[i].iov_base;
+ nbytes = job->uiop->uio_iov[i].iov_len;
+
+ bios[i] = g_alloc_bio();
+ bp = bios[i];
+
+ poff = (vm_offset_t)buf & PAGE_MASK;
+ if (use_unmapped) {
+ pbuf = NULL;
+ pages = malloc(sizeof(vm_page_t) * (atop(round_page(
+ nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO);
+ } else {
+ pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
+ BUF_KERNPROC(pbuf);
+ pages = pbuf->b_pages;
+ }
+
+ bp->bio_length = nbytes;
+ bp->bio_bcount = nbytes;
+ bp->bio_done = aio_biowakeup;
+ bp->bio_offset = offset;
+ bp->bio_cmd = bio_cmd;
+ bp->bio_dev = dev;
+ bp->bio_caller1 = job;
+ bp->bio_caller2 = pbuf;
+
+ prot = VM_PROT_READ;
+ if (opcode == LIO_READ || opcode == LIO_READV)
+ prot |= VM_PROT_WRITE; /* Less backwards than it looks */
+ npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ (vm_offset_t)buf, bp->bio_length, prot, pages,
+ atop(maxphys) + 1);
+ if (npages < 0) {
+ if (pbuf != NULL)
+ uma_zfree(pbuf_zone, pbuf);
+ else
+ free(pages, M_TEMP);
+ error = EFAULT;
+ g_destroy_bio(bp);
+ i--;
+ goto destroy_bios;
+ }
+ if (pbuf != NULL) {
+ pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages);
+ bp->bio_data = pbuf->b_data + poff;
+ pbuf->b_npages = npages;
+ atomic_add_int(&num_buf_aio, 1);
+ } else {
+ bp->bio_ma = pages;
+ bp->bio_ma_n = npages;
+ bp->bio_ma_offset = poff;
+ bp->bio_data = unmapped_buf;
+ bp->bio_flags |= BIO_UNMAPPED;
+ atomic_add_int(&num_unmapped_aio, 1);
+ }
+
+ offset += nbytes;
}
/* Perform transfer. */
- csw->d_strategy(bp);
+ for (i = 0; i < iovcnt; i++)
+ csw->d_strategy(bios[i]);
+ free(bios, M_TEMP);
+
dev_relthread(dev, ref);
return (0);
-doerror:
- if (pbuf != NULL) {
- AIO_LOCK(ki);
- ki->kaio_buffer_count--;
- AIO_UNLOCK(ki);
- uma_zfree(pbuf_zone, pbuf);
- } else {
- free(pages, M_TEMP);
- }
- g_destroy_bio(bp);
+destroy_bios:
+ for (; i >= 0; i--)
+ aio_biocleanup(bios[i]);
+ free(bios, M_TEMP);
unref:
dev_relthread(dev, ref);
return (error);
@@ -1362,25 +1390,39 @@ convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig)
}
static int
-aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob)
+aiocb_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob,
+ int type __unused)
{
struct oaiocb *ojob;
+ struct aiocb *kcb = &kjob->uaiocb;
int error;
- bzero(kjob, sizeof(struct aiocb));
- error = copyin(ujob, kjob, sizeof(struct oaiocb));
+ bzero(kcb, sizeof(struct aiocb));
+ error = copyin(ujob, kcb, sizeof(struct oaiocb));
if (error)
return (error);
- ojob = (struct oaiocb *)kjob;
- return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent));
+ /* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */
+ ojob = (struct oaiocb *)kcb;
+ return (convert_old_sigevent(&ojob->aio_sigevent, &kcb->aio_sigevent));
}
#endif
static int
-aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob)
+aiocb_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
{
+ struct aiocb *kcb = &kjob->uaiocb;
+ int error;
+
+ error = copyin(ujob, kcb, sizeof(struct aiocb));
+ if (error)
+ return (error);
+ if (type == LIO_READV || type == LIO_WRITEV) {
+ /* malloc a uio and copy in the iovec */
+ error = copyinuio(__DEVOLATILE(struct iovec*, kcb->aio_iov),
+ kcb->aio_iovcnt, &kjob->uiop);
+ }
- return (copyin(ujob, kjob, sizeof(struct aiocb)));
+ return (error);
}
static long
@@ -1456,7 +1498,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
int type, struct aiocb_ops *ops)
{
struct proc *p = td->td_proc;
- struct file *fp;
+ struct file *fp = NULL;
struct kaiocb *job;
struct kaioinfo *ki;
struct kevent kev;
@@ -1477,39 +1519,35 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
if (num_queue_count >= max_queue_count ||
ki->kaio_count >= max_aio_queue_per_proc) {
- ops->store_error(ujob, EAGAIN);
- return (EAGAIN);
+ error = EAGAIN;
+ goto err1;
}
job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO);
knlist_init_mtx(&job->klist, AIO_MTX(ki));
- error = ops->aio_copyin(ujob, &job->uaiocb);
- if (error) {
- ops->store_error(ujob, error);
- uma_zfree(aiocb_zone, job);
- return (error);
- }
+ error = ops->aio_copyin(ujob, job, type);
+ if (error)
+ goto err2;
if (job->uaiocb.aio_nbytes > IOSIZE_MAX) {
- uma_zfree(aiocb_zone, job);
- return (EINVAL);
+ error = EINVAL;
+ goto err2;
}
if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT &&
job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL &&
job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID &&
job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) {
- ops->store_error(ujob, EINVAL);
- uma_zfree(aiocb_zone, job);
- return (EINVAL);
+ error = EINVAL;
+ goto err2;
}
if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ||
job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) &&
!_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) {
- uma_zfree(aiocb_zone, job);
- return (EINVAL);
+ error = EINVAL;
+ goto err2;
}
ksiginfo_init(&job->ksi);
@@ -1533,16 +1571,17 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
fd = job->uaiocb.aio_fildes;
switch (opcode) {
case LIO_WRITE:
+ case LIO_WRITEV:
error = fget_write(td, fd, &cap_pwrite_rights, &fp);
break;
case LIO_READ:
+ case LIO_READV:
error = fget_read(td, fd, &cap_pread_rights, &fp);
break;
case LIO_SYNC:
error = fget(td, fd, &cap_fsync_rights, &fp);
break;
case LIO_MLOCK:
- fp = NULL;
break;
case LIO_NOP:
error = fget(td, fd, &cap_no_rights, &fp);
@@ -1550,22 +1589,20 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
default:
error = EINVAL;
}
- if (error) {
- uma_zfree(aiocb_zone, job);
- ops->store_error(ujob, error);
- return (error);
- }
+ if (error)
+ goto err3;
if (opcode == LIO_SYNC && fp->f_vnode == NULL) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
- if ((opcode == LIO_READ || opcode == LIO_WRITE) &&
+ if ((opcode == LIO_READ || opcode == LIO_READV ||
+ opcode == LIO_WRITE || opcode == LIO_WRITEV) &&
job->uaiocb.aio_offset < 0 &&
(fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
job->fd_file = fp;
@@ -1577,12 +1614,13 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
error = ops->store_kernelinfo(ujob, jid);
if (error) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid;
if (opcode == LIO_NOP) {
fdrop(fp, td);
+ MPASS(job->uiop == &job->uio || job->uiop == NULL);
uma_zfree(aiocb_zone, job);
return (0);
}
@@ -1592,7 +1630,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags;
if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue;
memset(&kev, 0, sizeof(kev));
@@ -1603,7 +1641,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr;
error = kqfd_register(kqfd, &kev, td, M_WAITOK);
if (error)
- goto aqueue_fail;
+ goto err3;
no_kqueue:
@@ -1614,6 +1652,39 @@ no_kqueue:
job->jobflags = KAIOCB_QUEUEING;
job->lio = lj;
+ switch (opcode) {
+ case LIO_READV:
+ case LIO_WRITEV:
+ /* Use the uio copied in by aio_copyin */
+ MPASS(job->uiop != &job->uio && job->uiop != NULL);
+ break;
+ case LIO_READ:
+ case LIO_WRITE:
+ /* Setup the inline uio */
+ job->iov[0].iov_base = (void *)(uintptr_t)job->uaiocb.aio_buf;
+ job->iov[0].iov_len = job->uaiocb.aio_nbytes;
+ job->uio.uio_iov = job->iov;
+ job->uio.uio_iovcnt = 1;
+ job->uio.uio_resid = job->uaiocb.aio_nbytes;
+ job->uio.uio_segflg = UIO_USERSPACE;
+ /* FALLTHROUGH */
+ default:
+ job->uiop = &job->uio;
+ break;
+ }
+ switch (opcode) {
+ case LIO_READ:
+ case LIO_READV:
+ job->uiop->uio_rw = UIO_READ;
+ break;
+ case LIO_WRITE:
+ case LIO_WRITEV:
+ job->uiop->uio_rw = UIO_WRITE;
+ break;
+ }
+ job->uiop->uio_offset = job->uaiocb.aio_offset;
+ job->uiop->uio_td = td;
+
if (opcode == LIO_MLOCK) {
aio_schedule(job, aio_process_mlock);
error = 0;
@@ -1622,7 +1693,7 @@ no_kqueue:
else
error = fo_aio_queue(fp, job);
if (error)
- goto aqueue_fail;
+ goto err3;
AIO_LOCK(ki);
job->jobflags &= ~KAIOCB_QUEUEING;
@@ -1643,11 +1714,15 @@ no_kqueue:
AIO_UNLOCK(ki);
return (0);
-aqueue_fail:
- knlist_delete(&job->klist, curthread, 0);
+err3:
if (fp)
fdrop(fp, td);
+ knlist_delete(&job->klist, curthread, 0);
+err2:
+ if (job->uiop != &job->uio)
+ free(job->uiop, M_IOV);
uma_zfree(aiocb_zone, job);
+err1:
ops->store_error(ujob, error);
return (error);
}
@@ -1723,7 +1798,9 @@ aio_queue_file(struct file *fp, struct kaiocb *job)
switch (job->uaiocb.aio_lio_opcode) {
case LIO_READ:
+ case LIO_READV:
case LIO_WRITE:
+ case LIO_WRITEV:
aio_schedule(job, aio_process_rw);
error = 0;
break;
@@ -2097,6 +2174,13 @@ sys_aio_read(struct thread *td, struct aio_read_args *uap)
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops));
}
+int
+sys_aio_readv(struct thread *td, struct aio_readv_args *uap)
+{
+
+ return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READV, &aiocb_ops));
+}
+
/* syscall - asynchronous write to a file (REALTIME) */
#ifdef COMPAT_FREEBSD6
int
@@ -2116,6 +2200,13 @@ sys_aio_write(struct thread *td, struct aio_write_args *uap)
}
int
+sys_aio_writev(struct thread *td, struct aio_writev_args *uap)
+{
+
+ return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITEV, &aiocb_ops));
+}
+
+int
sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap)
{
@@ -2337,13 +2428,11 @@ sys_lio_listio(struct thread *td, struct lio_listio_args *uap)
}
static void
-aio_biowakeup(struct bio *bp)
+aio_biocleanup(struct bio *bp)
{
struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
struct kaioinfo *ki;
- struct buf *pbuf = (struct buf*)bp->bio_caller2;
- size_t nbytes;
- int error, nblks;
+ struct buf *pbuf = (struct buf *)bp->bio_caller2;
/* Release mapping into kernel space. */
if (pbuf != NULL) {
@@ -2362,23 +2451,47 @@ aio_biowakeup(struct bio *bp)
free(bp->bio_ma, M_TEMP);
atomic_subtract_int(&num_unmapped_aio, 1);
}
+ g_destroy_bio(bp);
+}
- nbytes = job->uaiocb.aio_nbytes - bp->bio_resid;
- error = 0;
- if (bp->bio_flags & BIO_ERROR)
- error = bp->bio_error;
+static void
+aio_biowakeup(struct bio *bp)
+{
+ struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
+ size_t nbytes;
+ long bcount = bp->bio_bcount;
+ long resid = bp->bio_resid;
+ int error, opcode, nblks;
+ int bio_error = bp->bio_error;
+ uint16_t flags = bp->bio_flags;
+
+ opcode = job->uaiocb.aio_lio_opcode;
+
+ aio_biocleanup(bp);
+
+ nbytes =bcount - resid;
+ atomic_add_acq_long(&job->nbytes, nbytes);
nblks = btodb(nbytes);
- if (job->uaiocb.aio_lio_opcode == LIO_WRITE)
- job->outblock += nblks;
+ error = 0;
+ /*
+ * If multiple bios experienced an error, the job will reflect the
+ * error of whichever failed bio completed last.
+ */
+ if (flags & BIO_ERROR)
+ atomic_set_int(&job->error, bio_error);
+ if (opcode == LIO_WRITE || opcode == LIO_WRITEV)
+ atomic_add_int(&job->outblock, nblks);
else
- job->inblock += nblks;
+ atomic_add_int(&job->inblock, nblks);
+ atomic_subtract_int(&job->nbio, 1);
- if (error)
- aio_complete(job, -1, error);
- else
- aio_complete(job, nbytes, 0);
- g_destroy_bio(bp);
+ if (atomic_load_int(&job->nbio) == 0) {
+ if (atomic_load_int(&job->error))
+ aio_complete(job, -1, job->error);
+ else
+ aio_complete(job, atomic_load_long(&job->nbytes), 0);
+ }
}
/* syscall - wait for the next completion of an aio request */
@@ -2614,8 +2727,8 @@ typedef struct oaiocb32 {
typedef struct aiocb32 {
int32_t aio_fildes; /* File descriptor */
uint64_t aio_offset __packed; /* File offset for I/O */
- uint32_t aio_buf; /* I/O buffer in process space */
- uint32_t aio_nbytes; /* Number of bytes for I/O */
+ uint32_t aio_buf; /* I/O buffer in process space */
+ uint32_t aio_nbytes; /* Number of bytes for I/O */
int __spare__[2];
uint32_t __spare2__;
int aio_lio_opcode; /* LIO opcode */
@@ -2652,49 +2765,67 @@ convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig)
}
static int
-aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob)
+aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob,
+ int type __unused)
{
struct oaiocb32 job32;
+ struct aiocb *kcb = &kjob->uaiocb;
int error;
- bzero(kjob, sizeof(struct aiocb));
+ bzero(kcb, sizeof(struct aiocb));
error = copyin(ujob, &job32, sizeof(job32));
if (error)
return (error);
- CP(job32, *kjob, aio_fildes);
- CP(job32, *kjob, aio_offset);
- PTRIN_CP(job32, *kjob, aio_buf);
- CP(job32, *kjob, aio_nbytes);
- CP(job32, *kjob, aio_lio_opcode);
- CP(job32, *kjob, aio_reqprio);
- CP(job32, *kjob, _aiocb_private.status);
- CP(job32, *kjob, _aiocb_private.error);
- PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo);
+ /* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */
+
+ CP(job32, *kcb, aio_fildes);
+ CP(job32, *kcb, aio_offset);
+ PTRIN_CP(job32, *kcb, aio_buf);
+ CP(job32, *kcb, aio_nbytes);
+ CP(job32, *kcb, aio_lio_opcode);
+ CP(job32, *kcb, aio_reqprio);
+ CP(job32, *kcb, _aiocb_private.status);
+ CP(job32, *kcb, _aiocb_private.error);
+ PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo);
return (convert_old_sigevent32(&job32.aio_sigevent,
- &kjob->aio_sigevent));
+ &kcb->aio_sigevent));
}
#endif
static int
-aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob)
+aiocb32_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
{
struct aiocb32 job32;
+ struct aiocb *kcb = &kjob->uaiocb;
+ struct iovec32 *iov32;
int error;
error = copyin(ujob, &job32, sizeof(job32));
if (error)
return (error);
- CP(job32, *kjob, aio_fildes);
- CP(job32, *kjob, aio_offset);
- PTRIN_CP(job32, *kjob, aio_buf);
- CP(job32, *kjob, aio_nbytes);
- CP(job32, *kjob, aio_lio_opcode);
- CP(job32, *kjob, aio_reqprio);
- CP(job32, *kjob, _aiocb_private.status);
- CP(job32, *kjob, _aiocb_private.error);
- PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo);
- return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent));
+ CP(job32, *kcb, aio_fildes);
+ CP(job32, *kcb, aio_offset);
+ CP(job32, *kcb, aio_lio_opcode);
+ if (type == LIO_READV || type == LIO_WRITEV) {
+ iov32 = PTRIN(job32.aio_iov);
+ CP(job32, *kcb, aio_iovcnt);
+ /* malloc a uio and copy in the iovec */
+ error = freebsd32_copyinuio(iov32,
+ kcb->aio_iovcnt, &kjob->uiop);
+ if (error)
+ return (error);
+ } else {
+ PTRIN_CP(job32, *kcb, aio_buf);
+ CP(job32, *kcb, aio_nbytes);
+ }
+ CP(job32, *kcb, aio_reqprio);
+ CP(job32, *kcb, _aiocb_private.status);
+ CP(job32, *kcb, _aiocb_private.error);
+ PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo);
+ error = convert_sigevent32(&job32.aio_sigevent, &kcb->aio_sigevent);
+
+ return (error);
}
static long
@@ -2840,6 +2971,14 @@ freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap)
&aiocb32_ops));
}
+int
+freebsd32_aio_readv(struct thread *td, struct freebsd32_aio_readv_args *uap)
+{
+
+ return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READV,
+ &aiocb32_ops));
+}
+
#ifdef COMPAT_FREEBSD6
int
freebsd6_freebsd32_aio_write(struct thread *td,
@@ -2860,6 +2999,14 @@ freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap)
}
int
+freebsd32_aio_writev(struct thread *td, struct freebsd32_aio_writev_args *uap)
+{
+
+ return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITEV,
+ &aiocb32_ops));
+}
+
+int
freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap)
{