aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/bsm/audit_kevents.h2
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_util.h2
-rw-r--r--sys/compat/freebsd32/syscalls.master6
-rw-r--r--sys/kern/capabilities.conf2
-rw-r--r--sys/kern/sys_socket.c35
-rw-r--r--sys/kern/syscalls.master12
-rw-r--r--sys/kern/vfs_aio.c497
-rw-r--r--sys/sys/aio.h22
9 files changed, 380 insertions, 200 deletions
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
index 5b37329078a1..eeb928ecafdc 100644
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -660,6 +660,8 @@
#define AUE_REALPATHAT 43264 /* FreeBSD-specific. */
#define AUE_CLOSERANGE 43265 /* FreeBSD-specific. */
#define AUE_SPECIALFD 43266 /* FreeBSD-specific. */
+#define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */
+#define AUE_AIO_READV 43268 /* FreeBSD-specific. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 62fab95c68d1..14afd433d9f1 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -1070,7 +1070,7 @@ freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
return (error);
}
-static int
+int
freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
{
struct iovec32 iov32;
diff --git a/sys/compat/freebsd32/freebsd32_util.h b/sys/compat/freebsd32/freebsd32_util.h
index a66038d4d36a..b126fbde0857 100644
--- a/sys/compat/freebsd32/freebsd32_util.h
+++ b/sys/compat/freebsd32/freebsd32_util.h
@@ -116,6 +116,8 @@ int freebsd32_copyout_strings(struct image_params *imgp,
uintptr_t *stack_base);
int freebsd32_copyiniov(struct iovec32 *iovp, u_int iovcnt,
struct iovec **iov, int error);
+int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt,
+ struct uio **uiop);
void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32);
struct image_args;
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index f4339795781a..ca0db9a76b1e 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -493,8 +493,10 @@
257 AUE_LIO_LISTIO STD { int freebsd32_lio_listio(int mode, \
struct aiocb32 * const *acb_list, \
int nent, struct sigevent32 *sig); }
-258 AUE_NULL UNIMPL nosys
-259 AUE_NULL UNIMPL nosys
+258 AUE_AIO_WRITEV STD { int freebsd32_aio_writev( \
+ struct aiocb32 *aiocbp); }
+259 AUE_AIO_READV STD { int freebsd32_aio_readv( \
+ struct aiocb32 *aiocbp); }
260 AUE_NULL UNIMPL nosys
261 AUE_NULL UNIMPL nosys
262 AUE_NULL UNIMPL nosys
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
index 3d552255d823..602ec7088fc6 100644
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -100,6 +100,8 @@ aio_return
aio_suspend
aio_waitcomplete
aio_write
+aio_writev
+aio_readv
##
## audit(2) is a global operation, submitting to the global trail, but it is
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 0fe200c119d2..18803b6a5ac0 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -600,9 +600,7 @@ soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job)
struct ucred *td_savedcred;
struct thread *td;
struct file *fp;
- struct uio uio;
- struct iovec iov;
- size_t cnt, done;
+ size_t cnt, done, job_total_nbytes;
long ru_before;
int error, flags;
@@ -614,16 +612,11 @@ retry:
td_savedcred = td->td_ucred;
td->td_ucred = job->cred;
+ job_total_nbytes = job->uiop->uio_resid + job->aio_done;
done = job->aio_done;
- cnt = job->uaiocb.aio_nbytes - done;
- iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done);
- iov.iov_len = cnt;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = 0;
- uio.uio_resid = cnt;
- uio.uio_segflg = UIO_USERSPACE;
- uio.uio_td = td;
+ cnt = job->uiop->uio_resid;
+ job->uiop->uio_offset = 0;
+ job->uiop->uio_td = td;
flags = MSG_NBIO;
/*
@@ -633,26 +626,26 @@ retry:
*/
if (sb == &so->so_rcv) {
- uio.uio_rw = UIO_READ;
ru_before = td->td_ru.ru_msgrcv;
#ifdef MAC
error = mac_socket_check_receive(fp->f_cred, so);
if (error == 0)
#endif
- error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
+ error = soreceive(so, NULL, job->uiop, NULL, NULL,
+ &flags);
if (td->td_ru.ru_msgrcv != ru_before)
job->msgrcv = 1;
} else {
if (!TAILQ_EMPTY(&sb->sb_aiojobq))
flags |= MSG_MORETOCOME;
- uio.uio_rw = UIO_WRITE;
ru_before = td->td_ru.ru_msgsnd;
#ifdef MAC
error = mac_socket_check_send(fp->f_cred, so);
if (error == 0)
#endif
- error = sosend(so, NULL, &uio, NULL, NULL, flags, td);
+ error = sosend(so, NULL, job->uiop, NULL, NULL, flags,
+ td);
if (td->td_ru.ru_msgsnd != ru_before)
job->msgsnd = 1;
if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
@@ -662,7 +655,7 @@ retry:
}
}
- done += cnt - uio.uio_resid;
+ done += cnt - job->uiop->uio_resid;
job->aio_done = done;
td->td_ucred = td_savedcred;
@@ -676,7 +669,7 @@ retry:
* been made, requeue this request at the head of the
* queue to try again when the socket is ready.
*/
- MPASS(done != job->uaiocb.aio_nbytes);
+ MPASS(done != job_total_nbytes);
SOCKBUF_LOCK(sb);
if (done == 0 || !(so->so_state & SS_NBIO)) {
empty_results++;
@@ -782,10 +775,10 @@ soo_aio_cancel(struct kaiocb *job)
so = job->fd_file->f_data;
opcode = job->uaiocb.aio_lio_opcode;
- if (opcode == LIO_READ)
+ if (opcode == LIO_READ || opcode == LIO_READV)
sb = &so->so_rcv;
else {
- MPASS(opcode == LIO_WRITE);
+ MPASS(opcode == LIO_WRITE || opcode == LIO_WRITEV);
sb = &so->so_snd;
}
@@ -817,9 +810,11 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
switch (job->uaiocb.aio_lio_opcode) {
case LIO_READ:
+ case LIO_READV:
sb = &so->so_rcv;
break;
case LIO_WRITE:
+ case LIO_WRITEV:
sb = &so->so_snd;
break;
default:
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index b7ea5e939635..aaa0a1277461 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -1477,7 +1477,17 @@
_In_opt_ struct sigevent *sig
);
}
-258-271 AUE_NULL UNIMPL nosys
+258 AUE_AIO_WRITEV STD {
+ int aio_writev(
+ _Inout_ struct aiocb *aiocbp
+ );
+ }
+259 AUE_AIO_READV STD {
+ int aio_readv(
+ _Inout_ struct aiocb *aiocbp
+ );
+ }
+260-271 AUE_NULL UNIMPL nosys
272 AUE_O_GETDENTS COMPAT11 {
int getdents(
int fd,
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 37e19557d807..d83c9d725e68 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -292,7 +292,7 @@ struct kaioinfo {
* Different ABIs provide their own operations.
*/
struct aiocb_ops {
- int (*aio_copyin)(struct aiocb *ujob, struct aiocb *kjob);
+ int (*aio_copyin)(struct aiocb *ujob, struct kaiocb *kjob, int ty);
long (*fetch_status)(struct aiocb *ujob);
long (*fetch_error)(struct aiocb *ujob);
int (*store_status)(struct aiocb *ujob, long status);
@@ -307,6 +307,7 @@ static struct mtx aio_job_mtx;
static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */
static struct unrhdr *aiod_unr;
+static void aio_biocleanup(struct bio *bp);
void aio_init_aioinfo(struct proc *p);
static int aio_onceonly(void);
static int aio_free_entry(struct kaiocb *job);
@@ -559,6 +560,8 @@ aio_free_entry(struct kaiocb *job)
if (job->fd_file)
fdrop(job->fd_file, curthread);
crfree(job->cred);
+ if (job->uiop != &job->uio)
+ free(job->uiop, M_IOV);
uma_zfree(aiocb_zone, job);
AIO_LOCK(ki);
@@ -754,36 +757,29 @@ aio_process_rw(struct kaiocb *job)
struct thread *td;
struct aiocb *cb;
struct file *fp;
- struct uio auio;
- struct iovec aiov;
ssize_t cnt;
long msgsnd_st, msgsnd_end;
long msgrcv_st, msgrcv_end;
long oublock_st, oublock_end;
long inblock_st, inblock_end;
- int error;
+ int error, opcode;
KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ ||
- job->uaiocb.aio_lio_opcode == LIO_WRITE,
+ job->uaiocb.aio_lio_opcode == LIO_READV ||
+ job->uaiocb.aio_lio_opcode == LIO_WRITE ||
+ job->uaiocb.aio_lio_opcode == LIO_WRITEV,
("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
aio_switch_vmspace(job);
td = curthread;
td_savedcred = td->td_ucred;
td->td_ucred = job->cred;
+ job->uiop->uio_td = td;
cb = &job->uaiocb;
fp = job->fd_file;
- aiov.iov_base = (void *)(uintptr_t)cb->aio_buf;
- aiov.iov_len = cb->aio_nbytes;
-
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = cb->aio_offset;
- auio.uio_resid = cb->aio_nbytes;
- cnt = cb->aio_nbytes;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_td = td;
+ opcode = job->uaiocb.aio_lio_opcode;
+ cnt = job->uiop->uio_resid;
msgrcv_st = td->td_ru.ru_msgrcv;
msgsnd_st = td->td_ru.ru_msgsnd;
@@ -794,17 +790,16 @@ aio_process_rw(struct kaiocb *job)
* aio_aqueue() acquires a reference to the file that is
* released in aio_free_entry().
*/
- if (cb->aio_lio_opcode == LIO_READ) {
- auio.uio_rw = UIO_READ;
- if (auio.uio_resid == 0)
+ if (opcode == LIO_READ || opcode == LIO_READV) {
+ if (job->uiop->uio_resid == 0)
error = 0;
else
- error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td);
+ error = fo_read(fp, job->uiop, fp->f_cred, FOF_OFFSET,
+ td);
} else {
if (fp->f_type == DTYPE_VNODE)
bwillwrite();
- auio.uio_rw = UIO_WRITE;
- error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td);
+ error = fo_write(fp, job->uiop, fp->f_cred, FOF_OFFSET, td);
}
msgrcv_end = td->td_ru.ru_msgrcv;
msgsnd_end = td->td_ru.ru_msgsnd;
@@ -816,17 +811,18 @@ aio_process_rw(struct kaiocb *job)
job->inblock = inblock_end - inblock_st;
job->outblock = oublock_end - oublock_st;
- if ((error) && (auio.uio_resid != cnt)) {
+ if (error != 0 && job->uiop->uio_resid != cnt) {
if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
error = 0;
- if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) {
+ if (error == EPIPE &&
+ (opcode == LIO_WRITE || opcode == LIO_WRITEV)) {
PROC_LOCK(job->userproc);
kern_psignal(job->userproc, SIGPIPE);
PROC_UNLOCK(job->userproc);
}
}
- cnt -= auio.uio_resid;
+ cnt -= job->uiop->uio_resid;
td->td_ucred = td_savedcred;
if (error)
aio_complete(job, -1, error);
@@ -1210,21 +1206,23 @@ aio_qbio(struct proc *p, struct kaiocb *job)
{
struct aiocb *cb;
struct file *fp;
- struct bio *bp;
struct buf *pbuf;
struct vnode *vp;
struct cdevsw *csw;
struct cdev *dev;
struct kaioinfo *ki;
- struct vm_page **pages;
- int error, npages, poff, ref;
+ struct bio **bios = NULL;
+ off_t offset;
+ int bio_cmd, error, i, iovcnt, opcode, poff, ref;
vm_prot_t prot;
+ bool use_unmapped;
cb = &job->uaiocb;
fp = job->fd_file;
+ opcode = cb->aio_lio_opcode;
- if (!(cb->aio_lio_opcode == LIO_WRITE ||
- cb->aio_lio_opcode == LIO_READ))
+ if (!(opcode == LIO_WRITE || opcode == LIO_WRITEV ||
+ opcode == LIO_READ || opcode == LIO_READV))
return (-1);
if (fp == NULL || fp->f_type != DTYPE_VNODE)
return (-1);
@@ -1234,8 +1232,21 @@ aio_qbio(struct proc *p, struct kaiocb *job)
return (-1);
if (vp->v_bufobj.bo_bsize == 0)
return (-1);
- if (cb->aio_nbytes % vp->v_bufobj.bo_bsize)
+
+ bio_cmd = opcode == LIO_WRITE || opcode == LIO_WRITEV ? BIO_WRITE :
+ BIO_READ;
+ iovcnt = job->uiop->uio_iovcnt;
+ if (iovcnt > max_buf_aio)
return (-1);
+ for (i = 0; i < iovcnt; i++) {
+ if (job->uiop->uio_iov[i].iov_len % vp->v_bufobj.bo_bsize != 0)
+ return (-1);
+ if (job->uiop->uio_iov[i].iov_len > maxphys) {
+ error = -1;
+ return (-1);
+ }
+ }
+ offset = cb->aio_offset;
ref = 0;
csw = devvn_refthread(vp, &dev, &ref);
@@ -1246,89 +1257,106 @@ aio_qbio(struct proc *p, struct kaiocb *job)
error = -1;
goto unref;
}
- if (cb->aio_nbytes > dev->si_iosize_max) {
+ if (job->uiop->uio_resid > dev->si_iosize_max) {
error = -1;
goto unref;
}
ki = p->p_aioinfo;
- poff = (vm_offset_t)cb->aio_buf & PAGE_MASK;
- if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
- if (cb->aio_nbytes > maxphys) {
- error = -1;
- goto unref;
- }
+ job->error = 0;
- pbuf = NULL;
- pages = malloc(sizeof(vm_page_t) * (atop(round_page(
- cb->aio_nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO);
- } else {
- if (cb->aio_nbytes > maxphys) {
- error = -1;
- goto unref;
- }
- if (ki->kaio_buffer_count >= max_buf_aio) {
+ use_unmapped = (dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed;
+ if (!use_unmapped) {
+ AIO_LOCK(ki);
+ if (ki->kaio_buffer_count + iovcnt > max_buf_aio) {
+ AIO_UNLOCK(ki);
error = EAGAIN;
goto unref;
}
-
- pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
- BUF_KERNPROC(pbuf);
- AIO_LOCK(ki);
- ki->kaio_buffer_count++;
+ ki->kaio_buffer_count += iovcnt;
AIO_UNLOCK(ki);
- pages = pbuf->b_pages;
- }
- bp = g_alloc_bio();
-
- bp->bio_length = cb->aio_nbytes;
- bp->bio_bcount = cb->aio_nbytes;
- bp->bio_done = aio_biowakeup;
- bp->bio_offset = cb->aio_offset;
- bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ;
- bp->bio_dev = dev;
- bp->bio_caller1 = job;
- bp->bio_caller2 = pbuf;
-
- prot = VM_PROT_READ;
- if (cb->aio_lio_opcode == LIO_READ)
- prot |= VM_PROT_WRITE; /* Less backwards than it looks */
- npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
- (vm_offset_t)cb->aio_buf, bp->bio_length, prot, pages,
- atop(maxphys) + 1);
- if (npages < 0) {
- error = EFAULT;
- goto doerror;
}
- if (pbuf != NULL) {
- pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages);
- bp->bio_data = pbuf->b_data + poff;
- atomic_add_int(&num_buf_aio, 1);
- pbuf->b_npages = npages;
- } else {
- bp->bio_ma = pages;
- bp->bio_ma_n = npages;
- bp->bio_ma_offset = poff;
- bp->bio_data = unmapped_buf;
- bp->bio_flags |= BIO_UNMAPPED;
- atomic_add_int(&num_unmapped_aio, 1);
+
+ bios = malloc(sizeof(struct bio *) * iovcnt, M_TEMP, M_WAITOK);
+ atomic_store_int(&job->nbio, iovcnt);
+ for (i = 0; i < iovcnt; i++) {
+ struct vm_page** pages;
+ struct bio *bp;
+ void *buf;
+ size_t nbytes;
+ int npages;
+
+ buf = job->uiop->uio_iov[i].iov_base;
+ nbytes = job->uiop->uio_iov[i].iov_len;
+
+ bios[i] = g_alloc_bio();
+ bp = bios[i];
+
+ poff = (vm_offset_t)buf & PAGE_MASK;
+ if (use_unmapped) {
+ pbuf = NULL;
+ pages = malloc(sizeof(vm_page_t) * (atop(round_page(
+ nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO);
+ } else {
+ pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
+ BUF_KERNPROC(pbuf);
+ pages = pbuf->b_pages;
+ }
+
+ bp->bio_length = nbytes;
+ bp->bio_bcount = nbytes;
+ bp->bio_done = aio_biowakeup;
+ bp->bio_offset = offset;
+ bp->bio_cmd = bio_cmd;
+ bp->bio_dev = dev;
+ bp->bio_caller1 = job;
+ bp->bio_caller2 = pbuf;
+
+ prot = VM_PROT_READ;
+ if (opcode == LIO_READ || opcode == LIO_READV)
+ prot |= VM_PROT_WRITE; /* Less backwards than it looks */
+ npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ (vm_offset_t)buf, bp->bio_length, prot, pages,
+ atop(maxphys) + 1);
+ if (npages < 0) {
+ if (pbuf != NULL)
+ uma_zfree(pbuf_zone, pbuf);
+ else
+ free(pages, M_TEMP);
+ error = EFAULT;
+ g_destroy_bio(bp);
+ i--;
+ goto destroy_bios;
+ }
+ if (pbuf != NULL) {
+ pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages);
+ bp->bio_data = pbuf->b_data + poff;
+ pbuf->b_npages = npages;
+ atomic_add_int(&num_buf_aio, 1);
+ } else {
+ bp->bio_ma = pages;
+ bp->bio_ma_n = npages;
+ bp->bio_ma_offset = poff;
+ bp->bio_data = unmapped_buf;
+ bp->bio_flags |= BIO_UNMAPPED;
+ atomic_add_int(&num_unmapped_aio, 1);
+ }
+
+ offset += nbytes;
}
/* Perform transfer. */
- csw->d_strategy(bp);
+ for (i = 0; i < iovcnt; i++)
+ csw->d_strategy(bios[i]);
+ free(bios, M_TEMP);
+
dev_relthread(dev, ref);
return (0);
-doerror:
- if (pbuf != NULL) {
- AIO_LOCK(ki);
- ki->kaio_buffer_count--;
- AIO_UNLOCK(ki);
- uma_zfree(pbuf_zone, pbuf);
- } else {
- free(pages, M_TEMP);
- }
- g_destroy_bio(bp);
+destroy_bios:
+ for (; i >= 0; i--)
+ aio_biocleanup(bios[i]);
+ free(bios, M_TEMP);
unref:
dev_relthread(dev, ref);
return (error);
@@ -1362,25 +1390,39 @@ convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig)
}
static int
-aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob)
+aiocb_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob,
+ int type __unused)
{
struct oaiocb *ojob;
+ struct aiocb *kcb = &kjob->uaiocb;
int error;
- bzero(kjob, sizeof(struct aiocb));
- error = copyin(ujob, kjob, sizeof(struct oaiocb));
+ bzero(kcb, sizeof(struct aiocb));
+ error = copyin(ujob, kcb, sizeof(struct oaiocb));
if (error)
return (error);
- ojob = (struct oaiocb *)kjob;
- return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent));
+ /* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */
+ ojob = (struct oaiocb *)kcb;
+ return (convert_old_sigevent(&ojob->aio_sigevent, &kcb->aio_sigevent));
}
#endif
static int
-aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob)
+aiocb_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
{
+ struct aiocb *kcb = &kjob->uaiocb;
+ int error;
+
+ error = copyin(ujob, kcb, sizeof(struct aiocb));
+ if (error)
+ return (error);
+ if (type == LIO_READV || type == LIO_WRITEV) {
+ /* malloc a uio and copy in the iovec */
+ error = copyinuio(__DEVOLATILE(struct iovec*, kcb->aio_iov),
+ kcb->aio_iovcnt, &kjob->uiop);
+ }
- return (copyin(ujob, kjob, sizeof(struct aiocb)));
+ return (error);
}
static long
@@ -1456,7 +1498,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
int type, struct aiocb_ops *ops)
{
struct proc *p = td->td_proc;
- struct file *fp;
+ struct file *fp = NULL;
struct kaiocb *job;
struct kaioinfo *ki;
struct kevent kev;
@@ -1477,39 +1519,35 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
if (num_queue_count >= max_queue_count ||
ki->kaio_count >= max_aio_queue_per_proc) {
- ops->store_error(ujob, EAGAIN);
- return (EAGAIN);
+ error = EAGAIN;
+ goto err1;
}
job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO);
knlist_init_mtx(&job->klist, AIO_MTX(ki));
- error = ops->aio_copyin(ujob, &job->uaiocb);
- if (error) {
- ops->store_error(ujob, error);
- uma_zfree(aiocb_zone, job);
- return (error);
- }
+ error = ops->aio_copyin(ujob, job, type);
+ if (error)
+ goto err2;
if (job->uaiocb.aio_nbytes > IOSIZE_MAX) {
- uma_zfree(aiocb_zone, job);
- return (EINVAL);
+ error = EINVAL;
+ goto err2;
}
if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT &&
job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL &&
job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID &&
job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) {
- ops->store_error(ujob, EINVAL);
- uma_zfree(aiocb_zone, job);
- return (EINVAL);
+ error = EINVAL;
+ goto err2;
}
if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ||
job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) &&
!_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) {
- uma_zfree(aiocb_zone, job);
- return (EINVAL);
+ error = EINVAL;
+ goto err2;
}
ksiginfo_init(&job->ksi);
@@ -1533,16 +1571,17 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
fd = job->uaiocb.aio_fildes;
switch (opcode) {
case LIO_WRITE:
+ case LIO_WRITEV:
error = fget_write(td, fd, &cap_pwrite_rights, &fp);
break;
case LIO_READ:
+ case LIO_READV:
error = fget_read(td, fd, &cap_pread_rights, &fp);
break;
case LIO_SYNC:
error = fget(td, fd, &cap_fsync_rights, &fp);
break;
case LIO_MLOCK:
- fp = NULL;
break;
case LIO_NOP:
error = fget(td, fd, &cap_no_rights, &fp);
@@ -1550,22 +1589,20 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
default:
error = EINVAL;
}
- if (error) {
- uma_zfree(aiocb_zone, job);
- ops->store_error(ujob, error);
- return (error);
- }
+ if (error)
+ goto err3;
if (opcode == LIO_SYNC && fp->f_vnode == NULL) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
- if ((opcode == LIO_READ || opcode == LIO_WRITE) &&
+ if ((opcode == LIO_READ || opcode == LIO_READV ||
+ opcode == LIO_WRITE || opcode == LIO_WRITEV) &&
job->uaiocb.aio_offset < 0 &&
(fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
job->fd_file = fp;
@@ -1577,12 +1614,13 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
error = ops->store_kernelinfo(ujob, jid);
if (error) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid;
if (opcode == LIO_NOP) {
fdrop(fp, td);
+ MPASS(job->uiop == &job->uio || job->uiop == NULL);
uma_zfree(aiocb_zone, job);
return (0);
}
@@ -1592,7 +1630,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags;
if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) {
error = EINVAL;
- goto aqueue_fail;
+ goto err3;
}
kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue;
memset(&kev, 0, sizeof(kev));
@@ -1603,7 +1641,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr;
error = kqfd_register(kqfd, &kev, td, M_WAITOK);
if (error)
- goto aqueue_fail;
+ goto err3;
no_kqueue:
@@ -1614,6 +1652,39 @@ no_kqueue:
job->jobflags = KAIOCB_QUEUEING;
job->lio = lj;
+ switch (opcode) {
+ case LIO_READV:
+ case LIO_WRITEV:
+ /* Use the uio copied in by aio_copyin */
+ MPASS(job->uiop != &job->uio && job->uiop != NULL);
+ break;
+ case LIO_READ:
+ case LIO_WRITE:
+ /* Setup the inline uio */
+ job->iov[0].iov_base = (void *)(uintptr_t)job->uaiocb.aio_buf;
+ job->iov[0].iov_len = job->uaiocb.aio_nbytes;
+ job->uio.uio_iov = job->iov;
+ job->uio.uio_iovcnt = 1;
+ job->uio.uio_resid = job->uaiocb.aio_nbytes;
+ job->uio.uio_segflg = UIO_USERSPACE;
+ /* FALLTHROUGH */
+ default:
+ job->uiop = &job->uio;
+ break;
+ }
+ switch (opcode) {
+ case LIO_READ:
+ case LIO_READV:
+ job->uiop->uio_rw = UIO_READ;
+ break;
+ case LIO_WRITE:
+ case LIO_WRITEV:
+ job->uiop->uio_rw = UIO_WRITE;
+ break;
+ }
+ job->uiop->uio_offset = job->uaiocb.aio_offset;
+ job->uiop->uio_td = td;
+
if (opcode == LIO_MLOCK) {
aio_schedule(job, aio_process_mlock);
error = 0;
@@ -1622,7 +1693,7 @@ no_kqueue:
else
error = fo_aio_queue(fp, job);
if (error)
- goto aqueue_fail;
+ goto err3;
AIO_LOCK(ki);
job->jobflags &= ~KAIOCB_QUEUEING;
@@ -1643,11 +1714,15 @@ no_kqueue:
AIO_UNLOCK(ki);
return (0);
-aqueue_fail:
- knlist_delete(&job->klist, curthread, 0);
+err3:
if (fp)
fdrop(fp, td);
+ knlist_delete(&job->klist, curthread, 0);
+err2:
+ if (job->uiop != &job->uio)
+ free(job->uiop, M_IOV);
uma_zfree(aiocb_zone, job);
+err1:
ops->store_error(ujob, error);
return (error);
}
@@ -1723,7 +1798,9 @@ aio_queue_file(struct file *fp, struct kaiocb *job)
switch (job->uaiocb.aio_lio_opcode) {
case LIO_READ:
+ case LIO_READV:
case LIO_WRITE:
+ case LIO_WRITEV:
aio_schedule(job, aio_process_rw);
error = 0;
break;
@@ -2097,6 +2174,13 @@ sys_aio_read(struct thread *td, struct aio_read_args *uap)
return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops));
}
+int
+sys_aio_readv(struct thread *td, struct aio_readv_args *uap)
+{
+
+ return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READV, &aiocb_ops));
+}
+
/* syscall - asynchronous write to a file (REALTIME) */
#ifdef COMPAT_FREEBSD6
int
@@ -2116,6 +2200,13 @@ sys_aio_write(struct thread *td, struct aio_write_args *uap)
}
int
+sys_aio_writev(struct thread *td, struct aio_writev_args *uap)
+{
+
+ return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITEV, &aiocb_ops));
+}
+
+int
sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap)
{
@@ -2337,13 +2428,11 @@ sys_lio_listio(struct thread *td, struct lio_listio_args *uap)
}
static void
-aio_biowakeup(struct bio *bp)
+aio_biocleanup(struct bio *bp)
{
struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
struct kaioinfo *ki;
- struct buf *pbuf = (struct buf*)bp->bio_caller2;
- size_t nbytes;
- int error, nblks;
+ struct buf *pbuf = (struct buf *)bp->bio_caller2;
/* Release mapping into kernel space. */
if (pbuf != NULL) {
@@ -2362,23 +2451,47 @@ aio_biowakeup(struct bio *bp)
free(bp->bio_ma, M_TEMP);
atomic_subtract_int(&num_unmapped_aio, 1);
}
+ g_destroy_bio(bp);
+}
- nbytes = job->uaiocb.aio_nbytes - bp->bio_resid;
- error = 0;
- if (bp->bio_flags & BIO_ERROR)
- error = bp->bio_error;
+static void
+aio_biowakeup(struct bio *bp)
+{
+ struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
+ size_t nbytes;
+ long bcount = bp->bio_bcount;
+ long resid = bp->bio_resid;
+ int error, opcode, nblks;
+ int bio_error = bp->bio_error;
+ uint16_t flags = bp->bio_flags;
+
+ opcode = job->uaiocb.aio_lio_opcode;
+
+ aio_biocleanup(bp);
+
+ nbytes =bcount - resid;
+ atomic_add_acq_long(&job->nbytes, nbytes);
nblks = btodb(nbytes);
- if (job->uaiocb.aio_lio_opcode == LIO_WRITE)
- job->outblock += nblks;
+ error = 0;
+ /*
+ * If multiple bios experienced an error, the job will reflect the
+ * error of whichever failed bio completed last.
+ */
+ if (flags & BIO_ERROR)
+ atomic_set_int(&job->error, bio_error);
+ if (opcode == LIO_WRITE || opcode == LIO_WRITEV)
+ atomic_add_int(&job->outblock, nblks);
else
- job->inblock += nblks;
+ atomic_add_int(&job->inblock, nblks);
+ atomic_subtract_int(&job->nbio, 1);
- if (error)
- aio_complete(job, -1, error);
- else
- aio_complete(job, nbytes, 0);
- g_destroy_bio(bp);
+ if (atomic_load_int(&job->nbio) == 0) {
+ if (atomic_load_int(&job->error))
+ aio_complete(job, -1, job->error);
+ else
+ aio_complete(job, atomic_load_long(&job->nbytes), 0);
+ }
}
/* syscall - wait for the next completion of an aio request */
@@ -2614,8 +2727,8 @@ typedef struct oaiocb32 {
typedef struct aiocb32 {
int32_t aio_fildes; /* File descriptor */
uint64_t aio_offset __packed; /* File offset for I/O */
- uint32_t aio_buf; /* I/O buffer in process space */
- uint32_t aio_nbytes; /* Number of bytes for I/O */
+ uint32_t aio_buf; /* I/O buffer in process space */
+ uint32_t aio_nbytes; /* Number of bytes for I/O */
int __spare__[2];
uint32_t __spare2__;
int aio_lio_opcode; /* LIO opcode */
@@ -2652,49 +2765,67 @@ convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig)
}
static int
-aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob)
+aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob,
+ int type __unused)
{
struct oaiocb32 job32;
+ struct aiocb *kcb = &kjob->uaiocb;
int error;
- bzero(kjob, sizeof(struct aiocb));
+ bzero(kcb, sizeof(struct aiocb));
error = copyin(ujob, &job32, sizeof(job32));
if (error)
return (error);
- CP(job32, *kjob, aio_fildes);
- CP(job32, *kjob, aio_offset);
- PTRIN_CP(job32, *kjob, aio_buf);
- CP(job32, *kjob, aio_nbytes);
- CP(job32, *kjob, aio_lio_opcode);
- CP(job32, *kjob, aio_reqprio);
- CP(job32, *kjob, _aiocb_private.status);
- CP(job32, *kjob, _aiocb_private.error);
- PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo);
+ /* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */
+
+ CP(job32, *kcb, aio_fildes);
+ CP(job32, *kcb, aio_offset);
+ PTRIN_CP(job32, *kcb, aio_buf);
+ CP(job32, *kcb, aio_nbytes);
+ CP(job32, *kcb, aio_lio_opcode);
+ CP(job32, *kcb, aio_reqprio);
+ CP(job32, *kcb, _aiocb_private.status);
+ CP(job32, *kcb, _aiocb_private.error);
+ PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo);
return (convert_old_sigevent32(&job32.aio_sigevent,
- &kjob->aio_sigevent));
+ &kcb->aio_sigevent));
}
#endif
static int
-aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob)
+aiocb32_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
{
struct aiocb32 job32;
+ struct aiocb *kcb = &kjob->uaiocb;
+ struct iovec32 *iov32;
int error;
error = copyin(ujob, &job32, sizeof(job32));
if (error)
return (error);
- CP(job32, *kjob, aio_fildes);
- CP(job32, *kjob, aio_offset);
- PTRIN_CP(job32, *kjob, aio_buf);
- CP(job32, *kjob, aio_nbytes);
- CP(job32, *kjob, aio_lio_opcode);
- CP(job32, *kjob, aio_reqprio);
- CP(job32, *kjob, _aiocb_private.status);
- CP(job32, *kjob, _aiocb_private.error);
- PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo);
- return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent));
+ CP(job32, *kcb, aio_fildes);
+ CP(job32, *kcb, aio_offset);
+ CP(job32, *kcb, aio_lio_opcode);
+ if (type == LIO_READV || type == LIO_WRITEV) {
+ iov32 = PTRIN(job32.aio_iov);
+ CP(job32, *kcb, aio_iovcnt);
+ /* malloc a uio and copy in the iovec */
+ error = freebsd32_copyinuio(iov32,
+ kcb->aio_iovcnt, &kjob->uiop);
+ if (error)
+ return (error);
+ } else {
+ PTRIN_CP(job32, *kcb, aio_buf);
+ CP(job32, *kcb, aio_nbytes);
+ }
+ CP(job32, *kcb, aio_reqprio);
+ CP(job32, *kcb, _aiocb_private.status);
+ CP(job32, *kcb, _aiocb_private.error);
+ PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo);
+ error = convert_sigevent32(&job32.aio_sigevent, &kcb->aio_sigevent);
+
+ return (error);
}
static long
@@ -2840,6 +2971,14 @@ freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap)
&aiocb32_ops));
}
+int
+freebsd32_aio_readv(struct thread *td, struct freebsd32_aio_readv_args *uap)
+{
+
+ return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READV,
+ &aiocb32_ops));
+}
+
#ifdef COMPAT_FREEBSD6
int
freebsd6_freebsd32_aio_write(struct thread *td,
@@ -2860,6 +2999,14 @@ freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap)
}
int
+freebsd32_aio_writev(struct thread *td, struct freebsd32_aio_writev_args *uap)
+{
+
+ return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITEV,
+ &aiocb32_ops));
+}
+
+int
freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap)
{
diff --git a/sys/sys/aio.h b/sys/sys/aio.h
index 71afdbf31b35..c0e2b4eaaaf6 100644
--- a/sys/sys/aio.h
+++ b/sys/sys/aio.h
@@ -27,6 +27,7 @@
#include <sys/queue.h>
#include <sys/event.h>
#include <sys/signalvar.h>
+#include <sys/uio.h>
#endif
/*
@@ -45,6 +46,8 @@
#ifdef _KERNEL
#define LIO_SYNC 0x3
#define LIO_MLOCK 0x4
+#define LIO_WRITEV 0x5
+#define LIO_READV 0x6
#endif
/*
@@ -92,7 +95,7 @@ struct __aiocb_private {
typedef struct aiocb {
int aio_fildes; /* File descriptor */
off_t aio_offset; /* File offset for I/O */
- volatile void *aio_buf; /* I/O buffer in process space */
+ volatile void *aio_buf; /* I/O buffer in process space */
size_t aio_nbytes; /* Number of bytes for I/O */
int __spare__[2];
void *__spare2__;
@@ -102,6 +105,9 @@ typedef struct aiocb {
struct sigevent aio_sigevent; /* Signal to deliver */
} aiocb_t;
+#define aio_iov aio_buf /* I/O scatter/gather list */
+#define aio_iovcnt aio_nbytes /* Length of aio_iov */
+
#ifdef _KERNEL
typedef void aio_cancel_fn_t(struct kaiocb *);
@@ -132,11 +138,19 @@ struct kaiocb {
struct aiocb *ujob; /* (*) pointer in userspace of aiocb */
struct knlist klist; /* (a) list of knotes */
struct aiocb uaiocb; /* (*) copy of user I/O control block */
+ struct uio uio; /* (*) storage for non-vectored uio */
+ struct iovec iov[1]; /* (*) storage for non-vectored uio */
+ struct uio *uiop; /* (*) Possibly malloced uio */
ksiginfo_t ksi; /* (a) realtime signal info */
uint64_t seqno; /* (*) job number */
aio_cancel_fn_t *cancel_fn; /* (a) backend cancel function */
aio_handle_fn_t *handle_fn; /* (c) backend handle function */
union { /* Backend-specific data fields */
+ struct { /* BIO backend */
+ int nbio; /* Number of remaining bios */
+ int error; /* Worst error of all bios */
+ long nbytes; /* Bytes completed so far */
+ };
struct { /* fsync() requests */
int pending; /* (a) number of pending I/O */
};
@@ -202,11 +216,17 @@ __BEGIN_DECLS
* Asynchronously read from a file
*/
int aio_read(struct aiocb *);
+#if __BSD_VISIBLE
+int aio_readv(struct aiocb *);
+#endif
/*
* Asynchronously write to file
*/
int aio_write(struct aiocb *);
+#if __BSD_VISIBLE
+int aio_writev(struct aiocb *);
+#endif
/*
* List I/O Asynchronously/synchronously read/write to/from file