diff options
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/capabilities.conf | 2 | ||||
-rw-r--r-- | sys/kern/sys_socket.c | 35 | ||||
-rw-r--r-- | sys/kern/syscalls.master | 12 | ||||
-rw-r--r-- | sys/kern/vfs_aio.c | 497 |
4 files changed, 350 insertions, 196 deletions
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf index 3d552255d823..602ec7088fc6 100644 --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -100,6 +100,8 @@ aio_return aio_suspend aio_waitcomplete aio_write +aio_writev +aio_readv ## ## audit(2) is a global operation, submitting to the global trail, but it is diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 0fe200c119d2..18803b6a5ac0 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -600,9 +600,7 @@ soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job) struct ucred *td_savedcred; struct thread *td; struct file *fp; - struct uio uio; - struct iovec iov; - size_t cnt, done; + size_t cnt, done, job_total_nbytes; long ru_before; int error, flags; @@ -614,16 +612,11 @@ retry: td_savedcred = td->td_ucred; td->td_ucred = job->cred; + job_total_nbytes = job->uiop->uio_resid + job->aio_done; done = job->aio_done; - cnt = job->uaiocb.aio_nbytes - done; - iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done); - iov.iov_len = cnt; - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_offset = 0; - uio.uio_resid = cnt; - uio.uio_segflg = UIO_USERSPACE; - uio.uio_td = td; + cnt = job->uiop->uio_resid; + job->uiop->uio_offset = 0; + job->uiop->uio_td = td; flags = MSG_NBIO; /* @@ -633,26 +626,26 @@ retry: */ if (sb == &so->so_rcv) { - uio.uio_rw = UIO_READ; ru_before = td->td_ru.ru_msgrcv; #ifdef MAC error = mac_socket_check_receive(fp->f_cred, so); if (error == 0) #endif - error = soreceive(so, NULL, &uio, NULL, NULL, &flags); + error = soreceive(so, NULL, job->uiop, NULL, NULL, + &flags); if (td->td_ru.ru_msgrcv != ru_before) job->msgrcv = 1; } else { if (!TAILQ_EMPTY(&sb->sb_aiojobq)) flags |= MSG_MORETOCOME; - uio.uio_rw = UIO_WRITE; ru_before = td->td_ru.ru_msgsnd; #ifdef MAC error = mac_socket_check_send(fp->f_cred, so); if (error == 0) #endif - error = sosend(so, NULL, &uio, NULL, NULL, flags, td); + error = sosend(so, NULL, job->uiop, NULL, NULL, flags, + td); if (td->td_ru.ru_msgsnd != ru_before) job->msgsnd = 1; if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) { @@ -662,7 +655,7 @@ retry: } } - done += cnt - uio.uio_resid; + done += cnt - job->uiop->uio_resid; job->aio_done = done; td->td_ucred = td_savedcred; @@ -676,7 +669,7 @@ retry: * been made, requeue this request at the head of the * queue to try again when the socket is ready. */ - MPASS(done != job->uaiocb.aio_nbytes); + MPASS(done != job_total_nbytes); SOCKBUF_LOCK(sb); if (done == 0 || !(so->so_state & SS_NBIO)) { empty_results++; @@ -782,10 +775,10 @@ soo_aio_cancel(struct kaiocb *job) so = job->fd_file->f_data; opcode = job->uaiocb.aio_lio_opcode; - if (opcode == LIO_READ) + if (opcode == LIO_READ || opcode == LIO_READV) sb = &so->so_rcv; else { - MPASS(opcode == LIO_WRITE); + MPASS(opcode == LIO_WRITE || opcode == LIO_WRITEV); sb = &so->so_snd; } @@ -817,9 +810,11 @@ soo_aio_queue(struct file *fp, struct kaiocb *job) switch (job->uaiocb.aio_lio_opcode) { case LIO_READ: + case LIO_READV: sb = &so->so_rcv; break; case LIO_WRITE: + case LIO_WRITEV: sb = &so->so_snd; break; default: diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index b7ea5e939635..aaa0a1277461 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1477,7 +1477,17 @@ _In_opt_ struct sigevent *sig ); } -258-271 AUE_NULL UNIMPL nosys +258 AUE_AIO_WRITEV STD { + int aio_writev( + _Inout_ struct aiocb *aiocbp + ); + } +259 AUE_AIO_READV STD { + int aio_readv( + _Inout_ struct aiocb *aiocbp + ); + } +260-271 AUE_NULL UNIMPL nosys 272 AUE_O_GETDENTS COMPAT11 { int getdents( int fd, diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 37e19557d807..d83c9d725e68 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -292,7 +292,7 @@ struct kaioinfo { * Different ABIs provide their own operations. */ struct aiocb_ops { - int (*aio_copyin)(struct aiocb *ujob, struct aiocb *kjob); + int (*aio_copyin)(struct aiocb *ujob, struct kaiocb *kjob, int ty); long (*fetch_status)(struct aiocb *ujob); long (*fetch_error)(struct aiocb *ujob); int (*store_status)(struct aiocb *ujob, long status); @@ -307,6 +307,7 @@ static struct mtx aio_job_mtx; static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */ static struct unrhdr *aiod_unr; +static void aio_biocleanup(struct bio *bp); void aio_init_aioinfo(struct proc *p); static int aio_onceonly(void); static int aio_free_entry(struct kaiocb *job); @@ -559,6 +560,8 @@ aio_free_entry(struct kaiocb *job) if (job->fd_file) fdrop(job->fd_file, curthread); crfree(job->cred); + if (job->uiop != &job->uio) + free(job->uiop, M_IOV); uma_zfree(aiocb_zone, job); AIO_LOCK(ki); @@ -754,36 +757,29 @@ aio_process_rw(struct kaiocb *job) struct thread *td; struct aiocb *cb; struct file *fp; - struct uio auio; - struct iovec aiov; ssize_t cnt; long msgsnd_st, msgsnd_end; long msgrcv_st, msgrcv_end; long oublock_st, oublock_end; long inblock_st, inblock_end; - int error; + int error, opcode; KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ || - job->uaiocb.aio_lio_opcode == LIO_WRITE, + job->uaiocb.aio_lio_opcode == LIO_READV || + job->uaiocb.aio_lio_opcode == LIO_WRITE || + job->uaiocb.aio_lio_opcode == LIO_WRITEV, ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); aio_switch_vmspace(job); td = curthread; td_savedcred = td->td_ucred; td->td_ucred = job->cred; + job->uiop->uio_td = td; cb = &job->uaiocb; fp = job->fd_file; - aiov.iov_base = (void *)(uintptr_t)cb->aio_buf; - aiov.iov_len = cb->aio_nbytes; - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = cb->aio_offset; - auio.uio_resid = cb->aio_nbytes; - cnt = cb->aio_nbytes; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_td = td; + opcode = job->uaiocb.aio_lio_opcode; + cnt = job->uiop->uio_resid; msgrcv_st = td->td_ru.ru_msgrcv; msgsnd_st = td->td_ru.ru_msgsnd; @@ -794,17 +790,16 @@ aio_process_rw(struct kaiocb *job) * aio_aqueue() acquires a reference to the file that is * released in aio_free_entry(). */ - if (cb->aio_lio_opcode == LIO_READ) { - auio.uio_rw = UIO_READ; - if (auio.uio_resid == 0) + if (opcode == LIO_READ || opcode == LIO_READV) { + if (job->uiop->uio_resid == 0) error = 0; else - error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); + error = fo_read(fp, job->uiop, fp->f_cred, FOF_OFFSET, + td); } else { if (fp->f_type == DTYPE_VNODE) bwillwrite(); - auio.uio_rw = UIO_WRITE; - error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); + error = fo_write(fp, job->uiop, fp->f_cred, FOF_OFFSET, td); } msgrcv_end = td->td_ru.ru_msgrcv; msgsnd_end = td->td_ru.ru_msgsnd; @@ -816,17 +811,18 @@ aio_process_rw(struct kaiocb *job) job->inblock = inblock_end - inblock_st; job->outblock = oublock_end - oublock_st; - if ((error) && (auio.uio_resid != cnt)) { + if (error != 0 && job->uiop->uio_resid != cnt) { if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) error = 0; - if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { + if (error == EPIPE && + (opcode == LIO_WRITE || opcode == LIO_WRITEV)) { PROC_LOCK(job->userproc); kern_psignal(job->userproc, SIGPIPE); PROC_UNLOCK(job->userproc); } } - cnt -= auio.uio_resid; + cnt -= job->uiop->uio_resid; td->td_ucred = td_savedcred; if (error) aio_complete(job, -1, error); @@ -1210,21 +1206,23 @@ aio_qbio(struct proc *p, struct kaiocb *job) { struct aiocb *cb; struct file *fp; - struct bio *bp; struct buf *pbuf; struct vnode *vp; struct cdevsw *csw; struct cdev *dev; struct kaioinfo *ki; - struct vm_page **pages; - int error, npages, poff, ref; + struct bio **bios = NULL; + off_t offset; + int bio_cmd, error, i, iovcnt, opcode, poff, ref; vm_prot_t prot; + bool use_unmapped; cb = &job->uaiocb; fp = job->fd_file; + opcode = cb->aio_lio_opcode; - if (!(cb->aio_lio_opcode == LIO_WRITE || - cb->aio_lio_opcode == LIO_READ)) + if (!(opcode == LIO_WRITE || opcode == LIO_WRITEV || + opcode == LIO_READ || opcode == LIO_READV)) return (-1); if (fp == NULL || fp->f_type != DTYPE_VNODE) return (-1); @@ -1234,8 +1232,21 @@ aio_qbio(struct proc *p, struct kaiocb *job) return (-1); if (vp->v_bufobj.bo_bsize == 0) return (-1); - if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) + + bio_cmd = opcode == LIO_WRITE || opcode == LIO_WRITEV ? BIO_WRITE : + BIO_READ; + iovcnt = job->uiop->uio_iovcnt; + if (iovcnt > max_buf_aio) return (-1); + for (i = 0; i < iovcnt; i++) { + if (job->uiop->uio_iov[i].iov_len % vp->v_bufobj.bo_bsize != 0) + return (-1); + if (job->uiop->uio_iov[i].iov_len > maxphys) { + error = -1; + return (-1); + } + } + offset = cb->aio_offset; ref = 0; csw = devvn_refthread(vp, &dev, &ref); @@ -1246,89 +1257,106 @@ aio_qbio(struct proc *p, struct kaiocb *job) error = -1; goto unref; } - if (cb->aio_nbytes > dev->si_iosize_max) { + if (job->uiop->uio_resid > dev->si_iosize_max) { error = -1; goto unref; } ki = p->p_aioinfo; - poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; - if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { - if (cb->aio_nbytes > maxphys) { - error = -1; - goto unref; - } + job->error = 0; - pbuf = NULL; - pages = malloc(sizeof(vm_page_t) * (atop(round_page( - cb->aio_nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO); - } else { - if (cb->aio_nbytes > maxphys) { - error = -1; - goto unref; - } - if (ki->kaio_buffer_count >= max_buf_aio) { + use_unmapped = (dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed; + if (!use_unmapped) { + AIO_LOCK(ki); + if (ki->kaio_buffer_count + iovcnt > max_buf_aio) { + AIO_UNLOCK(ki); error = EAGAIN; goto unref; } - - pbuf = uma_zalloc(pbuf_zone, M_WAITOK); - BUF_KERNPROC(pbuf); - AIO_LOCK(ki); - ki->kaio_buffer_count++; + ki->kaio_buffer_count += iovcnt; AIO_UNLOCK(ki); - pages = pbuf->b_pages; - } - bp = g_alloc_bio(); - - bp->bio_length = cb->aio_nbytes; - bp->bio_bcount = cb->aio_nbytes; - bp->bio_done = aio_biowakeup; - bp->bio_offset = cb->aio_offset; - bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; - bp->bio_dev = dev; - bp->bio_caller1 = job; - bp->bio_caller2 = pbuf; - - prot = VM_PROT_READ; - if (cb->aio_lio_opcode == LIO_READ) - prot |= VM_PROT_WRITE; /* Less backwards than it looks */ - npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, - (vm_offset_t)cb->aio_buf, bp->bio_length, prot, pages, - atop(maxphys) + 1); - if (npages < 0) { - error = EFAULT; - goto doerror; } - if (pbuf != NULL) { - pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages); - bp->bio_data = pbuf->b_data + poff; - atomic_add_int(&num_buf_aio, 1); - pbuf->b_npages = npages; - } else { - bp->bio_ma = pages; - bp->bio_ma_n = npages; - bp->bio_ma_offset = poff; - bp->bio_data = unmapped_buf; - bp->bio_flags |= BIO_UNMAPPED; - atomic_add_int(&num_unmapped_aio, 1); + + bios = malloc(sizeof(struct bio *) * iovcnt, M_TEMP, M_WAITOK); + atomic_store_int(&job->nbio, iovcnt); + for (i = 0; i < iovcnt; i++) { + struct vm_page** pages; + struct bio *bp; + void *buf; + size_t nbytes; + int npages; + + buf = job->uiop->uio_iov[i].iov_base; + nbytes = job->uiop->uio_iov[i].iov_len; + + bios[i] = g_alloc_bio(); + bp = bios[i]; + + poff = (vm_offset_t)buf & PAGE_MASK; + if (use_unmapped) { + pbuf = NULL; + pages = malloc(sizeof(vm_page_t) * (atop(round_page( + nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO); + } else { + pbuf = uma_zalloc(pbuf_zone, M_WAITOK); + BUF_KERNPROC(pbuf); + pages = pbuf->b_pages; + } + + bp->bio_length = nbytes; + bp->bio_bcount = nbytes; + bp->bio_done = aio_biowakeup; + bp->bio_offset = offset; + bp->bio_cmd = bio_cmd; + bp->bio_dev = dev; + bp->bio_caller1 = job; + bp->bio_caller2 = pbuf; + + prot = VM_PROT_READ; + if (opcode == LIO_READ || opcode == LIO_READV) + prot |= VM_PROT_WRITE; /* Less backwards than it looks */ + npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, + (vm_offset_t)buf, bp->bio_length, prot, pages, + atop(maxphys) + 1); + if (npages < 0) { + if (pbuf != NULL) + uma_zfree(pbuf_zone, pbuf); + else + free(pages, M_TEMP); + error = EFAULT; + g_destroy_bio(bp); + i--; + goto destroy_bios; + } + if (pbuf != NULL) { + pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages); + bp->bio_data = pbuf->b_data + poff; + pbuf->b_npages = npages; + atomic_add_int(&num_buf_aio, 1); + } else { + bp->bio_ma = pages; + bp->bio_ma_n = npages; + bp->bio_ma_offset = poff; + bp->bio_data = unmapped_buf; + bp->bio_flags |= BIO_UNMAPPED; + atomic_add_int(&num_unmapped_aio, 1); + } + + offset += nbytes; } /* Perform transfer. */ - csw->d_strategy(bp); + for (i = 0; i < iovcnt; i++) + csw->d_strategy(bios[i]); + free(bios, M_TEMP); + dev_relthread(dev, ref); return (0); -doerror: - if (pbuf != NULL) { - AIO_LOCK(ki); - ki->kaio_buffer_count--; - AIO_UNLOCK(ki); - uma_zfree(pbuf_zone, pbuf); - } else { - free(pages, M_TEMP); - } - g_destroy_bio(bp); +destroy_bios: + for (; i >= 0; i--) + aio_biocleanup(bios[i]); + free(bios, M_TEMP); unref: dev_relthread(dev, ref); return (error); @@ -1362,25 +1390,39 @@ convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) } static int -aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) +aiocb_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob, + int type __unused) { struct oaiocb *ojob; + struct aiocb *kcb = &kjob->uaiocb; int error; - bzero(kjob, sizeof(struct aiocb)); - error = copyin(ujob, kjob, sizeof(struct oaiocb)); + bzero(kcb, sizeof(struct aiocb)); + error = copyin(ujob, kcb, sizeof(struct oaiocb)); if (error) return (error); - ojob = (struct oaiocb *)kjob; - return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); + /* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */ + ojob = (struct oaiocb *)kcb; + return (convert_old_sigevent(&ojob->aio_sigevent, &kcb->aio_sigevent)); } #endif static int -aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob) +aiocb_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type) { + struct aiocb *kcb = &kjob->uaiocb; + int error; + + error = copyin(ujob, kcb, sizeof(struct aiocb)); + if (error) + return (error); + if (type == LIO_READV || type == LIO_WRITEV) { + /* malloc a uio and copy in the iovec */ + error = copyinuio(__DEVOLATILE(struct iovec*, kcb->aio_iov), + kcb->aio_iovcnt, &kjob->uiop); + } - return (copyin(ujob, kjob, sizeof(struct aiocb))); + return (error); } static long @@ -1456,7 +1498,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int type, struct aiocb_ops *ops) { struct proc *p = td->td_proc; - struct file *fp; + struct file *fp = NULL; struct kaiocb *job; struct kaioinfo *ki; struct kevent kev; @@ -1477,39 +1519,35 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, if (num_queue_count >= max_queue_count || ki->kaio_count >= max_aio_queue_per_proc) { - ops->store_error(ujob, EAGAIN); - return (EAGAIN); + error = EAGAIN; + goto err1; } job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO); knlist_init_mtx(&job->klist, AIO_MTX(ki)); - error = ops->aio_copyin(ujob, &job->uaiocb); - if (error) { - ops->store_error(ujob, error); - uma_zfree(aiocb_zone, job); - return (error); - } + error = ops->aio_copyin(ujob, job, type); + if (error) + goto err2; if (job->uaiocb.aio_nbytes > IOSIZE_MAX) { - uma_zfree(aiocb_zone, job); - return (EINVAL); + error = EINVAL; + goto err2; } if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT && job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL && job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID && job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) { - ops->store_error(ujob, EINVAL); - uma_zfree(aiocb_zone, job); - return (EINVAL); + error = EINVAL; + goto err2; } if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) { - uma_zfree(aiocb_zone, job); - return (EINVAL); + error = EINVAL; + goto err2; } ksiginfo_init(&job->ksi); @@ -1533,16 +1571,17 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, fd = job->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: + case LIO_WRITEV: error = fget_write(td, fd, &cap_pwrite_rights, &fp); break; case LIO_READ: + case LIO_READV: error = fget_read(td, fd, &cap_pread_rights, &fp); break; case LIO_SYNC: error = fget(td, fd, &cap_fsync_rights, &fp); break; case LIO_MLOCK: - fp = NULL; break; case LIO_NOP: error = fget(td, fd, &cap_no_rights, &fp); @@ -1550,22 +1589,20 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, default: error = EINVAL; } - if (error) { - uma_zfree(aiocb_zone, job); - ops->store_error(ujob, error); - return (error); - } + if (error) + goto err3; if (opcode == LIO_SYNC && fp->f_vnode == NULL) { error = EINVAL; - goto aqueue_fail; + goto err3; } - if ((opcode == LIO_READ || opcode == LIO_WRITE) && + if ((opcode == LIO_READ || opcode == LIO_READV || + opcode == LIO_WRITE || opcode == LIO_WRITEV) && job->uaiocb.aio_offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) { error = EINVAL; - goto aqueue_fail; + goto err3; } job->fd_file = fp; @@ -1577,12 +1614,13 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, error = ops->store_kernelinfo(ujob, jid); if (error) { error = EINVAL; - goto aqueue_fail; + goto err3; } job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid; if (opcode == LIO_NOP) { fdrop(fp, td); + MPASS(job->uiop == &job->uio || job->uiop == NULL); uma_zfree(aiocb_zone, job); return (0); } @@ -1592,7 +1630,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags; if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) { error = EINVAL; - goto aqueue_fail; + goto err3; } kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue; memset(&kev, 0, sizeof(kev)); @@ -1603,7 +1641,7 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr; error = kqfd_register(kqfd, &kev, td, M_WAITOK); if (error) - goto aqueue_fail; + goto err3; no_kqueue: @@ -1614,6 +1652,39 @@ no_kqueue: job->jobflags = KAIOCB_QUEUEING; job->lio = lj; + switch (opcode) { + case LIO_READV: + case LIO_WRITEV: + /* Use the uio copied in by aio_copyin */ + MPASS(job->uiop != &job->uio && job->uiop != NULL); + break; + case LIO_READ: + case LIO_WRITE: + /* Setup the inline uio */ + job->iov[0].iov_base = (void *)(uintptr_t)job->uaiocb.aio_buf; + job->iov[0].iov_len = job->uaiocb.aio_nbytes; + job->uio.uio_iov = job->iov; + job->uio.uio_iovcnt = 1; + job->uio.uio_resid = job->uaiocb.aio_nbytes; + job->uio.uio_segflg = UIO_USERSPACE; + /* FALLTHROUGH */ + default: + job->uiop = &job->uio; + break; + } + switch (opcode) { + case LIO_READ: + case LIO_READV: + job->uiop->uio_rw = UIO_READ; + break; + case LIO_WRITE: + case LIO_WRITEV: + job->uiop->uio_rw = UIO_WRITE; + break; + } + job->uiop->uio_offset = job->uaiocb.aio_offset; + job->uiop->uio_td = td; + if (opcode == LIO_MLOCK) { aio_schedule(job, aio_process_mlock); error = 0; @@ -1622,7 +1693,7 @@ no_kqueue: else error = fo_aio_queue(fp, job); if (error) - goto aqueue_fail; + goto err3; AIO_LOCK(ki); job->jobflags &= ~KAIOCB_QUEUEING; @@ -1643,11 +1714,15 @@ no_kqueue: AIO_UNLOCK(ki); return (0); -aqueue_fail: - knlist_delete(&job->klist, curthread, 0); +err3: if (fp) fdrop(fp, td); + knlist_delete(&job->klist, curthread, 0); +err2: + if (job->uiop != &job->uio) + free(job->uiop, M_IOV); uma_zfree(aiocb_zone, job); +err1: ops->store_error(ujob, error); return (error); } @@ -1723,7 +1798,9 @@ aio_queue_file(struct file *fp, struct kaiocb *job) switch (job->uaiocb.aio_lio_opcode) { case LIO_READ: + case LIO_READV: case LIO_WRITE: + case LIO_WRITEV: aio_schedule(job, aio_process_rw); error = 0; break; @@ -2097,6 +2174,13 @@ sys_aio_read(struct thread *td, struct aio_read_args *uap) return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); } +int +sys_aio_readv(struct thread *td, struct aio_readv_args *uap) +{ + + return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READV, &aiocb_ops)); +} + /* syscall - asynchronous write to a file (REALTIME) */ #ifdef COMPAT_FREEBSD6 int @@ -2116,6 +2200,13 @@ sys_aio_write(struct thread *td, struct aio_write_args *uap) } int +sys_aio_writev(struct thread *td, struct aio_writev_args *uap) +{ + + return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITEV, &aiocb_ops)); +} + +int sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) { @@ -2337,13 +2428,11 @@ sys_lio_listio(struct thread *td, struct lio_listio_args *uap) } static void -aio_biowakeup(struct bio *bp) +aio_biocleanup(struct bio *bp) { struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; struct kaioinfo *ki; - struct buf *pbuf = (struct buf*)bp->bio_caller2; - size_t nbytes; - int error, nblks; + struct buf *pbuf = (struct buf *)bp->bio_caller2; /* Release mapping into kernel space. */ if (pbuf != NULL) { @@ -2362,23 +2451,47 @@ aio_biowakeup(struct bio *bp) free(bp->bio_ma, M_TEMP); atomic_subtract_int(&num_unmapped_aio, 1); } + g_destroy_bio(bp); +} - nbytes = job->uaiocb.aio_nbytes - bp->bio_resid; - error = 0; - if (bp->bio_flags & BIO_ERROR) - error = bp->bio_error; +static void +aio_biowakeup(struct bio *bp) +{ + struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; + size_t nbytes; + long bcount = bp->bio_bcount; + long resid = bp->bio_resid; + int error, opcode, nblks; + int bio_error = bp->bio_error; + uint16_t flags = bp->bio_flags; + + opcode = job->uaiocb.aio_lio_opcode; + + aio_biocleanup(bp); + + nbytes =bcount - resid; + atomic_add_acq_long(&job->nbytes, nbytes); nblks = btodb(nbytes); - if (job->uaiocb.aio_lio_opcode == LIO_WRITE) - job->outblock += nblks; + error = 0; + /* + * If multiple bios experienced an error, the job will reflect the + * error of whichever failed bio completed last. + */ + if (flags & BIO_ERROR) + atomic_set_int(&job->error, bio_error); + if (opcode == LIO_WRITE || opcode == LIO_WRITEV) + atomic_add_int(&job->outblock, nblks); else - job->inblock += nblks; + atomic_add_int(&job->inblock, nblks); + atomic_subtract_int(&job->nbio, 1); - if (error) - aio_complete(job, -1, error); - else - aio_complete(job, nbytes, 0); - g_destroy_bio(bp); + if (atomic_load_int(&job->nbio) == 0) { + if (atomic_load_int(&job->error)) + aio_complete(job, -1, job->error); + else + aio_complete(job, atomic_load_long(&job->nbytes), 0); + } } /* syscall - wait for the next completion of an aio request */ @@ -2614,8 +2727,8 @@ typedef struct oaiocb32 { typedef struct aiocb32 { int32_t aio_fildes; /* File descriptor */ uint64_t aio_offset __packed; /* File offset for I/O */ - uint32_t aio_buf; /* I/O buffer in process space */ - uint32_t aio_nbytes; /* Number of bytes for I/O */ + uint32_t aio_buf; /* I/O buffer in process space */ + uint32_t aio_nbytes; /* Number of bytes for I/O */ int __spare__[2]; uint32_t __spare2__; int aio_lio_opcode; /* LIO opcode */ @@ -2652,49 +2765,67 @@ convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig) } static int -aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) +aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob, + int type __unused) { struct oaiocb32 job32; + struct aiocb *kcb = &kjob->uaiocb; int error; - bzero(kjob, sizeof(struct aiocb)); + bzero(kcb, sizeof(struct aiocb)); error = copyin(ujob, &job32, sizeof(job32)); if (error) return (error); - CP(job32, *kjob, aio_fildes); - CP(job32, *kjob, aio_offset); - PTRIN_CP(job32, *kjob, aio_buf); - CP(job32, *kjob, aio_nbytes); - CP(job32, *kjob, aio_lio_opcode); - CP(job32, *kjob, aio_reqprio); - CP(job32, *kjob, _aiocb_private.status); - CP(job32, *kjob, _aiocb_private.error); - PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); + /* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */ + + CP(job32, *kcb, aio_fildes); + CP(job32, *kcb, aio_offset); + PTRIN_CP(job32, *kcb, aio_buf); + CP(job32, *kcb, aio_nbytes); + CP(job32, *kcb, aio_lio_opcode); + CP(job32, *kcb, aio_reqprio); + CP(job32, *kcb, _aiocb_private.status); + CP(job32, *kcb, _aiocb_private.error); + PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo); return (convert_old_sigevent32(&job32.aio_sigevent, - &kjob->aio_sigevent)); + &kcb->aio_sigevent)); } #endif static int -aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) +aiocb32_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type) { struct aiocb32 job32; + struct aiocb *kcb = &kjob->uaiocb; + struct iovec32 *iov32; int error; error = copyin(ujob, &job32, sizeof(job32)); if (error) return (error); - CP(job32, *kjob, aio_fildes); - CP(job32, *kjob, aio_offset); - PTRIN_CP(job32, *kjob, aio_buf); - CP(job32, *kjob, aio_nbytes); - CP(job32, *kjob, aio_lio_opcode); - CP(job32, *kjob, aio_reqprio); - CP(job32, *kjob, _aiocb_private.status); - CP(job32, *kjob, _aiocb_private.error); - PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); - return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); + CP(job32, *kcb, aio_fildes); + CP(job32, *kcb, aio_offset); + CP(job32, *kcb, aio_lio_opcode); + if (type == LIO_READV || type == LIO_WRITEV) { + iov32 = PTRIN(job32.aio_iov); + CP(job32, *kcb, aio_iovcnt); + /* malloc a uio and copy in the iovec */ + error = freebsd32_copyinuio(iov32, + kcb->aio_iovcnt, &kjob->uiop); + if (error) + return (error); + } else { + PTRIN_CP(job32, *kcb, aio_buf); + CP(job32, *kcb, aio_nbytes); + } + CP(job32, *kcb, aio_reqprio); + CP(job32, *kcb, _aiocb_private.status); + CP(job32, *kcb, _aiocb_private.error); + PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo); + error = convert_sigevent32(&job32.aio_sigevent, &kcb->aio_sigevent); + + return (error); } static long @@ -2840,6 +2971,14 @@ freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) &aiocb32_ops)); } +int +freebsd32_aio_readv(struct thread *td, struct freebsd32_aio_readv_args *uap) +{ + + return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READV, + &aiocb32_ops)); +} + #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_aio_write(struct thread *td, @@ -2860,6 +2999,14 @@ freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) } int +freebsd32_aio_writev(struct thread *td, struct freebsd32_aio_writev_args *uap) +{ + + return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITEV, + &aiocb32_ops)); +} + +int freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) { |