diff options
Diffstat (limited to 'sys/kern/uipc_usrreq.c')
| -rw-r--r-- | sys/kern/uipc_usrreq.c | 182 |
1 files changed, 110 insertions, 72 deletions
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 6138e543fae7..60736af5adf6 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -793,12 +793,18 @@ uipc_connect2(struct socket *so1, struct socket *so2) } static void +maybe_schedule_gc(void) +{ + if (atomic_load_int(&unp_rights) != 0) + taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); +} + +static void uipc_detach(struct socket *so) { struct unpcb *unp, *unp2; struct mtx *vplock; struct vnode *vp; - int local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); @@ -854,7 +860,6 @@ uipc_detach(struct socket *so) UNP_REF_LIST_UNLOCK(); UNP_PCB_LOCK(unp); - local_unp_rights = unp_rights; unp->unp_socket->so_pcb = NULL; unp->unp_socket = NULL; free(unp->unp_addr, M_SONAME); @@ -865,8 +870,7 @@ uipc_detach(struct socket *so) mtx_unlock(vplock); vrele(vp); } - if (local_unp_rights) - taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); + maybe_schedule_gc(); switch (so->so_type) { case SOCK_STREAM: @@ -902,6 +906,18 @@ uipc_disconnect(struct socket *so) return (0); } +static void +uipc_fdclose(struct socket *so __unused) +{ + /* + * Ensure that userspace can't create orphaned file descriptors without + * triggering garbage collection. Triggering GC from uipc_detach() is + * not sufficient, since that's only closed once a socket reference + * count drops to zero. + */ + maybe_schedule_gc(); +} + static int uipc_listen(struct socket *so, int backlog, struct thread *td) { @@ -1069,6 +1085,21 @@ uipc_stream_sbspace(struct sockbuf *sb) return (min(space, mbspace)); } +/* + * UNIX version of generic sbwait() for writes. We wait on peer's receive + * buffer, using our timeout. + */ +static int +uipc_stream_sbwait(struct socket *so, sbintime_t timeo) +{ + struct sockbuf *sb = &so->so_rcv; + + SOCK_RECVBUF_LOCK_ASSERT(so); + sb->sb_flags |= SB_WAIT; + return (msleep_sbt(&sb->sb_acc, SOCK_RECVBUF_MTX(so), PSOCK | PCATCH, + "sbwait", timeo, 0, 0)); +} + static int uipc_sosend_stream_or_seqpacket(struct socket *so, struct sockaddr *addr, struct uio *uio0, struct mbuf *m, struct mbuf *c, int flags, @@ -1203,7 +1234,8 @@ restart: error = EWOULDBLOCK; goto out4; } - if ((error = sbwait(so2, SO_RCV)) != 0) { + if ((error = uipc_stream_sbwait(so2, + so->so_snd.sb_timeo)) != 0) { SOCK_RECVBUF_UNLOCK(so2); goto out4; } else @@ -1356,8 +1388,8 @@ uipc_soreceive_stream_or_seqpacket(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct sockbuf *sb = &so->so_rcv; - struct mbuf *control, *m, *first, *last, *next; - u_int ctl, space, datalen, mbcnt, lastlen; + struct mbuf *control, *m, *first, *part, *next; + u_int ctl, space, datalen, mbcnt, partlen; int error, flags; bool nonblock, waitall, peek; @@ -1441,22 +1473,16 @@ restart: control = NULL; /* - * Find split point for the next copyout. On exit from the loop: - * last == NULL - socket to be flushed - * last != NULL - * lastlen > last->m_len - uio to be filled, last to be adjusted - * lastlen == 0 - MT_CONTROL, M_EOR or M_NOTREADY encountered + * Find split point for the next copyout. On exit from the loop, + * 'next' points to the new head of the buffer STAILQ and 'datalen' + * contains the amount of data we will copy out at the end. The + * copyout is protected by the I/O lock only, as writers can only + * append to the buffer. We need to record the socket buffer state + * and do all length adjustments before dropping the socket buffer lock. */ - space = uio->uio_resid; - datalen = 0; - for (m = first, last = sb->uxst_fnrdy, lastlen = 0; - m != sb->uxst_fnrdy; + for (space = uio->uio_resid, m = next = first, part = NULL, datalen = 0; + space > 0 && m != sb->uxst_fnrdy && m->m_type == MT_DATA; m = STAILQ_NEXT(m, m_stailq)) { - if (m->m_type != MT_DATA) { - last = m; - lastlen = 0; - break; - } if (space >= m->m_len) { space -= m->m_len; datalen += m->m_len; @@ -1464,29 +1490,28 @@ restart: if (m->m_flags & M_EXT) mbcnt += m->m_ext.ext_size; if (m->m_flags & M_EOR) { - last = STAILQ_NEXT(m, m_stailq); - lastlen = 0; flags |= MSG_EOR; + next = STAILQ_NEXT(m, m_stailq); break; } } else { datalen += space; - last = m; - lastlen = space; + partlen = space; + if (!peek) { + m->m_len -= partlen; + m->m_data += partlen; + } + next = part = m; break; } + next = STAILQ_NEXT(m, m_stailq); } - UIPC_STREAM_SBCHECK(sb); if (!peek) { - if (last == NULL) + if (next == NULL) STAILQ_INIT(&sb->uxst_mbq); - else { - STAILQ_FIRST(&sb->uxst_mbq) = last; - MPASS(last->m_len > lastlen); - last->m_len -= lastlen; - last->m_data += lastlen; - } + else + STAILQ_FIRST(&sb->uxst_mbq) = next; MPASS(sb->sb_acc >= datalen); sb->sb_acc -= datalen; sb->sb_ccc -= datalen; @@ -1543,15 +1568,19 @@ restart: mc_init_m(&cmc, control); SOCK_RECVBUF_LOCK(so); - MPASS(!(sb->sb_state & SBS_CANTRCVMORE)); - - if (__predict_false(cmc.mc_len + sb->sb_ccc + - sb->sb_ctl > sb->sb_hiwat)) { + if (__predict_false( + (sb->sb_state & SBS_CANTRCVMORE) || + cmc.mc_len + sb->sb_ccc + sb->sb_ctl > + sb->sb_hiwat)) { /* - * Too bad, while unp_externalize() was - * failing, the other side had filled - * the buffer and we can't prepend data - * back. Losing data! + * While the lock was dropped and we + * were failing in unp_externalize(), + * the peer could has a) disconnected, + * b) filled the buffer so that we + * can't prepend data back. + * These are two edge conditions that + * we just can't handle, so lose the + * data and return the error. */ SOCK_RECVBUF_UNLOCK(so); SOCK_IO_RECV_UNLOCK(so); @@ -1609,33 +1638,34 @@ restart: } } - for (m = first; m != last; m = next) { + for (m = first; datalen > 0; m = next) { + void *data; + u_int len; + next = STAILQ_NEXT(m, m_stailq); - error = uiomove(mtod(m, char *), m->m_len, uio); + if (m == part) { + data = peek ? + mtod(m, char *) : mtod(m, char *) - partlen; + len = partlen; + } else { + data = mtod(m, char *); + len = m->m_len; + } + error = uiomove(data, len, uio); if (__predict_false(error)) { - SOCK_IO_RECV_UNLOCK(so); if (!peek) - for (; m != last; m = next) { + for (; m != part && datalen > 0; m = next) { next = STAILQ_NEXT(m, m_stailq); + MPASS(datalen >= m->m_len); + datalen -= m->m_len; m_free(m); } - return (error); - } - if (!peek) - m_free(m); - } - if (last != NULL && lastlen > 0) { - if (!peek) { - MPASS(!(m->m_flags & M_PKTHDR)); - MPASS(last->m_data - M_START(last) >= lastlen); - error = uiomove(mtod(last, char *) - lastlen, - lastlen, uio); - } else - error = uiomove(mtod(last, char *), lastlen, uio); - if (__predict_false(error)) { SOCK_IO_RECV_UNLOCK(so); return (error); } + datalen -= len; + if (!peek && m != part) + m_free(m); } if (waitall && !(flags & MSG_EOR) && uio->uio_resid > 0) goto restart; @@ -1835,11 +1865,13 @@ static const struct filterops uipc_write_filtops = { .f_isfd = 1, .f_detach = uipc_filt_sowdetach, .f_event = uipc_filt_sowrite, + .f_copy = knote_triv_copy, }; static const struct filterops uipc_empty_filtops = { .f_isfd = 1, .f_detach = uipc_filt_sowdetach, .f_event = uipc_filt_soempty, + .f_copy = knote_triv_copy, }; static int @@ -2397,7 +2429,7 @@ uipc_sendfile_wait(struct socket *so, off_t need, int *space) } if (!sockref) soref(so2); - error = sbwait(so2, SO_RCV); + error = uipc_stream_sbwait(so2, so->so_snd.sb_timeo); if (error == 0 && __predict_false(sb->sb_state & SBS_CANTRCVMORE)) error = EPIPE; @@ -3194,11 +3226,9 @@ unp_disconnect(struct unpcb *unp, struct unpcb *unp2) #endif LIST_REMOVE(unp, unp_reflink); UNP_REF_LIST_UNLOCK(); - if (so) { - SOCK_LOCK(so); - so->so_state &= ~SS_ISCONNECTED; - SOCK_UNLOCK(so); - } + SOCK_LOCK(so); + so->so_state &= ~SS_ISCONNECTED; + SOCK_UNLOCK(so); break; case SOCK_STREAM: @@ -3667,11 +3697,14 @@ unp_internalize(struct mbuf *control, struct mchain *mc, struct thread *td) cmcred->cmcred_uid = td->td_ucred->cr_ruid; cmcred->cmcred_gid = td->td_ucred->cr_rgid; cmcred->cmcred_euid = td->td_ucred->cr_uid; - cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, + _Static_assert(CMGROUP_MAX >= 1, + "Room needed for the effective GID."); + cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups + 1, CMGROUP_MAX); - for (i = 0; i < cmcred->cmcred_ngroups; i++) + cmcred->cmcred_groups[0] = td->td_ucred->cr_gid; + for (i = 1; i < cmcred->cmcred_ngroups; i++) cmcred->cmcred_groups[i] = - td->td_ucred->cr_groups[i]; + td->td_ucred->cr_groups[i - 1]; break; case SCM_RIGHTS: @@ -4183,10 +4216,12 @@ unp_gc(__unused void *arg, int pending) struct socket *so; so = unref[i]->f_data; - CURVNET_SET(so->so_vnet); - socantrcvmore(so); - unp_dispose(so); - CURVNET_RESTORE(); + if (!SOLISTENING(so)) { + CURVNET_SET(so->so_vnet); + socantrcvmore(so); + unp_dispose(so); + CURVNET_RESTORE(); + } } /* @@ -4353,6 +4388,7 @@ static struct protosw streamproto = { .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_send = uipc_sendfile, @@ -4383,6 +4419,7 @@ static struct protosw dgramproto = { .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_peeraddr = uipc_peeraddr, .pr_sosend = uipc_sosend_dgram, .pr_sense = uipc_sense, @@ -4407,6 +4444,7 @@ static struct protosw seqpacketproto = { .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_sense = uipc_sense, |
