diff options
author | Gleb Smirnoff <glebius@FreeBSD.org> | 2017-06-08 21:30:34 +0000 |
---|---|---|
committer | Gleb Smirnoff <glebius@FreeBSD.org> | 2017-06-08 21:30:34 +0000 |
commit | 779f106aa169256b7010a1d8f963ff656b881e92 (patch) | |
tree | 01f96ac5827e2a95a385258086155b37b80ec6b9 /sys/rpc/svc_vc.c | |
parent | 4623e047a7c87148916e11e6c7b80409f6e45519 (diff) | |
download | src-779f106aa169256b7010a1d8f963ff656b881e92.tar.gz src-779f106aa169256b7010a1d8f963ff656b881e92.zip |
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from
fields that belong to normal dataflow, and unionize them. This
shrinks the structure a bit.
- Take out selinfo's from the socket buffers into the socket. The
first reason is to support braindamaged scenario when a socket is
added to kevent(2) and then listen(2) is cast on it. The second
reason is that there is future plan to make socket buffers pluggable,
so that for a dataflow socket a socket buffer can be changed, and
in this case we also want to keep same selinfos through the lifetime
of a socket.
- Remove struct struct so_accf. Since now listening stuff no longer
affects struct socket size, just move its fields into listening part
of the union.
- Provide sol_upcall field and enforce that so_upcall_set() may be called
only on a dataflow socket, which has buffers, and for listening sockets
provide solisten_upcall_set().
o Remove ACCEPT_LOCK() global.
- Add a mutex to socket, to be used instead of socket buffer lock to lock
fields of struct socket that don't belong to a socket buffer.
- Allow to acquire two socket locks, but the first one must belong to a
listening socket.
- Make soref()/sorele() to use atomic(9). This allows in some situations
to do soref() without owning socket lock. There is place for improvement
here, it is possible to make sorele() also to lock optionally.
- Most protocols aren't touched by this change, except UNIX local sockets.
See below for more information.
o Reduce copy-and-paste in kernel modules that accept connections from
listening sockets: provide function solisten_dequeue(), and use it in
the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
infiniband, rpc.
o UNIX local sockets.
- Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
local sockets. Most races exist around spawning a new socket, when we
are connecting to a local listening socket. To cover them, we need to
hold locks on both PCBs when spawning a third one. This means holding
them across sonewconn(). This creates a LOR between pcb locks and
unp_list_lock.
- To fix the new LOR, abandon the global unp_list_lock in favor of global
unp_link_lock. Indeed, separating these two locks didn't provide us any
extra parralelism in the UNIX sockets.
- Now call into uipc_attach() may happen with unp_link_lock hold if, we
are accepting, or without unp_link_lock in case if we are just creating
a socket.
- Another problem in UNIX sockets is that uipc_close() basicly did nothing
for a listening socket. The vnode remained opened for connections. This
is fixed by removing vnode in uipc_close(). Maybe the right way would be
to do it for all sockets (not only listening), simply move the vnode
teardown from uipc_detach() to uipc_close()?
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D9770
Notes
Notes:
svn path=/head/; revision=319722
Diffstat (limited to 'sys/rpc/svc_vc.c')
-rw-r--r-- | sys/rpc/svc_vc.c | 91 |
1 files changed, 50 insertions, 41 deletions
diff --git a/sys/rpc/svc_vc.c b/sys/rpc/svc_vc.c index 731ba28ce9e3..8a8b2bdbc8ab 100644 --- a/sys/rpc/svc_vc.c +++ b/sys/rpc/svc_vc.c @@ -96,6 +96,7 @@ static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr); static int svc_vc_accept(struct socket *head, struct socket **sop); static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag); +static int svc_vc_rendezvous_soupcall(struct socket *, void *, int); static struct xp_ops svc_vc_rendezvous_ops = { .xp_recv = svc_vc_rendezvous_recv, @@ -183,10 +184,10 @@ svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize, solisten(so, -1, curthread); - SOCKBUF_LOCK(&so->so_rcv); + SOLISTEN_LOCK(so); xprt->xp_upcallset = 1; - soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt); - SOCKBUF_UNLOCK(&so->so_rcv); + solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt); + SOLISTEN_UNLOCK(so); return (xprt); @@ -316,9 +317,11 @@ svc_vc_create_backchannel(SVCPOOL *pool) int svc_vc_accept(struct socket *head, struct socket **sop) { - int error = 0; struct socket *so; + int error = 0; + short nbio; + /* XXXGL: shouldn't that be an assertion? */ if ((head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; goto done; @@ -328,38 +331,26 @@ svc_vc_accept(struct socket *head, struct socket **sop) if (error != 0) goto done; #endif - ACCEPT_LOCK(); - if (TAILQ_EMPTY(&head->so_comp)) { - ACCEPT_UNLOCK(); - error = EWOULDBLOCK; - goto done; - } - so = TAILQ_FIRST(&head->so_comp); - KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP")); - KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP")); - /* - * Before changing the flags on the socket, we have to bump the - * reference count. Otherwise, if the protocol calls sofree(), - * the socket will be released due to a zero refcount. - * XXX might not need soref() since this is simpler than kern_accept. + * XXXGL: we want non-blocking semantics. The socket could be a + * socket created by kernel as well as socket shared with userland, + * so we can't be sure about presense of SS_NBIO. We also shall not + * toggle it on the socket, since that may surprise userland. So we + * set SS_NBIO only temporarily. */ - SOCK_LOCK(so); /* soref() and so_state update */ - soref(so); /* file descriptor reference */ - - TAILQ_REMOVE(&head->so_comp, so, so_list); - head->so_qlen--; - so->so_state |= (head->so_state & SS_NBIO); - so->so_qstate &= ~SQ_COMP; - so->so_head = NULL; - - SOCK_UNLOCK(so); - ACCEPT_UNLOCK(); + SOLISTEN_LOCK(head); + nbio = head->so_state & SS_NBIO; + head->so_state |= SS_NBIO; + error = solisten_dequeue(head, &so, 0); + head->so_state &= (nbio & ~SS_NBIO); + if (error) + goto done; + so->so_state |= nbio; *sop = so; /* connection has been removed from the listen queue */ - KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); + KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0); done: return (error); } @@ -392,21 +383,21 @@ svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg, * connection arrives after our call to accept fails * with EWOULDBLOCK. */ - ACCEPT_LOCK(); - if (TAILQ_EMPTY(&xprt->xp_socket->so_comp)) + SOLISTEN_LOCK(xprt->xp_socket); + if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp)) xprt_inactive_self(xprt); - ACCEPT_UNLOCK(); + SOLISTEN_UNLOCK(xprt->xp_socket); sx_xunlock(&xprt->xp_lock); return (FALSE); } if (error) { - SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); + SOLISTEN_LOCK(xprt->xp_socket); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } - SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); + SOLISTEN_UNLOCK(xprt->xp_socket); xprt_inactive_self(xprt); sx_xunlock(&xprt->xp_lock); return (FALSE); @@ -453,12 +444,6 @@ svc_vc_rendezvous_stat(SVCXPRT *xprt) static void svc_vc_destroy_common(SVCXPRT *xprt) { - SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); - if (xprt->xp_upcallset) { - xprt->xp_upcallset = 0; - soupcall_clear(xprt->xp_socket, SO_RCV); - } - SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_socket) (void)soclose(xprt->xp_socket); @@ -472,6 +457,13 @@ static void svc_vc_rendezvous_destroy(SVCXPRT *xprt) { + SOLISTEN_LOCK(xprt->xp_socket); + if (xprt->xp_upcallset) { + xprt->xp_upcallset = 0; + solisten_upcall_set(xprt->xp_socket, NULL, NULL); + } + SOLISTEN_UNLOCK(xprt->xp_socket); + svc_vc_destroy_common(xprt); } @@ -480,6 +472,13 @@ svc_vc_destroy(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1; + SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); + if (xprt->xp_upcallset) { + xprt->xp_upcallset = 0; + soupcall_clear(xprt->xp_socket, SO_RCV); + } + SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); + svc_vc_destroy_common(xprt); if (cd->mreq) @@ -958,6 +957,16 @@ svc_vc_soupcall(struct socket *so, void *arg, int waitflag) return (SU_OK); } +static int +svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag) +{ + SVCXPRT *xprt = (SVCXPRT *) arg; + + if (!TAILQ_EMPTY(&head->sol_comp)) + xprt_active(xprt); + return (SU_OK); +} + #if 0 /* * Get the effective UID of the sending process. Used by rpcbind, keyserv |