aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_offload.h
diff options
context:
space:
mode:
authorKip Macy <kmacy@FreeBSD.org>2007-12-17 07:56:27 +0000
committerKip Macy <kmacy@FreeBSD.org>2007-12-17 07:56:27 +0000
commit8b5709dfabd4d4889e1866188b19f6114327ed69 (patch)
treeb9000c4541f5e9666b6711cf6933c727f6eb15b1 /sys/netinet/tcp_offload.h
parent29910a5a77c2902cf80841a74a9446576dc19a17 (diff)
downloadsrc-8b5709dfabd4d4889e1866188b19f6114327ed69.tar.gz
src-8b5709dfabd4d4889e1866188b19f6114327ed69.zip
incorporate feedback since initial commit
- rename tcp_ofld.[ch] to tcp_offload.[ch] - document usage and locking conventions of the functions in the toe_usrreqs function vector - document tcpcb, inpcb, and socket fields used by toe - widen the listen interface into 2 functions - rename DISABLE_TCP_OFFLOAD to TCP_OFFLOAD_DISABLE - shrink conditional compilation to reduce the likelihood of bitrot - replace sc->sc_toepcb checks in tcp_syncache.c with TOEPCB_ISSET
Notes
Notes: svn path=/head/; revision=174704
Diffstat (limited to 'sys/netinet/tcp_offload.h')
-rw-r--r--sys/netinet/tcp_offload.h328
1 files changed, 328 insertions, 0 deletions
diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h
new file mode 100644
index 000000000000..33d73ee553af
--- /dev/null
+++ b/sys/netinet/tcp_offload.h
@@ -0,0 +1,328 @@
+/*-
+ * Copyright (c) 2007, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_OFFLOAD_H_
+#define _NETINET_TCP_OFFLOAD_H_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+/*
+ * A driver publishes that it provides offload services
+ * by setting IFCAP_TOE in the ifnet. The offload connect
+ * will bypass any further work if the interface that a
+ * connection would use does not support TCP offload.
+ *
+ * The TOE API assumes that the tcp offload engine can offload the
+ * the entire connection from set up to teardown, with some provision
+ * being made to allowing the software stack to handle time wait. If
+ * the device does not meet these criteria, it is the driver's responsibility
+ * to overload the functions that it needs to in tcp_usrreqs and make
+ * its own calls to tcp_output if it needs to do so.
+ *
+ * There is currently no provision for the device advertising the congestion
+ * control algorithms it supports as there is currently no API for querying
+ * an operating system for the protocols that it has loaded. This is a desirable
+ * future extension.
+ *
+ *
+ *
+ * It is assumed that individuals deploying TOE will want connections
+ * to be offloaded without software changes so all connections on an
+ * interface providing TOE are offloaded unless the the SO_NO_OFFLOAD
+ * flag is set on the socket.
+ *
+ *
+ * The toe_usrreqs structure constitutes the TOE driver's
+ * interface to the TCP stack for functionality that doesn't
+ * interact directly with userspace. If one wants to provide
+ * (optional) functionality to do zero-copy to/from
+ * userspace one still needs to override soreceive/sosend
+ * with functions that fault in and pin the user buffers.
+ *
+ * + tu_send
+ * - tells the driver that new data may have been added to the
+ * socket's send buffer - the driver should not fail if the
+ * buffer is in fact unchanged
+ * - the driver is responsible for providing credits (bytes in the send window)
+ * back to the socket by calling sbdrop() as segments are acknowledged.
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * + tu_rcvd
+ * - returns credits to the driver and triggers window updates
+ * to the peer (a credit as used here is a byte in the peer's receive window)
+ * - the driver is expected to determine how many bytes have been
+ * consumed and credit that back to the card so that it can grow
+ * the window again by maintaining its own state between invocations.
+ * - In principle this could be used to shrink the window as well as
+ * grow the window, although it is not used for that now.
+ * - this function needs to correctly handle being called any number of
+ * times without any bytes being consumed from the receive buffer.
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * + tu_disconnect
+ * - tells the driver to send FIN to peer
+ * - driver is expected to send the remaining data and then do a clean half close
+ * - disconnect implies at least half-close so only send, reset, and detach
+ * are legal
+ * - the driver is expected to handle transition through the shutdown
+ * state machine and allow the stack to support SO_LINGER.
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * + tu_reset
+ * - closes the connection and sends a RST to peer
+ * - driver is expectd to trigger an RST and detach the toepcb
+ * - no further calls are legal after reset
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * The following fields in the tcpcb are expected to be referenced by the driver:
+ * + iss
+ * + rcv_nxt
+ * + rcv_wnd
+ * + snd_isn
+ * + snd_max
+ * + snd_nxt
+ * + snd_una
+ * + t_flags
+ * + t_inpcb
+ * + t_maxseg
+ * + t_toe
+ *
+ * The following fields in the inpcb are expected to be referenced by the driver:
+ * + inp_lport
+ * + inp_fport
+ * + inp_laddr
+ * + inp_fport
+ * + inp_socket
+ * + inp_ip_tos
+ *
+ * The following fields in the socket are expected to be referenced by the
+ * driver:
+ * + so_comp
+ * + so_error
+ * + so_linger
+ * + so_options
+ * + so_rcv
+ * + so_snd
+ * + so_state
+ * + so_timeo
+ *
+ * These functions all return 0 on success and can return the following errors
+ * as appropriate:
+ * + EPERM:
+ * + ENOBUFS: memory allocation failed
+ * + EMSGSIZE: MTU changed during the call
+ * + EHOSTDOWN:
+ * + EHOSTUNREACH:
+ * + ENETDOWN:
+ * * ENETUNREACH: the peer is no longer reachable
+ *
+ * + tu_detach
+ * - tells driver that the socket is going away so disconnect
+ * the toepcb and free appropriate resources
+ * - allows the driver to cleanly handle the case of connection state
+ * outliving the socket
+ * - no further calls are legal after detach
+ * - the driver is expected to provide its own synchronization between
+ * detach and receiving new data.
+ *
+ * + tu_syncache_event
+ * - even if it is not actually needed, the driver is expected to
+ * call syncache_add for the initial SYN and then syncache_expand
+ * for the SYN,ACK
+ * - tells driver that a connection either has not been added or has
+ * been dropped from the syncache
+ * - the driver is expected to maintain state that lives outside the
+ * software stack so the syncache needs to be able to notify the
+ * toe driver that the software stack is not going to create a connection
+ * for a received SYN
+ * - The driver is responsible for any synchronization required between
+ * the syncache dropping an entry and the driver processing the SYN,ACK.
+ *
+ */
+struct toe_usrreqs {
+ int (*tu_send)(struct tcpcb *tp);
+ int (*tu_rcvd)(struct tcpcb *tp);
+ int (*tu_disconnect)(struct tcpcb *tp);
+ int (*tu_reset)(struct tcpcb *tp);
+ void (*tu_detach)(struct tcpcb *tp);
+ void (*tu_syncache_event)(int event, void *toep);
+};
+
+#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
+#define TOE_SC_DROP 2 /* connection was timed out */
+
+/*
+ * Because listen is a one-to-many relationship (a socket can be listening
+ * on all interfaces on a machine some of which may be using different TCP
+ * offload devices), listen uses a publish/subscribe mechanism. The TCP
+ * offload driver registers a listen notification function with the stack.
+ * When a listen socket is created all TCP offload devices are notified
+ * so that they can do the appropriate set up to offload connections on the
+ * port to which the socket is bound. When the listen socket is closed,
+ * the offload devices are notified so that they will stop listening on that
+ * port and free any associated resources as well as sending RSTs on any
+ * connections in the SYN_RCVD state.
+ *
+ */
+
+typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+/*
+ * Check if the socket can be offloaded by the following steps:
+ * - determine the egress interface
+ * - check the interface for TOE capability and TOE is enabled
+ * - check if the device has resources to offload the connection
+ */
+int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
+
+/*
+ * The tcp_gen_* routines are wrappers around the toe_usrreqs calls,
+ * in the non-offloaded case they translate to tcp_output.
+ *
+ * Listen is a special case because it is a 1 to many relationship
+ * and there can be more than one offload driver in the system.
+ */
+
+/*
+ * Connection is offloaded
+ */
+#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
+/*
+ * The socket has not been marked as "do not offload"
+ */
+#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
+
+static __inline int
+tcp_gen_connect(struct socket *so, struct sockaddr *nam)
+{
+ struct tcpcb *tp = sototcpcb(so);
+ int error;
+
+ /*
+ * If offload has been disabled for this socket or the
+ * connection cannot be offloaded just call tcp_output
+ * to start the TCP state machine.
+ */
+#ifndef TCP_OFFLOAD_DISABLE
+ if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
+#endif
+ error = tcp_output(tp);
+ return (error);
+}
+
+static __inline int
+tcp_gen_send(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_send(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline int
+tcp_gen_rcvd(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_rcvd(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline int
+tcp_gen_disconnect(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_disconnect(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline int
+tcp_gen_reset(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_reset(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline void
+tcp_gen_detach(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ tp->t_tu->tu_detach(tp);
+#endif
+}
+
+static __inline void
+tcp_gen_listen_open(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
+ EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+#endif
+}
+
+static __inline void
+tcp_gen_listen_close(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
+#endif
+}
+
+#undef tp_offload
+#undef SO_OFFLOADABLE
+#endif /* _NETINET_TCP_OFFLOAD_H_ */