aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet6
diff options
context:
space:
mode:
authorHans Petter Selasky <hselasky@FreeBSD.org>2017-01-18 13:31:17 +0000
committerHans Petter Selasky <hselasky@FreeBSD.org>2017-01-18 13:31:17 +0000
commitf3e7afe2d7b262ab55ab818445d4dfdb6e0c70a9 (patch)
treef99cc015b93fe941bf2c9800511d277156c375d9 /sys/netinet6
parentae69172343e8e6940510bf3cf7512fceaa479929 (diff)
downloadsrc-f3e7afe2d7b262ab55ab818445d4dfdb6e0c70a9.tar.gz
src-f3e7afe2d7b262ab55ab818445d4dfdb6e0c70a9.zip
Implement kernel support for hardware rate limited sockets.
- Add RATELIMIT kernel configuration keyword which must be set to enable the new functionality. - Add support for hardware driven, Receive Side Scaling, RSS aware, rate limited sendqueues and expose the functionality through the already established SO_MAX_PACING_RATE setsockopt(). The API support rates in the range from 1 to 4Gbytes/s which are suitable for regular TCP and UDP streams. The setsockopt(2) manual page has been updated. - Add rate limit function callback API to "struct ifnet" which supports the following operations: if_snd_tag_alloc(), if_snd_tag_modify(), if_snd_tag_query() and if_snd_tag_free(). - Add support to ifconfig to view, set and clear the IFCAP_TXRTLMT flag, which tells if a network driver supports rate limiting or not. - This patch also adds support for rate limiting through VLAN and LAGG intermediate network devices. - How rate limiting works: 1) The userspace application calls setsockopt() after accepting or making a new connection to set the rate which is then stored in the socket structure in the kernel. Later on when packets are transmitted a check is made in the transmit path for rate changes. A rate change implies a non-blocking ifp->if_snd_tag_alloc() call will be made to the destination network interface, which then sets up a custom sendqueue with the given rate limitation parameter. A "struct m_snd_tag" pointer is returned which serves as a "snd_tag" hint in the m_pkthdr for the subsequently transmitted mbufs. 2) When the network driver sees the "m->m_pkthdr.snd_tag" different from NULL, it will move the packets into a designated rate limited sendqueue given by the snd_tag pointer. It is up to the individual drivers how the rate limited traffic will be rate limited. 3) Route changes are detected by the NIC drivers in the ifp->if_transmit() routine when the ifnet pointer in the incoming snd_tag mismatches the one of the network interface. The network adapter frees the mbuf and returns EAGAIN which causes the ip_output() to release and clear the send tag. Upon next ip_output() a new "snd_tag" will be tried allocated. 4) When the PCB is detached the custom sendqueue will be released by a non-blocking ifp->if_snd_tag_free() call to the currently bound network interface. Reviewed by: wblock (manpages), adrian, gallatin, scottl (network) Differential Revision: https://reviews.freebsd.org/D3687 Sponsored by: Mellanox Technologies MFC after: 3 months
Notes
Notes: svn path=/head/; revision=312379
Diffstat (limited to 'sys/netinet6')
-rw-r--r--sys/netinet6/ip6_output.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index 49b89564b978..be05c8ade522 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ratelimit.h"
#include "opt_ipsec.h"
#include "opt_sctp.h"
#include "opt_route.h"
@@ -954,8 +955,23 @@ passout:
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ } else {
+ m->m_pkthdr.snd_tag = NULL;
+ }
+#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
+#ifdef RATELIMIT
+ /* check for route change */
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
goto done;
}
@@ -1054,8 +1070,23 @@ sendorfree:
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
+ in_pcboutput_txrtlmt(inp, ifp, m);
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ } else {
+ m->m_pkthdr.snd_tag = NULL;
+ }
+#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
+#ifdef RATELIMIT
+ /* check for route change */
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
} else
m_freem(m);
}
@@ -1441,6 +1472,16 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(in6p);
error = 0;
break;
+ case SO_MAX_PACING_RATE:
+#ifdef RATELIMIT
+ INP_WLOCK(in6p);
+ in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ INP_WUNLOCK(in6p);
+ error = 0;
+#else
+ error = EOPNOTSUPP;
+#endif
+ break;
default:
break;
}