aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet6
diff options
context:
space:
mode:
authorAlexander V. Chernikov <melifaro@FreeBSD.org>2020-04-12 14:30:00 +0000
committerAlexander V. Chernikov <melifaro@FreeBSD.org>2020-04-12 14:30:00 +0000
commita666325282eaed4b044459d121f339b2d6d0224b (patch)
tree64aab98b0911750e1f0625db916b74583e682bdf /sys/netinet6
parent07ddae2822b0e0cb4b1b63307dfa422e82297e15 (diff)
downloadsrc-a666325282eaed4b044459d121f339b2d6d0224b.tar.gz
src-a666325282eaed4b044459d121f339b2d6d0224b.zip
Introduce nexthop objects and new routing KPI.
This is the foundational change for the routing subsytem rearchitecture. More details and goals are available in https://reviews.freebsd.org/D24141 . This patch introduces concept of nexthop objects and new nexthop-based routing KPI. Nexthops are objects, containing all necessary information for performing the packet output decision. Output interface, mtu, flags, gw address goes there. For most of the cases, these objects will serve the same role as the struct rtentry is currently serving. Typically there will be low tens of such objects for the router even with multiple BGP full-views, as these objects will be shared between routing entries. This allows to store more information in the nexthop. New KPI: struct nhop_object *fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid); struct nhop_object *fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, uint32_t flowid); These 2 function are intended to replace all all flavours of <in_|in6_>rtalloc[1]<_ign><_fib>, mpath functions and the previous fib[46]-generation functions. Upon successful lookup, they return nexthop object which is guaranteed to exist within current NET_EPOCH. If longer lifetime is desired, one can specify NHR_REF as a flag and get a referenced version of the nexthop. Reference semantic closely resembles rtentry one, allowing sed-style conversion. Additionally, another 2 functions are introduced to support uRPF functionality inside variety of our firewalls. Their primary goal is to hide the multipath implementation details inside the routing subsystem, greatly simplifying firewalls implementation: int fib4_lookup_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); int fib6_lookup_urpf(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); All functions have a separate scopeid argument, paving way to eliminating IPv6 scope embedding and allowing to support IPv4 link-locals in the future. Structure changes: * rtentry gets new 'rt_nhop' pointer, slightly growing the overall size. * rib_head gets new 'rnh_preadd' callback pointer, slightly growing overall sz. Old KPI: During the transition state old and new KPI will coexists. As there are another 4-5 decent-sized conversion patches, it will probably take a couple of weeks. To support both KPIs, fields not required by the new KPI (most of rtentry) has to be kept, resulting in the temporary size increase. Once conversion is finished, rtentry will notably shrink. More details: * architectural overview: https://reviews.freebsd.org/D24141 * list of the next changes: https://reviews.freebsd.org/D24232 Reviewed by: ae,glebius(initial version) Differential Revision: https://reviews.freebsd.org/D24232
Notes
Notes: svn path=/head/; revision=359823
Diffstat (limited to 'sys/netinet6')
-rw-r--r--sys/netinet6/in6_fib.c221
-rw-r--r--sys/netinet6/in6_fib.h6
-rw-r--r--sys/netinet6/in6_rmx.c40
3 files changed, 213 insertions, 54 deletions
diff --git a/sys/netinet6/in6_fib.c b/sys/netinet6/in6_fib.c
index ae4beab3b5ce..b3effb2b422e 100644
--- a/sys/netinet6/in6_fib.c
+++ b/sys/netinet6/in6_fib.c
@@ -50,6 +50,8 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
+#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@@ -68,94 +70,63 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#ifdef INET6
-static void fib6_rte_to_nh_extended(struct rtentry *rte,
+static void fib6_rte_to_nh_extended(const struct nhop_object *nh,
const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
-static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_basic *pnh6);
-static struct ifnet *fib6_get_ifaifp(struct rtentry *rte);
#define RNTORT(p) ((struct rtentry *)(p))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst);
-/*
- * Gets real interface for the @rte.
- * Returns rt_ifp for !IFF_LOOPBACK routers.
- * Extracts "real" address interface from interface address
- * loopback routes.
- */
-static struct ifnet *
-fib6_get_ifaifp(struct rtentry *rte)
-{
- struct ifnet *ifp;
- struct sockaddr_dl *sdl;
-
- ifp = rte->rt_ifp;
- if ((ifp->if_flags & IFF_LOOPBACK) &&
- rte->rt_gateway->sa_family == AF_LINK) {
- sdl = (struct sockaddr_dl *)rte->rt_gateway;
- return (ifnet_byindex(sdl->sdl_index));
- }
- return (ifp);
-}
static void
-fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_basic *pnh6)
{
- struct sockaddr_in6 *gw;
/* Do explicit nexthop zero unless we're copying it */
memset(pnh6, 0, sizeof(*pnh6));
if ((flags & NHR_IFAIF) != 0)
- pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ pnh6->nh_ifp = nh->nh_aifp;
else
- pnh6->nh_ifp = rte->rt_ifp;
+ pnh6->nh_ifp = nh->nh_ifp;
- pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
- if (rte->rt_flags & RTF_GATEWAY) {
+ pnh6->nh_mtu = nh->nh_mtu;
+ if (nh->nh_flags & NHF_GATEWAY) {
/* Return address with embedded scope. */
- gw = (struct sockaddr_in6 *)rte->rt_gateway;
- pnh6->nh_addr = gw->sin6_addr;
+ pnh6->nh_addr = nh->gw6_sa.sin6_addr;
} else
pnh6->nh_addr = *dst;
/* Set flags */
- pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
- gw = (struct sockaddr_in6 *)rt_key(rte);
- if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
- pnh6->nh_flags |= NHF_DEFAULT;
+ pnh6->nh_flags = nh->nh_flags;
}
static void
-fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst,
+fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_extended *pnh6)
{
- struct sockaddr_in6 *gw;
/* Do explicit nexthop zero unless we're copying it */
memset(pnh6, 0, sizeof(*pnh6));
if ((flags & NHR_IFAIF) != 0)
- pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ pnh6->nh_ifp = nh->nh_aifp;
else
- pnh6->nh_ifp = rte->rt_ifp;
+ pnh6->nh_ifp = nh->nh_ifp;
- pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
- if (rte->rt_flags & RTF_GATEWAY) {
+ pnh6->nh_mtu = nh->nh_mtu;
+ if (nh->nh_flags & NHF_GATEWAY) {
/* Return address with embedded scope. */
- gw = (struct sockaddr_in6 *)rte->rt_gateway;
- pnh6->nh_addr = gw->sin6_addr;
+ pnh6->nh_addr = nh->gw6_sa.sin6_addr;
} else
pnh6->nh_addr = *dst;
/* Set flags */
- pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
- gw = (struct sockaddr_in6 *)rt_key(rte);
- if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
- pnh6->nh_flags |= NHF_DEFAULT;
- pnh6->nh_ia = ifatoia6(rte->rt_ifa);
+ pnh6->nh_flags = nh->nh_flags;
+ pnh6->nh_ia = ifatoia6(nh->nh_ifa);
}
/*
@@ -180,7 +151,7 @@ fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scope
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
- struct rtentry *rte;
+ struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
@@ -198,10 +169,10 @@ fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scope
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
- rte = RNTORT(rn);
+ nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
- if (RT_LINK_IS_UP(rte->rt_ifp)) {
- fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6);
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ fib6_rte_to_nh_basic(nh, &sin6.sin6_addr, flags, pnh6);
RIB_RUNLOCK(rh);
return (0);
}
@@ -231,6 +202,7 @@ fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
struct radix_node *rn;
struct sockaddr_in6 sin6;
struct rtentry *rte;
+ struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
@@ -256,9 +228,10 @@ fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
return (ENOENT);
}
#endif
+ nh = rte->rt_nhop;
/* Ensure route & ifp is UP */
- if (RT_LINK_IS_UP(rte->rt_ifp)) {
- fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags,
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ fib6_rte_to_nh_extended(nh, &sin6.sin6_addr, flags,
pnh6);
if ((flags & NHR_REF) != 0) {
/* TODO: Do lwref on egress ifp's */
@@ -279,5 +252,145 @@ fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6)
}
+/*
+ * Looks up path in fib @fibnum specified by @dst.
+ * Assumes scope is deembedded and provided in @scopeid.
+ *
+ * Returns path nexthop on success. Nexthop is safe to use
+ * within the current network epoch. If longer lifetime is required,
+ * one needs to pass NHR_REF as a flag. This will return referenced
+ * nexthop.
+ */
+struct nhop_object *
+fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct rtentry *rt;
+ struct nhop_object *nh;
+ struct sockaddr_in6 sin6;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (NULL);
+
+ /* TODO: radix changes */
+ //addr = *dst6;
+ /* Prepare lookup key */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_addr = *dst6;
+
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst6))
+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rt = RNTORT(rn);
+#ifdef RADIX_MPATH
+ if (rt_mpath_next(rt) != NULL)
+ rt = rt_mpath_selectrte(rt, flowid);
+#endif
+ nh = rt->rt_nhop;
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ if (flags & NHR_REF)
+ nhop_ref_object(nh);
+ RIB_RUNLOCK(rh);
+ return (nh);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ RTSTAT_INC(rts_unreach);
+ return (NULL);
+}
+
+inline static int
+check_urpf(const struct nhop_object *nh, uint32_t flags,
+ const struct ifnet *src_if)
+{
+
+ if (src_if != NULL && nh->nh_aifp == src_if) {
+ return (1);
+ }
+ if (src_if == NULL) {
+ if ((flags & NHR_NODEFAULT) == 0)
+ return (1);
+ else if ((nh->nh_flags & NHF_DEFAULT) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+#ifdef RADIX_MPATH
+inline static int
+check_urpf_mpath(struct rtentry *rt, uint32_t flags,
+ const struct ifnet *src_if)
+{
+
+ while (rt != NULL) {
+ if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
+ return (1);
+ rt = rt_mpath_next(rt);
+ }
+
+ return (0);
+}
+#endif
+
+/*
+ * Performs reverse path forwarding lookup.
+ * If @src_if is non-zero, verifies that at least 1 path goes via
+ * this interface.
+ * If @src_if is zero, verifies that route exist.
+ * if @flags contains NHR_NOTDEFAULT, do not consider default route.
+ *
+ * Returns 1 if route matching conditions is found, 0 otherwise.
+ */
+int
+fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
+ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct rtentry *rt;
+ struct in6_addr addr;
+ int ret;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_check_urpf: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (0);
+
+ addr = *dst6;
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst6))
+ addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&addr, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rt = RNTORT(rn);
+#ifdef RADIX_MPATH
+ ret = check_urpf_mpath(rt, flags, src_if);
+#else
+ ret = check_urpf(rt->rt_nhop, flags, src_if);
+#endif
+ RIB_RUNLOCK(rh);
+ return (ret);
+ }
+ RIB_RUNLOCK(rh);
+
+ return (0);
+}
+
#endif
diff --git a/sys/netinet6/in6_fib.h b/sys/netinet6/in6_fib.h
index fa07a5ce9a3e..bf8d367309cc 100644
--- a/sys/netinet6/in6_fib.h
+++ b/sys/netinet6/in6_fib.h
@@ -58,5 +58,11 @@ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,
uint32_t scopeid, uint32_t flags, uint32_t flowid,
struct nhop6_extended *pnh6);
void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6);
+
+struct nhop_object *fib6_lookup(uint32_t fibnum,
+ const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags,
+ uint32_t flowid);
+int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
+ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if);
#endif
diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c
index 35756cf95868..7f10b290309b 100644
--- a/sys/netinet6/in6_rmx.c
+++ b/sys/netinet6/in6_rmx.c
@@ -82,6 +82,8 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
+#include <net/route/shared.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
@@ -103,6 +105,43 @@ extern int in6_inithead(void **head, int off, u_int fibnum);
extern int in6_detachhead(void **head, int off);
#endif
+static int
+rib6_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
+ struct nhop_object *nh)
+{
+ uint16_t nh_type;
+
+ /* XXX: RTF_LOCAL */
+
+ /*
+ * Check route MTU:
+ * inherit interface MTU if not set or
+ * check if MTU is too large.
+ */
+ if (nh->nh_mtu == 0) {
+ nh->nh_mtu = IN6_LINKMTU(nh->nh_ifp);
+ } else if (nh->nh_mtu > IN6_LINKMTU(nh->nh_ifp))
+ nh->nh_mtu = IN6_LINKMTU(nh->nh_ifp);
+
+ /* Ensure that default route nhop has special flag */
+ const struct sockaddr_in6 *mask6 = (const struct sockaddr_in6 *)mask;
+ if ((nhop_get_rtflags(nh) & RTF_HOST) == 0 &&
+ IN6_IS_ADDR_UNSPECIFIED(&mask6->sin6_addr))
+ nh->nh_flags |= NHF_DEFAULT;
+
+ /* Set nexthop type */
+ if (nhop_get_type(nh) == 0) {
+ if (nh->nh_flags & NHF_GATEWAY)
+ nh_type = NH_TYPE_IPV6_ETHER_NHOP;
+ else
+ nh_type = NH_TYPE_IPV6_ETHER_RSLV;
+
+ nhop_set_type(nh, nh_type);
+ }
+
+ return (0);
+}
+
/*
* Do what we need to do when inserting a route.
*/
@@ -169,6 +208,7 @@ in6_inithead(void **head, int off, u_int fibnum)
return (0);
rh->rnh_addaddr = in6_addroute;
+ rh->rnh_preadd = rib6_preadd;
#ifdef RADIX_MPATH
rt_mpath_init_rnh(rh);
#endif