aboutsummaryrefslogtreecommitdiff
path: root/sys/net/rtsock.c
diff options
context:
space:
mode:
authorAlexander V. Chernikov <melifaro@FreeBSD.org>2020-04-23 08:04:20 +0000
committerAlexander V. Chernikov <melifaro@FreeBSD.org>2020-04-23 08:04:20 +0000
commitaaad3c4fcaa558ec3b2c7db9bb26f2843585a122 (patch)
tree3e7477aad71b6fe5569c4af1c7b4e4a5dec00cc0 /sys/net/rtsock.c
parent9e88f47c8f402b87fb36bdea673ad80a988b7d9d (diff)
downloadsrc-aaad3c4fcaa558ec3b2c7db9bb26f2843585a122.tar.gz
src-aaad3c4fcaa558ec3b2c7db9bb26f2843585a122.zip
Convert rtentry field accesses into nhop field accesses.
One of the goals of the new routing KPI defined in r359823 is to entirely hide`struct rtentry` from the consumers. It will allow to improve routing subsystem internals and deliver more features much faster. This commit is mostly mechanical change to eliminate direct struct rtentry field accesses. The only notable difference is AF_LINK gateway encoding. AF_LINK gw is used in routing stack for operations with interface routes and host loopback routes. In the former case it indicates _some_ non-NULL gateway, as the interface is the same as in rt_ifp in kernel and rtm_ifindex in rtsock reporting. In the latter case the interface index inside gateway was used by the IPv6 datapath to verify address scope for link-local interfaces. Kernel uses struct sockaddr_dl for this type of gateway. This structure allows for specifying rich interface data, such as mac address and interface name. However, this results in relatively large structure size - 52 bytes. Routing stack fils in only 2 fields - sdl_index and sdl_type, which reside in the first 8 bytes of the structure. In the new KPI, struct nhop_object tries to be cache-efficient, hence embodies gateway address inside the structure. In the AF_LINK case it stores stortened version of the structure - struct sockaddr_dl_short, which occupies 16 bytes. After D24340 changes, the data inside AF_LINK gateway will not be used in the kernel at all, leaving rtsock as the only potential concern. The difference in rtsock reporting: (old) got message of size 240 on Thu Apr 16 03:12:13 2020 RTM_ADD: Add Route: len 240, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 link#5 255.255.255.0 (new) got message of size 200 on Sun Apr 19 09:46:32 2020 RTM_ADD: Add Route: len 200, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 link#5 255.255.255.0 Note 40 bytes different (52-16 + alignment). However, gateway is still a valid AF_LINK gateway with proper data filled in. It is worth noting that these particular messages (interface routes) are mostly ignored by routing daemons: * bird/quagga/frr uses RTM_NEWADDR and ignores prefix route addition messages. * quagga/frr ignores routes without gateway More detailed overview on how rtsock messages are used by the routing daemons to reconstruct the kernel view, can be found in D22974. Differential Revision: https://reviews.freebsd.org/D24519
Notes
Notes: svn path=/head/; revision=360218
Diffstat (limited to 'sys/net/rtsock.c')
-rw-r--r--sys/net/rtsock.c43
1 files changed, 24 insertions, 19 deletions
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index 38bf677e6b78..471ca8f82241 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -457,15 +457,15 @@ union sockaddr_union {
static int
rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
- struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
+ struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
{
#if defined(INET) || defined(INET6)
struct epoch_tracker et;
#endif
/* First, see if the returned address is part of the jail. */
- if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
- info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+ if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
+ info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
return (0);
}
@@ -499,7 +499,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*
* As a last resort return the 'default' jail address.
*/
- ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
+ ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
sin_addr;
if (prison_get_ip4(cred, &ia) != 0)
return (ESRCH);
@@ -542,7 +542,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*
* As a last resort return the 'default' jail address.
*/
- ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
+ ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
sin6_addr;
if (prison_get_ip6(cred, &ia6) != 0)
return (ESRCH);
@@ -635,7 +635,7 @@ fill_addrinfo(struct rt_msghdr *rtm, int len, u_int fibnum, struct rt_addrinfo *
* is not reachable locally. This behavior is fixed as
* part of the new L2/L3 redesign and rewrite work. The
* signature of this interface address route is the
- * AF_LINK sa_family type of the rt_gateway, and the
+ * AF_LINK sa_family type of the gateway, and the
* rt_ifp has the IFF_LOOPBACK flag set.
*/
if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
@@ -711,12 +711,15 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
* the actual PPP host entry is found, perform
* another search to retrieve the prefix route of
* the local end point of the PPP link.
+ * TODO: move this logic to userland.
*/
if (rtm->rtm_flags & RTF_ANNOUNCE) {
struct sockaddr laddr;
+ struct nhop_object *nh;
- if (rt->rt_ifp != NULL &&
- rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
+ nh = rt->rt_nhop;
+ if (nh->nh_ifp != NULL &&
+ nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
struct epoch_tracker et;
struct ifaddr *ifa;
@@ -729,9 +732,9 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
&laddr,
ifa->ifa_netmask);
} else
- rt_maskedcopy(rt->rt_ifa->ifa_addr,
+ rt_maskedcopy(nh->nh_ifa->ifa_addr,
&laddr,
- rt->rt_ifa->ifa_netmask);
+ nh->nh_ifa->ifa_netmask);
/*
* refactor rt and no lock operation necessary
*/
@@ -741,7 +744,7 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
RIB_RUNLOCK(rnh);
return (ESRCH);
}
- }
+ }
RT_LOCK(rt);
RT_ADDREF(rt);
RIB_RUNLOCK(rnh);
@@ -768,6 +771,7 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
struct walkarg w;
union sockaddr_union saun;
struct rt_msghdr *rtm, *orig_rtm = NULL;
+ struct nhop_object *nh;
struct ifnet *ifp;
int error, len;
@@ -775,23 +779,24 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
rtm = *prtm;
+ nh = rt->rt_nhop;
info->rti_info[RTAX_DST] = rt_key(rt);
- info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
rt_mask(rt), &netmask_ss);
info->rti_info[RTAX_GENMASK] = 0;
- ifp = rt->rt_ifp;
+ ifp = nh->nh_ifp;
if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
if (ifp) {
info->rti_info[RTAX_IFP] =
ifp->if_addr->ifa_addr;
- error = rtm_get_jailed(info, ifp, rt,
+ error = rtm_get_jailed(info, ifp, nh,
&saun, curthread->td_ucred);
if (error != 0)
return (error);
if (ifp->if_flags & IFF_POINTOPOINT)
info->rti_info[RTAX_BRD] =
- rt->rt_ifa->ifa_dstaddr;
+ nh->nh_ifa->ifa_dstaddr;
rtm->rtm_index = ifp->if_index;
} else {
info->rti_info[RTAX_IFP] = NULL;
@@ -1075,7 +1080,7 @@ rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
{
bzero(out, sizeof(*out));
- out->rmx_mtu = rt->rt_mtu;
+ out->rmx_mtu = rt->rt_nhop->nh_mtu;
out->rmx_weight = rt->rt_weight;
out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
out->rmx_nhidx = nhop_get_idx(rt->rt_nhop);
@@ -1496,7 +1501,7 @@ rtsock_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs,
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
info.rti_flags = rt->rt_flags;
info.rti_ifp = ifp;
@@ -1725,7 +1730,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
return (0);
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
rt_mask(rt), &ss);
info.rti_info[RTAX_GENMASK] = 0;
@@ -2244,7 +2249,7 @@ rt_dumpentry_ddb(struct radix_node *rn, void *arg __unused)
rt = (void *)rn;
rt_dumpaddr_ddb("dst", rt_key(rt));
- rt_dumpaddr_ddb("gateway", rt->rt_gateway);
+ rt_dumpaddr_ddb("gateway", &rt->rt_nhop->gw_sa);
rt_dumpaddr_ddb("netmask", rtsock_fix_netmask(rt_key(rt), rt_mask(rt),
&ss));
if (rt->rt_ifp != NULL && (rt->rt_ifp->if_flags & IFF_DYING) == 0) {