diff options
author | Alexander V. Chernikov <melifaro@FreeBSD.org> | 2020-04-23 08:04:20 +0000 |
---|---|---|
committer | Alexander V. Chernikov <melifaro@FreeBSD.org> | 2020-04-23 08:04:20 +0000 |
commit | aaad3c4fcaa558ec3b2c7db9bb26f2843585a122 (patch) | |
tree | 3e7477aad71b6fe5569c4af1c7b4e4a5dec00cc0 /sys/net/rtsock.c | |
parent | 9e88f47c8f402b87fb36bdea673ad80a988b7d9d (diff) | |
download | src-aaad3c4fcaa558ec3b2c7db9bb26f2843585a122.tar.gz src-aaad3c4fcaa558ec3b2c7db9bb26f2843585a122.zip |
Convert rtentry field accesses into nhop field accesses.
One of the goals of the new routing KPI defined in r359823 is to entirely
hide`struct rtentry` from the consumers. It will allow to improve routing
subsystem internals and deliver more features much faster.
This commit is mostly mechanical change to eliminate direct struct rtentry
field accesses.
The only notable difference is AF_LINK gateway encoding.
AF_LINK gw is used in routing stack for operations with interface routes
and host loopback routes.
In the former case it indicates _some_ non-NULL gateway, as the interface
is the same as in rt_ifp in kernel and rtm_ifindex in rtsock reporting.
In the latter case the interface index inside gateway was used by the IPv6
datapath to verify address scope for link-local interfaces.
Kernel uses struct sockaddr_dl for this type of gateway. This structure
allows for specifying rich interface data, such as mac address and interface
name. However, this results in relatively large structure size - 52 bytes.
Routing stack fils in only 2 fields - sdl_index and sdl_type, which reside
in the first 8 bytes of the structure.
In the new KPI, struct nhop_object tries to be cache-efficient, hence
embodies gateway address inside the structure. In the AF_LINK case it
stores stortened version of the structure - struct sockaddr_dl_short,
which occupies 16 bytes. After D24340 changes, the data inside AF_LINK
gateway will not be used in the kernel at all, leaving rtsock as the only
potential concern.
The difference in rtsock reporting:
(old)
got message of size 240 on Thu Apr 16 03:12:13 2020
RTM_ADD: Add Route: len 240, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED>
locks: inits:
sockaddrs: <DST,GATEWAY,NETMASK>
10.0.0.0 link#5 255.255.255.0
(new)
got message of size 200 on Sun Apr 19 09:46:32 2020
RTM_ADD: Add Route: len 200, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED>
locks: inits:
sockaddrs: <DST,GATEWAY,NETMASK>
10.0.0.0 link#5 255.255.255.0
Note 40 bytes different (52-16 + alignment).
However, gateway is still a valid AF_LINK gateway with proper data filled in.
It is worth noting that these particular messages (interface routes) are mostly
ignored by routing daemons:
* bird/quagga/frr uses RTM_NEWADDR and ignores prefix route addition messages.
* quagga/frr ignores routes without gateway
More detailed overview on how rtsock messages are used by the
routing daemons to reconstruct the kernel view, can be found in D22974.
Differential Revision: https://reviews.freebsd.org/D24519
Notes
Notes:
svn path=/head/; revision=360218
Diffstat (limited to 'sys/net/rtsock.c')
-rw-r--r-- | sys/net/rtsock.c | 43 |
1 files changed, 24 insertions, 19 deletions
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 38bf677e6b78..471ca8f82241 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -457,15 +457,15 @@ union sockaddr_union { static int rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, - struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred) + struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred) { #if defined(INET) || defined(INET6) struct epoch_tracker et; #endif /* First, see if the returned address is part of the jail. */ - if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) { - info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; + if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) { + info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr; return (0); } @@ -499,7 +499,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, /* * As a last resort return the 'default' jail address. */ - ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)-> + ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)-> sin_addr; if (prison_get_ip4(cred, &ia) != 0) return (ESRCH); @@ -542,7 +542,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, /* * As a last resort return the 'default' jail address. */ - ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)-> + ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)-> sin6_addr; if (prison_get_ip6(cred, &ia6) != 0) return (ESRCH); @@ -635,7 +635,7 @@ fill_addrinfo(struct rt_msghdr *rtm, int len, u_int fibnum, struct rt_addrinfo * * is not reachable locally. This behavior is fixed as * part of the new L2/L3 redesign and rewrite work. The * signature of this interface address route is the - * AF_LINK sa_family type of the rt_gateway, and the + * AF_LINK sa_family type of the gateway, and the * rt_ifp has the IFF_LOOPBACK flag set. */ if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) { @@ -711,12 +711,15 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, * the actual PPP host entry is found, perform * another search to retrieve the prefix route of * the local end point of the PPP link. + * TODO: move this logic to userland. */ if (rtm->rtm_flags & RTF_ANNOUNCE) { struct sockaddr laddr; + struct nhop_object *nh; - if (rt->rt_ifp != NULL && - rt->rt_ifp->if_type == IFT_PROPVIRTUAL) { + nh = rt->rt_nhop; + if (nh->nh_ifp != NULL && + nh->nh_ifp->if_type == IFT_PROPVIRTUAL) { struct epoch_tracker et; struct ifaddr *ifa; @@ -729,9 +732,9 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, &laddr, ifa->ifa_netmask); } else - rt_maskedcopy(rt->rt_ifa->ifa_addr, + rt_maskedcopy(nh->nh_ifa->ifa_addr, &laddr, - rt->rt_ifa->ifa_netmask); + nh->nh_ifa->ifa_netmask); /* * refactor rt and no lock operation necessary */ @@ -741,7 +744,7 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, RIB_RUNLOCK(rnh); return (ESRCH); } - } + } RT_LOCK(rt); RT_ADDREF(rt); RIB_RUNLOCK(rnh); @@ -768,6 +771,7 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm, struct walkarg w; union sockaddr_union saun; struct rt_msghdr *rtm, *orig_rtm = NULL; + struct nhop_object *nh; struct ifnet *ifp; int error, len; @@ -775,23 +779,24 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm, rtm = *prtm; + nh = rt->rt_nhop; info->rti_info[RTAX_DST] = rt_key(rt); - info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &netmask_ss); info->rti_info[RTAX_GENMASK] = 0; - ifp = rt->rt_ifp; + ifp = nh->nh_ifp; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { if (ifp) { info->rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; - error = rtm_get_jailed(info, ifp, rt, + error = rtm_get_jailed(info, ifp, nh, &saun, curthread->td_ucred); if (error != 0) return (error); if (ifp->if_flags & IFF_POINTOPOINT) info->rti_info[RTAX_BRD] = - rt->rt_ifa->ifa_dstaddr; + nh->nh_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info->rti_info[RTAX_IFP] = NULL; @@ -1075,7 +1080,7 @@ rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out) { bzero(out, sizeof(*out)); - out->rmx_mtu = rt->rt_mtu; + out->rmx_mtu = rt->rt_nhop->nh_mtu; out->rmx_weight = rt->rt_weight; out->rmx_pksent = counter_u64_fetch(rt->rt_pksent); out->rmx_nhidx = nhop_get_idx(rt->rt_nhop); @@ -1496,7 +1501,7 @@ rtsock_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs, bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss); - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info.rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; info.rti_flags = rt->rt_flags; info.rti_ifp = ifp; @@ -1725,7 +1730,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info.rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss); info.rti_info[RTAX_GENMASK] = 0; @@ -2244,7 +2249,7 @@ rt_dumpentry_ddb(struct radix_node *rn, void *arg __unused) rt = (void *)rn; rt_dumpaddr_ddb("dst", rt_key(rt)); - rt_dumpaddr_ddb("gateway", rt->rt_gateway); + rt_dumpaddr_ddb("gateway", &rt->rt_nhop->gw_sa); rt_dumpaddr_ddb("netmask", rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss)); if (rt->rt_ifp != NULL && (rt->rt_ifp->if_flags & IFF_DYING) == 0) { |