diff options
Diffstat (limited to 'sys/netlink/route/iface.c')
-rw-r--r-- | sys/netlink/route/iface.c | 1530 |
1 files changed, 1530 insertions, 0 deletions
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c new file mode 100644 index 000000000000..8b871576d0b2 --- /dev/null +++ b/sys/netlink/route/iface.c @@ -0,0 +1,1530 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include "opt_inet.h" +#include "opt_inet6.h" +#include <sys/types.h> +#include <sys/eventhandler.h> +#include <sys/kernel.h> +#include <sys/jail.h> +#include <sys/malloc.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/syslog.h> + +#include <net/if.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_var.h> +#include <net/if_clone.h> +#include <net/route.h> +#include <net/route/nhop.h> +#include <net/route/route_ctl.h> +#include <netinet/in_var.h> +#include <netinet6/in6_var.h> +#include <netinet6/scope6_var.h> /* scope deembedding */ +#include <netlink/netlink.h> +#include <netlink/netlink_ctl.h> +#include <netlink/netlink_route.h> +#include <netlink/route/route_var.h> + +#define DEBUG_MOD_NAME nl_iface +#define DEBUG_MAX_LEVEL LOG_DEBUG3 +#include <netlink/netlink_debug.h> +_DECLARE_DEBUG(LOG_INFO); + +struct netlink_walkargs { + struct nl_writer *nw; + struct nlmsghdr hdr; + struct nlpcb *so; + struct ucred *cred; + uint32_t fibnum; + int family; + int error; + int count; + int dumped; +}; + +static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event; + +static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners); + +static struct sx rtnl_cloner_lock; +SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock"); + +/* These are external hooks for CARP. */ +extern int (*carp_get_vhid_p)(struct ifaddr *); + +/* + * RTM_GETLINK request + * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0}, + * {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32 + * + * Reply: + * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, +{{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"} + +[ +{{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"}, +{{nla_len=8, nla_type=IFLA_TXQLEN}, 1000}, +{{nla_len=5, nla_type=IFLA_OPERSTATE}, 6}, +{{nla_len=5, nla_type=IFLA_LINKMODE}, 0}, +{{nla_len=8, nla_type=IFLA_MTU}, 1500}, +{{nla_len=8, nla_type=IFLA_MIN_MTU}, 68}, + {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000}, +{{nla_len=8, nla_type=IFLA_GROUP}, 0}, +{{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0}, +{{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1}, +{{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535}, +{{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536}, +{{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1}, +{{nla_len=5, nla_type=IFLA_CARRIER}, 1}, +{{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"}, +{{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2}, +{{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0}, +{{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1}, +{{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1}, + */ + +struct if_state { + uint8_t ifla_operstate; + uint8_t ifla_carrier; +}; + +static void +get_operstate_ether(if_t ifp, struct if_state *pstate) +{ + struct ifmediareq ifmr = {}; + int error; + error = if_ioctl(ifp, SIOCGIFMEDIA, (void *)&ifmr); + + if (error != 0) { + NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d", + if_name(ifp), error); + return; + } + + switch (IFM_TYPE(ifmr.ifm_active)) { + case IFM_ETHER: + if (ifmr.ifm_status & IFM_ACTIVE) { + pstate->ifla_carrier = 1; + if (if_getflags(ifp) & IFF_MONITOR) + pstate->ifla_operstate = IF_OPER_DORMANT; + else + pstate->ifla_operstate = IF_OPER_UP; + } else + pstate->ifla_operstate = IF_OPER_DOWN; + } +} + +static bool +get_stats(struct nl_writer *nw, if_t ifp) +{ + struct rtnl_link_stats64 *stats; + + int nla_len = sizeof(struct nlattr) + sizeof(*stats); + struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); + if (nla == NULL) + return (false); + nla->nla_type = IFLA_STATS64; + nla->nla_len = nla_len; + stats = (struct rtnl_link_stats64 *)(nla + 1); + + stats->rx_packets = if_getcounter(ifp, IFCOUNTER_IPACKETS); + stats->tx_packets = if_getcounter(ifp, IFCOUNTER_OPACKETS); + stats->rx_bytes = if_getcounter(ifp, IFCOUNTER_IBYTES); + stats->tx_bytes = if_getcounter(ifp, IFCOUNTER_OBYTES); + stats->rx_errors = if_getcounter(ifp, IFCOUNTER_IERRORS); + stats->tx_errors = if_getcounter(ifp, IFCOUNTER_OERRORS); + stats->rx_dropped = if_getcounter(ifp, IFCOUNTER_IQDROPS); + stats->tx_dropped = if_getcounter(ifp, IFCOUNTER_OQDROPS); + stats->multicast = if_getcounter(ifp, IFCOUNTER_IMCASTS); + stats->rx_nohandler = if_getcounter(ifp, IFCOUNTER_NOPROTO); + + return (true); +} + +static void +get_operstate(if_t ifp, struct if_state *pstate) +{ + pstate->ifla_operstate = IF_OPER_UNKNOWN; + pstate->ifla_carrier = 0; /* no carrier */ + + switch (if_gettype(ifp)) { + case IFT_ETHER: + case IFT_L2VLAN: + get_operstate_ether(ifp, pstate); + break; + default: + /* Map admin state to the operstate */ + if (if_getflags(ifp) & IFF_UP) { + pstate->ifla_operstate = IF_OPER_UP; + pstate->ifla_carrier = 1; + } else + pstate->ifla_operstate = IF_OPER_DOWN; + break; + } +} + +static void +get_hwaddr(struct nl_writer *nw, if_t ifp) +{ + struct ifreq ifr = {}; + + if (if_gethwaddr(ifp, &ifr) == 0) { + nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp), + ifr.ifr_addr.sa_data); + } +} + +static unsigned +ifp_flags_to_netlink(const if_t ifp) +{ + return (if_getflags(ifp) | if_getdrvflags(ifp)); +} + +#define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen)) +static bool +dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa) +{ + uint32_t addr_len = 0; + const void *addr_data = NULL; +#ifdef INET6 + struct in6_addr addr6; +#endif + + if (sa == NULL) + return (true); + + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + addr_len = sizeof(struct in_addr); + addr_data = &((const struct sockaddr_in *)sa)->sin_addr; + break; +#endif +#ifdef INET6 + case AF_INET6: + in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len); + addr_len = sizeof(struct in6_addr); + addr_data = &addr6; + break; +#endif + case AF_LINK: + addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen; + addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa); + break; + case AF_UNSPEC: + /* Ignore empty SAs without warning */ + return (true); + default: + NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family); + return (true); + } + + return (nlattr_add(nw, attr, addr_len, addr_data)); +} + +static bool +dump_iface_caps(struct nl_writer *nw, struct ifnet *ifp) +{ + int off = nlattr_add_nested(nw, IFLAF_CAPS); + uint32_t active_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {}; + uint32_t all_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {}; + + MPASS(sizeof(active_caps) >= 8); + MPASS(sizeof(all_caps) >= 8); + + if (off == 0) + return (false); + + active_caps[0] = (uint32_t)if_getcapabilities(ifp); + all_caps[0] = (uint32_t)if_getcapenable(ifp); + active_caps[1] = (uint32_t)if_getcapabilities2(ifp); + all_caps[1] = (uint32_t)if_getcapenable2(ifp); + + nlattr_add_u32(nw, NLA_BITSET_SIZE, IFCAP_B_SIZE); + nlattr_add(nw, NLA_BITSET_MASK, sizeof(all_caps), all_caps); + nlattr_add(nw, NLA_BITSET_VALUE, sizeof(active_caps), active_caps); + + nlattr_set_len(nw, off); + + return (true); +} + +/* + * Dumps interface state, properties and metrics. + * @nw: message writer + * @ifp: target interface + * @hdr: template header + * @if_flags_mask: changed if_[drv]_flags bitmask + * + * This function is called without epoch and MAY sleep. + */ +static bool +dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr, + int if_flags_mask) +{ + struct epoch_tracker et; + struct ifinfomsg *ifinfo; + + NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp)); + + if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg))) + goto enomem; + + ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg); + ifinfo->ifi_family = AF_UNSPEC; + ifinfo->__ifi_pad = 0; + ifinfo->ifi_type = if_gettype(ifp); + ifinfo->ifi_index = if_getindex(ifp); + ifinfo->ifi_flags = ifp_flags_to_netlink(ifp); + ifinfo->ifi_change = if_flags_mask; + + struct if_state ifs = {}; + get_operstate(ifp, &ifs); + + if (ifs.ifla_operstate == IF_OPER_UP) + ifinfo->ifi_flags |= IFF_LOWER_UP; + + nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp)); + nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate); + nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier); + +/* + nlattr_add_u8(nw, IFLA_PROTO_DOWN, val); + nlattr_add_u8(nw, IFLA_LINKMODE, val); +*/ + if (if_getaddrlen(ifp) != 0) { + struct ifaddr *ifa; + struct ifa_iter it; + + NET_EPOCH_ENTER(et); + ifa = ifa_iter_start(ifp, &it); + if (ifa != NULL) + dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr); + ifa_iter_finish(&it); + NET_EPOCH_EXIT(et); + } + + if ((if_getbroadcastaddr(ifp) != NULL)) { + nlattr_add(nw, IFLA_BROADCAST, if_getaddrlen(ifp), + if_getbroadcastaddr(ifp)); + } + + nlattr_add_u32(nw, IFLA_MTU, if_getmtu(ifp)); +/* + nlattr_add_u32(nw, IFLA_MIN_MTU, 60); + nlattr_add_u32(nw, IFLA_MAX_MTU, 9000); + nlattr_add_u32(nw, IFLA_GROUP, 0); +*/ + + if (if_getdescr(ifp) != NULL) + nlattr_add_string(nw, IFLA_IFALIAS, if_getdescr(ifp)); + + /* Store FreeBSD-specific attributes */ + int off = nlattr_add_nested(nw, IFLA_FREEBSD); + if (off != 0) { + get_hwaddr(nw, ifp); + dump_iface_caps(nw, ifp); + + nlattr_set_len(nw, off); + } + + get_stats(nw, ifp); + + uint32_t val = (if_getflags(ifp) & IFF_PROMISC) != 0; + nlattr_add_u32(nw, IFLA_PROMISCUITY, val); + + ifc_dump_ifp_nl(ifp, nw); + + if (nlmsg_end(nw)) + return (true); + +enomem: + NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp)); + nlmsg_abort(nw); + return (false); +} + +static bool +check_ifmsg(void *hdr, struct nl_pstate *npt) +{ + struct ifinfomsg *ifm = hdr; + + if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 || + ifm->ifi_flags != 0 || ifm->ifi_change != 0) { + nlmsg_report_err_msg(npt, + "strict checking: non-zero values in ifinfomsg header"); + return (false); + } + + return (true); +} + +#define _IN(_field) offsetof(struct ifinfomsg, _field) +#define _OUT(_field) offsetof(struct nl_parsed_link, _field) +static const struct nlfield_parser nlf_p_if[] = { + { .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 }, + { .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 }, + { .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 }, + { .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 }, +}; + +static const struct nlattr_parser nla_p_linfo[] = { + { .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn }, + { .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla }, +}; +NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo); + +static const struct nlattr_parser nla_p_if[] = { + { .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, + { .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 }, + { .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 }, + { .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested }, + { .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string }, + { .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string }, + { .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, +}; +#undef _IN +#undef _OUT +NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if); + +static bool +match_iface(if_t ifp, void *_arg) +{ + struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg; + + if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp)) + return (false); + if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp)) + return (false); + if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp))) + return (false); + /* TODO: add group match */ + + return (true); +} + +static int +dump_cb(if_t ifp, void *_arg) +{ + struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; + if (!dump_iface(wa->nw, ifp, &wa->hdr, 0)) + return (ENOMEM); + return (0); +} + +/* + * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0}, + * {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, + * [ + * [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"], + * [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF] + * ] + */ +static int +rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct epoch_tracker et; + if_t ifp; + int error = 0; + + struct nl_parsed_link attrs = {}; + error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); + if (error != 0) + return (error); + + struct netlink_walkargs wa = { + .so = nlp, + .nw = npt->nw, + .hdr.nlmsg_pid = hdr->nlmsg_pid, + .hdr.nlmsg_seq = hdr->nlmsg_seq, + .hdr.nlmsg_flags = hdr->nlmsg_flags, + .hdr.nlmsg_type = NL_RTM_NEWLINK, + }; + + /* Fast track for an interface w/ explicit name or index match */ + if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) { + if (attrs.ifi_index != 0) { + NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", + attrs.ifi_index); + NET_EPOCH_ENTER(et); + ifp = ifnet_byindex_ref(attrs.ifi_index); + NET_EPOCH_EXIT(et); + } else { + NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s", + attrs.ifla_ifname); + ifp = ifunit_ref(attrs.ifla_ifname); + } + + if (ifp != NULL) { + if (match_iface(ifp, &attrs)) { + if (!dump_iface(wa.nw, ifp, &wa.hdr, 0)) + error = ENOMEM; + } else + error = ENODEV; + if_rele(ifp); + } else + error = ENODEV; + return (error); + } + + /* Always treat non-direct-match as a multipart message */ + wa.hdr.nlmsg_flags |= NLM_F_MULTI; + + /* + * Fetching some link properties require performing ioctl's that may be blocking. + * Address it by saving referenced pointers of the matching links, + * exiting from epoch and going through the list one-by-one. + */ + + NL_LOG(LOG_DEBUG2, "Start dump"); + if_foreach_sleep(match_iface, &attrs, dump_cb, &wa); + NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); + + if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { + NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); + return (ENOMEM); + } + + return (error); +} + +/* + * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[ + * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0}, + * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, + * {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"], + * [ + * {nla_len=16, nla_type=IFLA_LINKINFO}, + * [ + * {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"... + * ] + * ] + */ + +static int +rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct epoch_tracker et; + if_t ifp; + int error; + + struct nl_parsed_link attrs = {}; + error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); + if (error != 0) + return (error); + + NET_EPOCH_ENTER(et); + ifp = ifnet_byindex_ref(attrs.ifi_index); + NET_EPOCH_EXIT(et); + if (ifp == NULL) { + NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index); + return (ENOENT); + } + NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp)); + + sx_xlock(&ifnet_detach_sxlock); + error = if_clone_destroy(if_name(ifp)); + sx_xunlock(&ifnet_detach_sxlock); + + NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error); + + if_rele(ifp); + return (error); +} + +/* + * New link: + * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0}, + * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0} + * [ + * {{nla_len=8, nla_type=IFLA_MTU}, 123}, + * {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"}, + * {{nla_len=24, nla_type=IFLA_LINKINFO}, + * [ + * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, + * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]} + * + * Update link: + * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0}, + * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0}, + * {{nla_len=8, nla_type=IFLA_MTU}, 123}} + * + * + * Check command availability: + * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, + * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0} + */ + + +static int +create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs, + struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) +{ + if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) { + NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute"); + return (EINVAL); + } + if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) { + NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute"); + return (EINVAL); + } + + struct ifc_data_nl ifd = { + .flags = IFC_F_CREATE, + .lattrs = lattrs, + .bm = bm, + .npt = npt, + }; + if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0) + nl_store_ifp_cookie(npt, ifd.ifp); + + return (ifd.error); +} + +static int +modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs, + struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) +{ + if_t ifp = NULL; + struct epoch_tracker et; + + if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) { + /* + * Applications like ip(8) verify RTM_NEWLINK command + * existence by calling it with empty arguments. Always + * return "innocent" error in that case. + */ + NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field"); + return (EPERM); + } + + if (lattrs->ifi_index != 0) { + NET_EPOCH_ENTER(et); + ifp = ifnet_byindex_ref(lattrs->ifi_index); + NET_EPOCH_EXIT(et); + if (ifp == NULL) { + NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u", + lattrs->ifi_index); + return (ENOENT); + } + } + + if (ifp == NULL && lattrs->ifla_ifname != NULL) { + ifp = ifunit_ref(lattrs->ifla_ifname); + if (ifp == NULL) { + NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s", + lattrs->ifla_ifname); + return (ENOENT); + } + } + + MPASS(ifp != NULL); + + /* + * Modification request can address either + * 1) cloned interface, in which case we call the cloner-specific + * modification routine + * or + * 2) non-cloned (e.g. "physical") interface, in which case we call + * generic modification routine + */ + struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt }; + if (!ifc_modify_ifp_nl(ifp, &ifd)) + ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt); + + if_rele(ifp); + + return (ifd.error); +} + + +static int +rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct nlattr_bmask bm; + int error; + + struct nl_parsed_link attrs = {}; + error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); + if (error != 0) + return (error); + nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm); + + if (hdr->nlmsg_flags & NLM_F_CREATE) + return (create_link(hdr, &attrs, &bm, nlp, npt)); + else + return (modify_link(hdr, &attrs, &bm, nlp, npt)); +} + +static void +set_scope6(struct sockaddr *sa, uint32_t ifindex) +{ +#ifdef INET6 + if (sa != NULL && sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; + + if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) + in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex); + } +#endif +} + +static bool +check_sa_family(const struct sockaddr *sa, int family, const char *attr_name, + struct nl_pstate *npt) +{ + if (sa == NULL || sa->sa_family == family) + return (true); + + nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d", + attr_name, family, sa->sa_family); + return (false); +} + +struct nl_parsed_ifa { + uint8_t ifa_family; + uint8_t ifa_prefixlen; + uint8_t ifa_scope; + uint32_t ifa_index; + uint32_t ifa_flags; + uint32_t ifaf_vhid; + uint32_t ifaf_flags; + struct sockaddr *ifa_address; + struct sockaddr *ifa_local; + struct sockaddr *ifa_broadcast; + struct ifa_cacheinfo *ifa_cacheinfo; + struct sockaddr *f_ifa_addr; + struct sockaddr *f_ifa_dst; +}; + +static int +nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt, + const void *arg __unused, void *target) +{ + if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) { + NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo", + nla->nla_type, NLA_DATA_LEN(nla)); + return (EINVAL); + } + *((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla); + return (0); +} + +#define _IN(_field) offsetof(struct ifaddrmsg, _field) +#define _OUT(_field) offsetof(struct nl_parsed_ifa, _field) +static const struct nlfield_parser nlf_p_ifa[] = { + { .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 }, + { .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 }, + { .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 }, + { .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 }, + { .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 }, +}; + +static const struct nlattr_parser nla_p_ifa_fbsd[] = { + { .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 }, + { .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 }, +}; +NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd); + +static const struct nlattr_parser nla_p_ifa[] = { + { .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip }, + { .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip }, + { .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip }, + { .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo }, + { .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 }, + { .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested }, +}; +#undef _IN +#undef _OUT + +static bool +post_p_ifa(void *_attrs, struct nl_pstate *npt) +{ + struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs; + + if (!check_sa_family(attrs->ifa_address, attrs->ifa_family, "IFA_ADDRESS", npt)) + return (false); + if (!check_sa_family(attrs->ifa_local, attrs->ifa_family, "IFA_LOCAL", npt)) + return (false); + if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt)) + return (false); + + set_scope6(attrs->ifa_address, attrs->ifa_index); + set_scope6(attrs->ifa_local, attrs->ifa_index); + + return (true); +} + +NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa); + + +/* + +{ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")}, + [ + {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")}, + {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")}, + {{nla_len=7, nla_type=IFA_LABEL}, "lo"}, + {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}, + {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]}, +--- + +{{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735}, + {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")}, + [ + {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")}, + {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}}, + {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]}, +*/ + +static uint8_t +ifa_get_scope(const struct ifaddr *ifa) +{ + const struct sockaddr *sa; + uint8_t addr_scope = RT_SCOPE_UNIVERSE; + + sa = ifa->ifa_addr; + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + { + struct in_addr addr; + addr = ((const struct sockaddr_in *)sa)->sin_addr; + if (IN_LOOPBACK(ntohl(addr.s_addr))) + addr_scope = RT_SCOPE_HOST; + else if (IN_LINKLOCAL(ntohl(addr.s_addr))) + addr_scope = RT_SCOPE_LINK; + break; + } +#endif +#ifdef INET6 + case AF_INET6: + { + const struct in6_addr *addr; + addr = &((const struct sockaddr_in6 *)sa)->sin6_addr; + if (IN6_IS_ADDR_LOOPBACK(addr)) + addr_scope = RT_SCOPE_HOST; + else if (IN6_IS_ADDR_LINKLOCAL(addr)) + addr_scope = RT_SCOPE_LINK; + break; + } +#endif + } + + return (addr_scope); +} + +#ifdef INET6 +static uint8_t +inet6_get_plen(const struct in6_addr *addr) +{ + + return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + + bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); +} +#endif + +static uint8_t +get_sa_plen(const struct sockaddr *sa) +{ +#ifdef INET + const struct in_addr *paddr; +#endif +#ifdef INET6 + const struct in6_addr *paddr6; +#endif + + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + paddr = &(((const struct sockaddr_in *)sa)->sin_addr); + return bitcount32(paddr->s_addr); +#endif +#ifdef INET6 + case AF_INET6: + paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr); + return inet6_get_plen(paddr6); +#endif + } + + return (0); +} + +#ifdef INET6 +static uint32_t +in6_flags_to_nl(uint32_t flags) +{ + uint32_t nl_flags = 0; + + if (flags & IN6_IFF_TEMPORARY) + nl_flags |= IFA_F_TEMPORARY; + if (flags & IN6_IFF_NODAD) + nl_flags |= IFA_F_NODAD; + if (flags & IN6_IFF_DEPRECATED) + nl_flags |= IFA_F_DEPRECATED; + if (flags & IN6_IFF_TENTATIVE) + nl_flags |= IFA_F_TENTATIVE; + if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0) + flags |= IFA_F_PERMANENT; + if (flags & IN6_IFF_DUPLICATED) + flags |= IFA_F_DADFAILED; + return (nl_flags); +} + +static uint32_t +nl_flags_to_in6(uint32_t flags) +{ + uint32_t in6_flags = 0; + + if (flags & IFA_F_TEMPORARY) + in6_flags |= IN6_IFF_TEMPORARY; + if (flags & IFA_F_NODAD) + in6_flags |= IN6_IFF_NODAD; + if (flags & IFA_F_DEPRECATED) + in6_flags |= IN6_IFF_DEPRECATED; + if (flags & IFA_F_TENTATIVE) + in6_flags |= IN6_IFF_TENTATIVE; + if (flags & IFA_F_DADFAILED) + in6_flags |= IN6_IFF_DUPLICATED; + + return (in6_flags); +} + +static void +export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia) +{ + struct ifa_cacheinfo ci = { + .cstamp = ia->ia6_createtime * 1000, + .tstamp = ia->ia6_updatetime * 1000, + .ifa_prefered = ia->ia6_lifetime.ia6t_pltime, + .ifa_valid = ia->ia6_lifetime.ia6t_vltime, + }; + + nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci); +} +#endif + +static void +export_cache_info(struct nl_writer *nw, struct ifaddr *ifa) +{ + switch (ifa->ifa_addr->sa_family) { +#ifdef INET6 + case AF_INET6: + export_cache_info6(nw, (struct in6_ifaddr *)ifa); + break; +#endif + } +} + +/* + * {'attrs': [('IFA_ADDRESS', '12.0.0.1'), + ('IFA_LOCAL', '12.0.0.1'), + ('IFA_LABEL', 'eth10'), + ('IFA_FLAGS', 128), + ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})], + */ +static bool +dump_iface_addr(struct nl_writer *nw, if_t ifp, struct ifaddr *ifa, + const struct nlmsghdr *hdr) +{ + struct ifaddrmsg *ifamsg; + struct sockaddr *sa = ifa->ifa_addr; + struct sockaddr *sa_dst = ifa->ifa_dstaddr; + + NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s", + ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); + + if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg))) + goto enomem; + + ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg); + ifamsg->ifa_family = sa->sa_family; + ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask); + ifamsg->ifa_flags = 0; // ifa_flags is useless + ifamsg->ifa_scope = ifa_get_scope(ifa); + ifamsg->ifa_index = if_getindex(ifp); + + if ((if_getflags(ifp) & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) { + /* P2P interface may have IPv6 LL with no dst address */ + dump_sa(nw, IFA_ADDRESS, sa_dst); + dump_sa(nw, IFA_LOCAL, sa); + } else { + dump_sa(nw, IFA_ADDRESS, sa); +#ifdef INET + /* + * In most cases, IFA_ADDRESS == IFA_LOCAL + * Skip IFA_LOCAL for anything except INET + */ + if (sa->sa_family == AF_INET) + dump_sa(nw, IFA_LOCAL, sa); +#endif + } + if (if_getflags(ifp) & IFF_BROADCAST) + dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr); + + nlattr_add_string(nw, IFA_LABEL, if_name(ifp)); + + uint32_t nl_ifa_flags = 0; +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; + nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags); + } +#endif + nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags); + + export_cache_info(nw, ifa); + + /* Store FreeBSD-specific attributes */ + int off = nlattr_add_nested(nw, IFA_FREEBSD); + if (off != 0) { + if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) { + uint32_t vhid = (uint32_t)(*carp_get_vhid_p)(ifa); + nlattr_add_u32(nw, IFAF_VHID, vhid); + } +#ifdef INET6 + if (sa->sa_family == AF_INET6) { + uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags; + + nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags); + } +#endif + + nlattr_set_len(nw, off); + } + + if (nlmsg_end(nw)) + return (true); +enomem: + NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s", + rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); + nlmsg_abort(nw); + return (false); +} + +static int +dump_iface_addrs(struct netlink_walkargs *wa, if_t ifp) +{ + struct ifaddr *ifa; + struct ifa_iter it; + int error = 0; + + for (ifa = ifa_iter_start(ifp, &it); ifa != NULL; ifa = ifa_iter_next(&it)) { + if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family) + continue; + if (ifa->ifa_addr->sa_family == AF_LINK) + continue; + if (prison_if(wa->cred, ifa->ifa_addr) != 0) + continue; + wa->count++; + if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) { + error = ENOMEM; + break; + } + wa->dumped++; + } + ifa_iter_finish(&it); + + return (error); +} + +static int +rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) +{ + if_t ifp; + int error = 0; + + struct nl_parsed_ifa attrs = {}; + error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs); + if (error != 0) + return (error); + + struct netlink_walkargs wa = { + .so = nlp, + .nw = npt->nw, + .cred = nlp_get_cred(nlp), + .family = attrs.ifa_family, + .hdr.nlmsg_pid = hdr->nlmsg_pid, + .hdr.nlmsg_seq = hdr->nlmsg_seq, + .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, + .hdr.nlmsg_type = NL_RTM_NEWADDR, + }; + + NL_LOG(LOG_DEBUG2, "Start dump"); + + if (attrs.ifa_index != 0) { + ifp = ifnet_byindex(attrs.ifa_index); + if (ifp == NULL) + error = ENOENT; + else + error = dump_iface_addrs(&wa, ifp); + } else { + struct if_iter it; + + for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) { + error = dump_iface_addrs(&wa, ifp); + if (error != 0) + break; + } + if_iter_finish(&it); + } + + NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); + + if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { + NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); + return (ENOMEM); + } + + return (error); +} + +#ifdef INET +static int +handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, + if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) +{ + int plen = attrs->ifa_prefixlen; + int if_flags = if_getflags(ifp); + struct sockaddr_in *addr, *dst; + + if (plen > 32) { + nlmsg_report_err_msg(npt, "invalid ifa_prefixlen"); + return (EINVAL); + }; + + if (if_flags & IFF_POINTOPOINT) { + /* + * Only P2P IFAs are allowed by the implementation. + */ + if (attrs->ifa_address == NULL || attrs->ifa_local == NULL) { + nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); + return (EINVAL); + } + addr = (struct sockaddr_in *)attrs->ifa_local; + dst = (struct sockaddr_in *)attrs->ifa_address; + } else { + /* + * Map the Netlink attributes to FreeBSD ifa layout. + * If only IFA_ADDRESS or IFA_LOCAL is set OR + * both are set to the same value => ifa is not p2p + * and the attribute value contains interface address. + * + * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and + * different), IFA_LOCAL contains an interface address and + * IFA_ADDRESS contains peer address. + */ + addr = (struct sockaddr_in *)attrs->ifa_local; + if (addr == NULL) + addr = (struct sockaddr_in *)attrs->ifa_address; + + if (addr == NULL) { + nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); + return (EINVAL); + } + + /* Generate broadcast address if not set */ + if ((if_flags & IFF_BROADCAST) && attrs->ifa_broadcast == NULL) { + uint32_t s_baddr; + struct sockaddr_in *sin_brd; + + if (plen == 31) + s_baddr = INADDR_BROADCAST; /* RFC 3021 */ + else { + uint32_t s_mask; + + s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); + s_baddr = addr->sin_addr.s_addr | ~s_mask; + } + + sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd)); + if (sin_brd == NULL) + return (ENOMEM); + sin_brd->sin_family = AF_INET; + sin_brd->sin_len = sizeof(*sin_brd); + sin_brd->sin_addr.s_addr = s_baddr; + attrs->ifa_broadcast = (struct sockaddr *)sin_brd; + } + dst = (struct sockaddr_in *)attrs->ifa_broadcast; + } + + struct sockaddr_in mask = { + .sin_len = sizeof(struct sockaddr_in), + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0), + }; + struct in_aliasreq req = { + .ifra_addr = *addr, + .ifra_mask = mask, + .ifra_vhid = attrs->ifaf_vhid, + }; + if (dst != NULL) + req.ifra_dstaddr = *dst; + + return (in_control_ioctl(SIOCAIFADDR, &req, ifp, nlp_get_cred(nlp))); +} + +static int +handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, + if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct sockaddr *addr = attrs->ifa_local; + + if (addr == NULL) + addr = attrs->ifa_address; + + if (addr == NULL) { + nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL"); + return (EINVAL); + } + + struct ifreq req = { .ifr_addr = *addr }; + + return (in_control_ioctl(SIOCDIFADDR, &req, ifp, nlp_get_cred(nlp))); +} +#endif + +#ifdef INET6 +static int +handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, + if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct sockaddr_in6 *addr, *dst; + + if (attrs->ifa_prefixlen > 128) { + nlmsg_report_err_msg(npt, "invalid ifa_prefixlen"); + return (EINVAL); + } + + /* + * In IPv6 implementation, adding non-P2P address to the P2P interface + * is allowed. + */ + addr = (struct sockaddr_in6 *)(attrs->ifa_local); + dst = (struct sockaddr_in6 *)(attrs->ifa_address); + + if (addr == NULL) { + addr = dst; + dst = NULL; + } else if (dst != NULL) { + if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, &dst->sin6_addr)) { + /* + * Sometimes Netlink users fills in both attributes + * with the same address. It still means "non-p2p". + */ + dst = NULL; + } + } + + if (addr == NULL) { + nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); + return (EINVAL); + } + + uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags; + + uint32_t pltime = 0, vltime = 0; + if (attrs->ifa_cacheinfo != 0) { + pltime = attrs->ifa_cacheinfo->ifa_prefered; + vltime = attrs->ifa_cacheinfo->ifa_valid; + } + + struct sockaddr_in6 mask = { + .sin6_len = sizeof(struct sockaddr_in6), + .sin6_family = AF_INET6, + }; + ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen); + + struct in6_aliasreq req = { + .ifra_addr = *addr, + .ifra_prefixmask = mask, + .ifra_flags = flags, + .ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime }, + .ifra_vhid = attrs->ifaf_vhid, + }; + if (dst != NULL) + req.ifra_dstaddr = *dst; + + return (in6_control_ioctl(SIOCAIFADDR_IN6, &req, ifp, nlp_get_cred(nlp))); +} + +static int +handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, + if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct sockaddr_in6 *addr = (struct sockaddr_in6 *)attrs->ifa_local; + + if (addr == NULL) + addr = (struct sockaddr_in6 *)(attrs->ifa_address); + + if (addr == NULL) { + nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); + return (EINVAL); + } + + struct in6_ifreq req = { .ifr_addr = *addr }; + + return (in6_control_ioctl(SIOCDIFADDR_IN6, &req, ifp, nlp_get_cred(nlp))); +} +#endif + + +static int +rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) +{ + struct epoch_tracker et; + int error; + + struct nl_parsed_ifa attrs = {}; + error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs); + if (error != 0) + return (error); + + NET_EPOCH_ENTER(et); + if_t ifp = ifnet_byindex_ref(attrs.ifa_index); + NET_EPOCH_EXIT(et); + + if (ifp == NULL) { + nlmsg_report_err_msg(npt, "Unable to find interface with index %u", + attrs.ifa_index); + return (ENOENT); + } + int if_flags = if_getflags(ifp); + +#if defined(INET) || defined(INET6) + bool new = hdr->nlmsg_type == NL_RTM_NEWADDR; +#endif + + /* + * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL. + * The current ioctl-based KPI always does an implicit create-or-replace. + * It is not possible to specify fine-grained options. + */ + + switch (attrs.ifa_family) { +#ifdef INET + case AF_INET: + if (new) + error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt); + else + error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt); + break; +#endif +#ifdef INET6 + case AF_INET6: + if (new) + error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt); + else + error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt); + break; +#endif + default: + error = EAFNOSUPPORT; + } + + if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP)) + if_up(ifp); + + if_rele(ifp); + + return (error); +} + + +static void +rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd) +{ + struct nlmsghdr hdr = {}; + struct nl_writer nw; + uint32_t group = 0; + + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + group = RTNLGRP_IPV4_IFADDR; + break; +#endif +#ifdef INET6 + case AF_INET6: + group = RTNLGRP_IPV6_IFADDR; + break; +#endif + default: + NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d", + ifa->ifa_addr->sa_family); + return; + } + + if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, group, 0, + false)) { + NL_LOG(LOG_DEBUG, "error allocating group writer"); + return; + } + + hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR; + + dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr); + nlmsg_flush(&nw); +} + +static void +rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask) +{ + struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type }; + struct nl_writer nw; + + if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK, 0, + false)) { + NL_LOG(LOG_DEBUG, "error allocating group writer"); + return; + } + dump_iface(&nw, ifp, &hdr, if_flags_mask); + nlmsg_flush(&nw); +} + +static void +rtnl_handle_ifattach(void *arg, if_t ifp) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); +} + +static void +rtnl_handle_ifdetach(void *arg, if_t ifp) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0); +} + +static void +rtnl_handle_iflink(void *arg, if_t ifp, int link_state __unused) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); +} + +void +rtnl_handle_ifnet_event(if_t ifp, int if_flags_mask) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask); +} + +static const struct rtnl_cmd_handler cmd_handlers[] = { + { + .cmd = NL_RTM_GETLINK, + .name = "RTM_GETLINK", + .cb = &rtnl_handle_getlink, + .flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL, + }, + { + .cmd = NL_RTM_DELLINK, + .name = "RTM_DELLINK", + .cb = &rtnl_handle_dellink, + .priv = PRIV_NET_IFDESTROY, + .flags = RTNL_F_NOEPOCH, + }, + { + .cmd = NL_RTM_NEWLINK, + .name = "RTM_NEWLINK", + .cb = &rtnl_handle_newlink, + .priv = PRIV_NET_IFCREATE, + .flags = RTNL_F_NOEPOCH, + }, + { + .cmd = NL_RTM_GETADDR, + .name = "RTM_GETADDR", + .cb = &rtnl_handle_getaddr, + .flags = RTNL_F_ALLOW_NONVNET_JAIL, + }, + { + .cmd = NL_RTM_NEWADDR, + .name = "RTM_NEWADDR", + .cb = &rtnl_handle_addr, + .priv = PRIV_NET_ADDIFADDR, + .flags = RTNL_F_NOEPOCH, + }, + { + .cmd = NL_RTM_DELADDR, + .name = "RTM_DELADDR", + .cb = &rtnl_handle_addr, + .priv = PRIV_NET_DELIFADDR, + .flags = RTNL_F_NOEPOCH, + }, +}; + +static const struct nlhdr_parser *all_parsers[] = { + &ifmsg_parser, &ifa_parser, &ifa_fbsd_parser, +}; + +void +rtnl_iface_add_cloner(struct nl_cloner *cloner) +{ + sx_xlock(&rtnl_cloner_lock); + SLIST_INSERT_HEAD(&nl_cloners, cloner, next); + sx_xunlock(&rtnl_cloner_lock); +} + +void +rtnl_iface_del_cloner(struct nl_cloner *cloner) +{ + sx_xlock(&rtnl_cloner_lock); + SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next); + sx_xunlock(&rtnl_cloner_lock); +} + +void +rtnl_ifaces_init(void) +{ + ifattach_event = EVENTHANDLER_REGISTER( + ifnet_arrival_event, rtnl_handle_ifattach, NULL, + EVENTHANDLER_PRI_ANY); + ifdetach_event = EVENTHANDLER_REGISTER( + ifnet_departure_event, rtnl_handle_ifdetach, NULL, + EVENTHANDLER_PRI_ANY); + ifaddr_event = EVENTHANDLER_REGISTER( + rt_addrmsg, rtnl_handle_ifaddr, NULL, + EVENTHANDLER_PRI_ANY); + iflink_event = EVENTHANDLER_REGISTER( + ifnet_link_event, rtnl_handle_iflink, NULL, + EVENTHANDLER_PRI_ANY); + NL_VERIFY_PARSERS(all_parsers); + rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); +} + +void +rtnl_ifaces_destroy(void) +{ + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event); + EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event); + EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event); +} |