diff options
Diffstat (limited to 'sys/netpfil')
-rw-r--r-- | sys/netpfil/ipfw/ip_fw2.c | 7 | ||||
-rw-r--r-- | sys/netpfil/pf/if_pfsync.c | 235 | ||||
-rw-r--r-- | sys/netpfil/pf/pf.c | 366 | ||||
-rw-r--r-- | sys/netpfil/pf/pf.h | 9 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_if.c | 6 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_ioctl.c | 395 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_lb.c | 151 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_nl.c | 14 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_nl.h | 5 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_norm.c | 24 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_nv.c | 7 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_osfp.c | 2 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_ruleset.c | 10 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_syncookies.c | 8 |
14 files changed, 879 insertions, 360 deletions
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 3f810533b7fc..b59d8d08bf80 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -67,6 +67,7 @@ #include <net/route/nhop.h> #include <net/pfil.h> #include <net/vnet.h> +#include <net/if_gif.h> #include <net/if_pfsync.h> #include <netpfil/pf/pf_mtag.h> @@ -1757,6 +1758,12 @@ do { \ PULLUP_TO(hlen, ulp, struct ip); break; + case IPPROTO_ETHERIP: /* RFC 3378 */ + PULLUP_LEN(hlen, ulp, + sizeof(struct etherip_header) + + sizeof(struct ether_header)); + break; + case IPPROTO_PFSYNC: PULLUP_TO(hlen, ulp, struct pfsync_header); break; diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 585c196391c0..66bc99df2afa 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -153,6 +153,8 @@ static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { pfsync_in_eof, /* PFSYNC_ACT_EOF */ pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ + pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */ + pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */ }; struct pfsync_q { @@ -165,9 +167,11 @@ struct pfsync_q { enum pfsync_q_id { PFSYNC_Q_INS_1301, PFSYNC_Q_INS_1400, + PFSYNC_Q_INS_1500, PFSYNC_Q_IACK, PFSYNC_Q_UPD_1301, PFSYNC_Q_UPD_1400, + PFSYNC_Q_UPD_1500, PFSYNC_Q_UPD_C, PFSYNC_Q_DEL_C, PFSYNC_Q_COUNT, @@ -176,6 +180,7 @@ enum pfsync_q_id { /* Functions for building messages for given queue */ static void pfsync_out_state_1301(struct pf_kstate *, void *); static void pfsync_out_state_1400(struct pf_kstate *, void *); +static void pfsync_out_state_1500(struct pf_kstate *, void *); static void pfsync_out_iack(struct pf_kstate *, void *); static void pfsync_out_upd_c(struct pf_kstate *, void *); static void pfsync_out_del_c(struct pf_kstate *, void *); @@ -184,9 +189,11 @@ static void pfsync_out_del_c(struct pf_kstate *, void *); static struct pfsync_q pfsync_qs[] = { { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, + { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, + { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 }, { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; @@ -195,9 +202,11 @@ static struct pfsync_q pfsync_qs[] = { static u_int8_t pfsync_qid_sstate[] = { PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ + PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */ PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ + PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */ PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ }; @@ -525,13 +534,15 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) struct pf_kstate *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_krule *r = NULL; - struct pfi_kkif *kif; + struct pfi_kkif *kif, *orig_kif; struct pfi_kkif *rt_kif = NULL; struct pf_kpooladdr *rpool_first; int error; + int n = 0; sa_family_t rt_af = 0; uint8_t rt = 0; - int n = 0; + sa_family_t wire_af, stack_af; + u_int8_t wire_proto, stack_proto; PF_RULES_RASSERT(); @@ -542,7 +553,11 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) return (EINVAL); } - if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { + /* + * Check interfaces early on. Do it before allocating memory etc. + * Because there is a high chance there will be a lot more such states. + */ + if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown interface: %s\n", __func__, sp->pfs_1301.ifname); @@ -552,6 +567,30 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) } /* + * States created with floating interface policy can be synchronized to + * hosts with different interfaces, because they are bound to V_pfi_all. + * But s->orig_kif still points to a real interface. Don't abort + * importing the state if orig_kif does not exists on the importing host + * but the state is not interface-bound. + */ + if (msg_version == PFSYNC_MSG_VERSION_1500) { + orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname); + if (orig_kif == NULL) { + if (kif == V_pfi_all) { + orig_kif = kif; + } else { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: unknown original interface:" + " %s\n", __func__, + sp->pfs_1500.orig_ifname); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ + } + } + } + + /* * If the ruleset checksums match or the state is coming from the ioctl, * it's safe to associate the state with the rule of that number. */ @@ -565,10 +604,6 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) } else r = &V_pf_default_rule; - /* - * Check routing interface early on. Do it before allocating memory etc. - * because there is a high chance there will be a lot more such states. - */ switch (msg_version) { case PFSYNC_MSG_VERSION_1301: /* @@ -605,7 +640,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) rt_kif = rpool_first->kif; /* * Guess the AF of the route address, FreeBSD 13 does - * not support af-to so it should be safe. + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. */ rt_af = r->af; } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { @@ -618,10 +654,12 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) "because of different ruleset", __func__); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } + wire_af = stack_af = sp->pfs_1301.af; + wire_proto = stack_proto = sp->pfs_1301.proto; break; case PFSYNC_MSG_VERSION_1400: /* - * On FreeBSD 14 and above we're not taking any chances. + * On FreeBSD 14 we're not taking any chances. * We use the information synced to us. */ if (sp->pfs_1400.rt) { @@ -634,11 +672,35 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) } rt = sp->pfs_1400.rt; /* - * Guess the AF of the route address, FreeBSD 13 does - * not support af-to so it should be safe. + * Guess the AF of the route address, FreeBSD 14 does + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. */ rt_af = sp->pfs_1400.af; } + wire_af = stack_af = sp->pfs_1400.af; + wire_proto = stack_proto = sp->pfs_1400.proto; + break; + case PFSYNC_MSG_VERSION_1500: + /* + * On FreeBSD 15 and above we're not taking any chances. + * We use the information synced to us. + */ + if (sp->pfs_1500.rt) { + rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname); + if (rt_kif == NULL) { + DPFPRINTF(PF_DEBUG_MISC, + "%s: unknown route interface: %s", + __func__, sp->pfs_1500.rt_ifname); + return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); + } + rt = sp->pfs_1500.rt; + rt_af = sp->pfs_1500.rt_af; + } + wire_af = sp->pfs_1500.wire_af; + stack_af = sp->pfs_1500.stack_af; + wire_proto = sp->pfs_1500.wire_proto; + stack_proto = sp->pfs_1500.stack_proto; break; } @@ -665,8 +727,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) ks = &sp->pfs_1301.key[PF_SK_STACK]; #endif - if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || - PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || + if (wire_af != stack_af || + PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) || + PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) || kw->port[0] != ks->port[0] || kw->port[1] != ks->port[1]) { sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); @@ -685,36 +748,19 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) skw->addr[1] = kw->addr[1]; skw->port[0] = kw->port[0]; skw->port[1] = kw->port[1]; - skw->proto = sp->pfs_1301.proto; - skw->af = sp->pfs_1301.af; + skw->proto = wire_proto; + skw->af = wire_af; if (sks != skw) { sks->addr[0] = ks->addr[0]; sks->addr[1] = ks->addr[1]; sks->port[0] = ks->port[0]; sks->port[1] = ks->port[1]; - sks->proto = sp->pfs_1301.proto; - sks->af = sp->pfs_1301.af; + sks->proto = stack_proto; + sks->af = stack_af; } /* copy to state */ - bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, sizeof(st->act.rt_addr)); st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; - st->expire = pf_get_uptime(); - if (sp->pfs_1301.expire) { - uint32_t timeout; - - timeout = r->timeout[sp->pfs_1301.timeout]; - if (!timeout) - timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; - - /* sp->expire may have been adaptively scaled by export. */ - st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; - } - - st->direction = sp->pfs_1301.direction; - st->act.log = sp->pfs_1301.log; - st->timeout = sp->pfs_1301.timeout; - st->act.rt = rt; st->act.rt_kif = rt_kif; st->act.rt_af = rt_af; @@ -722,6 +768,12 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) switch (msg_version) { case PFSYNC_MSG_VERSION_1301: st->state_flags = sp->pfs_1301.state_flags; + st->direction = sp->pfs_1301.direction; + st->act.log = sp->pfs_1301.log; + st->timeout = sp->pfs_1301.timeout; + if (rt) + bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); /* * In FreeBSD 13 pfsync lacks many attributes. Copy them * from the rule if possible. If rule can't be matched @@ -760,6 +812,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) break; case PFSYNC_MSG_VERSION_1400: st->state_flags = ntohs(sp->pfs_1400.state_flags); + st->direction = sp->pfs_1400.direction; + st->act.log = sp->pfs_1400.log; + st->timeout = sp->pfs_1400.timeout; st->act.qid = ntohs(sp->pfs_1400.qid); st->act.pqid = ntohs(sp->pfs_1400.pqid); st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); @@ -770,12 +825,47 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) st->act.max_mss = ntohs(sp->pfs_1400.max_mss); st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; + if (rt) + bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); + break; + case PFSYNC_MSG_VERSION_1500: + st->state_flags = ntohs(sp->pfs_1500.state_flags); + st->direction = sp->pfs_1500.direction; + st->act.log = sp->pfs_1500.log; + st->timeout = sp->pfs_1500.timeout; + st->act.qid = ntohs(sp->pfs_1500.qid); + st->act.pqid = ntohs(sp->pfs_1500.pqid); + st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe); + st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe); + st->act.rtableid = ntohl(sp->pfs_1500.rtableid); + st->act.min_ttl = sp->pfs_1500.min_ttl; + st->act.set_tos = sp->pfs_1500.set_tos; + st->act.max_mss = ntohs(sp->pfs_1500.max_mss); + st->act.set_prio[0] = sp->pfs_1500.set_prio[0]; + st->act.set_prio[1] = sp->pfs_1500.set_prio[1]; + if (rt) + bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); + if (sp->pfs_1500.tagname[0] != 0) + st->tag = pf_tagname2tag(sp->pfs_1500.tagname); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } + st->expire = pf_get_uptime(); + if (sp->pfs_1301.expire) { + uint32_t timeout; + timeout = r->timeout[st->timeout]; + if (!timeout) + timeout = V_pf_default_rule.timeout[st->timeout]; + + /* sp->expire may have been adaptively scaled by export. */ + st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; + } + if (! (st->act.rtableid == -1 || (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) goto cleanup; @@ -795,7 +885,7 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) if (!(flags & PFSYNC_SI_IOCTL)) st->state_flags |= PFSTATE_NOSYNC; - if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) + if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0) goto cleanup_state; /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ @@ -1087,23 +1177,29 @@ pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) struct mbuf *mp; union pfsync_state_union *sa, *sp; int i, offp, total_len, msg_version, msg_len; + u_int8_t timeout, direction; + sa_family_t af; switch (action) { case PFSYNC_ACT_INS_1301: msg_len = sizeof(struct pfsync_state_1301); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_INS_1400: msg_len = sizeof(struct pfsync_state_1400); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1400; break; + case PFSYNC_ACT_INS_1500: + msg_len = sizeof(struct pfsync_state_1500); + msg_version = PFSYNC_MSG_VERSION_1500; + break; default: V_pfsyncstats.pfsyncs_badver++; return (-1); } + total_len = msg_len * count; + mp = m_pulldown(m, offset, total_len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; @@ -1114,13 +1210,26 @@ pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) for (i = 0; i < count; i++) { sp = (union pfsync_state_union *)((char *)sa + msg_len * i); + switch (msg_version) { + case PFSYNC_MSG_VERSION_1301: + case PFSYNC_MSG_VERSION_1400: + af = sp->pfs_1301.af; + timeout = sp->pfs_1301.timeout; + direction = sp->pfs_1301.direction; + break; + case PFSYNC_MSG_VERSION_1500: + af = sp->pfs_1500.wire_af; + timeout = sp->pfs_1500.timeout; + direction = sp->pfs_1500.direction; + break; + } + /* Check for invalid values. */ - if (sp->pfs_1301.timeout >= PFTM_MAX || + if (timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || - sp->pfs_1301.direction > PF_OUT || - (sp->pfs_1301.af != AF_INET && - sp->pfs_1301.af != AF_INET6)) { + direction > PF_OUT || + (af != AF_INET && af != AF_INET6)) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; @@ -1213,23 +1322,28 @@ pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) struct pf_kstate *st; struct mbuf *mp; int sync, offp, i, total_len, msg_len, msg_version; + u_int8_t timeout; switch (action) { case PFSYNC_ACT_UPD_1301: msg_len = sizeof(struct pfsync_state_1301); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_UPD_1400: msg_len = sizeof(struct pfsync_state_1400); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1400; break; + case PFSYNC_ACT_UPD_1500: + msg_len = sizeof(struct pfsync_state_1500); + msg_version = PFSYNC_MSG_VERSION_1500; + break; default: V_pfsyncstats.pfsyncs_badact++; return (-1); } + total_len = msg_len * count; + mp = m_pulldown(m, offset, total_len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; @@ -1240,8 +1354,18 @@ pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) for (i = 0; i < count; i++) { sp = (union pfsync_state_union *)((char *)sa + msg_len * i); + switch (msg_version) { + case PFSYNC_MSG_VERSION_1301: + case PFSYNC_MSG_VERSION_1400: + timeout = sp->pfs_1301.timeout; + break; + case PFSYNC_MSG_VERSION_1500: + timeout = sp->pfs_1500.timeout; + break; + } + /* check for invalid values */ - if (sp->pfs_1301.timeout >= PFTM_MAX || + if (timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { @@ -1286,7 +1410,7 @@ pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); st->expire = pf_get_uptime(); - st->timeout = sp->pfs_1301.timeout; + st->timeout = timeout; } st->pfsync_time = time_uptime; @@ -1741,16 +1865,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); - data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); + data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); if ((error = copyin(ifr->ifr_cap_nv.buffer, data, ifr->ifr_cap_nv.length)) != 0) { - free(data, M_TEMP); + free(data, M_PF); return (error); } if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { - free(data, M_TEMP); + free(data, M_PF); return (EINVAL); } @@ -1758,7 +1882,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) pfsync_nvstatus_to_kstatus(nvl, &status); nvlist_destroy(nvl); - free(data, M_TEMP); + free(data, M_PF); error = pfsync_kstatus_to_softc(&status, sc); return (error); @@ -1787,6 +1911,14 @@ pfsync_out_state_1400(struct pf_kstate *st, void *buf) } static void +pfsync_out_state_1500(struct pf_kstate *st, void *buf) +{ + union pfsync_state_union *sp = buf; + + pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1500); +} + +static void pfsync_out_iack(struct pf_kstate *st, void *buf) { struct pfsync_ins_ack *iack = buf; @@ -2453,6 +2585,8 @@ pfsync_sstate_to_qid(u_int8_t sync_state) return PFSYNC_Q_INS_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_INS_1400; + case PFSYNC_MSG_VERSION_1500: + return PFSYNC_Q_INS_1500; } break; case PFSYNC_S_IACK: @@ -2463,6 +2597,8 @@ pfsync_sstate_to_qid(u_int8_t sync_state) return PFSYNC_Q_UPD_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_UPD_1400; + case PFSYNC_MSG_VERSION_1500: + return PFSYNC_Q_UPD_1500; } break; case PFSYNC_S_UPD_C: @@ -3019,6 +3155,7 @@ pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) break; case PFSYNC_MSG_VERSION_1301: case PFSYNC_MSG_VERSION_1400: + case PFSYNC_MSG_VERSION_1500: sc->sc_version = status->version; break; default: diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 8cd4fff95b15..ec6960180413 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -1667,7 +1667,6 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, #ifdef INET6 struct nd_neighbor_solicit nd; struct pf_addr *target; - u_short action, reason; if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6) goto copy; @@ -1676,7 +1675,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, case ND_NEIGHBOR_SOLICIT: if (multi) return (-1); - if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) + if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL, + pd->af)) return (-1); target = (struct pf_addr *)&nd.nd_ns_target; daddr = target; @@ -1684,7 +1684,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, case ND_NEIGHBOR_ADVERT: if (multi) return (-1); - if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) + if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL, + pd->af)) return (-1); target = (struct pf_addr *)&nd.nd_ns_target; saddr = target; @@ -2833,7 +2834,7 @@ pf_remove_state(struct pf_kstate *s) s->key[PF_SK_WIRE]->port[0], s->src.seqhi, s->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0, - s->act.rtableid); + s->act.rtableid, NULL); } LIST_REMOVE(s, entry); @@ -3632,6 +3633,18 @@ pf_translate_af(struct pf_pdesc *pd) pd->src = (struct pf_addr *)&ip4->ip_src; pd->dst = (struct pf_addr *)&ip4->ip_dst; pd->off = sizeof(struct ip); + if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_TCP; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_UDP; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_SCTP; + } break; case AF_INET6: ip6 = mtod(pd->m, struct ip6_hdr *); @@ -3649,6 +3662,18 @@ pf_translate_af(struct pf_pdesc *pd) pd->src = (struct pf_addr *)&ip6->ip6_src; pd->dst = (struct pf_addr *)&ip6->ip6_dst; pd->off = sizeof(struct ip6_hdr); + if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP; + pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP; + pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6; + } /* * If we're dealing with a reassembled packet we need to adjust @@ -4019,7 +4044,7 @@ pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th, optsoff = pd->off + sizeof(struct tcphdr); #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) if (olen < TCPOLEN_MINSACK || - !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) return (0); eoh = opts + olen; @@ -4055,7 +4080,7 @@ pf_build_tcp(const struct pf_krule *r, sa_family_t af, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, u_int sack, - int rtableid) + int rtableid, u_short *reason) { struct mbuf *m; int len, tlen; @@ -4095,13 +4120,16 @@ pf_build_tcp(const struct pf_krule *r, sa_family_t af, } m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) + if (m == NULL) { + REASON_SET(reason, PFRES_MEMORY); return (NULL); + } #ifdef MAC mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { + REASON_SET(reason, PFRES_MEMORY); m_freem(m); return (NULL); } @@ -4321,13 +4349,14 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, - int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) + int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid, + u_short *reason) { struct pf_send_entry *pfse; struct mbuf *m; m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags, - win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid); + win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid, reason); if (m == NULL) return; @@ -4335,6 +4364,7 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af, pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); if (pfse == NULL) { m_freem(m); + REASON_SET(reason, PFRES_MEMORY); return; } @@ -4396,9 +4426,10 @@ pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd, if (tcp_get_flags(th) & TH_FIN) ack++; pf_send_tcp(r, pd->af, pd->dst, - pd->src, th->th_dport, th->th_sport, - ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid); + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid, + reason); } } else if (pd->proto == IPPROTO_SCTP && (r->rule_flag & PFRULE_RETURN)) { @@ -4449,7 +4480,8 @@ pf_icmp_to_bandlim(uint8_t type) static void pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s, - struct pf_state_peer *src, struct pf_state_peer *dst) + struct pf_state_peer *src, struct pf_state_peer *dst, + u_short *reason) { /* * We are sending challenge ACK as a response to SYN packet, which @@ -4463,7 +4495,7 @@ pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s, pf_send_tcp(s->rule, pd->af, pd->dst, pd->src, pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0, - s->rule->rtableid); + s->rule->rtableid, reason); } static void @@ -5082,7 +5114,7 @@ pf_get_wscale(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, - pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -5107,7 +5139,7 @@ pf_get_mss(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, - pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -5601,6 +5633,9 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) *ctx->rm = ctx->pd->related_rule; break; } + PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, + TAILQ_NEXT(r, entries)); + /* Don't count expired rule evaluations. */ pf_counter_u64_add(&r->evaluations, 1); PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, r->skip[PF_SKIP_IFP]); @@ -5704,6 +5739,21 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) if (r->tag) ctx->tag = r->tag; if (r->anchor == NULL) { + + if (r->rule_flag & PFRULE_ONCE) { + uint32_t rule_flag; + + rule_flag = r->rule_flag; + if ((rule_flag & PFRULE_EXPIRED) == 0 && + atomic_cmpset_int(&r->rule_flag, rule_flag, + rule_flag | PFRULE_EXPIRED)) { + r->exptime = time_uptime; + } else { + r = TAILQ_NEXT(r, entries); + continue; + } + } + if (r->action == PF_MATCH) { /* * Apply translations before increasing counters, @@ -5781,6 +5831,7 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) r = TAILQ_NEXT(r, entries); } + return (ctx->test_status); } @@ -5960,7 +6011,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, if (r->rt) { /* * Set act.rt here instead of in pf_rule_to_actions() because - * it is applied only from the last pass rule. + * it is applied only from the last pass rule. For rules + * with the prefer-ipv6-nexthop option act.rt_af is a hint + * about AF of the forwarded packet and might be changed. */ pd->act.rt = r->rt; if (r->rt == PF_REPLYTO) @@ -6293,7 +6346,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0, - pd->act.rtableid); + pd->act.rtableid, &ctx->reason); REASON_SET(&ctx->reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -6741,8 +6794,12 @@ pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd, (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo || - (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { + (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || /* Require an exact/+1 sequence match on resets when possible */ + (SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) && + SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 && + (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) { + /* Allow resets to match sequence window if ack is perfect match */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(pd, reason, th, @@ -6883,7 +6940,7 @@ pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, state->rule->return_ttl, M_SKIP_FIREWALL, - 0, 0, state->act.rtableid); + 0, 0, state->act.rtableid, reason); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; @@ -7008,7 +7065,8 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) pd->src, th->th_dport, th->th_sport, state->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, state->src.mss, 0, - M_SKIP_FIREWALL, 0, 0, state->act.rtableid); + M_SKIP_FIREWALL, 0, 0, state->act.rtableid, + reason); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || @@ -7041,7 +7099,8 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) state->dst.seqhi, 0, TH_SYN, 0, state->src.mss, 0, state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0, - state->tag, 0, state->act.rtableid); + state->tag, 0, state->act.rtableid, + reason); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != @@ -7056,13 +7115,15 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, state->src.max_win, 0, 0, 0, - state->tag, 0, state->act.rtableid); + state->tag, 0, state->act.rtableid, + reason); pf_send_tcp(state->rule, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], state->src.seqhi + 1, state->src.seqlo + 1, TH_ACK, state->dst.max_win, 0, 0, - M_SKIP_FIREWALL, 0, 0, state->act.rtableid); + M_SKIP_FIREWALL, 0, 0, state->act.rtableid, + reason); state->src.seqdiff = state->dst.seqhi - state->src.seqlo; state->dst.seqdiff = state->src.seqhi - @@ -7162,7 +7223,7 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) * ACK enables all parties (firewall and peers) * to get in sync again. */ - pf_send_challenge_ack(pd, *state, src, dst); + pf_send_challenge_ack(pd, *state, src, dst, reason); return (PF_DROP); } } @@ -7633,7 +7694,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) while (off < len) { struct sctp_paramhdr h; - if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL, + if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, pd->af)) return (PF_DROP); @@ -7653,7 +7714,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) return (PF_DROP); if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); if (in_nullhost(t)) @@ -7697,7 +7758,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) return (PF_DROP); if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0) break; @@ -7727,7 +7788,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) struct sctp_asconf_paramhdr ah; if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); ret = pf_multihome_scan(start + off + sizeof(ah), @@ -7742,7 +7803,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) struct sctp_asconf_paramhdr ah; if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); ret = pf_multihome_scan(start + off + sizeof(ah), ntohs(ah.ph.param_length) - sizeof(ah), pd, @@ -8024,7 +8085,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, ipoff2 = pd->off + ICMP_MINLEN; if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(ip)"); @@ -8045,6 +8106,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, return (PF_DROP); pd2.tot_len = ntohs(h2.ip_len); + pd2.ttl = h2.ip_ttl; pd2.src = (struct pf_addr *)&h2.ip_src; pd2.dst = (struct pf_addr *)&h2.ip_dst; pd2.ip_sum = &h2.ip_sum; @@ -8055,7 +8117,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, ipoff2 = pd->off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(ip6)"); @@ -8067,6 +8129,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, pd2.tot_len = ntohs(h2_6.ip6_plen) + sizeof(struct ip6_hdr); + pd2.ttl = h2_6.ip6_hlim; pd2.src = (struct pf_addr *)&h2_6.ip6_src; pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; pd2.ip_sum = NULL; @@ -8107,7 +8170,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ - if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason, + if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " @@ -8303,7 +8366,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, int action; if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(udp)"); @@ -8434,7 +8497,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, int copyback = 0; int action; - if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason, + if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " @@ -8590,7 +8653,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, } if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN, - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short i" "(icmp)"); @@ -8710,7 +8773,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, } if (!pf_pull_hdr(pd->m, pd2.off, iih, - sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { + sizeof(struct icmp6_hdr), reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, "pf: ICMP error message too short " "(icmp6)"); @@ -8825,6 +8888,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, default: { int action; + /* + * Placeholder value, so future calls to pf_change_ap() + * don't try to update a NULL checksum pointer. + */ + pd->pcksum = &pd->sctp_dummy_sum; key.af = pd2.af; key.proto = pd2.proto; pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); @@ -8887,7 +8955,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, */ void * pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, - u_short *actionp, u_short *reasonp, sa_family_t af) + u_short *reasonp, sa_family_t af) { int iplen = 0; switch (af) { @@ -8897,12 +8965,7 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; if (fragoff) { - if (fragoff >= len) - ACTION_SET(actionp, PF_PASS); - else { - ACTION_SET(actionp, PF_DROP); - REASON_SET(reasonp, PFRES_FRAG); - } + REASON_SET(reasonp, PFRES_FRAG); return (NULL); } iplen = ntohs(h->ip_len); @@ -8919,7 +8982,6 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, #endif /* INET6 */ } if (m->m_pkthdr.len < off + len || iplen < off + len) { - ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } @@ -8974,9 +9036,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *m1, *md; - struct route ro; - const struct sockaddr *gw = &ro.ro_dst; - struct sockaddr_in *dst; + struct route_in6 ro; + union sockaddr_union rt_gw; + const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst; + union sockaddr_union *dst; struct ip *ip; struct ifnet *ifp = NULL; int error = 0; @@ -9071,10 +9134,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, ip = mtod(m0, struct ip *); bzero(&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(struct sockaddr_in); - dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + dst = (union sockaddr_union *)&ro.ro_dst; + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_addr = ip->ip_dst; + if (ifp) { /* Only needed in forward direction and route-to */ + bzero(&rt_gw, sizeof(rt_gw)); + ro.ro_flags |= RT_HAS_GW; + gw = &rt_gw; + switch (pd->act.rt_af) { +#ifdef INET + case AF_INET: + rt_gw.sin.sin_family = AF_INET; + rt_gw.sin.sin_len = sizeof(struct sockaddr_in); + rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + rt_gw.sin6.sin6_family = AF_INET6; + rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6); + pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr, + &pd->act.rt_addr, AF_INET6); + break; +#endif /* INET6 */ + default: + /* Normal af-to without route-to */ + break; + } + } if (pd->dir == PF_IN) { if (ip->ip_ttl <= IPTTLDEC) { @@ -9098,34 +9186,19 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, /* Use the gateway if needed. */ if (nh->nh_flags & NHF_GATEWAY) { - gw = &nh->gw_sa; + gw = (const union sockaddr_union *)&nh->gw_sa; ro.ro_flags |= RT_HAS_GW; } else { - dst->sin_addr = ip->ip_dst; + dst->sin.sin_addr = ip->ip_dst; } - - /* - * Bind to the correct interface if we're - * if-bound. We don't know which interface - * that will be until here, so we've inserted - * the state on V_pf_all. Fix that now. - */ - if (s->kif == V_pfi_all && ifp != NULL && - r->rule_flag & PFRULE_IFBOUND) - s->kif = ifp->if_pf_kif; } } - - if (r->rule_flag & PFRULE_IFBOUND && - pd->act.rt == PF_REPLYTO && - s->kif == V_pfi_all) { - s->kif = pd->act.rt_kif; - s->orig_kif = oifp->if_pf_kif; - } - PF_STATE_UNLOCK(s); } + /* It must have been either set from rt_af or from fib4_lookup */ + KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__)); + if (ifp == NULL) { m0 = pd->m; pd->m = NULL; @@ -9134,6 +9207,20 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, goto bad; } + /* + * Bind to the correct interface if we're if-bound. We don't know which + * interface that will be until here, so we've inserted the state + * on V_pf_all. Fix that now. + */ + if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) { + /* Verify that we're here because of BOUND_IFACE */ + MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN)); + s->kif = ifp->if_pf_kif; + if (pd->act.rt == PF_REPLYTO) { + s->orig_kif = oifp->if_pf_kif; + } + } + if (r->rt == PF_DUPTO) skip_test = true; @@ -9210,9 +9297,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, m_clrprotoflags(m0); /* Avoid confusing lower layers. */ md = m0; - error = pf_dummynet_route(pd, s, r, ifp, gw, &md); + error = pf_dummynet_route(pd, s, r, ifp, + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } goto done; @@ -9253,9 +9342,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, md = m0; pd->pf_mtag = pf_find_mtag(md); error = pf_dummynet_route(pd, s, r, ifp, - gw, &md); + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, + (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } } else @@ -9410,26 +9501,8 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, sizeof(dst.sin6_addr)); else dst.sin6_addr = ip6->ip6_dst; - - /* - * Bind to the correct interface if we're - * if-bound. We don't know which interface - * that will be until here, so we've inserted - * the state on V_pf_all. Fix that now. - */ - if (s->kif == V_pfi_all && ifp != NULL && - r->rule_flag & PFRULE_IFBOUND) - s->kif = ifp->if_pf_kif; } } - - if (r->rule_flag & PFRULE_IFBOUND && - pd->act.rt == PF_REPLYTO && - s->kif == V_pfi_all) { - s->kif = pd->act.rt_kif; - s->orig_kif = oifp->if_pf_kif; - } - PF_STATE_UNLOCK(s); } @@ -9451,6 +9524,20 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, goto bad; } + /* + * Bind to the correct interface if we're if-bound. We don't know which + * interface that will be until here, so we've inserted the state + * on V_pf_all. Fix that now. + */ + if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) { + /* Verify that we're here because of BOUND_IFACE */ + MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN)); + s->kif = ifp->if_pf_kif; + if (pd->act.rt == PF_REPLYTO) { + s->orig_kif = oifp->if_pf_kif; + } + } + if (r->rt == PF_DUPTO) skip_test = true; @@ -9692,6 +9779,7 @@ pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r, const struct pf_kstate *s, struct ip_fw_args *dnflow) { int dndir = r->direction; + sa_family_t af = pd->naf; if (s && dndir == PF_INOUT) { dndir = s->direction; @@ -9732,20 +9820,46 @@ pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r, dnflow->f_id.proto = pd->proto; dnflow->f_id.extra = dnflow->rule.info; - switch (pd->naf) { + if (s) + af = s->key[PF_SK_STACK]->af; + + switch (af) { case AF_INET: dnflow->f_id.addr_type = 4; - dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); - dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); + if (s) { + dnflow->f_id.src_ip = htonl( + s->key[PF_SK_STACK]->addr[pd->sidx].v4.s_addr); + dnflow->f_id.dst_ip = htonl( + s->key[PF_SK_STACK]->addr[pd->didx].v4.s_addr); + } else { + dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); + dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); + } break; case AF_INET6: - dnflow->flags |= IPFW_ARGS_IP6; dnflow->f_id.addr_type = 6; - dnflow->f_id.src_ip6 = pd->src->v6; - dnflow->f_id.dst_ip6 = pd->dst->v6; + + if (s) { + dnflow->f_id.src_ip6 = + s->key[PF_SK_STACK]->addr[pd->sidx].v6; + dnflow->f_id.dst_ip6 = + s->key[PF_SK_STACK]->addr[pd->didx].v6; + } else { + dnflow->f_id.src_ip6 = pd->src->v6; + dnflow->f_id.dst_ip6 = pd->dst->v6; + } break; } + /* + * Separate this out, because while we pass the pre-NAT addresses to + * dummynet we want the post-nat address family in case of nat64. + * Dummynet may call ip_output/ip6_output itself, and we need it to + * call the correct one. + */ + if (pd->naf == AF_INET6) + dnflow->flags |= IPFW_ARGS_IP6; + return (true); } @@ -9962,9 +10076,12 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) pd->proto = h->ip_p; /* IGMP packets have router alert options, allow them */ if (pd->proto == IPPROTO_IGMP) { - /* According to RFC 1112 ttl must be set to 1. */ - if ((h->ip_ttl != 1) || - !IN_MULTICAST(ntohl(h->ip_dst.s_addr))) { + /* + * According to RFC 1112 ttl must be set to 1 in all IGMP + * packets sent to 224.0.0.1 + */ + if ((h->ip_ttl != 1) && + (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) { DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); @@ -9982,7 +10099,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) end < pd->off + sizeof(ext)) return (PF_PASS); if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET)) { + reason, AF_INET)) { DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr"); return (PF_DROP); } @@ -10008,7 +10125,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, while (off < end) { if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, - sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { + sizeof(opt.ip6o_type), reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type"); return (PF_DROP); } @@ -10016,7 +10133,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, off++; continue; } - if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL, + if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt"); return (PF_DROP); @@ -10041,7 +10158,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } - if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL, + if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo"); return (PF_DROP); @@ -10090,7 +10207,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) break; case IPPROTO_HOPOPTS: if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); return (PF_DROP); } @@ -10117,7 +10234,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_DROP); } if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment"); return (PF_DROP); } @@ -10145,7 +10262,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_PASS); } if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr"); return (PF_DROP); } @@ -10166,7 +10283,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) case IPPROTO_AH: case IPPROTO_DSTOPTS: if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); return (PF_DROP); } @@ -10199,7 +10316,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_PASS); } if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), - NULL, reason, AF_INET6)) { + reason, AF_INET6)) { DPFPRINTF(PF_DEBUG_MISC, "IPv6 short icmp6hdr"); return (PF_DROP); @@ -10432,7 +10549,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, case IPPROTO_TCP: { struct tcphdr *th = &pd->hdr.tcp; - if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action, + if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); @@ -10448,7 +10565,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, case IPPROTO_UDP: { struct udphdr *uh = &pd->hdr.udp; - if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action, + if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); @@ -10469,7 +10586,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } case IPPROTO_SCTP: { if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp), - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10499,7 +10616,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } case IPPROTO_ICMP: { if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10513,7 +10630,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, size_t icmp_hlen = sizeof(struct icmp6_hdr); if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10539,7 +10656,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } if (icmp_hlen > sizeof(struct icmp6_hdr) && !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); return (-1); @@ -10549,6 +10666,13 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, break; } #endif /* INET6 */ + default: + /* + * Placeholder value, so future calls to pf_change_ap() don't + * try to update a NULL checksum pointer. + */ + pd->pcksum = &pd->sctp_dummy_sum; + break; } if (pd->sport) @@ -10556,6 +10680,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (pd->dport) pd->odport = pd->ndport = *pd->dport; + MPASS(pd->pcksum != NULL); + return (0); } @@ -10826,7 +10952,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 /* Respond to SYN with a syncookie. */ if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN && pd.dir == PF_IN && pf_synflood_check(&pd)) { - pf_syncookie_send(&pd); + pf_syncookie_send(&pd, &reason); action = PF_DROP; break; } @@ -10850,7 +10976,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 pd.dir == PF_IN) { struct mbuf *msyn; - msyn = pf_syncookie_recreate_syn(&pd); + msyn = pf_syncookie_recreate_syn(&pd, &reason); if (msyn == NULL) { action = PF_DROP; break; diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 51b3fd6390e1..bcd66fd17d5d 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -131,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_POOL_ENDPI 0x40 +#define PF_POOL_IPV6NH 0x80 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f @@ -246,6 +247,12 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define SCNT_SRC_NODE_REMOVALS 2 #define SCNT_MAX 3 +/* fragment counters */ +#define NCNT_FRAG_SEARCH 0 +#define NCNT_FRAG_INSERT 1 +#define NCNT_FRAG_REMOVALS 2 +#define NCNT_MAX 3 + #define PF_TABLE_NAME_SIZE 32 #define PF_QNAME_SIZE 64 @@ -630,6 +637,8 @@ struct pf_rule { #define PFRULE_PFLOW 0x00040000 #define PFRULE_ALLOW_RELATED 0x00080000 #define PFRULE_AFTO 0x00200000 /* af-to rule */ +#define PFRULE_ONCE 0x00400000 /* one shot rule */ +#define PFRULE_EXPIRED 0x00800000 /* one shot rule hit by pkt */ #ifdef _KERNEL #define PFRULE_REFS 0x0080 /* rule has references */ diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c index e2200c15c704..f3be036ef745 100644 --- a/sys/netpfil/pf/pf_if.c +++ b/sys/netpfil/pf/pf_if.c @@ -655,8 +655,10 @@ pfi_kkif_update(struct pfi_kkif *kif) /* again for all groups kif is member of */ if (kif->pfik_ifp != NULL) { CK_STAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) - pfi_kkif_update((struct pfi_kkif *) - ifgl->ifgl_group->ifg_pf_kif); + if (ifgl->ifgl_group->ifg_pf_kif) { + pfi_kkif_update((struct pfi_kkif *) + ifgl->ifgl_group->ifg_pf_kif); + } } } diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index e5da05a958f6..d58af6e5ec4d 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -116,7 +116,6 @@ static int pf_rollback_altq(u_int32_t); static int pf_commit_altq(u_int32_t); static int pf_enable_altq(struct pf_altq *); static int pf_disable_altq(struct pf_altq *); -static uint16_t pf_qname2qid(const char *); static void pf_qid_unref(uint16_t); #endif /* ALTQ */ static int pf_begin_rules(u_int32_t *, int, const char *); @@ -187,6 +186,7 @@ VNET_DEFINE(uma_zone_t, pf_tag_z); #define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); +MALLOC_DEFINE(M_PF, "pf", "pf(4)"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE @@ -213,8 +213,7 @@ static void pf_init_tagset(struct pf_tagset *, unsigned int *, static void pf_cleanup_tagset(struct pf_tagset *); static uint16_t tagname2hashindex(const struct pf_tagset *, const char *); static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t); -static u_int16_t tagname2tag(struct pf_tagset *, const char *); -static u_int16_t pf_tagname2tag(const char *); +static u_int16_t tagname2tag(struct pf_tagset *, const char *, bool); static void tag_unref(struct pf_tagset *, u_int16_t); struct cdev *pf_dev; @@ -285,6 +284,7 @@ int pf_end_threads; struct proc *pf_purge_proc; VNET_DEFINE(struct rmlock, pf_rules_lock); +VNET_DEFINE(struct rmlock, pf_tags_lock); VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock); #define V_pf_ioctl_lock VNET(pf_ioctl_lock) struct sx pf_end_lock; @@ -420,6 +420,8 @@ pfattach_vnet(void) pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK); for (int i = 0; i < SCNT_MAX; i++) V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK); + for (int i = 0; i < NCNT_MAX; i++) + V_pf_status.ncounters[i] = counter_u64_alloc(M_WAITOK); if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET, INTR_MPSAFE, &V_pf_swi_cookie) != 0) @@ -684,19 +686,50 @@ tag2hashindex(const struct pf_tagset *ts, uint16_t tag) } static u_int16_t -tagname2tag(struct pf_tagset *ts, const char *tagname) +tagname2tag(struct pf_tagset *ts, const char *tagname, bool add_new) { struct pf_tagname *tag; u_int32_t index; u_int16_t new_tagid; - PF_RULES_WASSERT(); + PF_TAGS_RLOCK_TRACKER; + + PF_TAGS_RLOCK(); index = tagname2hashindex(ts, tagname); TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; - return (tag->tag); + new_tagid = tag->tag; + PF_TAGS_RUNLOCK(); + return (new_tagid); + } + + /* + * When used for pfsync with queues we must not create new entries. + * Pf tags can be created just fine by this function, but queues + * require additional configuration. If they are missing on the target + * system we just ignore them + */ + if (add_new == false) { + printf("%s: Not creating a new tag\n", __func__); + PF_TAGS_RUNLOCK(); + return (0); + } + + /* + * If a new entry must be created do it under a write lock. + * But first search again, somebody could have created the tag + * between unlocking the read lock and locking the write lock. + */ + PF_TAGS_RUNLOCK(); + PF_TAGS_WLOCK(); + TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) + if (strcmp(tagname, tag->name) == 0) { + tag->ref++; + new_tagid = tag->tag; + PF_TAGS_WUNLOCK(); + return (new_tagid); } /* @@ -713,16 +746,20 @@ tagname2tag(struct pf_tagset *ts, const char *tagname) * to rounding of the number of bits in the vector up to a multiple * of the vector word size at declaration/allocation time. */ - if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) + if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) { + PF_TAGS_WUNLOCK(); return (0); + } /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */ BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail); /* allocate and fill new struct pf_tagname */ tag = uma_zalloc(V_pf_tag_z, M_NOWAIT); - if (tag == NULL) + if (tag == NULL) { + PF_TAGS_WUNLOCK(); return (0); + } strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref = 1; @@ -734,7 +771,29 @@ tagname2tag(struct pf_tagset *ts, const char *tagname) index = tag2hashindex(ts, new_tagid); TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries); - return (tag->tag); + PF_TAGS_WUNLOCK(); + return (new_tagid); +} + +static char * +tag2tagname(struct pf_tagset *ts, u_int16_t tag) +{ + struct pf_tagname *t; + uint16_t index; + + PF_TAGS_RLOCK_TRACKER; + + PF_TAGS_RLOCK(); + + index = tag2hashindex(ts, tag); + TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) + if (tag == t->tag) { + PF_TAGS_RUNLOCK(); + return (t->name); + } + + PF_TAGS_RUNLOCK(); + return (NULL); } static void @@ -743,7 +802,7 @@ tag_unref(struct pf_tagset *ts, u_int16_t tag) struct pf_tagname *t; uint16_t index; - PF_RULES_WASSERT(); + PF_TAGS_WLOCK(); index = tag2hashindex(ts, tag); TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) @@ -760,12 +819,20 @@ tag_unref(struct pf_tagset *ts, u_int16_t tag) } break; } + + PF_TAGS_WUNLOCK(); } -static uint16_t +uint16_t pf_tagname2tag(const char *tagname) { - return (tagname2tag(&V_pf_tags, tagname)); + return (tagname2tag(&V_pf_tags, tagname, true)); +} + +static const char * +pf_tag2tagname(uint16_t tag) +{ + return (tag2tagname(&V_pf_tags, tag)); } static int @@ -896,10 +963,16 @@ pf_commit_eth(uint32_t ticket, const char *anchor) } #ifdef ALTQ -static uint16_t -pf_qname2qid(const char *qname) +uint16_t +pf_qname2qid(const char *qname, bool add_new) { - return (tagname2tag(&V_pf_qids, qname)); + return (tagname2tag(&V_pf_qids, qname, add_new)); +} + +static const char * +pf_qid2qname(uint16_t qid) +{ + return (tag2tagname(&V_pf_qids, qid)); } static void @@ -1148,7 +1221,7 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) } bcopy(a1, a2, sizeof(struct pf_altq)); - if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { + if ((a2->qid = pf_qname2qid(a2->qname, true)) == 0) { error = EBUSY; free(a2, M_PFALTQ); break; @@ -1181,18 +1254,18 @@ pf_rule_tree_alloc(int flags) { struct pf_krule_global *tree; - tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags); + tree = malloc(sizeof(struct pf_krule_global), M_PF, flags); if (tree == NULL) return (NULL); RB_INIT(tree); return (tree); } -static void +void pf_rule_tree_free(struct pf_krule_global *tree) { - free(tree, M_TEMP); + free(tree, M_PF); } static int @@ -1211,7 +1284,7 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) return (ENOMEM); rs = pf_find_or_create_kruleset(anchor); if (rs == NULL) { - free(tree, M_TEMP); + pf_rule_tree_free(tree); return (EINVAL); } pf_rule_tree_free(rs->rules[rs_num].inactive.tree); @@ -1432,7 +1505,7 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_kruleset(rs); - free(old_tree, M_TEMP); + pf_rule_tree_free(old_tree); return (0); } @@ -1603,7 +1676,7 @@ pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) #define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x #define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \ SATU32(q->pq_u.hfsc_opts.x) - + ASSIGN_OPT_SATU32(rtsc_m1); ASSIGN_OPT(rtsc_d); ASSIGN_OPT_SATU32(rtsc_m2); @@ -1617,7 +1690,7 @@ pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) ASSIGN_OPT_SATU32(ulsc_m2); ASSIGN_OPT(flags); - + #undef ASSIGN_OPT #undef ASSIGN_OPT_SATU32 } else @@ -1725,7 +1798,7 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) ASSIGN_OPT(ulsc_m2); ASSIGN_OPT(flags); - + #undef ASSIGN_OPT } else COPY(pq_u); @@ -1757,7 +1830,7 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) ASSIGN(qid); break; } - default: + default: panic("%s: unhandled struct pfioc_altq version", __func__); break; } @@ -2188,11 +2261,11 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { - if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0) ERROUT(EBUSY); else if (rule->pqname[0] != 0) { if ((rule->pqid = - pf_qname2qid(rule->pqname)) == 0) + pf_qname2qid(rule->pqname, true)) == 0) ERROUT(EBUSY); } else rule->pqid = rule->qid; @@ -2276,6 +2349,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, rule->nat.cur = TAILQ_FIRST(&rule->nat.list); rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list); rule->route.cur = TAILQ_FIRST(&rule->route.list); + rule->route.ipv6_nexthop_af = AF_INET6; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; @@ -2506,6 +2580,8 @@ pf_ioctl_clear_status(void) pf_counter_u64_zero(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_zero(V_pf_status.scounters[i]); + for (int i = 0; i < NCNT_MAX; i++) + counter_u64_zero(V_pf_status.ncounters[i]); for (int i = 0; i < KLCNT_MAX; i++) counter_u64_zero(V_pf_status.lcounters[i]); V_pf_status.since = time_uptime; @@ -3308,7 +3384,7 @@ DIOCGETETHRULE_error: #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { - if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0) error = EBUSY; else rule->qid = rule->qid; @@ -3859,11 +3935,11 @@ DIOCGETRULENV_error: /* set queue IDs */ if (newrule->qname[0] != 0) { if ((newrule->qid = - pf_qname2qid(newrule->qname)) == 0) + pf_qname2qid(newrule->qname, true)) == 0) error = EBUSY; else if (newrule->pqname[0] != 0) { if ((newrule->pqid = - pf_qname2qid(newrule->pqname)) == 0) + pf_qname2qid(newrule->pqname, true)) == 0) error = EBUSY; } else newrule->pqid = newrule->qid; @@ -4076,7 +4152,7 @@ DIOCCHANGERULE_error: out = ps->ps_states; pstore = mallocarray(slice_count, - sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { @@ -4098,10 +4174,10 @@ DIOCGETSTATES_retry: if (count > slice_count) { PF_HASHROW_UNLOCK(ih); - free(pstore, M_TEMP); + free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, - sizeof(struct pfsync_state_1301), M_TEMP, + sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATES_retry; } @@ -4123,13 +4199,15 @@ DIOCGETSTATES_retry: PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pfsync_state_1301) * count); - if (error) + if (error) { + free(pstore, M_PF); goto fail; + } out = ps->ps_states + nr; } DIOCGETSTATES_full: ps->ps_len = sizeof(struct pfsync_state_1301) * nr; - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -4155,7 +4233,7 @@ DIOCGETSTATES_full: out = ps->ps_states; pstore = mallocarray(slice_count, - sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { @@ -4177,10 +4255,10 @@ DIOCGETSTATESV2_retry: if (count > slice_count) { PF_HASHROW_UNLOCK(ih); - free(pstore, M_TEMP); + free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, - sizeof(struct pf_state_export), M_TEMP, + sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATESV2_retry; } @@ -4201,13 +4279,15 @@ DIOCGETSTATESV2_retry: PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pf_state_export) * count); - if (error) + if (error) { + free(pstore, M_PF); goto fail; + } out = ps->ps_states + nr; } DIOCGETSTATESV2_full: ps->ps_len = nr * sizeof(struct pf_state_export); - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -4390,7 +4470,7 @@ DIOCGETSTATESV2_full: * copy the necessary fields */ if (altq->qname[0] != 0) { - if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + if ((altq->qid = pf_qname2qid(altq->qname, true)) == 0) { PF_RULES_WUNLOCK(); error = EBUSY; free(altq, M_PFALTQ); @@ -4737,17 +4817,17 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4769,17 +4849,17 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4805,7 +4885,7 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_NOWAIT | M_ZERO); + M_PF, M_NOWAIT | M_ZERO); if (pfrts == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); @@ -4816,7 +4896,7 @@ DIOCCHANGEADDR_error: PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrts, io->pfrio_buffer, totlen); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4843,7 +4923,7 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_tstats); pfrtstats = mallocarray(io->pfrio_size, - sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); + sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); if (pfrtstats == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); @@ -4856,7 +4936,7 @@ DIOCCHANGEADDR_error: PF_TABLE_STATS_UNLOCK(); if (error == 0) error = copyout(pfrtstats, io->pfrio_buffer, totlen); - free(pfrtstats, M_TEMP); + free(pfrtstats, M_PF); break; } @@ -4881,10 +4961,10 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } @@ -4894,7 +4974,7 @@ DIOCCHANGEADDR_error: &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4922,10 +5002,10 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); + free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -4933,7 +5013,7 @@ DIOCCHANGEADDR_error: io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4968,10 +5048,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -4982,7 +5062,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5003,10 +5083,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5016,7 +5096,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5040,11 +5120,11 @@ DIOCCHANGEADDR_error: goto fail; } totlen = count * sizeof(struct pfr_addr); - pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP, + pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5055,7 +5135,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5076,14 +5156,14 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK | M_ZERO); + M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5104,14 +5184,14 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_astats); pfrastats = mallocarray(io->pfrio_size, - sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrastats, io->pfrio_buffer, totlen); - free(pfrastats, M_TEMP); + free(pfrastats, M_PF); break; } @@ -5132,10 +5212,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5145,7 +5225,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5166,10 +5246,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_RLOCK(); @@ -5179,7 +5259,7 @@ DIOCCHANGEADDR_error: PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5200,10 +5280,10 @@ DIOCCHANGEADDR_error: } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); + free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5211,7 +5291,7 @@ DIOCCHANGEADDR_error: io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5249,10 +5329,10 @@ DIOCCHANGEADDR_error: } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5262,7 +5342,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ETH: if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5270,13 +5350,13 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5291,7 +5371,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5300,7 +5380,7 @@ DIOCCHANGEADDR_error: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5308,7 +5388,7 @@ DIOCCHANGEADDR_error: } PF_RULES_WUNLOCK(); error = copyout(ioes, io->array, totlen); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5330,10 +5410,10 @@ DIOCCHANGEADDR_error: } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5344,7 +5424,7 @@ DIOCCHANGEADDR_error: if ((error = pf_rollback_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5352,13 +5432,13 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5373,7 +5453,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5382,14 +5462,14 @@ DIOCCHANGEADDR_error: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5415,10 +5495,10 @@ DIOCCHANGEADDR_error: totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); @@ -5431,7 +5511,7 @@ DIOCCHANGEADDR_error: if (ers == NULL || ioe->ticket == 0 || ioe->ticket != ers->inactive.ticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } @@ -5440,14 +5520,14 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5458,7 +5538,7 @@ DIOCCHANGEADDR_error: if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5467,7 +5547,7 @@ DIOCCHANGEADDR_error: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } @@ -5477,7 +5557,7 @@ DIOCCHANGEADDR_error: rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5490,7 +5570,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ETH: if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5498,7 +5578,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5513,7 +5593,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_commit(&table, ioe->ticket, NULL, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5522,7 +5602,7 @@ DIOCCHANGEADDR_error: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5536,7 +5616,7 @@ DIOCCHANGEADDR_error: else dehook_pf_eth(); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5565,7 +5645,7 @@ DIOCCHANGEADDR_error: nr = 0; - p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO); + p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO); for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); @@ -5584,11 +5664,11 @@ DIOCCHANGEADDR_error: error = copyout(pstore, psn->psn_src_nodes, sizeof(struct pf_src_node) * nr); if (error) { - free(pstore, M_TEMP); + free(pstore, M_PF); goto fail; } psn->psn_len = sizeof(struct pf_src_node) * nr; - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -5655,13 +5735,13 @@ DIOCCHANGEADDR_error: bufsiz = io->pfiio_size * sizeof(struct pfi_kif); ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif), - M_TEMP, M_WAITOK | M_ZERO); + M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); PF_RULES_RUNLOCK(); error = copyout(ifstore, io->pfiio_buffer, bufsiz); - free(ifstore, M_TEMP); + free(ifstore, M_PF); break; } @@ -5713,6 +5793,7 @@ fail: void pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version) { + const char *tagname; bzero(sp, sizeof(union pfsync_state_union)); /* copy from state key */ @@ -5724,8 +5805,6 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_ sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; - sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname)); @@ -5737,16 +5816,31 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_ else sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime); - sp->pfs_1301.direction = st->direction; - sp->pfs_1301.log = st->act.log; - sp->pfs_1301.timeout = st->timeout; - switch (msg_version) { case PFSYNC_MSG_VERSION_1301: sp->pfs_1301.state_flags = st->state_flags; + sp->pfs_1301.direction = st->direction; + sp->pfs_1301.log = st->act.log; + sp->pfs_1301.timeout = st->timeout; + sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; + /* + * XXX Why do we bother pfsyncing source node information if source + * nodes are not synced? Showing users that there is source tracking + * when there is none seems useless. + */ + if (st->sns[PF_SN_LIMIT] != NULL) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; break; case PFSYNC_MSG_VERSION_1400: sp->pfs_1400.state_flags = htons(st->state_flags); + sp->pfs_1400.direction = st->direction; + sp->pfs_1400.log = st->act.log; + sp->pfs_1400.timeout = st->timeout; + sp->pfs_1400.proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1400.af = st->key[PF_SK_WIRE]->af; sp->pfs_1400.qid = htons(st->act.qid); sp->pfs_1400.pqid = htons(st->act.pqid); sp->pfs_1400.dnpipe = htons(st->act.dnpipe); @@ -5762,22 +5856,53 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_ strlcpy(sp->pfs_1400.rt_ifname, st->act.rt_kif->pfik_name, sizeof(sp->pfs_1400.rt_ifname)); + /* + * XXX Why do we bother pfsyncing source node information if source + * nodes are not synced? Showing users that there is source tracking + * when there is none seems useless. + */ + if (st->sns[PF_SN_LIMIT] != NULL) + sp->pfs_1400.sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) + sp->pfs_1400.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + break; + case PFSYNC_MSG_VERSION_1500: + sp->pfs_1500.state_flags = htons(st->state_flags); + sp->pfs_1500.direction = st->direction; + sp->pfs_1500.log = st->act.log; + sp->pfs_1500.timeout = st->timeout; + sp->pfs_1500.wire_proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1500.wire_af = st->key[PF_SK_WIRE]->af; + sp->pfs_1500.stack_proto = st->key[PF_SK_STACK]->proto; + sp->pfs_1500.stack_af = st->key[PF_SK_STACK]->af; + sp->pfs_1500.qid = htons(st->act.qid); + sp->pfs_1500.pqid = htons(st->act.pqid); + sp->pfs_1500.dnpipe = htons(st->act.dnpipe); + sp->pfs_1500.dnrpipe = htons(st->act.dnrpipe); + sp->pfs_1500.rtableid = htonl(st->act.rtableid); + sp->pfs_1500.min_ttl = st->act.min_ttl; + sp->pfs_1500.set_tos = st->act.set_tos; + sp->pfs_1500.max_mss = htons(st->act.max_mss); + sp->pfs_1500.set_prio[0] = st->act.set_prio[0]; + sp->pfs_1500.set_prio[1] = st->act.set_prio[1]; + sp->pfs_1500.rt = st->act.rt; + sp->pfs_1500.rt_af = st->act.rt_af; + if (st->act.rt_kif) + strlcpy(sp->pfs_1500.rt_ifname, + st->act.rt_kif->pfik_name, + sizeof(sp->pfs_1500.rt_ifname)); + strlcpy(sp->pfs_1500.orig_ifname, + st->orig_kif->pfik_name, + sizeof(sp->pfs_1500.orig_ifname)); + if ((tagname = pf_tag2tagname(st->tag)) != NULL) + strlcpy(sp->pfs_1500.tagname, tagname, + sizeof(sp->pfs_1500.tagname)); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } - /* - * XXX Why do we bother pfsyncing source node information if source - * nodes are not synced? Showing users that there is source tracking - * when there is none seems useless. - */ - if (st->sns[PF_SN_LIMIT] != NULL) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; - sp->pfs_1301.id = st->id; sp->pfs_1301.creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->pfs_1301.src); @@ -6541,6 +6666,11 @@ shutdown_pf(void) pf_kill_srcnodes(NULL); + for (int i = 0; i < PF_RULESET_MAX; i++) { + pf_rule_tree_free(pf_main_ruleset.rules[i].active.tree); + pf_rule_tree_free(pf_main_ruleset.rules[i].inactive.tree); + } + /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ } while(0); @@ -6827,6 +6957,7 @@ pf_load_vnet(void) NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE); + rm_init_flags(&V_pf_tags_lock, "pf tags and queues", RM_RECURSE); sx_init(&V_pf_ioctl_lock, "pf ioctl"); pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize, @@ -6943,6 +7074,8 @@ pf_unload_vnet(void) pf_counter_u64_deinit(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_free(V_pf_status.scounters[i]); + for (int i = 0; i < NCNT_MAX; i++) + counter_u64_free(V_pf_status.ncounters[i]); rm_destroy(&V_pf_rules_lock); sx_destroy(&V_pf_ioctl_lock); @@ -6976,7 +7109,7 @@ vnet_pf_init(void *unused __unused) pf_load_vnet(); } -VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, +VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_init, NULL); static void @@ -6984,7 +7117,7 @@ vnet_pf_uninit(const void *unused __unused) { pf_unload_vnet(); -} +} SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL); VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_uninit, NULL); diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index bc9e1dc72902..b8b5157c9b15 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -545,11 +545,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, uint64_t hashidx; int cnt; sa_family_t wanted_af; + u_int8_t pool_type; + bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; KASSERT(saf != 0, ("%s: saf == 0", __func__)); KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); + /* + * Given (*naf) is a hint about AF of the forwarded packet. + * It might be changed if prefer_ipv6_nexthop is enabled and + * the combination of nexthop AF and packet AF allows for it. + */ wanted_af = (*naf); mtx_lock(&rpool->mtx); @@ -594,19 +601,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - /* - * For single addresses check their address family. Unless they - * have none, which happens when addresses are added with - * the old ioctl mechanism. In such case trust that the address - * has the proper AF. - */ - if (rpool->cur->af && rpool->cur->af != wanted_af) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; + } + + /* + * For pools with a single host with the prefer-ipv6-nexthop option + * we can return pool address of any AF, unless the forwarded packet + * is IPv6, then we can return only if pool address is IPv6. + * For non-prefer-ipv6-nexthop we can return pool address only + * of wanted AF, unless the pool address'es AF is unknown, which + * happens in case old ioctls have been used to set up the pool. + * + * Round-robin pools have their own logic for retrying next addresses. + */ + pool_type = rpool->opts & PF_POOL_TYPEMASK; + if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || + ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && + rpool->cur->addr.type != PF_ADDR_TABLE && + rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { + if (prefer_ipv6_nexthop) { + if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + wanted_af = rpool->cur->af; + } else { + if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } } - switch (rpool->opts & PF_POOL_TYPEMASK) { + switch (pool_type) { case PF_POOL_NONE: pf_addrcpy(naddr, raddr, wanted_af); break; @@ -631,10 +657,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, else rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_random: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_random; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else if (init_addr != NULL && PF_AZERO(init_addr, @@ -702,10 +740,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, else rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_srchash: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_srchash; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else { @@ -718,6 +768,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, { struct pf_kpooladdr *acur = rpool->cur; + retry_other_af_rr: + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, @@ -728,46 +781,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, true)) goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, - wanted_af)) + } else if (rpool->cur->af == wanted_af && + pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) goto get_addr; - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + /* Reset table index when changing wanted AF. */ + rpool->tblidx = -1; + rpool->ipv6_nexthop_af = AF_INET; + goto retry_other_af_rr; + } try_next: + /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ + rpool->ipv6_nexthop_af = AF_INET6; if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); + try_next_ipv6_nexthop_rr: + /* Reset table index when iterating pools or changing wanted AF. */ rpool->tblidx = -1; + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if (pfr_pool_get(rpool->cur->addr.p.tbl, + if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, NULL, - true)) { - /* table contains no address of type 'wanted_af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + true)) + goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, wanted_af, - pf_islinklocal, true)) { - /* interface has no address of type 'wanted_af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, + true)) + goto get_addr; } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - if (rpool->cur->af && rpool->cur->af != wanted_af) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; + if (rpool->cur->af == wanted_af) { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + pf_addrcpy(&rpool->counter, raddr, wanted_af); + goto get_addr; } - pf_addrcpy(&rpool->counter, raddr, wanted_af); } - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + rpool->ipv6_nexthop_af = AF_INET; + goto try_next_ipv6_nexthop_rr; + } + if (rpool->cur != acur) + goto try_next; + reason = PFRES_MAPFAILED; + goto done_pool_mtx; get_addr: pf_addrcpy(naddr, &rpool->counter, wanted_af); if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) @@ -777,9 +839,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, } } + if (wanted_af == 0) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + if (nkif) *nkif = rpool->cur->kif; + (*naf) = wanted_af; + done_pool_mtx: mtx_unlock(&rpool->mtx); diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 45b5b8dd5fef..082b9b565153 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -763,6 +763,8 @@ static const struct nlattr_parser nla_p_rule[] = { { .type = PF_RT_RCV_IFNOT, .off = _OUT(rcvifnot), .cb = nlattr_get_bool }, { .type = PF_RT_PKTRATE, .off = _OUT(pktrate), .arg = &threshold_parser, .cb = nlattr_get_nested }, { .type = PF_RT_MAX_PKT_SIZE, .off = _OUT(max_pkt_size), .cb = nlattr_get_uint16 }, + { .type = PF_RT_TYPE_2, .off = _OUT(type), .cb = nlattr_get_uint16 }, + { .type = PF_RT_CODE_2, .off = _OUT(code), .cb = nlattr_get_uint16 }, }; NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule); #undef _OUT @@ -984,8 +986,12 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u8(nw, PF_RT_AF, rule->af); nlattr_add_u8(nw, PF_RT_NAF, rule->naf); nlattr_add_u8(nw, PF_RT_PROTO, rule->proto); + nlattr_add_u8(nw, PF_RT_TYPE, rule->type); nlattr_add_u8(nw, PF_RT_CODE, rule->code); + nlattr_add_u16(nw, PF_RT_TYPE_2, rule->type); + nlattr_add_u16(nw, PF_RT_CODE_2, rule->code); + nlattr_add_u8(nw, PF_RT_FLAGS, rule->flags); nlattr_add_u8(nw, PF_RT_FLAGSET, rule->flagset); nlattr_add_u8(nw, PF_RT_MIN_TTL, rule->min_ttl); @@ -1019,6 +1025,7 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u64(nw, PF_RT_SRC_NODES_NAT, counter_u64_fetch(rule->src_nodes[PF_SN_NAT])); nlattr_add_u64(nw, PF_RT_SRC_NODES_ROUTE, counter_u64_fetch(rule->src_nodes[PF_SN_ROUTE])); nlattr_add_pf_threshold(nw, PF_RT_PKTRATE, &rule->pktrate); + nlattr_add_time_t(nw, PF_RT_EXPTIME, time_second - (time_uptime - rule->exptime)); error = pf_kanchor_copyout(ruleset, rule, anchor_call, sizeof(anchor_call)); MPASS(error == 0); @@ -1228,6 +1235,9 @@ pf_handle_get_status(struct nlmsghdr *hdr, struct nl_pstate *npt) V_pf_status.fcounters); nlattr_add_counters(nw, PF_GS_SCOUNTERS, SCNT_MAX, pf_fcounter, V_pf_status.scounters); + nlattr_add_counters(nw, PF_GS_NCOUNTERS, NCNT_MAX, pf_fcounter, + V_pf_status.ncounters); + nlattr_add_u64(nw, PF_GS_FRAGMENTS, pf_normalize_get_frag_count()); pfi_update_status(V_pf_status.ifname, &s); nlattr_add_u64_array(nw, PF_GS_BCOUNTERS, 2 * 2, (uint64_t *)s.bcounters); @@ -1945,7 +1955,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt) n = pfr_table_count(&attrs.pfrio_table, attrs.pfrio_flags); pfrtstats = mallocarray(n, - sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); + sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); error = pfr_get_tstats(&attrs.pfrio_table, pfrtstats, &n, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); @@ -1997,7 +2007,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt) } } } - free(pfrtstats, M_TEMP); + free(pfrtstats, M_PF); if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h index 87daac393821..c46c8f2b2592 100644 --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -283,6 +283,9 @@ enum pf_rule_type_t { PF_RT_SRC_NODES_ROUTE = 81, /* u64 */ PF_RT_PKTRATE = 82, /* nested, pf_threshold_type_t */ PF_RT_MAX_PKT_SIZE = 83, /* u16 */ + PF_RT_TYPE_2 = 84, /* u16 */ + PF_RT_CODE_2 = 85, /* u16 */ + PF_RT_EXPTIME = 86, /* time_t */ }; enum pf_addrule_type_t { @@ -350,6 +353,8 @@ enum pf_get_status_types_t { PF_GS_CHKSUM = 14, /* byte array */ PF_GS_PCOUNTERS = 15, /* u64 array */ PF_GS_BCOUNTERS = 16, /* u64 array */ + PF_GS_NCOUNTERS = 17, /* nested, */ + PF_GS_FRAGMENTS = 18, /* u64, */ }; enum pf_natlook_types_t { diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index a684d778ab42..53010222dd07 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -211,6 +211,12 @@ pf_normalize_cleanup(void) mtx_destroy(&V_pf_frag_mtx); } +uint64_t +pf_normalize_get_frag_count(void) +{ + return (uma_zone_get_cur(V_pf_frent_z)); +} + static int pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b) { @@ -314,6 +320,7 @@ pf_free_fragment(struct pf_fragment *frag) /* Free all fragment entries */ while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); m_freem(frent->fe_m); uma_zfree(V_pf_frent_z, frent); @@ -331,6 +338,7 @@ pf_find_fragment(struct pf_frnode *key, uint32_t id) PF_FRAG_ASSERT(); frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_SEARCH], 1); if (frnode == NULL) return (NULL); MPASS(frnode->fn_fragments >= 1); @@ -438,6 +446,7 @@ pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, ("overlapping fragment")); TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); } + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_INSERT], 1); if (frag->fr_firstoff[index] == NULL) { KASSERT(prev == NULL || pf_frent_index(prev) < index, @@ -496,6 +505,7 @@ pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) } TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); KASSERT(frag->fr_entries[index] > 0, ("No fragments remaining")); frag->fr_entries[index]--; @@ -768,6 +778,7 @@ pf_join_fragment(struct pf_fragment *frag) frent = TAILQ_FIRST(&frag->fr_queue); TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); m = frent->fe_m; if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len) @@ -775,6 +786,7 @@ pf_join_fragment(struct pf_fragment *frag) uma_zfree(V_pf_frent_z, frent); while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); m2 = frent->fe_m; /* Strip off ip header. */ @@ -1354,7 +1366,7 @@ pf_normalize_ip6(int off, u_short *reason, pf_rule_to_actions(r, &pd->act); } - if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), NULL, reason, AF_INET6)) + if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), reason, AF_INET6)) return (PF_DROP); /* Offset now points to data portion. */ @@ -1542,7 +1554,7 @@ pf_normalize_tcp_init(struct pf_pdesc *pd, struct tcphdr *th, olen = (th->th_off << 2) - sizeof(*th); if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m, - pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(*th), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -1645,7 +1657,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, if (olen >= TCPOLEN_TIMESTAMP && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && - pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) { + pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, pd->af)) { /* Modulate the timestamps. Can be used for NAT detection, OS * uptime determination or reboot detection. */ @@ -1975,7 +1987,7 @@ pf_normalize_mss(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); optsoff = pd->off + sizeof(struct tcphdr); if (olen < TCPOLEN_MAXSEG || - !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -2009,7 +2021,7 @@ pf_scan_sctp(struct pf_pdesc *pd) int ret; while (pd->off + chunk_off < pd->tot_len) { - if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL, + if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL, pd->af)) return (PF_DROP); @@ -2026,7 +2038,7 @@ pf_scan_sctp(struct pf_pdesc *pd) struct sctp_init_chunk init; if (!pf_pull_hdr(pd->m, pd->off + chunk_start, &init, - sizeof(init), NULL, NULL, pd->af)) + sizeof(init), NULL, pd->af)) return (PF_DROP); /* diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c index 89486928e6e1..2f484e2dabc6 100644 --- a/sys/netpfil/pf/pf_nv.c +++ b/sys/netpfil/pf/pf_nv.c @@ -505,6 +505,7 @@ int pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) { int error = 0; + uint8_t tmp; #define ERROUT(x) ERROUT_FUNCTION(errout, x) @@ -610,8 +611,10 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) PFNV_CHK(pf_nvuint8(nvl, "keep_state", &rule->keep_state)); PFNV_CHK(pf_nvuint8(nvl, "af", &rule->af)); PFNV_CHK(pf_nvuint8(nvl, "proto", &rule->proto)); - PFNV_CHK(pf_nvuint8(nvl, "type", &rule->type)); - PFNV_CHK(pf_nvuint8(nvl, "code", &rule->code)); + PFNV_CHK(pf_nvuint8(nvl, "type", &tmp)); + rule->type = tmp; + PFNV_CHK(pf_nvuint8(nvl, "code", &tmp)); + rule->code = tmp; PFNV_CHK(pf_nvuint8(nvl, "flags", &rule->flags)); PFNV_CHK(pf_nvuint8(nvl, "flagset", &rule->flagset)); PFNV_CHK(pf_nvuint8(nvl, "min_ttl", &rule->min_ttl)); diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c index 150626c5f3fb..8c041d45eae8 100644 --- a/sys/netpfil/pf/pf_osfp.c +++ b/sys/netpfil/pf/pf_osfp.c @@ -82,7 +82,7 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, const struct tcphdr *tcp) ip6 = mtod(pd->m, struct ip6_hdr *); break; } - if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, NULL, + if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, pd->af)) return (NULL); return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 039908a53126..4e16eaa76f9d 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -59,8 +59,8 @@ #error "Kernel only file. Please use sbin/pfctl/pf_ruleset.c instead." #endif -#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) -#define rs_free(x) free(x, M_TEMP) +#define rs_malloc(x) malloc(x, M_PF, M_NOWAIT|M_ZERO) +#define rs_free(x) free(x, M_PF) VNET_DEFINE(struct pf_kanchor_global, pf_anchors); VNET_DEFINE(struct pf_kanchor, pf_main_anchor); @@ -346,6 +346,12 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset) !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || ruleset->rules[i].inactive.open) return; + for (int i = 0; i < PF_RULESET_MAX; i++) { + pf_rule_tree_free(ruleset->rules[i].active.tree); + ruleset->rules[i].active.tree = NULL; + pf_rule_tree_free(ruleset->rules[i].inactive.tree); + ruleset->rules[i].inactive.tree = NULL; + } RB_REMOVE(pf_kanchor_global, &V_pf_anchors, ruleset->anchor); if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_kanchor_node, &parent->children, diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c index 4a935bc65767..d11551ffb6ae 100644 --- a/sys/netpfil/pf/pf_syncookies.c +++ b/sys/netpfil/pf/pf_syncookies.c @@ -287,7 +287,7 @@ pf_synflood_check(struct pf_pdesc *pd) } void -pf_syncookie_send(struct pf_pdesc *pd) +pf_syncookie_send(struct pf_pdesc *pd, u_short *reason) { uint16_t mss; uint32_t iss; @@ -297,7 +297,7 @@ pf_syncookie_send(struct pf_pdesc *pd) pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport, iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss, 0, M_SKIP_FIREWALL | (pd->m->m_flags & M_LOOP), 0, 0, - pd->act.rtableid); + pd->act.rtableid, reason); counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_SENT], 1); /* XXX Maybe only in adaptive mode? */ atomic_add_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven], @@ -495,7 +495,7 @@ pf_syncookie_generate(struct pf_pdesc *pd, uint16_t mss) } struct mbuf * -pf_syncookie_recreate_syn(struct pf_pdesc *pd) +pf_syncookie_recreate_syn(struct pf_pdesc *pd, u_short *reason) { uint8_t wscale; uint16_t mss; @@ -516,5 +516,5 @@ pf_syncookie_recreate_syn(struct pf_pdesc *pd) return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport, *pd->dport, seq, 0, TH_SYN, wscale, mss, pd->ttl, (pd->m->m_flags & M_LOOP), 0, PF_MTAG_FLAG_SYNCOOKIE_RECREATED, - cookie.flags.sack_ok, pd->act.rtableid)); + cookie.flags.sack_ok, pd->act.rtableid, reason)); } |