diff options
Diffstat (limited to 'sys/netpfil')
29 files changed, 2715 insertions, 2108 deletions
diff --git a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c index 04850549db98..6eb6cf2a7a47 100644 --- a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c @@ -463,13 +463,14 @@ ipf_send_ip(fr_info_t *fin, mb_t *m) int ipf_send_icmp_err(int type, fr_info_t *fin, int dst) { - int err, hlen, xtra, iclen, ohlen, avail, code; + int err, hlen, xtra, iclen, ohlen, avail; struct in_addr dst4; struct icmp *icmp; struct mbuf *m; i6addr_t dst6; void *ifp; #ifdef USE_INET6 + int code; ip6_t *ip6; #endif ip_t *ip, *ip2; @@ -477,8 +478,8 @@ ipf_send_icmp_err(int type, fr_info_t *fin, int dst) if ((type < 0) || (type >= ICMP_MAXTYPE)) return (-1); - code = fin->fin_icode; #ifdef USE_INET6 + code = fin->fin_icode; /* See NetBSD ip_fil_netbsd.c r1.4: */ if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int))) return (-1); diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.c b/sys/netpfil/ipfilter/netinet/ip_htable.c index 91b375f80db1..5f5c04732d69 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.c +++ b/sys/netpfil/ipfilter/netinet/ip_htable.c @@ -96,6 +96,8 @@ typedef struct ipf_htable_softc_s { u_long ipf_nhtnodes[LOOKUP_POOL_SZ]; iphtable_t *ipf_htables[LOOKUP_POOL_SZ]; iphtent_t *ipf_node_explist; + ipftuneable_t *ipf_htable_tune; + u_int ipf_htable_size_max; } ipf_htable_softc_t; ipf_lookup_t ipf_htable_backend = { @@ -122,6 +124,18 @@ ipf_lookup_t ipf_htable_backend = { }; +static ipftuneable_t ipf_htable_tuneables[] = { + { { (void *)offsetof(ipf_htable_softc_t, ipf_htable_size_max) }, + "htable_size_max", 1, 0x7fffffff, + stsizeof(ipf_htable_softc_t, ipf_htable_size_max), + 0, NULL, NULL }, + { { NULL }, + NULL, 0, 0, + 0, + 0, NULL, NULL } +}; + + /* ------------------------------------------------------------------------ */ /* Function: ipf_htable_soft_create */ /* Returns: void * - NULL = failure, else pointer to local context */ @@ -142,6 +156,18 @@ ipf_htable_soft_create(ipf_main_softc_t *softc) bzero((char *)softh, sizeof(*softh)); + softh->ipf_htable_tune = ipf_tune_array_copy(softh, + sizeof(ipf_htable_tuneables), + ipf_htable_tuneables); + if (softh->ipf_htable_tune == NULL) { + ipf_htable_soft_destroy(softc, softh); + return (NULL); + } + if (ipf_tune_array_link(softc, softh->ipf_htable_tune) == -1) { + ipf_htable_soft_destroy(softc, softh); + return (NULL); + } + return (softh); } @@ -160,6 +186,12 @@ ipf_htable_soft_destroy(ipf_main_softc_t *softc, void *arg) { ipf_htable_softc_t *softh = arg; + if (softh->ipf_htable_tune != NULL) { + ipf_tune_array_unlink(softc, softh->ipf_htable_tune); + KFREES(softh->ipf_htable_tune, sizeof(ipf_htable_tuneables)); + softh->ipf_htable_tune = NULL; + } + KFREE(softh); } @@ -179,6 +211,8 @@ ipf_htable_soft_init(ipf_main_softc_t *softc, void *arg) bzero((char *)softh, sizeof(*softh)); + softh->ipf_htable_size_max = IPHTABLE_MAX_SIZE; + return (0); } @@ -230,6 +264,8 @@ ipf_htable_stats_get(ipf_main_softc_t *softc, void *arg, iplookupop_t *op) return (EINVAL); } + bzero(&stats, sizeof(stats)); + stats.iphs_tables = softh->ipf_htables[op->iplo_unit + 1]; stats.iphs_numtables = softh->ipf_nhtables[op->iplo_unit + 1]; stats.iphs_numnodes = softh->ipf_nhtnodes[op->iplo_unit + 1]; @@ -325,6 +361,15 @@ ipf_htable_create(ipf_main_softc_t *softc, void *arg, iplookupop_t *op) iph->iph_name[sizeof(iph->iph_name) - 1] = '\0'; } + if ((iph->iph_size == 0) || + (iph->iph_size > softh->ipf_htable_size_max)) { + IPFERROR(30027); + return (EINVAL); + } + if (iph->iph_size > ( SIZE_MAX / sizeof(*iph->iph_table))) { + IPFERROR(30028); + return (EINVAL); + } KMALLOCS(iph->iph_table, iphtent_t **, iph->iph_size * sizeof(*iph->iph_table)); if (iph->iph_table == NULL) { diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.h b/sys/netpfil/ipfilter/netinet/ip_htable.h index 55c289e57ff6..3a8782ccd4b2 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.h +++ b/sys/netpfil/ipfilter/netinet/ip_htable.h @@ -55,6 +55,8 @@ typedef struct iphtable_s { char iph_name[FR_GROUPLEN]; /* hash table number */ } iphtable_t; +#define IPHTABLE_MAX_SIZE 1024 + /* iph_type */ #define IPHASH_LOOKUP 0 #define IPHASH_GROUPMAP 1 diff --git a/sys/netpfil/ipfilter/netinet/ip_nat.c b/sys/netpfil/ipfilter/netinet/ip_nat.c index 972511f43bd5..53c180cdfbca 100644 --- a/sys/netpfil/ipfilter/netinet/ip_nat.c +++ b/sys/netpfil/ipfilter/netinet/ip_nat.c @@ -1771,6 +1771,7 @@ ipf_nat_getent(ipf_main_softc_t *softc, caddr_t data, int getlock) IPFERROR(60029); return (ENOMEM); } + bzero(ipn, ipns.ipn_dsize); if (getlock) { READ_ENTER(&softc->ipf_nat); diff --git a/sys/netpfil/ipfw/ip_dn_glue.c b/sys/netpfil/ipfw/ip_dn_glue.c deleted file mode 100644 index 0412b730e4df..000000000000 --- a/sys/netpfil/ipfw/ip_dn_glue.c +++ /dev/null @@ -1,858 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * - * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8 - */ - -#include "opt_inet6.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/module.h> -#include <sys/priv.h> -#include <sys/proc.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/time.h> -#include <sys/taskqueue.h> -#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ -#include <netinet/in.h> -#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ -#include <netinet/ip_fw.h> -#include <netinet/ip_dummynet.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/dn_heap.h> -#include <netpfil/ipfw/ip_dn_private.h> -#ifdef NEW_AQM -#include <netpfil/ipfw/dn_aqm.h> -#endif -#include <netpfil/ipfw/dn_sched.h> - -/* FREEBSD7.2 ip_dummynet.h r191715*/ - -struct dn_heap_entry7 { - int64_t key; /* sorting key. Topmost element is smallest one */ - void *object; /* object pointer */ -}; - -struct dn_heap7 { - int size; - int elements; - int offset; /* XXX if > 0 this is the offset of direct ptr to obj */ - struct dn_heap_entry7 *p; /* really an array of "size" entries */ -}; - -/* Common to 7.2 and 8 */ -struct dn_flow_set { - SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ - - u_short fs_nr ; /* flow_set number */ - u_short flags_fs; -#define DNOLD_HAVE_FLOW_MASK 0x0001 -#define DNOLD_IS_RED 0x0002 -#define DNOLD_IS_GENTLE_RED 0x0004 -#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ -#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */ -#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ -#define DNOLD_IS_PIPE 0x4000 -#define DNOLD_IS_QUEUE 0x8000 - - struct dn_pipe7 *pipe ; /* pointer to parent pipe */ - u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ - - int weight ; /* WFQ queue weight */ - int qsize ; /* queue size in slots or bytes */ - int plr[4] ; /* pkt loss rate (2^31-1 means 100%) */ - - struct ipfw_flow_id flow_mask ; - - /* hash table of queues onto this flow_set */ - int rq_size ; /* number of slots */ - int rq_elements ; /* active elements */ - struct dn_flow_queue7 **rq ; /* array of rq_size entries */ - - u_int32_t last_expired ; /* do not expire too frequently */ - int backlogged ; /* #active queues for this flowset */ - - /* RED parameters */ -#define SCALE_RED 16 -#define SCALE(x) ( (x) << SCALE_RED ) -#define SCALE_VAL(x) ( (x) >> SCALE_RED ) -#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ - u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ - u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ - u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ - u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ - u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ - u_int lookup_depth ; /* depth of lookup table */ - int lookup_step ; /* granularity inside the lookup table */ - int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ - int avg_pkt_size ; /* medium packet size */ - int max_pkt_size ; /* max packet size */ -}; -SLIST_HEAD(dn_flow_set_head, dn_flow_set); - -#define DN_IS_PIPE 0x4000 -#define DN_IS_QUEUE 0x8000 -struct dn_flow_queue7 { - struct dn_flow_queue7 *next ; - struct ipfw_flow_id id ; - - struct mbuf *head, *tail ; /* queue of packets */ - u_int len ; - u_int len_bytes ; - - u_long numbytes; - - u_int64_t tot_pkts ; /* statistics counters */ - u_int64_t tot_bytes ; - u_int32_t drops ; - - int hash_slot ; /* debugging/diagnostic */ - - /* RED parameters */ - int avg ; /* average queue length est. (scaled) */ - int count ; /* arrivals since last RED drop */ - int random ; /* random value (scaled) */ - u_int32_t q_time; /* start of queue idle time */ - - /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - int64_t sched_time ; /* current time when queue enters ready_heap */ - - int64_t S,F ; /* start time, finish time */ -}; - -struct dn_pipe7 { /* a pipe */ - SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */ - - int pipe_nr ; /* number */ - uint32_t bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ - - struct mbuf *head, *tail ; /* packets in delay line */ - - /* WF2Q+ */ - struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap7 not_eligible_heap; /* top extract- key Start time */ - struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */ - - int64_t V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ - - int numbytes; - - int64_t sched_time ; /* time pipe was scheduled in ready_heap */ - - /* - * When the tx clock come from an interface (if_name[0] != '\0'), its name - * is stored below, whereas the ifp is filled when the rule is configured. - */ - char if_name[IFNAMSIZ]; - struct ifnet *ifp ; - int ready ; /* set if ifp != NULL and we got a signal from it */ - - struct dn_flow_set fs ; /* used with fixed-rate flows */ -}; -SLIST_HEAD(dn_pipe_head7, dn_pipe7); - -/* FREEBSD8 ip_dummynet.h r196045 */ -struct dn_flow_queue8 { - struct dn_flow_queue8 *next ; - struct ipfw_flow_id id ; - - struct mbuf *head, *tail ; /* queue of packets */ - u_int len ; - u_int len_bytes ; - - uint64_t numbytes ; /* credit for transmission (dynamic queues) */ - int64_t extra_bits; /* extra bits simulating unavailable channel */ - - u_int64_t tot_pkts ; /* statistics counters */ - u_int64_t tot_bytes ; - u_int32_t drops ; - - int hash_slot ; /* debugging/diagnostic */ - - /* RED parameters */ - int avg ; /* average queue length est. (scaled) */ - int count ; /* arrivals since last RED drop */ - int random ; /* random value (scaled) */ - int64_t idle_time; /* start of queue idle time */ - - /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - int64_t sched_time ; /* current time when queue enters ready_heap */ - - int64_t S,F ; /* start time, finish time */ -}; - -struct dn_pipe8 { /* a pipe */ - SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */ - - int pipe_nr ; /* number */ - uint32_t bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ - - struct mbuf *head, *tail ; /* packets in delay line */ - - /* WF2Q+ */ - struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap7 not_eligible_heap; /* top extract- key Start time */ - struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */ - - int64_t V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ - - /* Same as in dn_flow_queue, numbytes can become large */ - int64_t numbytes; /* bits I can transmit (more or less). */ - uint64_t burst; /* burst size, scaled: bits * hz */ - - int64_t sched_time ; /* time pipe was scheduled in ready_heap */ - int64_t idle_time; /* start of pipe idle time */ - - char if_name[IFNAMSIZ]; - struct ifnet *ifp ; - int ready ; /* set if ifp != NULL and we got a signal from it */ - - struct dn_flow_set fs ; /* used with fixed-rate flows */ - - /* fields to simulate a delay profile */ -#define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int loss_level; - int samples_no; - int *samples; -}; - -#define ED_MAX_SAMPLES_NO 1024 -struct dn_pipe_max8 { - struct dn_pipe8 pipe; - int samples[ED_MAX_SAMPLES_NO]; -}; -SLIST_HEAD(dn_pipe_head8, dn_pipe8); - -/* - * Changes from 7.2 to 8: - * dn_pipe: - * numbytes from int to int64_t - * add burst (int64_t) - * add idle_time (int64_t) - * add profile - * add struct dn_pipe_max - * add flag DN_HAS_PROFILE - * - * dn_flow_queue - * numbytes from u_long to int64_t - * add extra_bits (int64_t) - * q_time from u_int32_t to int64_t and name idle_time - * - * dn_flow_set unchanged - * - */ - -/* NOTE:XXX copied from dummynet.c */ -#define O_NEXT(p, len) ((void *)((char *)p + len)) -static void -oid_fill(struct dn_id *oid, int len, int type, uintptr_t id) -{ - oid->len = len; - oid->type = type; - oid->subtype = 0; - oid->id = id; -} -/* make room in the buffer and move the pointer forward */ -static void * -o_next(struct dn_id **o, int len, int type) -{ - struct dn_id *ret = *o; - oid_fill(ret, len, type, 0); - *o = O_NEXT(*o, len); - return ret; -} - -static size_t pipesize7 = sizeof(struct dn_pipe7); -static size_t pipesize8 = sizeof(struct dn_pipe8); -static size_t pipesizemax8 = sizeof(struct dn_pipe_max8); - -/* Indicate 'ipfw' version - * 1: from FreeBSD 7.2 - * 0: from FreeBSD 8 - * -1: unknown (for now is unused) - * - * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives - * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknown, - * it is suppose to be the FreeBSD 8 version. - */ -static int is7 = 0; - -static int -convertflags2new(int src) -{ - int dst = 0; - - if (src & DNOLD_HAVE_FLOW_MASK) - dst |= DN_HAVE_MASK; - if (src & DNOLD_QSIZE_IS_BYTES) - dst |= DN_QSIZE_BYTES; - if (src & DNOLD_NOERROR) - dst |= DN_NOERROR; - if (src & DNOLD_IS_RED) - dst |= DN_IS_RED; - if (src & DNOLD_IS_GENTLE_RED) - dst |= DN_IS_GENTLE_RED; - if (src & DNOLD_HAS_PROFILE) - dst |= DN_HAS_PROFILE; - - return dst; -} - -static int -convertflags2old(int src) -{ - int dst = 0; - - if (src & DN_HAVE_MASK) - dst |= DNOLD_HAVE_FLOW_MASK; - if (src & DN_IS_RED) - dst |= DNOLD_IS_RED; - if (src & DN_IS_GENTLE_RED) - dst |= DNOLD_IS_GENTLE_RED; - if (src & DN_NOERROR) - dst |= DNOLD_NOERROR; - if (src & DN_HAS_PROFILE) - dst |= DNOLD_HAS_PROFILE; - if (src & DN_QSIZE_BYTES) - dst |= DNOLD_QSIZE_IS_BYTES; - - return dst; -} - -static int -dn_compat_del(void *v) -{ - struct dn_pipe7 *p = (struct dn_pipe7 *) v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *) v; - struct { - struct dn_id oid; - uintptr_t a[1]; /* add more if we want a list */ - } cmd; - - /* XXX DN_API_VERSION ??? */ - oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION); - - if (is7) { - if (p->pipe_nr == 0 && p->fs.fs_nr == 0) - return EINVAL; - if (p->pipe_nr != 0 && p->fs.fs_nr != 0) - return EINVAL; - } else { - if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0) - return EINVAL; - if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0) - return EINVAL; - } - - if (p->pipe_nr != 0) { /* pipe x delete */ - cmd.a[0] = p->pipe_nr; - cmd.oid.subtype = DN_LINK; - } else { /* queue x delete */ - cmd.oid.subtype = DN_FS; - cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr; - } - - return do_config(&cmd, cmd.oid.len); -} - -static int -dn_compat_config_queue(struct dn_fs *fs, void* v) -{ - struct dn_pipe7 *p7 = (struct dn_pipe7 *)v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - struct dn_flow_set *f; - - if (is7) - f = &p7->fs; - else - f = &p8->fs; - - fs->fs_nr = f->fs_nr; - fs->sched_nr = f->parent_nr; - fs->flow_mask = f->flow_mask; - fs->buckets = f->rq_size; - fs->qsize = f->qsize; - fs->plr[0] = f->plr[0]; - fs->plr[1] = f->plr[1]; - fs->plr[2] = f->plr[2]; - fs->plr[3] = f->plr[3]; - fs->par[0] = f->weight; - fs->flags = convertflags2new(f->flags_fs); - if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) { - fs->w_q = f->w_q; - fs->max_th = f->max_th; - fs->min_th = f->min_th; - fs->max_p = f->max_p; - } - - return 0; -} - -static int -dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p, - struct dn_fs *fs, void* v) -{ - struct dn_pipe7 *p7 = (struct dn_pipe7 *)v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - int i = p7->pipe_nr; - - sch->sched_nr = i; - sch->oid.subtype = 0; - p->link_nr = i; - fs->fs_nr = i + 2*DN_MAX_ID; - fs->sched_nr = i + DN_MAX_ID; - - /* Common to 7 and 8 */ - p->bandwidth = p7->bandwidth; - p->delay = p7->delay; - if (!is7) { - /* FreeBSD 8 has burst */ - p->burst = p8->burst; - } - - /* fill the fifo flowset */ - dn_compat_config_queue(fs, v); - fs->fs_nr = i + 2*DN_MAX_ID; - fs->sched_nr = i + DN_MAX_ID; - - /* Move scheduler related parameter from fs to sch */ - sch->buckets = fs->buckets; /*XXX*/ - fs->buckets = 0; - if (fs->flags & DN_HAVE_MASK) { - sch->flags |= DN_HAVE_MASK; - fs->flags &= ~DN_HAVE_MASK; - sch->sched_mask = fs->flow_mask; - bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id)); - } - - return 0; -} - -static int -dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p, - void *v) -{ - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - - p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]); - - pf->link_nr = p->link_nr; - pf->loss_level = p8->loss_level; -// pf->bandwidth = p->bandwidth; //XXX bandwidth redundant? - pf->samples_no = p8->samples_no; - strncpy(pf->name, p8->name,sizeof(pf->name)); - bcopy(p8->samples, pf->samples, sizeof(pf->samples)); - - return 0; -} - -/* - * If p->pipe_nr != 0 the command is 'pipe x config', so need to create - * the three main struct, else only a flowset is created - */ -static int -dn_compat_configure(void *v) -{ - struct dn_id *buf = NULL, *base; - struct dn_sch *sch = NULL; - struct dn_link *p = NULL; - struct dn_fs *fs = NULL; - struct dn_profile *pf = NULL; - int lmax; - int error; - - struct dn_pipe7 *p7 = (struct dn_pipe7 *)v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - - int i; /* number of object to configure */ - - lmax = sizeof(struct dn_id); /* command header */ - lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) + - sizeof(struct dn_fs) + sizeof(struct dn_profile); - - base = buf = malloc(lmax, M_DUMMYNET, M_WAITOK|M_ZERO); - o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG); - base->id = DN_API_VERSION; - - /* pipe_nr is the same in p7 and p8 */ - i = p7->pipe_nr; - if (i != 0) { /* pipe config */ - sch = o_next(&buf, sizeof(*sch), DN_SCH); - p = o_next(&buf, sizeof(*p), DN_LINK); - fs = o_next(&buf, sizeof(*fs), DN_FS); - - error = dn_compat_config_pipe(sch, p, fs, v); - if (error) { - free(buf, M_DUMMYNET); - return error; - } - if (!is7 && p8->samples_no > 0) { - /* Add profiles*/ - pf = o_next(&buf, sizeof(*pf), DN_PROFILE); - error = dn_compat_config_profile(pf, p, v); - if (error) { - free(buf, M_DUMMYNET); - return error; - } - } - } else { /* queue config */ - fs = o_next(&buf, sizeof(*fs), DN_FS); - error = dn_compat_config_queue(fs, v); - if (error) { - free(buf, M_DUMMYNET); - return error; - } - } - error = do_config(base, (char *)buf - (char *)base); - - if (buf) - free(buf, M_DUMMYNET); - return error; -} - -int -dn_compat_calc_size(void) -{ - int need = 0; - /* XXX use FreeBSD 8 struct size */ - /* NOTE: - * - half scheduler: schk_count/2 - * - all flowset: fsk_count - * - all flowset queues: queue_count - * - all pipe queue: si_count - */ - need += V_dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2; - need += V_dn_cfg.fsk_count * sizeof(struct dn_flow_set); - need += V_dn_cfg.si_count * sizeof(struct dn_flow_queue8); - need += V_dn_cfg.queue_count * sizeof(struct dn_flow_queue8); - - return need; -} - -int -dn_c_copy_q (void *_ni, void *arg) -{ - struct copy_args *a = arg; - struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start; - struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start; - struct dn_flow *ni = (struct dn_flow *)_ni; - int size = 0; - - /* XXX hash slot not set */ - /* No difference between 7.2/8 */ - fq7->len = ni->length; - fq7->len_bytes = ni->len_bytes; - fq7->id = ni->fid; - - if (is7) { - size = sizeof(struct dn_flow_queue7); - fq7->tot_pkts = ni->tot_pkts; - fq7->tot_bytes = ni->tot_bytes; - fq7->drops = ni->drops; - } else { - size = sizeof(struct dn_flow_queue8); - fq8->tot_pkts = ni->tot_pkts; - fq8->tot_bytes = ni->tot_bytes; - fq8->drops = ni->drops; - } - - *a->start += size; - return 0; -} - -int -dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq) -{ - struct dn_link *l = &s->link; - struct dn_fsk *f = s->fs; - - struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start; - struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start; - struct dn_flow_set *fs; - int size = 0; - - if (is7) { - fs = &pipe7->fs; - size = sizeof(struct dn_pipe7); - } else { - fs = &pipe8->fs; - size = sizeof(struct dn_pipe8); - } - - /* These 4 field are the same in pipe7 and pipe8 */ - pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE; - pipe7->bandwidth = l->bandwidth; - pipe7->delay = l->delay * 1000 / hz; - pipe7->pipe_nr = l->link_nr - DN_MAX_ID; - - if (!is7) { - if (s->profile) { - struct dn_profile *pf = s->profile; - strncpy(pipe8->name, pf->name, sizeof(pf->name)); - pipe8->loss_level = pf->loss_level; - pipe8->samples_no = pf->samples_no; - } - pipe8->burst = div64(l->burst , 8 * hz); - } - - fs->flow_mask = s->sch.sched_mask; - fs->rq_size = s->sch.buckets ? s->sch.buckets : 1; - - fs->parent_nr = l->link_nr - DN_MAX_ID; - fs->qsize = f->fs.qsize; - fs->plr[0] = f->fs.plr[0]; - fs->plr[1] = f->fs.plr[1]; - fs->plr[2] = f->fs.plr[2]; - fs->plr[3] = f->fs.plr[3]; - fs->w_q = f->fs.w_q; - fs->max_th = f->max_th; - fs->min_th = f->min_th; - fs->max_p = f->fs.max_p; - fs->rq_elements = nq; - - fs->flags_fs = convertflags2old(f->fs.flags); - - *a->start += size; - return 0; -} - -int -dn_compat_copy_pipe(struct copy_args *a, void *_o) -{ - int have = a->end - *a->start; - int need = 0; - int pipe_size = sizeof(struct dn_pipe8); - int queue_size = sizeof(struct dn_flow_queue8); - int n_queue = 0; /* number of queues */ - - struct dn_schk *s = (struct dn_schk *)_o; - /* calculate needed space: - * - struct dn_pipe - * - if there are instances, dn_queue * n_instances - */ - n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) : - (s->siht ? 1 : 0)); - need = pipe_size + queue_size * n_queue; - if (have < need) { - D("have %d < need %d", have, need); - return 1; - } - /* copy pipe */ - dn_c_copy_pipe(s, a, n_queue); - - /* copy queues */ - if (s->sch.flags & DN_HAVE_MASK) - dn_ht_scan(s->siht, dn_c_copy_q, a); - else if (s->siht) - dn_c_copy_q(s->siht, a); - return 0; -} - -int -dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq) -{ - struct dn_flow_set *fs = (struct dn_flow_set *)*a->start; - - fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE; - fs->fs_nr = f->fs.fs_nr; - fs->qsize = f->fs.qsize; - fs->plr[0] = f->fs.plr[0]; - fs->plr[1] = f->fs.plr[1]; - fs->plr[2] = f->fs.plr[2]; - fs->plr[3] = f->fs.plr[3]; - fs->w_q = f->fs.w_q; - fs->max_th = f->max_th; - fs->min_th = f->min_th; - fs->max_p = f->fs.max_p; - fs->flow_mask = f->fs.flow_mask; - fs->rq_elements = nq; - fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1); - fs->parent_nr = f->fs.sched_nr; - fs->weight = f->fs.par[0]; - - fs->flags_fs = convertflags2old(f->fs.flags); - *a->start += sizeof(struct dn_flow_set); - return 0; -} - -int -dn_compat_copy_queue(struct copy_args *a, void *_o) -{ - int have = a->end - *a->start; - int need = 0; - int fs_size = sizeof(struct dn_flow_set); - int queue_size = sizeof(struct dn_flow_queue8); - - struct dn_fsk *fs = (struct dn_fsk *)_o; - int n_queue = 0; /* number of queues */ - - n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) : - (fs->qht ? 1 : 0)); - - need = fs_size + queue_size * n_queue; - if (have < need) { - D("have < need"); - return 1; - } - - /* copy flowset */ - dn_c_copy_fs(fs, a, n_queue); - - /* copy queues */ - if (fs->fs.flags & DN_HAVE_MASK) - dn_ht_scan(fs->qht, dn_c_copy_q, a); - else if (fs->qht) - dn_c_copy_q(fs->qht, a); - - return 0; -} - -int -copy_data_helper_compat(void *_o, void *_arg) -{ - struct copy_args *a = _arg; - - if (a->type == DN_COMPAT_PIPE) { - struct dn_schk *s = _o; - if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) { - return 0; /* not old type */ - } - /* copy pipe parameters, and if instance exists, copy - * other parameters and eventually queues. - */ - if(dn_compat_copy_pipe(a, _o)) - return DNHT_SCAN_END; - } else if (a->type == DN_COMPAT_QUEUE) { - struct dn_fsk *fs = _o; - if (fs->fs.fs_nr >= DN_MAX_ID) - return 0; - if (dn_compat_copy_queue(a, _o)) - return DNHT_SCAN_END; - } - return 0; -} - -/* Main function to manage old requests */ -int -ip_dummynet_compat(struct sockopt *sopt) -{ - int error=0; - void *v = NULL; - struct dn_id oid; - - /* Length of data, used to found ipfw version... */ - int len = sopt->sopt_valsize; - - /* len can be 0 if command was dummynet_flush */ - if (len == pipesize7) { - D("setting compatibility with FreeBSD 7.2"); - is7 = 1; - } - else if (len == pipesize8 || len == pipesizemax8) { - D("setting compatibility with FreeBSD 8"); - is7 = 0; - } - - switch (sopt->sopt_name) { - default: - printf("dummynet: -- unknown option %d", sopt->sopt_name); - error = EINVAL; - break; - - case IP_DUMMYNET_FLUSH: - oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION); - do_config(&oid, oid.len); - break; - - case IP_DUMMYNET_DEL: - v = malloc(len, M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, v, len, len); - if (error) - break; - error = dn_compat_del(v); - free(v, M_TEMP); - break; - - case IP_DUMMYNET_CONFIGURE: - v = malloc(len, M_TEMP, M_NOWAIT); - if (v == NULL) { - error = ENOMEM; - break; - } - error = sooptcopyin(sopt, v, len, len); - if (error) - break; - error = dn_compat_configure(v); - free(v, M_TEMP); - break; - - case IP_DUMMYNET_GET: { - void *buf; - int ret; - int original_size = sopt->sopt_valsize; - int size; - - ret = dummynet_get(sopt, &buf); - if (ret) - return 0;//XXX ? - size = sopt->sopt_valsize; - sopt->sopt_valsize = original_size; - D("size=%d, buf=%p", size, buf); - ret = sooptcopyout(sopt, buf, size); - if (ret) - printf(" %s ERROR sooptcopyout\n", __FUNCTION__); - if (buf) - free(buf, M_DUMMYNET); - } - } - - return error; -} diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index 03116cb0641c..3a8de2b2bfee 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -43,6 +43,7 @@ #include <sys/priv.h> #include <sys/proc.h> #include <sys/rwlock.h> +#include <sys/sdt.h> #include <sys/socket.h> #include <sys/time.h> #include <sys/sysctl.h> @@ -70,6 +71,9 @@ #endif #include <netpfil/ipfw/dn_sched.h> +SDT_PROVIDER_DEFINE(dummynet); +SDT_PROBE_DEFINE2(dummynet, , , drop, "struct mbuf *", "struct dn_queue *"); + /* * We keep a private variable for the simulation time, but we could * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) @@ -545,6 +549,7 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop) drop: V_dn_cfg.io_pkt_drop++; + SDT_PROBE2(dummynet, , , drop, m, q); q->ni.drops++; ni->drops++; FREE_PKT(m); @@ -1001,6 +1006,7 @@ done: dropit: V_dn_cfg.io_pkt_drop++; + SDT_PROBE2(dummynet, , , drop, m, q); DN_BH_WUNLOCK(); if (m) FREE_PKT(m); diff --git a/sys/netpfil/ipfw/ip_dn_private.h b/sys/netpfil/ipfw/ip_dn_private.h index 756a997b6ec3..9a43b86791e0 100644 --- a/sys/netpfil/ipfw/ip_dn_private.h +++ b/sys/netpfil/ipfw/ip_dn_private.h @@ -437,15 +437,7 @@ struct copy_args { }; struct sockopt; -int ip_dummynet_compat(struct sockopt *sopt); -int dummynet_get(struct sockopt *sopt, void **compat); -int dn_c_copy_q (void *_ni, void *arg); -int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq); -int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq); -int dn_compat_copy_queue(struct copy_args *a, void *_o); -int dn_compat_copy_pipe(struct copy_args *a, void *_o); -int copy_data_helper_compat(void *_o, void *_arg); -int dn_compat_calc_size(void); +int dummynet_get(struct sockopt *sopt); int do_config(void *p, size_t l); /* function to drain idle object */ diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index b3f52322425f..61442c617753 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -1150,7 +1150,7 @@ copy_data_helper(void *_o, void *_arg) return 0; /* not a pipe */ /* see if the object is within one of our ranges */ - for (;r < lim; r += 2) { + for (; r < lim; r += 2) { if (n < r[0] || n > r[1]) continue; /* Found a valid entry, copy and we are done */ @@ -1183,7 +1183,7 @@ copy_data_helper(void *_o, void *_arg) if (n >= DN_MAX_ID) return 0; /* see if the object is within one of our ranges */ - for (;r < lim; r += 2) { + for (; r < lim; r += 2) { if (n < r[0] || n > r[1]) continue; if (copy_flowset(a, fs, 0)) @@ -2198,9 +2198,6 @@ compute_space(struct dn_id *cmd, struct copy_args *a) case DN_FS: /* queue show */ x = DN_C_FS | DN_C_QUEUE; break; - case DN_GET_COMPAT: /* compatibility mode */ - need = dn_compat_calc_size(); - break; } a->flags = x; if (x & DN_C_SCH) { @@ -2226,11 +2223,9 @@ compute_space(struct dn_id *cmd, struct copy_args *a) } /* - * If compat != NULL dummynet_get is called in compatibility mode. - * *compat will be the pointer to the buffer to pass to ipfw */ int -dummynet_get(struct sockopt *sopt, void **compat) +dummynet_get(struct sockopt *sopt) { int have, i, need, error; char *start = NULL, *buf; @@ -2248,37 +2243,28 @@ dummynet_get(struct sockopt *sopt, void **compat) cmd = &r.o; - if (!compat) { - /* copy at least an oid, and possibly a full object */ - error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); - sopt->sopt_valsize = sopt_valsize; - if (error) - goto done; - l = cmd->len; + /* copy at least an oid, and possibly a full object */ + error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); + sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; + l = cmd->len; #ifdef EMULATE_SYSCTL - /* sysctl emulation. */ - if (cmd->type == DN_SYSCTL_GET) - return kesysctl_emu_get(sopt); + /* sysctl emulation. */ + if (cmd->type == DN_SYSCTL_GET) + return kesysctl_emu_get(sopt); #endif - if (l > sizeof(r)) { - /* request larger than default, allocate buffer */ - cmd = malloc(l, M_DUMMYNET, M_NOWAIT); - if (cmd == NULL) { - error = ENOMEM; - goto done; - } - error = sooptcopyin(sopt, cmd, l, l); - sopt->sopt_valsize = sopt_valsize; - if (error) - goto done; + if (l > sizeof(r)) { + /* request larger than default, allocate buffer */ + cmd = malloc(l, M_DUMMYNET, M_NOWAIT); + if (cmd == NULL) { + error = ENOMEM; + goto done; } - } else { /* compatibility */ - error = 0; - cmd->type = DN_CMD_GET; - cmd->len = sizeof(struct dn_id); - cmd->subtype = DN_GET_COMPAT; - // cmd->id = sopt_valsize; - D("compatibility mode"); + error = sooptcopyin(sopt, cmd, l, l); + sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; } #ifdef NEW_AQM @@ -2337,12 +2323,7 @@ dummynet_get(struct sockopt *sopt, void **compat) } if (start == NULL) { - if (compat) { - *compat = NULL; - error = 1; // XXX - } else { - error = sooptcopyout(sopt, cmd, sizeof(*cmd)); - } + error = sooptcopyout(sopt, cmd, sizeof(*cmd)); goto done; } ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " @@ -2355,35 +2336,20 @@ dummynet_get(struct sockopt *sopt, void **compat) sopt->sopt_valsize = sopt_valsize; a.type = cmd->subtype; - if (compat == NULL) { - memcpy(start, cmd, sizeof(*cmd)); - ((struct dn_id*)(start))->len = sizeof(struct dn_id); - buf = start + sizeof(*cmd); - } else - buf = start; + memcpy(start, cmd, sizeof(*cmd)); + ((struct dn_id*)(start))->len = sizeof(struct dn_id); + buf = start + sizeof(*cmd); a.start = &buf; a.end = start + have; /* start copying other objects */ - if (compat) { - a.type = DN_COMPAT_PIPE; - dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper_compat, &a); - a.type = DN_COMPAT_QUEUE; - dn_ht_scan(V_dn_cfg.fshash, copy_data_helper_compat, &a); - } else if (a.type == DN_FS) { + if (a.type == DN_FS) { dn_ht_scan(V_dn_cfg.fshash, copy_data_helper, &a); } else { dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper, &a); } DN_BH_WUNLOCK(); - if (compat) { - *compat = start; - sopt->sopt_valsize = buf - start; - /* free() is done by ip_dummynet_compat() */ - start = NULL; //XXX hack - } else { - error = sooptcopyout(sopt, start, buf - start); - } + error = sooptcopyout(sopt, start, buf - start); done: if (cmd != &r.o) free(cmd, M_DUMMYNET); @@ -2519,17 +2485,9 @@ ip_dn_ctl(struct sockopt *sopt) error = EINVAL; break; - case IP_DUMMYNET_FLUSH: - case IP_DUMMYNET_CONFIGURE: - case IP_DUMMYNET_DEL: /* remove a pipe or queue */ - case IP_DUMMYNET_GET: - D("dummynet: compat option %d", sopt->sopt_name); - error = ip_dummynet_compat(sopt); - break; - case IP_DUMMYNET3: if (sopt->sopt_dir == SOPT_GET) { - error = dummynet_get(sopt, NULL); + error = dummynet_get(sopt); break; } l = sopt->sopt_valsize; diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 923633d76df7..d15d7760d7f1 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -67,6 +67,7 @@ #include <net/route/nhop.h> #include <net/pfil.h> #include <net/vnet.h> +#include <net/if_gif.h> #include <net/if_pfsync.h> #include <netpfil/pf/pf_mtag.h> @@ -196,7 +197,7 @@ SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Firewall"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, - "Only do a single pass through ipfw when using dummynet(4)"); + "Only do a single pass through ipfw when using dummynet(4), ipfw_nat or other divert(4)-like interfaces"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, "Rule number auto-increment step"); @@ -1757,6 +1758,12 @@ do { \ PULLUP_TO(hlen, ulp, struct ip); break; + case IPPROTO_ETHERIP: /* RFC 3378 */ + PULLUP_LEN(hlen, ulp, + sizeof(struct etherip_header) + + sizeof(struct ether_header)); + break; + case IPPROTO_PFSYNC: PULLUP_TO(hlen, ulp, struct pfsync_header); break; @@ -3571,11 +3578,9 @@ sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS) /* * Stuff that must be initialised only on boot or module load */ -static int -ipfw_init(void) +static void +ipfw_init(void *dummy __unused) { - int error = 0; - /* * Only print out this stuff the first time around, * when called from the sysinit code. @@ -3620,14 +3625,13 @@ ipfw_init(void) ipfw_init_sopt_handler(); ipfw_init_obj_rewriter(); ipfw_iface_init(); - return (error); } /* * Called for the removal of the last instance only on module unload. */ static void -ipfw_destroy(void) +ipfw_destroy(void *dummy __unused) { ipfw_iface_destroy(); @@ -3680,6 +3684,7 @@ vnet_ipfw_init(const void *unused) IPFW_LOCK_INIT(chain); + ipfw_dyn_init(chain); /* fill and insert the default rule */ rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); rule->flags |= IPFW_RULE_NOOPT; @@ -3689,7 +3694,6 @@ vnet_ipfw_init(const void *unused) chain->default_rule = rule; ipfw_add_protected_rule(chain, rule, 0); - ipfw_dyn_init(chain); ipfw_eaction_init(chain, first); ipfw_init_skipto_cache(chain); ipfw_bpf_init(first); diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index 40598cef8076..cfb686594c7c 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -1323,6 +1323,33 @@ dyn_lookup_ipv6_parent_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, #endif /* INET6 */ +static int +dyn_handle_orphaned(struct ip_fw *old_rule, struct dyn_data *data) +{ + struct ip_fw *rule; + const ipfw_insn *cmd, *old_cmd; + + old_cmd = ACTION_PTR(old_rule); + switch (old_cmd->opcode) { + case O_SETMARK: + case O_SKIPTO: + /* + * Rule pointer was changed. For O_SKIPTO action it can be + * dangerous to keep use old rule. If new rule has the same + * action and the same destination number, then use this dynamic + * state. Otherwise it is better to create new one. + */ + rule = V_layer3_chain.map[data->f_pos]; + cmd = ACTION_PTR(rule); + if (cmd->opcode != old_cmd->opcode || + cmd->len != old_cmd->len || cmd->arg1 != old_cmd->arg1 || + insntoc(cmd, u32)->d[0] != insntoc(old_cmd, u32)->d[0]) + return (-1); + break; + } + return (0); +} + /* * Lookup dynamic state. * pkt - filled by ipfw_chk() ipfw_flow_id; @@ -1426,8 +1453,13 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp, * changed to point to the penultimate rule. */ MPASS(V_layer3_chain.n_rules > 1); - data->chain_id = V_layer3_chain.id; - data->f_pos = V_layer3_chain.n_rules - 2; + if (dyn_handle_orphaned(rule, data) == 0) { + data->chain_id = V_layer3_chain.id; + data->f_pos = V_layer3_chain.n_rules - 2; + } else { + rule = NULL; + info->direction = MATCH_NONE; + } } else { rule = NULL; info->direction = MATCH_NONE; @@ -3109,6 +3141,43 @@ ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) #undef DYN_EXPORT_STATES } +/* + * When we have enabled V_dyn_keep_states, states that become ORPHANED + * will keep pointer to original rule. Then this rule pointer is used + * to apply rule action after ipfw_dyn_lookup_state(). + * Some rule actions use IPFW_INC_RULE_COUNTER() directly to this rule + * pointer, but other actions use chain->map[f_pos] instead. The last + * case leads to incrementing counters on the wrong rule, because + * ORPHANED states have not parent rule in chain->map[]. + * To solve this we add protected rule: + * count ip from any to any not // comment + * It will be matched only by packets that are handled by ORPHANED states. + */ +static void +dyn_add_protected_rule(struct ip_fw_chain *chain) +{ + static const char *comment = + "orphaned dynamic states counter"; + struct ip_fw *rule; + ipfw_insn *cmd; + size_t l; + + l = roundup(strlen(comment) + 1, sizeof(uint32_t)); + rule = ipfw_alloc_rule(chain, sizeof(*rule) + sizeof(ipfw_insn) + l); + cmd = rule->cmd; + cmd->opcode = O_NOP; + cmd->len = 1 + l/sizeof(uint32_t); + cmd->len |= F_NOT; /* make rule to be not matched */ + strcpy((char *)(cmd + 1), comment); + cmd += F_LEN(cmd); + + cmd->len = 1; + cmd->opcode = O_COUNT; + rule->act_ofs = cmd - rule->cmd; + rule->cmd_len = rule->act_ofs + 1; + ipfw_add_protected_rule(chain, rule, 0); +} + void ipfw_dyn_init(struct ip_fw_chain *chain) { @@ -3171,6 +3240,8 @@ ipfw_dyn_init(struct ip_fw_chain *chain) callout_init(&V_dyn_timeout, 1); callout_reset(&V_dyn_timeout, hz, dyn_tick, curvnet); IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); + + dyn_add_protected_rule(chain); } void diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c index 1e2ff1bca290..8bd27f6885ab 100644 --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -999,9 +999,11 @@ ipfw_nat_del(struct sockopt *sopt) { struct cfg_nat *ptr; struct ip_fw_chain *chain = &V_layer3_chain; - int i; + int error, i; - sooptcopyin(sopt, &i, sizeof i, sizeof i); + error = sooptcopyin(sopt, &i, sizeof i, sizeof i); + if (error != 0) + return (error); /* XXX validate i */ IPFW_UH_WLOCK(chain); ptr = lookup_nat(&chain->nat, i); @@ -1104,7 +1106,7 @@ ipfw_nat_get_log(struct sockopt *sopt) { uint8_t *data; struct cfg_nat *ptr; - int i, size; + int error, i, size; struct ip_fw_chain *chain; IPFW_RLOCK_TRACKER; @@ -1134,9 +1136,9 @@ ipfw_nat_get_log(struct sockopt *sopt) i += LIBALIAS_BUF_SIZE; } IPFW_RUNLOCK(chain); - sooptcopyout(sopt, data, size); + error = sooptcopyout(sopt, data, size); free(data, M_IPFW); - return(0); + return (error); } static int @@ -1166,7 +1168,7 @@ vnet_ipfw_nat_uninit(const void *arg __unused) } static void -ipfw_nat_init(void) +ipfw_nat_init(void *dummy __unused) { /* init ipfw hooks */ @@ -1183,7 +1185,7 @@ ipfw_nat_init(void) } static void -ipfw_nat_destroy(void) +ipfw_nat_destroy(void *dummy __unused) { EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag); diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index 79b3ed43f63b..c490d2849a7d 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -489,6 +489,7 @@ struct obj_idx { struct rule_check_info { uint16_t flags; /* rule-specific check flags */ +#define IPFW_RCIFLAG_HAS_STATE 0x0001 uint16_t object_opcodes; /* num of opcodes referencing objects */ uint16_t urule_numoff; /* offset of rulenum in bytes */ uint8_t version; /* rule version */ diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index 19f5fff2749a..5d57759ffb00 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -1311,6 +1311,9 @@ ipfw_check_rule(struct ip_fw_rule *rule, size_t size, return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } +#define CHECK_TARG(a, c) \ + ((a) == IP_FW_TARG && ((c)->flags & IPFW_RCIFLAG_HAS_STATE)) + enum ipfw_opcheck_result ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) { @@ -1326,6 +1329,7 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) if (cmdlen != F_INSN_SIZE(ipfw_insn_kidx)) return (BAD_SIZE); ci->object_opcodes++; + ci->flags |= IPFW_RCIFLAG_HAS_STATE; break; case O_PROTO: case O_IP_SRC_ME: @@ -1410,6 +1414,8 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) cmd->arg1 & 0x7FFF); return (FAILED); } + if (CHECK_TARG(cmd->arg1, ci)) + goto bad_targ; return (CHECK_ACTION); case O_UID: @@ -1518,11 +1524,16 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) case O_QUEUE: if (cmdlen != F_INSN_SIZE(ipfw_insn)) return (BAD_SIZE); + if (CHECK_TARG(cmd->arg1, ci)) + goto bad_targ; return (CHECK_ACTION); case O_FORWARD_IP: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) return (BAD_SIZE); + if (insntoc(cmd, sa)->sa.sin_addr.s_addr == INADDR_ANY && + (ci->flags & IPFW_RCIFLAG_HAS_STATE)) + goto bad_targ; return (CHECK_ACTION); #ifdef INET6 case O_FORWARD_IP6: @@ -1537,6 +1548,8 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) return (FAILED); if (cmdlen != F_INSN_SIZE(ipfw_insn)) return (BAD_SIZE); + if (CHECK_TARG(cmd->arg1, ci)) + goto bad_targ; return (CHECK_ACTION); case O_NETGRAPH: case O_NGTEE: @@ -1544,12 +1557,16 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) return (FAILED); if (cmdlen != F_INSN_SIZE(ipfw_insn)) return (BAD_SIZE); + if (CHECK_TARG(cmd->arg1, ci)) + goto bad_targ; return (CHECK_ACTION); case O_NAT: if (!IPFW_NAT_LOADED) return (FAILED); if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) return (BAD_SIZE); + if (CHECK_TARG(cmd->arg1, ci)) + goto bad_targ; return (CHECK_ACTION); case O_SKIPTO: @@ -1557,6 +1574,11 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) case O_SETMARK: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) return (BAD_SIZE); + /* O_CALLRETURN + F_NOT means 'return' opcode. */ + if (cmd->opcode != O_CALLRETURN || (cmd->len & F_NOT) == 0) { + if (CHECK_TARG(insntoc(cmd, u32)->d[0], ci)) + goto bad_targ; + } return (CHECK_ACTION); case O_CHECK_STATE: @@ -1577,6 +1599,8 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) case O_REASS: if (cmdlen != F_INSN_SIZE(ipfw_insn)) return (BAD_SIZE); + if (cmd->opcode == O_SETDSCP && CHECK_TARG(cmd->arg1, ci)) + goto bad_targ; return (CHECK_ACTION); #ifdef INET6 case O_IP6_SRC: @@ -1627,6 +1651,13 @@ ipfw_check_opcode(ipfw_insn **pcmd, int *plen, struct rule_check_info *ci) } } return (SUCCESS); +bad_targ: + /* + * For dynamic states we can not correctly initialize tablearg value, + * because we don't go through rule's opcodes except rule action. + */ + printf("ipfw: tablearg is not allowed with dynamic states\n"); + return (FAILED); } static __noinline int diff --git a/sys/netpfil/ipfw/pmod/tcpmod.c b/sys/netpfil/ipfw/pmod/tcpmod.c index 0338dc792c64..50074ee98cca 100644 --- a/sys/netpfil/ipfw/pmod/tcpmod.c +++ b/sys/netpfil/ipfw/pmod/tcpmod.c @@ -57,7 +57,8 @@ VNET_DEFINE_STATIC(uint32_t, tcpmod_setmss_eid) = 0; #define V_tcpmod_setmss_eid VNET(tcpmod_setmss_eid) static int -tcpmod_setmss(struct mbuf **mp, struct tcphdr *tcp, int tlen, uint16_t mss) +tcpmod_setmss(struct mbuf **mp, struct tcphdr *tcp, int tlen, uint16_t mss, + int *done) { struct mbuf *m; u_char *cp; @@ -72,8 +73,10 @@ tcpmod_setmss(struct mbuf **mp, struct tcphdr *tcp, int tlen, uint16_t mss) * TCP header with options. */ *mp = m = m_pullup(m, m->m_pkthdr.len); - if (m == NULL) + if (m == NULL) { + *done = 1; return (ret); + } } /* Parse TCP options. */ for (tlen -= sizeof(struct tcphdr), cp = (u_char *)(tcp + 1); @@ -114,7 +117,7 @@ tcpmod_setmss(struct mbuf **mp, struct tcphdr *tcp, int tlen, uint16_t mss) #ifdef INET6 static int -tcpmod_ipv6_setmss(struct mbuf **mp, uint16_t mss) +tcpmod_ipv6_setmss(struct mbuf **mp, uint16_t mss, int *done) { struct ip6_hdr *ip6; struct ip6_hbh *hbh; @@ -142,13 +145,13 @@ tcpmod_ipv6_setmss(struct mbuf **mp, uint16_t mss) /* We must have TCP options and enough data in a packet. */ if (hlen <= sizeof(struct tcphdr) || hlen > plen) return (IP_FW_DENY); - return (tcpmod_setmss(mp, tcp, hlen, mss)); + return (tcpmod_setmss(mp, tcp, hlen, mss, done)); } #endif /* INET6 */ #ifdef INET static int -tcpmod_ipv4_setmss(struct mbuf **mp, uint16_t mss) +tcpmod_ipv4_setmss(struct mbuf **mp, uint16_t mss, int *done) { struct tcphdr *tcp; struct ip *ip; @@ -162,7 +165,7 @@ tcpmod_ipv4_setmss(struct mbuf **mp, uint16_t mss) /* We must have TCP options and enough data in a packet. */ if (hlen <= sizeof(struct tcphdr) || hlen > plen) return (IP_FW_DENY); - return (tcpmod_setmss(mp, tcp, hlen, mss)); + return (tcpmod_setmss(mp, tcp, hlen, mss, done)); } #endif /* INET */ @@ -206,19 +209,23 @@ ipfw_tcpmod(struct ip_fw_chain *chain, struct ip_fw_args *args, switch (args->f_id.addr_type) { #ifdef INET case 4: - ret = tcpmod_ipv4_setmss(&args->m, htons(icmd->arg1)); + ret = tcpmod_ipv4_setmss(&args->m, htons(icmd->arg1), + done); break; #endif #ifdef INET6 case 6: - ret = tcpmod_ipv6_setmss(&args->m, htons(icmd->arg1)); + ret = tcpmod_ipv6_setmss(&args->m, htons(icmd->arg1), + done); break; #endif } /* * We return zero in both @ret and @done on success, and ipfw_chk() * will update rule counters. Otherwise a packet will not be matched - * by rule. + * by rule. We passed @done around above in case we hit a fatal error + * somewhere, we'll return non-zero but signal that rule processing + * cannot succeed. */ return (ret); } diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c index 0a84f9d680ac..cb96d2fcc44c 100644 --- a/sys/netpfil/pf/if_pflog.c +++ b/sys/netpfil/pf/if_pflog.c @@ -284,9 +284,9 @@ pflog_packet(uint8_t action, u_int8_t reason, * state lock, since this leads to unsafe LOR. * These conditions are very very rare, however. */ - if (trigger->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe) + if (trigger->log & PF_LOG_USER && !pd->lookup.done && lookupsafe) pd->lookup.done = pf_socket_lookup(pd); - if (pd->lookup.done > 0) + if (trigger->log & PF_LOG_USER && pd->lookup.done > 0) hdr.uid = pd->lookup.uid; else hdr.uid = -1; diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 2391edaf1a5a..de69ecbb0985 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -110,8 +110,6 @@ #include <netpfil/pf/pfsync_nv.h> -#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x - struct pfsync_bucket; struct pfsync_softc; @@ -125,8 +123,8 @@ union inet_template { sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) -static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, - struct pfsync_state_peer *); +static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, + struct pf_state_peer_export *); static int pfsync_in_clr(struct mbuf *, int, int, int, int); static int pfsync_in_ins(struct mbuf *, int, int, int, int); static int pfsync_in_iack(struct mbuf *, int, int, int, int); @@ -155,6 +153,8 @@ static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { pfsync_in_eof, /* PFSYNC_ACT_EOF */ pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ + pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */ + pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */ }; struct pfsync_q { @@ -167,9 +167,11 @@ struct pfsync_q { enum pfsync_q_id { PFSYNC_Q_INS_1301, PFSYNC_Q_INS_1400, + PFSYNC_Q_INS_1500, PFSYNC_Q_IACK, PFSYNC_Q_UPD_1301, PFSYNC_Q_UPD_1400, + PFSYNC_Q_UPD_1500, PFSYNC_Q_UPD_C, PFSYNC_Q_DEL_C, PFSYNC_Q_COUNT, @@ -178,6 +180,7 @@ enum pfsync_q_id { /* Functions for building messages for given queue */ static void pfsync_out_state_1301(struct pf_kstate *, void *); static void pfsync_out_state_1400(struct pf_kstate *, void *); +static void pfsync_out_state_1500(struct pf_kstate *, void *); static void pfsync_out_iack(struct pf_kstate *, void *); static void pfsync_out_upd_c(struct pf_kstate *, void *); static void pfsync_out_del_c(struct pf_kstate *, void *); @@ -186,9 +189,11 @@ static void pfsync_out_del_c(struct pf_kstate *, void *); static struct pfsync_q pfsync_qs[] = { { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, + { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, + { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 }, { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; @@ -197,9 +202,11 @@ static struct pfsync_q pfsync_qs[] = { static u_int8_t pfsync_qid_sstate[] = { PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ + PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */ PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ + PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */ PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ }; @@ -332,7 +339,7 @@ SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); -static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, +static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); @@ -504,7 +511,7 @@ pfsync_clone_destroy(struct ifnet *ifp) } static int -pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, +pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { @@ -527,14 +534,21 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) struct pf_kstate *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_krule *r = NULL; - struct pfi_kkif *kif; + struct pfi_kkif *kif, *orig_kif; struct pfi_kkif *rt_kif = NULL; struct pf_kpooladdr *rpool_first; int error; + int n = 0; + sa_family_t rt_af = 0; uint8_t rt = 0; + sa_family_t wire_af, stack_af; + u_int8_t wire_proto, stack_proto; PF_RULES_RASSERT(); + if (strnlen(sp->pfs_1301.ifname, IFNAMSIZ) == IFNAMSIZ) + return (EINVAL); + if (sp->pfs_1301.creatorid == 0) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid creator id: %08x\n", __func__, @@ -542,7 +556,11 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) return (EINVAL); } - if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { + /* + * Check interfaces early on. Do it before allocating memory etc. + * Because there is a high chance there will be a lot more such states. + */ + if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown interface: %s\n", __func__, sp->pfs_1301.ifname); @@ -552,21 +570,43 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) } /* + * States created with floating interface policy can be synchronized to + * hosts with different interfaces, because they are bound to V_pfi_all. + * But s->orig_kif still points to a real interface. Don't abort + * importing the state if orig_kif does not exists on the importing host + * but the state is not interface-bound. + */ + if (msg_version == PFSYNC_MSG_VERSION_1500) { + orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname); + if (orig_kif == NULL) { + if (kif == V_pfi_all) { + orig_kif = kif; + } else { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: unknown original interface:" + " %s\n", __func__, + sp->pfs_1500.orig_ifname); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ + } + } + } + + /* * If the ruleset checksums match or the state is coming from the ioctl, * it's safe to associate the state with the rule of that number. */ if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < - pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) - r = pf_main_ruleset.rules[ - PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)]; - else + pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { + TAILQ_FOREACH(r, pf_main_ruleset.rules[ + PF_RULESET_FILTER].active.ptr, entries) + if (ntohl(sp->pfs_1301.rule) == n++) + break; + } else r = &V_pf_default_rule; - /* - * Check routing interface early on. Do it before allocating memory etc. - * because there is a high chance there will be a lot more such states. - */ switch (msg_version) { case PFSYNC_MSG_VERSION_1301: /* @@ -594,39 +634,76 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) if ((rpool_first == NULL) || (TAILQ_NEXT(rpool_first, entries) != NULL)) { DPFPRINTF(PF_DEBUG_MISC, - ("%s: can't recover routing information " - "because of empty or bad redirection pool\n", - __func__)); + "%s: can't recover routing information " + "because of empty or bad redirection pool", + __func__); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } rt = r->rt; rt_kif = rpool_first->kif; + /* + * Guess the AF of the route address, FreeBSD 13 does + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. + */ + rt_af = r->af; } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { /* * Ruleset different, routing *supposedly* requested, * give up on recovering. */ DPFPRINTF(PF_DEBUG_MISC, - ("%s: can't recover routing information " - "because of different ruleset\n", __func__)); + "%s: can't recover routing information " + "because of different ruleset", __func__); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } + wire_af = stack_af = sp->pfs_1301.af; + wire_proto = stack_proto = sp->pfs_1301.proto; break; case PFSYNC_MSG_VERSION_1400: /* - * On FreeBSD 14 and above we're not taking any chances. + * On FreeBSD 14 we're not taking any chances. * We use the information synced to us. */ if (sp->pfs_1400.rt) { rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); if (rt_kif == NULL) { DPFPRINTF(PF_DEBUG_MISC, - ("%s: unknown route interface: %s\n", - __func__, sp->pfs_1400.rt_ifname)); + "%s: unknown route interface: %s", + __func__, sp->pfs_1400.rt_ifname); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } rt = sp->pfs_1400.rt; + /* + * Guess the AF of the route address, FreeBSD 14 does + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. + */ + rt_af = sp->pfs_1400.af; } + wire_af = stack_af = sp->pfs_1400.af; + wire_proto = stack_proto = sp->pfs_1400.proto; + break; + case PFSYNC_MSG_VERSION_1500: + /* + * On FreeBSD 15 and above we're not taking any chances. + * We use the information synced to us. + */ + if (sp->pfs_1500.rt) { + rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname); + if (rt_kif == NULL) { + DPFPRINTF(PF_DEBUG_MISC, + "%s: unknown route interface: %s", + __func__, sp->pfs_1500.rt_ifname); + return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); + } + rt = sp->pfs_1500.rt; + rt_af = sp->pfs_1500.rt_af; + } + wire_af = sp->pfs_1500.wire_af; + stack_af = sp->pfs_1500.stack_af; + wire_proto = sp->pfs_1500.wire_proto; + stack_proto = sp->pfs_1500.stack_proto; break; } @@ -653,8 +730,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) ks = &sp->pfs_1301.key[PF_SK_STACK]; #endif - if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || - PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || + if (wire_af != stack_af || + PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) || + PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) || kw->port[0] != ks->port[0] || kw->port[1] != ks->port[1]) { sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); @@ -673,42 +751,32 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) skw->addr[1] = kw->addr[1]; skw->port[0] = kw->port[0]; skw->port[1] = kw->port[1]; - skw->proto = sp->pfs_1301.proto; - skw->af = sp->pfs_1301.af; + skw->proto = wire_proto; + skw->af = wire_af; if (sks != skw) { sks->addr[0] = ks->addr[0]; sks->addr[1] = ks->addr[1]; sks->port[0] = ks->port[0]; sks->port[1] = ks->port[1]; - sks->proto = sp->pfs_1301.proto; - sks->af = sp->pfs_1301.af; + sks->proto = stack_proto; + sks->af = stack_af; } /* copy to state */ - bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, sizeof(st->act.rt_addr)); st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; - st->expire = pf_get_uptime(); - if (sp->pfs_1301.expire) { - uint32_t timeout; - - timeout = r->timeout[sp->pfs_1301.timeout]; - if (!timeout) - timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; - - /* sp->expire may have been adaptively scaled by export. */ - st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; - } - - st->direction = sp->pfs_1301.direction; - st->act.log = sp->pfs_1301.log; - st->timeout = sp->pfs_1301.timeout; - st->act.rt = rt; st->act.rt_kif = rt_kif; + st->act.rt_af = rt_af; switch (msg_version) { case PFSYNC_MSG_VERSION_1301: st->state_flags = sp->pfs_1301.state_flags; + st->direction = sp->pfs_1301.direction; + st->act.log = sp->pfs_1301.log; + st->timeout = sp->pfs_1301.timeout; + if (rt) + bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); /* * In FreeBSD 13 pfsync lacks many attributes. Copy them * from the rule if possible. If rule can't be matched @@ -747,6 +815,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) break; case PFSYNC_MSG_VERSION_1400: st->state_flags = ntohs(sp->pfs_1400.state_flags); + st->direction = sp->pfs_1400.direction; + st->act.log = sp->pfs_1400.log; + st->timeout = sp->pfs_1400.timeout; st->act.qid = ntohs(sp->pfs_1400.qid); st->act.pqid = ntohs(sp->pfs_1400.pqid); st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); @@ -757,12 +828,47 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) st->act.max_mss = ntohs(sp->pfs_1400.max_mss); st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; + if (rt) + bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); + break; + case PFSYNC_MSG_VERSION_1500: + st->state_flags = ntohs(sp->pfs_1500.state_flags); + st->direction = sp->pfs_1500.direction; + st->act.log = sp->pfs_1500.log; + st->timeout = sp->pfs_1500.timeout; + st->act.qid = ntohs(sp->pfs_1500.qid); + st->act.pqid = ntohs(sp->pfs_1500.pqid); + st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe); + st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe); + st->act.rtableid = ntohl(sp->pfs_1500.rtableid); + st->act.min_ttl = sp->pfs_1500.min_ttl; + st->act.set_tos = sp->pfs_1500.set_tos; + st->act.max_mss = ntohs(sp->pfs_1500.max_mss); + st->act.set_prio[0] = sp->pfs_1500.set_prio[0]; + st->act.set_prio[1] = sp->pfs_1500.set_prio[1]; + if (rt) + bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); + if (sp->pfs_1500.tagname[0] != 0) + st->tag = pf_tagname2tag(sp->pfs_1500.tagname); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } + st->expire = pf_get_uptime(); + if (sp->pfs_1301.expire) { + uint32_t timeout; + timeout = r->timeout[st->timeout]; + if (!timeout) + timeout = V_pf_default_rule.timeout[st->timeout]; + + /* sp->expire may have been adaptively scaled by export. */ + st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; + } + if (! (st->act.rtableid == -1 || (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) goto cleanup; @@ -782,7 +888,7 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) if (!(flags & PFSYNC_SI_IOCTL)) st->state_flags |= PFSTATE_NOSYNC; - if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) + if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0) goto cleanup_state; /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ @@ -1074,23 +1180,29 @@ pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) struct mbuf *mp; union pfsync_state_union *sa, *sp; int i, offp, total_len, msg_version, msg_len; + u_int8_t timeout, direction; + sa_family_t af; switch (action) { case PFSYNC_ACT_INS_1301: msg_len = sizeof(struct pfsync_state_1301); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_INS_1400: msg_len = sizeof(struct pfsync_state_1400); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1400; break; + case PFSYNC_ACT_INS_1500: + msg_len = sizeof(struct pfsync_state_1500); + msg_version = PFSYNC_MSG_VERSION_1500; + break; default: V_pfsyncstats.pfsyncs_badver++; return (-1); } + total_len = msg_len * count; + mp = m_pulldown(m, offset, total_len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; @@ -1101,13 +1213,26 @@ pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) for (i = 0; i < count; i++) { sp = (union pfsync_state_union *)((char *)sa + msg_len * i); + switch (msg_version) { + case PFSYNC_MSG_VERSION_1301: + case PFSYNC_MSG_VERSION_1400: + af = sp->pfs_1301.af; + timeout = sp->pfs_1301.timeout; + direction = sp->pfs_1301.direction; + break; + case PFSYNC_MSG_VERSION_1500: + af = sp->pfs_1500.wire_af; + timeout = sp->pfs_1500.timeout; + direction = sp->pfs_1500.direction; + break; + } + /* Check for invalid values. */ - if (sp->pfs_1301.timeout >= PFTM_MAX || + if (timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || - sp->pfs_1301.direction > PF_OUT || - (sp->pfs_1301.af != AF_INET && - sp->pfs_1301.af != AF_INET6)) { + direction > PF_OUT || + (af != AF_INET && af != AF_INET6)) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; @@ -1159,8 +1284,8 @@ pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) } static int -pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, - struct pfsync_state_peer *dst) +pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, + struct pf_state_peer_export *dst) { int sync = 0; @@ -1200,23 +1325,28 @@ pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) struct pf_kstate *st; struct mbuf *mp; int sync, offp, i, total_len, msg_len, msg_version; + u_int8_t timeout; switch (action) { case PFSYNC_ACT_UPD_1301: msg_len = sizeof(struct pfsync_state_1301); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_UPD_1400: msg_len = sizeof(struct pfsync_state_1400); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1400; break; + case PFSYNC_ACT_UPD_1500: + msg_len = sizeof(struct pfsync_state_1500); + msg_version = PFSYNC_MSG_VERSION_1500; + break; default: V_pfsyncstats.pfsyncs_badact++; return (-1); } + total_len = msg_len * count; + mp = m_pulldown(m, offset, total_len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; @@ -1227,8 +1357,18 @@ pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) for (i = 0; i < count; i++) { sp = (union pfsync_state_union *)((char *)sa + msg_len * i); + switch (msg_version) { + case PFSYNC_MSG_VERSION_1301: + case PFSYNC_MSG_VERSION_1400: + timeout = sp->pfs_1301.timeout; + break; + case PFSYNC_MSG_VERSION_1500: + timeout = sp->pfs_1500.timeout; + break; + } + /* check for invalid values */ - if (sp->pfs_1301.timeout >= PFTM_MAX || + if (timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { @@ -1273,7 +1413,7 @@ pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); st->expire = pf_get_uptime(); - st->timeout = sp->pfs_1301.timeout; + st->timeout = timeout; } st->pfsync_time = time_uptime; @@ -1728,16 +1868,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); - data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); + data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); if ((error = copyin(ifr->ifr_cap_nv.buffer, data, ifr->ifr_cap_nv.length)) != 0) { - free(data, M_TEMP); + free(data, M_PF); return (error); } if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { - free(data, M_TEMP); + free(data, M_PF); return (EINVAL); } @@ -1745,7 +1885,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) pfsync_nvstatus_to_kstatus(nvl, &status); nvlist_destroy(nvl); - free(data, M_TEMP); + free(data, M_PF); error = pfsync_kstatus_to_softc(&status, sc); return (error); @@ -1774,6 +1914,14 @@ pfsync_out_state_1400(struct pf_kstate *st, void *buf) } static void +pfsync_out_state_1500(struct pf_kstate *st, void *buf) +{ + union pfsync_state_union *sp = buf; + + pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1500); +} + +static void pfsync_out_iack(struct pf_kstate *st, void *buf) { struct pfsync_ins_ack *iack = buf; @@ -2440,6 +2588,8 @@ pfsync_sstate_to_qid(u_int8_t sync_state) return PFSYNC_Q_INS_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_INS_1400; + case PFSYNC_MSG_VERSION_1500: + return PFSYNC_Q_INS_1500; } break; case PFSYNC_S_IACK: @@ -2450,6 +2600,8 @@ pfsync_sstate_to_qid(u_int8_t sync_state) return PFSYNC_Q_UPD_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_UPD_1400; + case PFSYNC_MSG_VERSION_1500: + return PFSYNC_Q_UPD_1500; } break; case PFSYNC_S_UPD_C: @@ -3006,6 +3158,7 @@ pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) break; case PFSYNC_MSG_VERSION_1301: case PFSYNC_MSG_VERSION_1400: + case PFSYNC_MSG_VERSION_1500: sc->sc_version = status->version; break; default: diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index d5f01e5c4956..a39f5fe58cd6 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -119,8 +119,6 @@ #include <machine/in_cksum.h> #include <security/mac/mac_framework.h> -#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x - SDT_PROVIDER_DEFINE(pf); SDT_PROBE_DEFINE2(pf, , test, reason_set, "int", "int"); SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *", @@ -161,6 +159,7 @@ SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *"); SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match, "int", "struct pf_keth_rule *"); SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t"); +SDT_PROBE_DEFINE2(pf, , log, log, "int", "const char *"); /* * Global variables @@ -345,10 +344,12 @@ static int pf_test_eth_rule(int, struct pfi_kkif *, struct mbuf **); static int pf_test_rule(struct pf_krule **, struct pf_kstate **, struct pf_pdesc *, struct pf_krule **, - struct pf_kruleset **, u_short *, struct inpcb *); + struct pf_kruleset **, u_short *, struct inpcb *, + struct pf_krule_slist *); static int pf_create_state(struct pf_krule *, struct pf_test_ctx *, - struct pf_kstate **, u_int16_t, u_int16_t); + struct pf_kstate **, u_int16_t, u_int16_t, + struct pf_krule_slist *match_rules); static int pf_state_key_addr_setup(struct pf_pdesc *, struct pf_state_key_cmp *, int); static int pf_tcp_track_full(struct pf_kstate *, @@ -375,6 +376,8 @@ static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, u_int16_t); static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); +static int pf_walk_option(struct pf_pdesc *, struct ip *, + int, int, u_short *); static int pf_walk_header(struct pf_pdesc *, struct ip *, u_short *); #ifdef INET6 static int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, @@ -392,7 +395,7 @@ static bool pf_src_connlimit(struct pf_kstate *); static int pf_match_rcvif(struct mbuf *, struct pf_krule *); static void pf_counters_inc(int, struct pf_pdesc *, struct pf_kstate *, struct pf_krule *, - struct pf_krule *); + struct pf_krule *, struct pf_krule_slist *); static void pf_log_matches(struct pf_pdesc *, struct pf_krule *, struct pf_krule *, struct pf_kruleset *, struct pf_krule_slist *); @@ -400,7 +403,7 @@ static void pf_overload_task(void *v, int pending); static u_short pf_insert_src_node(struct pf_ksrc_node *[PF_SN_MAX], struct pf_srchash *[PF_SN_MAX], struct pf_krule *, struct pf_addr *, sa_family_t, struct pf_addr *, - struct pfi_kkif *, pf_sn_types_t); + struct pfi_kkif *, sa_family_t, pf_sn_types_t); static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); @@ -408,14 +411,14 @@ static void pf_mtag_free(struct m_tag *); static void pf_packet_rework_nat(struct pf_pdesc *, int, struct pf_state_key *); #ifdef INET -static void pf_route(struct pf_krule *, +static int pf_route(struct pf_krule *, struct ifnet *, struct pf_kstate *, struct pf_pdesc *, struct inpcb *); #endif /* INET */ #ifdef INET6 static void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); -static void pf_route6(struct pf_krule *, +static int pf_route6(struct pf_krule *, struct ifnet *, struct pf_kstate *, struct pf_pdesc *, struct inpcb *); #endif /* INET6 */ @@ -488,26 +491,30 @@ BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd) counter_u64_add(s->anchor->states_cur, 1); \ counter_u64_add(s->anchor->states_tot, 1); \ } \ - if (s->nat_rule != NULL) { \ - counter_u64_add(s->nat_rule->states_cur, 1);\ - counter_u64_add(s->nat_rule->states_tot, 1);\ + if (s->nat_rule != NULL && s->nat_rule != s->rule) { \ + counter_u64_add(s->nat_rule->states_cur, 1); \ + counter_u64_add(s->nat_rule->states_tot, 1); \ } \ SLIST_FOREACH(mrm, &s->match_rules, entry) { \ - counter_u64_add(mrm->r->states_cur, 1); \ - counter_u64_add(mrm->r->states_tot, 1); \ + if (s->nat_rule != mrm->r) { \ + counter_u64_add(mrm->r->states_cur, 1); \ + counter_u64_add(mrm->r->states_tot, 1); \ + } \ } \ } while (0) #define STATE_DEC_COUNTERS(s) \ do { \ struct pf_krule_item *mrm; \ - if (s->nat_rule != NULL) \ - counter_u64_add(s->nat_rule->states_cur, -1);\ - if (s->anchor != NULL) \ - counter_u64_add(s->anchor->states_cur, -1); \ counter_u64_add(s->rule->states_cur, -1); \ + if (s->anchor != NULL) \ + counter_u64_add(s->anchor->states_cur, -1); \ + if (s->nat_rule != NULL && s->nat_rule != s->rule) \ + counter_u64_add(s->nat_rule->states_cur, -1); \ SLIST_FOREACH(mrm, &s->match_rules, entry) \ - counter_u64_add(mrm->r->states_cur, -1); \ + if (s->nat_rule != mrm->r) { \ + counter_u64_add(mrm->r->states_cur, -1);\ + } \ } while (0) MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); @@ -620,7 +627,7 @@ static void pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) { - switch (pd->proto) { + switch (pd->virtual_proto) { case IPPROTO_TCP: { struct tcphdr *th = &pd->hdr.tcp; @@ -1000,7 +1007,7 @@ pf_src_node_exists(struct pf_ksrc_node **sn, struct pf_srchash *sh) return (false); } -static void +void pf_free_src_node(struct pf_ksrc_node *sn) { @@ -1016,7 +1023,7 @@ static u_short pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX], struct pf_srchash *snhs[PF_SN_MAX], struct pf_krule *rule, struct pf_addr *src, sa_family_t af, struct pf_addr *raddr, - struct pfi_kkif *rkif, pf_sn_types_t sn_type) + struct pfi_kkif *rkif, sa_family_t raf, pf_sn_types_t sn_type) { u_short reason = 0; struct pf_krule *r_track = rule; @@ -1088,8 +1095,9 @@ pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX], (*sn)->rule = r_track; pf_addrcpy(&(*sn)->addr, src, af); if (raddr != NULL) - pf_addrcpy(&(*sn)->raddr, raddr, af); + pf_addrcpy(&(*sn)->raddr, raddr, raf); (*sn)->rkif = rkif; + (*sn)->raf = raf; LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry); (*sn)->creation = time_uptime; (*sn)->ruletype = rule->action; @@ -1252,6 +1260,21 @@ pf_initialize(void) MTX_DEF | MTX_DUPOK); } + /* Anchors */ + V_pf_anchor_z = uma_zcreate("pf anchors", + sizeof(struct pf_kanchor), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + V_pf_limits[PF_LIMIT_ANCHORS].zone = V_pf_anchor_z; + uma_zone_set_max(V_pf_anchor_z, PF_ANCHOR_HIWAT); + uma_zone_set_warning(V_pf_anchor_z, "PF anchor limit reached"); + + V_pf_eth_anchor_z = uma_zcreate("pf Ethernet anchors", + sizeof(struct pf_keth_anchor), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + V_pf_limits[PF_LIMIT_ETH_ANCHORS].zone = V_pf_eth_anchor_z; + uma_zone_set_max(V_pf_eth_anchor_z, PF_ANCHOR_HIWAT); + uma_zone_set_warning(V_pf_eth_anchor_z, "PF Ethernet anchor limit reached"); + /* ALTQ */ TAILQ_INIT(&V_pf_altqs[0]); TAILQ_INIT(&V_pf_altqs[1]); @@ -1330,6 +1353,8 @@ pf_cleanup(void) uma_zdestroy(V_pf_state_z); uma_zdestroy(V_pf_state_key_z); uma_zdestroy(V_pf_udp_mapping_z); + uma_zdestroy(V_pf_anchor_z); + uma_zdestroy(V_pf_eth_anchor_z); } static int @@ -1648,7 +1673,6 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, #ifdef INET6 struct nd_neighbor_solicit nd; struct pf_addr *target; - u_short action, reason; if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6) goto copy; @@ -1657,7 +1681,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, case ND_NEIGHBOR_SOLICIT: if (multi) return (-1); - if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) + if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL, + pd->af)) return (-1); target = (struct pf_addr *)&nd.nd_ns_target; daddr = target; @@ -1665,7 +1690,8 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, case ND_NEIGHBOR_ADVERT: if (multi) return (-1); - if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) + if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), NULL, + pd->af)) return (-1); target = (struct pf_addr *)&nd.nd_ns_target; saddr = target; @@ -2050,6 +2076,44 @@ pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir) return (false); } +void +pf_state_peer_hton(const struct pf_state_peer *s, struct pf_state_peer_export *d) +{ + d->seqlo = htonl(s->seqlo); + d->seqhi = htonl(s->seqhi); + d->seqdiff = htonl(s->seqdiff); + d->max_win = htons(s->max_win); + d->mss = htons(s->mss); + d->state = s->state; + d->wscale = s->wscale; + if (s->scrub) { + d->scrub.pfss_flags = htons( + s->scrub->pfss_flags & PFSS_TIMESTAMP); + d->scrub.pfss_ttl = (s)->scrub->pfss_ttl; + d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod); + d->scrub.scrub_flag = PF_SCRUB_FLAG_VALID; + } +} + +void +pf_state_peer_ntoh(const struct pf_state_peer_export *s, struct pf_state_peer *d) +{ + d->seqlo = ntohl(s->seqlo); + d->seqhi = ntohl(s->seqhi); + d->seqdiff = ntohl(s->seqdiff); + d->max_win = ntohs(s->max_win); + d->mss = ntohs(s->mss); + d->state = s->state; + d->wscale = s->wscale; + if (s->scrub.scrub_flag == PF_SCRUB_FLAG_VALID && + d->scrub != NULL) { + d->scrub->pfss_flags = ntohs(s->scrub.pfss_flags) & + PFSS_TIMESTAMP; + d->scrub->pfss_ttl = s->scrub.pfss_ttl; + d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod); + } +} + struct pf_udp_mapping * pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port, struct pf_addr *nat_addr, uint16_t nat_port) @@ -2776,7 +2840,7 @@ pf_remove_state(struct pf_kstate *s) s->key[PF_SK_WIRE]->port[0], s->src.seqhi, s->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0, - s->act.rtableid); + s->act.rtableid, NULL); } LIST_REMOVE(s, entry); @@ -2811,20 +2875,24 @@ pf_alloc_state(int flags) return (uma_zalloc(V_pf_state_z, flags | M_ZERO)); } +static __inline void +pf_free_match_rules(struct pf_krule_slist *match_rules) { + struct pf_krule_item *ri; + + while ((ri = SLIST_FIRST(match_rules))) { + SLIST_REMOVE_HEAD(match_rules, entry); + free(ri, M_PF_RULE_ITEM); + } +} + void pf_free_state(struct pf_kstate *cur) { - struct pf_krule_item *ri; - KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur)); KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__, cur->timeout)); - while ((ri = SLIST_FIRST(&cur->match_rules))) { - SLIST_REMOVE_HEAD(&cur->match_rules, entry); - free(ri, M_PF_RULE_ITEM); - } - + pf_free_match_rules(&(cur->match_rules)); pf_normalize_tcp_cleanup(cur); uma_zfree(V_pf_state_z, cur); pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1); @@ -3296,7 +3364,7 @@ pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p, u_int16_t po; uint8_t u = pd->virtual_proto == IPPROTO_UDP; - MPASS(pd->pcksum); + MPASS(pd->pcksum != NULL); if (pd->af == AF_INET) { MPASS(pd->ip_sum); } @@ -3575,6 +3643,18 @@ pf_translate_af(struct pf_pdesc *pd) pd->src = (struct pf_addr *)&ip4->ip_src; pd->dst = (struct pf_addr *)&ip4->ip_dst; pd->off = sizeof(struct ip); + if (pd->m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_TCP; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_UDP; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + pd->m->m_pkthdr.csum_flags |= CSUM_SCTP; + } break; case AF_INET6: ip6 = mtod(pd->m, struct ip6_hdr *); @@ -3592,6 +3672,18 @@ pf_translate_af(struct pf_pdesc *pd) pd->src = (struct pf_addr *)&ip6->ip6_src; pd->dst = (struct pf_addr *)&ip6->ip6_dst; pd->off = sizeof(struct ip6_hdr); + if (pd->m->m_pkthdr.csum_flags & CSUM_TCP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_TCP; + pd->m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_UDP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_UDP; + pd->m->m_pkthdr.csum_flags |= CSUM_UDP_IPV6; + } + if (pd->m->m_pkthdr.csum_flags & CSUM_SCTP) { + pd->m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + pd->m->m_pkthdr.csum_flags |= CSUM_SCTP_IPV6; + } /* * If we're dealing with a reassembled packet we need to adjust @@ -3962,7 +4054,7 @@ pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th, optsoff = pd->off + sizeof(struct tcphdr); #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) if (olen < TCPOLEN_MINSACK || - !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) return (0); eoh = opts + olen; @@ -3998,7 +4090,7 @@ pf_build_tcp(const struct pf_krule *r, sa_family_t af, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, u_int sack, - int rtableid) + int rtableid, u_short *reason) { struct mbuf *m; int len, tlen; @@ -4038,13 +4130,16 @@ pf_build_tcp(const struct pf_krule *r, sa_family_t af, } m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) + if (m == NULL) { + REASON_SET(reason, PFRES_MEMORY); return (NULL); + } #ifdef MAC mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { + REASON_SET(reason, PFRES_MEMORY); m_freem(m); return (NULL); } @@ -4264,13 +4359,14 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, - int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) + int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid, + u_short *reason) { struct pf_send_entry *pfse; struct mbuf *m; m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags, - win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid); + win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid, reason); if (m == NULL) return; @@ -4278,6 +4374,7 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af, pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); if (pfse == NULL) { m_freem(m); + REASON_SET(reason, PFRES_MEMORY); return; } @@ -4339,9 +4436,10 @@ pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd, if (tcp_get_flags(th) & TH_FIN) ack++; pf_send_tcp(r, pd->af, pd->dst, - pd->src, th->th_dport, th->th_sport, - ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid); + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid, + reason); } } else if (pd->proto == IPPROTO_SCTP && (r->rule_flag & PFRULE_RETURN)) { @@ -4392,7 +4490,8 @@ pf_icmp_to_bandlim(uint8_t type) static void pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s, - struct pf_state_peer *src, struct pf_state_peer *dst) + struct pf_state_peer *src, struct pf_state_peer *dst, + u_short *reason) { /* * We are sending challenge ACK as a response to SYN packet, which @@ -4406,7 +4505,7 @@ pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s, pf_send_tcp(s->rule, pd->af, pd->dst, pd->src, pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0, - s->rule->rtableid); + s->rule->rtableid, reason); } static void @@ -4615,8 +4714,8 @@ pf_match_rcvif(struct mbuf *m, struct pf_krule *r) if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, - ("%s: kif == NULL, @%d via %s\n", __func__, r->nr, - r->rcv_ifname)); + "%s: kif == NULL, @%d via %s", __func__, r->nr, + r->rcv_ifname); return (0); } @@ -4651,7 +4750,8 @@ pf_tag_packet(struct pf_pdesc *pd, int tag) } while (0) enum pf_test_status -pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) +pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r, + struct pf_krule_slist *match_rules) { enum pf_test_status rv; @@ -4669,7 +4769,7 @@ pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) struct pf_kanchor *child; rv = PF_TEST_OK; RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { - rv = pf_match_rule(ctx, &child->ruleset); + rv = pf_match_rule(ctx, &child->ruleset, match_rules); if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { /* * we either hit a rule with quick action @@ -4680,7 +4780,7 @@ pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) } } } else { - rv = pf_match_rule(ctx, &r->anchor->ruleset); + rv = pf_match_rule(ctx, &r->anchor->ruleset, match_rules); /* * Unless errors occured, stop iff any rule matched * within quick anchors. @@ -4975,7 +5075,7 @@ pf_socket_lookup(struct pf_pdesc *pd) } INP_RLOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; - pd->lookup.gid = inp->inp_cred->cr_groups[0]; + pd->lookup.gid = inp->inp_cred->cr_gid; INP_RUNLOCK(inp); return (1); @@ -5025,7 +5125,7 @@ pf_get_wscale(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, - pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -5050,7 +5150,7 @@ pf_get_mss(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, - pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -5242,8 +5342,8 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0) if (__predict_false(m->m_len < sizeof(struct ether_header)) && (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) { DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m_len < sizeof(struct ether_header)" - ", pullup failed\n", __func__)); + "%s: m_len < sizeof(struct ether_header)" + ", pullup failed", __func__); return (PF_DROP); } e = mtod(m, struct ether_header *); @@ -5529,9 +5629,10 @@ pf_rule_apply_nat(struct pf_test_ctx *ctx, struct pf_krule *r) } enum pf_test_status -pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) +pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset, + struct pf_krule_slist *match_rules) { - struct pf_krule_item *ri; + struct pf_krule_item *ri, *rt; struct pf_krule *r; struct pf_krule *save_a; struct pf_kruleset *save_aruleset; @@ -5544,6 +5645,9 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) *ctx->rm = ctx->pd->related_rule; break; } + PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, + TAILQ_NEXT(r, entries)); + /* Don't count expired rule evaluations. */ pf_counter_u64_add(&r->evaluations, 1); PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, r->skip[PF_SKIP_IFP]); @@ -5647,6 +5751,21 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) if (r->tag) ctx->tag = r->tag; if (r->anchor == NULL) { + + if (r->rule_flag & PFRULE_ONCE) { + uint32_t rule_flag; + + rule_flag = r->rule_flag; + if ((rule_flag & PFRULE_EXPIRED) == 0 && + atomic_cmpset_int(&r->rule_flag, rule_flag, + rule_flag | PFRULE_EXPIRED)) { + r->exptime = time_uptime; + } else { + r = TAILQ_NEXT(r, entries); + continue; + } + } + if (r->action == PF_MATCH) { /* * Apply translations before increasing counters, @@ -5670,11 +5789,14 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) return (PF_TEST_FAIL); } ri->r = r; - SLIST_INSERT_HEAD(&ctx->rules, ri, entry); - pf_counter_u64_critical_enter(); - pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); - pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); - pf_counter_u64_critical_exit(); + + if (SLIST_EMPTY(match_rules)) { + SLIST_INSERT_HEAD(match_rules, ri, entry); + } else { + SLIST_INSERT_AFTER(rt, ri, entry); + } + rt = ri; + pf_rule_to_actions(r, &pd->act); if (r->log) PFLOG_PACKET(r->action, PFRES_MATCH, r, @@ -5698,7 +5820,7 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) ctx->arsm = ctx->aruleset; } if (pd->act.log & PF_LOG_MATCHES) - pf_log_matches(pd, r, ctx->a, ruleset, &ctx->rules); + pf_log_matches(pd, r, ctx->a, ruleset, match_rules); if (r->quick) { ctx->test_status = PF_TEST_QUICK; break; @@ -5715,7 +5837,7 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) * Note: we don't need to restore if we are not going * to continue with ruleset evaluation. */ - if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) { + if (pf_step_into_anchor(ctx, r, match_rules) != PF_TEST_OK) { break; } ctx->a = save_a; @@ -5724,17 +5846,18 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) r = TAILQ_NEXT(r, entries); } + return (ctx->test_status); } static int pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pf_pdesc *pd, struct pf_krule **am, - struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp) + struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp, + struct pf_krule_slist *match_rules) { struct pf_krule *r = NULL; struct pf_kruleset *ruleset = NULL; - struct pf_krule_item *ri; struct pf_test_ctx ctx; u_short transerror; int action = PF_PASS; @@ -5751,7 +5874,6 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.rsm = rsm; ctx.th = &pd->hdr.tcp; ctx.reason = *reason; - SLIST_INIT(&ctx.rules); pf_addrcpy(&pd->nsaddr, pd->src, pd->af); pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); @@ -5759,7 +5881,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; - pd->lookup.gid = inp->inp_cred->cr_groups[0]; + pd->lookup.gid = inp->inp_cred->cr_gid; pd->lookup.done = 1; } @@ -5843,44 +5965,50 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.nat_pool = &(ctx.nr->rdr); } - ruleset = &pf_main_ruleset; - rv = pf_match_rule(&ctx, ruleset); - if (rv == PF_TEST_FAIL) { - /* - * Reason has been set in pf_match_rule() already. - */ - goto cleanup; - } - - r = *ctx.rm; /* matching rule */ - ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */ - ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */ - ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */ + *ctx.rm = &V_pf_default_rule; + if (ctx.nr && ctx.nr->natpass) { + r = ctx.nr; + ruleset = *ctx.rsm; + } else { + ruleset = &pf_main_ruleset; + rv = pf_match_rule(&ctx, ruleset, match_rules); + if (rv == PF_TEST_FAIL) { + /* + * Reason has been set in pf_match_rule() already. + */ + goto cleanup; + } - REASON_SET(&ctx.reason, PFRES_MATCH); + r = *ctx.rm; /* matching rule */ + ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */ + ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */ + ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */ - /* apply actions for last matching pass/block rule */ - pf_rule_to_actions(r, &pd->act); - transerror = pf_rule_apply_nat(&ctx, r); - switch (transerror) { - case PFRES_MATCH: - /* Translation action found in rule and applied successfully */ - case PFRES_MAX: - /* No translation action found in rule */ - break; - default: - /* Translation action found in rule but failed to apply */ - REASON_SET(&ctx.reason, transerror); - goto cleanup; + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + transerror = pf_rule_apply_nat(&ctx, r); + switch (transerror) { + case PFRES_MATCH: + /* Translation action found in rule and applied successfully */ + case PFRES_MAX: + /* No translation action found in rule */ + break; + default: + /* Translation action found in rule but failed to apply */ + REASON_SET(&ctx.reason, transerror); + goto cleanup; + } } + REASON_SET(&ctx.reason, PFRES_MATCH); + if (r->log) { if (ctx.rewrite) m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL); } if (pd->act.log & PF_LOG_MATCHES) - pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules); + pf_log_matches(pd, r, ctx.a, ruleset, match_rules); if (pd->virtual_proto != PF_VPROTO_FRAGMENT && (r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNRST) || @@ -5901,18 +6029,23 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, M_SETFIB(pd->m, pd->act.rtableid); if (r->rt) { - struct pf_ksrc_node *sn = NULL; - struct pf_srchash *snh = NULL; /* * Set act.rt here instead of in pf_rule_to_actions() because - * it is applied only from the last pass rule. + * it is applied only from the last pass rule. For rules + * with the prefer-ipv6-nexthop option act.rt_af is a hint + * about AF of the forwarded packet and might be changed. */ pd->act.rt = r->rt; - /* Don't use REASON_SET, pf_map_addr increases the reason counters */ - ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr, - &pd->act.rt_kif, NULL, &sn, &snh, &(r->route), PF_SN_ROUTE); - if (ctx.reason != 0) + if (r->rt == PF_REPLYTO) + pd->act.rt_af = pd->af; + else + pd->act.rt_af = pd->naf; + if ((transerror = pf_map_addr_sn(pd->af, r, pd->src, + &pd->act.rt_addr, &pd->act.rt_af, &pd->act.rt_kif, NULL, + &(r->route), PF_SN_ROUTE)) != PFRES_MATCH) { + REASON_SET(&ctx.reason, transerror); goto cleanup; + } } if (pd->virtual_proto != PF_VPROTO_FRAGMENT && @@ -5920,7 +6053,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, (pd->flags & PFDESC_TCP_NORM)))) { bool nat64; - action = pf_create_state(r, &ctx, sm, bproto_sum, bip_sum); + action = pf_create_state(r, &ctx, sm, bproto_sum, bip_sum, + match_rules); ctx.sk = ctx.nk = NULL; if (action != PF_PASS) { pf_udp_mapping_release(ctx.udp_mapping); @@ -5966,11 +6100,6 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, action = PF_AFRT; } } else { - while ((ri = SLIST_FIRST(&ctx.rules))) { - SLIST_REMOVE_HEAD(&ctx.rules, entry); - free(ri, M_PF_RULE_ITEM); - } - uma_zfree(V_pf_state_key_z, ctx.sk); uma_zfree(V_pf_state_key_z, ctx.nk); ctx.sk = ctx.nk = NULL; @@ -5998,11 +6127,6 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, return (action); cleanup: - while ((ri = SLIST_FIRST(&ctx.rules))) { - SLIST_REMOVE_HEAD(&ctx.rules, entry); - free(ri, M_PF_RULE_ITEM); - } - uma_zfree(V_pf_state_key_z, ctx.sk); uma_zfree(V_pf_state_key_z, ctx.nk); pf_udp_mapping_release(ctx.udp_mapping); @@ -6013,7 +6137,8 @@ cleanup: static int pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, - struct pf_kstate **sm, u_int16_t bproto_sum, u_int16_t bip_sum) + struct pf_kstate **sm, u_int16_t bproto_sum, u_int16_t bip_sum, + struct pf_krule_slist *match_rules) { struct pf_pdesc *pd = ctx->pd; struct pf_kstate *s = NULL; @@ -6027,7 +6152,6 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, struct tcphdr *th = &pd->hdr.tcp; u_int16_t mss = V_tcp_mssdflt; u_short sn_reason; - struct pf_krule_item *ri; /* check maximums */ if (r->max_states && @@ -6039,7 +6163,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, /* src node for limits */ if ((r->rule_flag & PFRULE_SRCTRACK) && (sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, pd->af, - NULL, NULL, PF_SN_LIMIT)) != 0) { + NULL, NULL, pd->af, PF_SN_LIMIT)) != 0) { REASON_SET(&ctx->reason, sn_reason); goto csfailed; } @@ -6047,7 +6171,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, if (r->rt) { if ((r->route.opts & PF_POOL_STICKYADDR) && (sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, - pd->af, &pd->act.rt_addr, pd->act.rt_kif, + pd->af, &pd->act.rt_addr, pd->act.rt_kif, pd->act.rt_af, PF_SN_ROUTE)) != 0) { REASON_SET(&ctx->reason, sn_reason); goto csfailed; @@ -6056,10 +6180,17 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, /* src node for translation rule */ if (ctx->nr != NULL) { KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__)); + /* + * The NAT addresses are chosen during ruleset parsing. + * The new afto code stores post-nat addresses in nsaddr. + * The old nat code (also used for new nat-to rules) creates + * state keys and stores addresses in them. + */ if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) && (sn_reason = pf_insert_src_node(sns, snhs, ctx->nr, - &ctx->sk->addr[pd->sidx], pd->af, &ctx->nk->addr[1], NULL, - PF_SN_NAT)) != 0 ) { + ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af, + ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL, + pd->naf, PF_SN_NAT)) != 0 ) { REASON_SET(&ctx->reason, sn_reason); goto csfailed; } @@ -6072,7 +6203,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, s->rule = r; s->nat_rule = ctx->nr; s->anchor = ctx->a; - memcpy(&s->match_rules, &ctx->rules, sizeof(s->match_rules)); + s->match_rules = *match_rules; memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions)); if (pd->act.allow_opts) @@ -6162,8 +6293,8 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, &s->src, &s->dst, &ctx->rewrite)) { /* This really shouldn't happen!!! */ DPFPRINTF(PF_DEBUG_URGENT, - ("%s: tcp normalize failed on first " - "pkt\n", __func__)); + "%s: tcp normalize failed on first " + "pkt", __func__); goto csfailed; } } else if (pd->proto == IPPROTO_SCTP) { @@ -6213,7 +6344,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, if (ctx->tag > 0) s->tag = ctx->tag; if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) == - TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { + TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); pf_undo_nat(ctx->nr, pd, bip_sum); s->src.seqhi = arc4random(); @@ -6226,7 +6357,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0, - pd->act.rtableid); + pd->act.rtableid, &ctx->reason); REASON_SET(&ctx->reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -6236,11 +6367,6 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, return (PF_PASS); csfailed: - while ((ri = SLIST_FIRST(&ctx->rules))) { - SLIST_REMOVE_HEAD(&ctx->rules, entry); - free(ri, M_PF_RULE_ITEM); - } - uma_zfree(V_pf_state_key_z, ctx->sk); uma_zfree(V_pf_state_key_z, ctx->nk); @@ -6364,7 +6490,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) KASSERT(ctx->sk != NULL, ("%s: null sk", __func__)); KASSERT(ctx->nk != NULL, ("%s: null nk", __func__)); - switch (pd->proto) { + switch (pd->virtual_proto) { case IPPROTO_TCP: if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) || nk->port[pd->sidx] != pd->nsport) { @@ -6674,8 +6800,12 @@ pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd, (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo || - (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { + (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || /* Require an exact/+1 sequence match on resets when possible */ + (SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) && + SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 && + (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) { + /* Allow resets to match sequence window if ack is perfect match */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(pd, reason, th, @@ -6816,7 +6946,7 @@ pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, state->rule->return_ttl, M_SKIP_FIREWALL, - 0, 0, state->act.rtableid); + 0, 0, state->act.rtableid, reason); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; @@ -6941,7 +7071,8 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) pd->src, th->th_dport, th->th_sport, state->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, state->src.mss, 0, - M_SKIP_FIREWALL, 0, 0, state->act.rtableid); + M_SKIP_FIREWALL, 0, 0, state->act.rtableid, + reason); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || @@ -6974,7 +7105,8 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) state->dst.seqhi, 0, TH_SYN, 0, state->src.mss, 0, state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0, - state->tag, 0, state->act.rtableid); + state->tag, 0, state->act.rtableid, + reason); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != @@ -6989,13 +7121,15 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, state->src.max_win, 0, 0, 0, - state->tag, 0, state->act.rtableid); + state->tag, 0, state->act.rtableid, + reason); pf_send_tcp(state->rule, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], state->src.seqhi + 1, state->src.seqlo + 1, TH_ACK, state->dst.max_win, 0, 0, - M_SKIP_FIREWALL, 0, 0, state->act.rtableid); + M_SKIP_FIREWALL, 0, 0, state->act.rtableid, + reason); state->src.seqdiff = state->dst.seqhi - state->src.seqlo; state->dst.seqdiff = state->src.seqhi - @@ -7095,7 +7229,7 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) * ACK enables all parties (firewall and peers) * to get in sync again. */ - pf_send_challenge_ack(pd, *state, src, dst); + pf_send_challenge_ack(pd, *state, src, dst, reason); return (PF_DROP); } } @@ -7390,9 +7524,10 @@ static void pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif, struct pf_kstate *s, int action) { + struct pf_krule_slist match_rules; struct pf_sctp_multihome_job *j, *tmp; struct pf_sctp_source *i; - int ret __unused; + int ret; struct pf_kstate *sm = NULL; struct pf_krule *ra = NULL; struct pf_krule *r = &V_pf_default_rule; @@ -7437,8 +7572,14 @@ again: if (s->rule->rule_flag & PFRULE_ALLOW_RELATED) { j->pd.related_rule = s->rule; } + SLIST_INIT(&match_rules); ret = pf_test_rule(&r, &sm, - &j->pd, &ra, &rs, &reason, NULL); + &j->pd, &ra, &rs, &reason, NULL, &match_rules); + /* + * Nothing to do about match rules, the processed + * packet has already increased the counters. + */ + pf_free_match_rules(&match_rules); PF_RULES_RUNLOCK(); SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->pd.m, ret); if (ret != PF_DROP && sm != NULL) { @@ -7499,6 +7640,7 @@ again: nj->pd.m = j->pd.m; nj->op = j->op; + MPASS(nj->pd.pcksum); TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next); } PF_SCTP_ENDPOINTS_UNLOCK(); @@ -7566,7 +7708,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) while (off < len) { struct sctp_paramhdr h; - if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL, + if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, pd->af)) return (PF_DROP); @@ -7586,7 +7728,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) return (PF_DROP); if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); if (in_nullhost(t)) @@ -7618,6 +7760,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) job->pd.m = pd->m; job->op = op; + MPASS(job->pd.pcksum); TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next); break; } @@ -7630,7 +7773,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) return (PF_DROP); if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0) break; @@ -7651,6 +7794,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) job->pd.m = pd->m; job->op = op; + MPASS(job->pd.pcksum); TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next); break; } @@ -7660,7 +7804,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) struct sctp_asconf_paramhdr ah; if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); ret = pf_multihome_scan(start + off + sizeof(ah), @@ -7675,7 +7819,7 @@ pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) struct sctp_asconf_paramhdr ah; if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), - NULL, NULL, pd->af)) + NULL, pd->af)) return (PF_DROP); ret = pf_multihome_scan(start + off + sizeof(ah), ntohs(ah.ph.param_length) - sizeof(ah), pd, @@ -7957,10 +8101,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, ipoff2 = pd->off + ICMP_MINLEN; if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short " - "(ip)\n")); + "pf: ICMP error message too short " + "(ip)"); return (PF_DROP); } /* @@ -7978,6 +8122,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, return (PF_DROP); pd2.tot_len = ntohs(h2.ip_len); + pd2.ttl = h2.ip_ttl; pd2.src = (struct pf_addr *)&h2.ip_src; pd2.dst = (struct pf_addr *)&h2.ip_dst; pd2.ip_sum = &h2.ip_sum; @@ -7988,10 +8133,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, ipoff2 = pd->off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short " - "(ip6)\n")); + "pf: ICMP error message too short " + "(ip6)"); return (PF_DROP); } pd2.off = ipoff2; @@ -8000,6 +8145,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, pd2.tot_len = ntohs(h2_6.ip6_plen) + sizeof(struct ip6_hdr); + pd2.ttl = h2_6.ip6_hlim; pd2.src = (struct pf_addr *)&h2_6.ip6_src; pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; pd2.ip_sum = NULL; @@ -8040,11 +8186,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ - if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason, + if (!pf_pull_hdr(pd->m, pd2.off, th, 8, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short " - "(tcp)\n")); + "pf: ICMP error message too short " + "(tcp)"); return (PF_DROP); } pd2.pcksum = &pd2.hdr.tcp.th_sum; @@ -8236,10 +8382,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, int action; if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh), - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short " - "(udp)\n")); + "pf: ICMP error message too short " + "(udp)"); return (PF_DROP); } pd2.pcksum = &pd2.hdr.udp.uh_sum; @@ -8367,11 +8513,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, int copyback = 0; int action; - if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason, + if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short " - "(sctp)\n")); + "pf: ICMP error message too short " + "(sctp)"); return (PF_DROP); } pd2.pcksum = &pd2.sctp_dummy_sum; @@ -8401,8 +8547,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, if (src->scrub->pfss_v_tag != sh->v_tag) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message has incorrect " - "SCTP v_tag\n")); + "pf: ICMP error message has incorrect " + "SCTP v_tag"); return (PF_DROP); } @@ -8523,10 +8669,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, } if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN, - NULL, reason, pd2.af)) { + reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short i" - "(icmp)\n")); + "pf: ICMP error message too short i" + "(icmp)"); return (PF_DROP); } pd2.pcksum = &pd2.hdr.icmp.icmp_cksum; @@ -8643,10 +8789,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, } if (!pf_pull_hdr(pd->m, pd2.off, iih, - sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { + sizeof(struct icmp6_hdr), reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: ICMP error message too short " - "(icmp6)\n")); + "pf: ICMP error message too short " + "(icmp6)"); return (PF_DROP); } pd2.pcksum = &pd2.hdr.icmp6.icmp6_cksum; @@ -8758,6 +8904,11 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, default: { int action; + /* + * Placeholder value, so future calls to pf_change_ap() + * don't try to update a NULL checksum pointer. + */ + pd->pcksum = &pd->sctp_dummy_sum; key.af = pd2.af; key.proto = pd2.proto; pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); @@ -8820,7 +8971,7 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, */ void * pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, - u_short *actionp, u_short *reasonp, sa_family_t af) + u_short *reasonp, sa_family_t af) { int iplen = 0; switch (af) { @@ -8830,12 +8981,7 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; if (fragoff) { - if (fragoff >= len) - ACTION_SET(actionp, PF_PASS); - else { - ACTION_SET(actionp, PF_DROP); - REASON_SET(reasonp, PFRES_FRAG); - } + REASON_SET(reasonp, PFRES_FRAG); return (NULL); } iplen = ntohs(h->ip_len); @@ -8852,7 +8998,6 @@ pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, #endif /* INET6 */ } if (m->m_pkthdr.len < off + len || iplen < off + len) { - ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } @@ -8902,14 +9047,15 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif, } #ifdef INET -static void +static int pf_route(struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *m1, *md; - struct route ro; - const struct sockaddr *gw = &ro.ro_dst; - struct sockaddr_in *dst; + struct route_in6 ro; + union sockaddr_union rt_gw; + const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst; + union sockaddr_union *dst; struct ip *ip; struct ifnet *ifp = NULL; int error = 0; @@ -8917,6 +9063,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, uint16_t tmp; int r_dir; bool skip_test = false; + int action = PF_PASS; KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__)); @@ -8938,6 +9085,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, m0 = pd->m; pd->m = NULL; SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + action = PF_DROP; goto bad_locked; } @@ -8951,11 +9099,12 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, } if (ifp == oifp) { /* When the 2nd interface is not skipped */ - return; + return (action); } else { m0 = pd->m; pd->m = NULL; SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + action = PF_DROP; goto bad; } } else { @@ -8963,7 +9112,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) { if (s) PF_STATE_UNLOCK(s); - return; + return (action); } } } else { @@ -8972,7 +9121,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, pf_dummynet(pd, s, r, &pd->m); if (s) PF_STATE_UNLOCK(s); - return; + return (action); } else { if (r_dir == PF_IN) { skip_test = true; @@ -9001,10 +9150,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, ip = mtod(m0, struct ip *); bzero(&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(struct sockaddr_in); - dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + dst = (union sockaddr_union *)&ro.ro_dst; + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_addr = ip->ip_dst; + if (ifp) { /* Only needed in forward direction and route-to */ + bzero(&rt_gw, sizeof(rt_gw)); + ro.ro_flags |= RT_HAS_GW; + gw = &rt_gw; + switch (pd->act.rt_af) { +#ifdef INET + case AF_INET: + rt_gw.sin.sin_family = AF_INET; + rt_gw.sin.sin_len = sizeof(struct sockaddr_in); + rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + rt_gw.sin6.sin6_family = AF_INET6; + rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6); + pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr, + &pd->act.rt_addr, AF_INET6); + break; +#endif /* INET6 */ + default: + /* Normal af-to without route-to */ + break; + } + } if (pd->dir == PF_IN) { if (ip->ip_ttl <= IPTTLDEC) { @@ -9012,6 +9186,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, pf_send_icmp(m0, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, pd->af, r, pd->act.rtableid); + action = PF_DROP; goto bad_locked; } ip->ip_ttl -= IPTTLDEC; @@ -9027,57 +9202,76 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, /* Use the gateway if needed. */ if (nh->nh_flags & NHF_GATEWAY) { - gw = &nh->gw_sa; + gw = (const union sockaddr_union *)&nh->gw_sa; ro.ro_flags |= RT_HAS_GW; } else { - dst->sin_addr = ip->ip_dst; + dst->sin.sin_addr = ip->ip_dst; } - - /* - * Bind to the correct interface if we're - * if-bound. We don't know which interface - * that will be until here, so we've inserted - * the state on V_pf_all. Fix that now. - */ - if (s->kif == V_pfi_all && ifp != NULL && - r->rule_flag & PFRULE_IFBOUND) - s->kif = ifp->if_pf_kif; } } - - if (r->rule_flag & PFRULE_IFBOUND && - pd->act.rt == PF_REPLYTO && - s->kif == V_pfi_all) { - s->kif = pd->act.rt_kif; - s->orig_kif = oifp->if_pf_kif; - } - PF_STATE_UNLOCK(s); } + /* It must have been either set from rt_af or from fib4_lookup */ + KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__)); + if (ifp == NULL) { m0 = pd->m; pd->m = NULL; + action = PF_DROP; SDT_PROBE1(pf, ip, route_to, drop, __LINE__); goto bad; } - if (pd->dir == PF_IN && !skip_test) { - if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp, - &pd->act) != PF_PASS) { - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - goto bad; - } else if (m0 == NULL) { - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - goto done; + /* + * Bind to the correct interface if we're if-bound. We don't know which + * interface that will be until here, so we've inserted the state + * on V_pf_all. Fix that now. + */ + if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) { + /* Verify that we're here because of BOUND_IFACE */ + MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN)); + s->kif = ifp->if_pf_kif; + if (pd->act.rt == PF_REPLYTO) { + s->orig_kif = oifp->if_pf_kif; } - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - goto bad; + } + + if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN)) + skip_test = true; + + if (pd->dir == PF_IN) { + if (skip_test) { + struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif; + MPASS(s != NULL); + pf_counter_u64_critical_enter(); + pf_counter_u64_add_protected( + &out_kif->pfik_bytes[pd->naf == AF_INET6][1] + [action != PF_PASS && action != PF_AFRT], pd->tot_len); + pf_counter_u64_add_protected( + &out_kif->pfik_packets[pd->naf == AF_INET6][1] + [action != PF_PASS && action != PF_AFRT], 1); + pf_counter_u64_critical_exit(); + } else { + if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp, + &pd->act) != PF_PASS) { + action = PF_DROP; + SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + goto bad; + } else if (m0 == NULL) { + action = PF_DROP; + SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + goto done; + } + if (m0->m_len < sizeof(struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + "%s: m0->m_len < sizeof(struct ip)", __func__); + SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + action = PF_DROP; + goto bad; + } + ip = mtod(m0, struct ip *); } - ip = mtod(m0, struct ip *); } if (ifp->if_flags & IFF_LOOPBACK) @@ -9132,9 +9326,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, m_clrprotoflags(m0); /* Avoid confusing lower layers. */ md = m0; - error = pf_dummynet_route(pd, s, r, ifp, gw, &md); + error = pf_dummynet_route(pd, s, r, ifp, + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } goto done; @@ -9156,12 +9352,14 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, ifp->if_mtu, pd->af, r, pd->act.rtableid); } SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + action = PF_DROP; goto bad; } error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist); if (error) { SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + action = PF_DROP; goto bad; } @@ -9173,9 +9371,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, md = m0; pd->pf_mtag = pf_find_mtag(md); error = pf_dummynet_route(pd, s, r, ifp, - gw, &md); + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, + (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } } else @@ -9188,7 +9388,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, done: if (pd->act.rt != PF_DUPTO) pd->m = NULL; - return; + else + action = PF_PASS; + return (action); bad_locked: if (s) @@ -9200,7 +9402,7 @@ bad: #endif /* INET */ #ifdef INET6 -static void +static int pf_route6(struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { @@ -9211,6 +9413,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, struct ifnet *ifp = NULL; int r_dir; bool skip_test = false; + int action = PF_PASS; KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__)); @@ -9231,6 +9434,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, pd->pf_mtag->routed++ > 3) { m0 = pd->m; pd->m = NULL; + action = PF_DROP; SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); goto bad_locked; } @@ -9245,10 +9449,11 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, } if (ifp == oifp) { /* When the 2nd interface is not skipped */ - return; + return (action); } else { m0 = pd->m; pd->m = NULL; + action = PF_DROP; SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); goto bad; } @@ -9257,7 +9462,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) { if (s) PF_STATE_UNLOCK(s); - return; + return (action); } } } else { @@ -9266,7 +9471,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, pf_dummynet(pd, s, r, &pd->m); if (s) PF_STATE_UNLOCK(s); - return; + return (action); } else { if (r_dir == PF_IN) { skip_test = true; @@ -9306,6 +9511,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r, pd->act.rtableid); + action = PF_DROP; goto bad_locked; } ip6->ip6_hlim -= IPV6_HLIMDEC; @@ -9324,26 +9530,8 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, sizeof(dst.sin6_addr)); else dst.sin6_addr = ip6->ip6_dst; - - /* - * Bind to the correct interface if we're - * if-bound. We don't know which interface - * that will be until here, so we've inserted - * the state on V_pf_all. Fix that now. - */ - if (s->kif == V_pfi_all && ifp != NULL && - r->rule_flag & PFRULE_IFBOUND) - s->kif = ifp->if_pf_kif; } } - - if (r->rule_flag & PFRULE_IFBOUND && - pd->act.rt == PF_REPLYTO && - s->kif == V_pfi_all) { - s->kif = pd->act.rt_kif; - s->orig_kif = oifp->if_pf_kif; - } - PF_STATE_UNLOCK(s); } @@ -9360,27 +9548,61 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, if (ifp == NULL) { m0 = pd->m; pd->m = NULL; + action = PF_DROP; SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); goto bad; } - if (pd->dir == PF_IN && !skip_test) { - if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT, - ifp, &m0, inp, &pd->act) != PF_PASS) { - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto bad; - } else if (m0 == NULL) { - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto done; + /* + * Bind to the correct interface if we're if-bound. We don't know which + * interface that will be until here, so we've inserted the state + * on V_pf_all. Fix that now. + */ + if (s != NULL && s->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) { + /* Verify that we're here because of BOUND_IFACE */ + MPASS(r->rt == PF_REPLYTO || (pd->af != pd->naf && s->direction == PF_IN)); + s->kif = ifp->if_pf_kif; + if (pd->act.rt == PF_REPLYTO) { + s->orig_kif = oifp->if_pf_kif; } - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", - __func__)); - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto bad; + } + + if (r->rt == PF_DUPTO || (pd->af != pd->naf && s->direction == PF_IN)) + skip_test = true; + + if (pd->dir == PF_IN) { + if (skip_test) { + struct pfi_kkif *out_kif = (struct pfi_kkif *)ifp->if_pf_kif; + MPASS(s != NULL); + pf_counter_u64_critical_enter(); + pf_counter_u64_add_protected( + &out_kif->pfik_bytes[pd->naf == AF_INET6][1] + [action != PF_PASS && action != PF_AFRT], pd->tot_len); + pf_counter_u64_add_protected( + &out_kif->pfik_packets[pd->naf == AF_INET6][1] + [action != PF_PASS && action != PF_AFRT], 1); + pf_counter_u64_critical_exit(); + } else { + if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT, + ifp, &m0, inp, &pd->act) != PF_PASS) { + action = PF_DROP; + SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); + goto bad; + } else if (m0 == NULL) { + action = PF_DROP; + SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); + goto done; + } + if (m0->m_len < sizeof(struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, + "%s: m0->m_len < sizeof(struct ip6_hdr)", + __func__); + action = PF_DROP; + SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); + goto bad; + } + ip6 = mtod(m0, struct ip6_hdr *); } - ip6 = mtod(m0, struct ip6_hdr *); } if (ifp->if_flags & IFF_LOOPBACK) @@ -9452,6 +9674,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu, pd->af, r, pd->act.rtableid); } + action = PF_DROP; SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); goto bad; } @@ -9459,7 +9682,9 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, done: if (pd->act.rt != PF_DUPTO) pd->m = NULL; - return; + else + action = PF_PASS; + return (action); bad_locked: if (s) @@ -9596,6 +9821,7 @@ pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r, const struct pf_kstate *s, struct ip_fw_args *dnflow) { int dndir = r->direction; + sa_family_t af = pd->naf; if (s && dndir == PF_INOUT) { dndir = s->direction; @@ -9636,20 +9862,46 @@ pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r, dnflow->f_id.proto = pd->proto; dnflow->f_id.extra = dnflow->rule.info; - switch (pd->naf) { + if (s) + af = s->key[PF_SK_STACK]->af; + + switch (af) { case AF_INET: dnflow->f_id.addr_type = 4; - dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); - dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); + if (s) { + dnflow->f_id.src_ip = htonl( + s->key[PF_SK_STACK]->addr[pd->sidx].v4.s_addr); + dnflow->f_id.dst_ip = htonl( + s->key[PF_SK_STACK]->addr[pd->didx].v4.s_addr); + } else { + dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); + dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); + } break; case AF_INET6: - dnflow->flags |= IPFW_ARGS_IP6; dnflow->f_id.addr_type = 6; - dnflow->f_id.src_ip6 = pd->src->v6; - dnflow->f_id.dst_ip6 = pd->dst->v6; + + if (s) { + dnflow->f_id.src_ip6 = + s->key[PF_SK_STACK]->addr[pd->sidx].v6; + dnflow->f_id.dst_ip6 = + s->key[PF_SK_STACK]->addr[pd->didx].v6; + } else { + dnflow->f_id.src_ip6 = pd->src->v6; + dnflow->f_id.dst_ip6 = pd->dst->v6; + } break; } + /* + * Separate this out, because while we pass the pre-NAT addresses to + * dummynet we want the post-nat address family in case of nat64. + * Dummynet may call ip_output/ip6_output itself, and we need it to + * call the correct one. + */ + if (pd->naf == AF_INET6) + dnflow->flags |= IPFW_ARGS_IP6; + return (true); } @@ -9671,7 +9923,7 @@ pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, - ("%s: kif == NULL, if_xname %s\n", __func__, ifp->if_xname)); + "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) @@ -9786,6 +10038,62 @@ pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s, } static int +pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end, + u_short *reason) +{ + uint8_t type, length, opts[15 * 4 - sizeof(struct ip)]; + + /* IP header in payload of ICMP packet may be too short */ + if (pd->m->m_pkthdr.len < end) { + DPFPRINTF(PF_DEBUG_MISC, "IP option too short"); + REASON_SET(reason, PFRES_SHORT); + return (PF_DROP); + } + + MPASS(end - off <= sizeof(opts)); + m_copydata(pd->m, off, end - off, opts); + end -= off; + off = 0; + + while (off < end) { + type = opts[off]; + if (type == IPOPT_EOL) + break; + if (type == IPOPT_NOP) { + off++; + continue; + } + if (off + 2 > end) { + DPFPRINTF(PF_DEBUG_MISC, "IP length opt"); + REASON_SET(reason, PFRES_IPOPTIONS); + return (PF_DROP); + } + length = opts[off + 1]; + if (length < 2) { + DPFPRINTF(PF_DEBUG_MISC, "IP short opt"); + REASON_SET(reason, PFRES_IPOPTIONS); + return (PF_DROP); + } + if (off + length > end) { + DPFPRINTF(PF_DEBUG_MISC, "IP long opt"); + REASON_SET(reason, PFRES_IPOPTIONS); + return (PF_DROP); + } + switch (type) { + case IPOPT_RA: + pd->badopts |= PF_OPT_ROUTER_ALERT; + break; + default: + pd->badopts |= PF_OPT_OTHER; + break; + } + off += length; + } + + return (PF_PASS); +} + +static int pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) { struct ah ext; @@ -9797,11 +10105,31 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) REASON_SET(reason, PFRES_SHORT); return (PF_DROP); } - if (hlen != sizeof(struct ip)) - pd->badopts++; + if (hlen != sizeof(struct ip)) { + if (pf_walk_option(pd, h, pd->off + sizeof(struct ip), + pd->off + hlen, reason) != PF_PASS) + return (PF_DROP); + /* header options which contain only padding is fishy */ + if (pd->badopts == 0) + pd->badopts |= PF_OPT_OTHER; + } end = pd->off + ntohs(h->ip_len); pd->off += hlen; pd->proto = h->ip_p; + /* IGMP packets have router alert options, allow them */ + if (pd->proto == IPPROTO_IGMP) { + /* + * According to RFC 1112 ttl must be set to 1 in all IGMP + * packets sent to 224.0.0.1 + */ + if ((h->ip_ttl != 1) && + (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) { + DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP"); + REASON_SET(reason, PFRES_IPOPTIONS); + return (PF_DROP); + } + pd->badopts &= ~PF_OPT_ROUTER_ALERT; + } /* stop walking over non initial fragments */ if ((h->ip_off & htons(IP_OFFMASK)) != 0) return (PF_PASS); @@ -9813,8 +10141,8 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) end < pd->off + sizeof(ext)) return (PF_PASS); if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET)) { - DPFPRINTF(PF_DEBUG_MISC, ("IP short exthdr")); + reason, AF_INET)) { + DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr"); return (PF_DROP); } pd->off += (ext.ah_len + 2) * 4; @@ -9824,7 +10152,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) return (PF_PASS); } } - DPFPRINTF(PF_DEBUG_MISC, ("IPv4 nested authentication header limit")); + DPFPRINTF(PF_DEBUG_MISC, "IPv4 nested authentication header limit"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } @@ -9839,51 +10167,58 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, while (off < end) { if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, - sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short opt type")); + sizeof(opt.ip6o_type), reason, AF_INET6)) { + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type"); return (PF_DROP); } if (opt.ip6o_type == IP6OPT_PAD1) { off++; continue; } - if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL, + if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), reason, AF_INET6)) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short opt")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt"); return (PF_DROP); } if (off + sizeof(opt) + opt.ip6o_len > end) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 long opt")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 long opt"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } switch (opt.ip6o_type) { + case IP6OPT_PADN: + break; case IP6OPT_JUMBO: + pd->badopts |= PF_OPT_JUMBO; if (pd->jumbolen != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple jumbo")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple jumbo"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } if (ntohs(h->ip6_plen) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 bad jumbo plen")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 bad jumbo plen"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } - if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL, + if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), reason, AF_INET6)) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short jumbo")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo"); return (PF_DROP); } memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, sizeof(pd->jumbolen)); pd->jumbolen = ntohl(pd->jumbolen); if (pd->jumbolen < IPV6_MAXPACKET) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short jumbolen")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbolen"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } break; + case IP6OPT_ROUTER_ALERT: + pd->badopts |= PF_OPT_ROUTER_ALERT; + break; default: + pd->badopts |= PF_OPT_OTHER; break; } off += sizeof(opt) + opt.ip6o_len; @@ -9897,6 +10232,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) { struct ip6_frag frag; struct ip6_ext ext; + struct icmp6_hdr icmp6; struct ip6_rthdr rthdr; uint32_t end; int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; @@ -9908,27 +10244,40 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) { switch (pd->proto) { case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: - pd->badopts++; + pd->badopts |= PF_OPT_OTHER; + break; + case IPPROTO_HOPOPTS: + if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), + reason, AF_INET6)) { + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); + return (PF_DROP); + } + if (pf_walk_option6(pd, h, pd->off + sizeof(ext), + pd->off + (ext.ip6e_len + 1) * 8, + reason) != PF_PASS) + return (PF_DROP); + /* option header which contains only padding is fishy */ + if (pd->badopts == 0) + pd->badopts |= PF_OPT_OTHER; break; } switch (pd->proto) { case IPPROTO_FRAGMENT: if (fraghdr_cnt++) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple fragment")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple fragment"); REASON_SET(reason, PFRES_FRAG); return (PF_DROP); } /* jumbo payload packets cannot be fragmented */ if (pd->jumbolen != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 fragmented jumbo")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 fragmented jumbo"); REASON_SET(reason, PFRES_FRAG); return (PF_DROP); } if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), - NULL, reason, AF_INET6)) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short fragment")); + reason, AF_INET6)) { + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment"); return (PF_DROP); } /* stop walking over non initial fragments */ @@ -9944,7 +10293,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) break; case IPPROTO_ROUTING: if (rthdr_cnt++) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple rthdr")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple rthdr"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } @@ -9955,12 +10304,12 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_PASS); } if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), - NULL, reason, AF_INET6)) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short rthdr")); + reason, AF_INET6)) { + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr"); return (PF_DROP); } if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 rthdr0")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 rthdr0"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } @@ -9968,7 +10317,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) case IPPROTO_HOPOPTS: /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 hopopts not first")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 hopopts not first"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } @@ -9976,8 +10325,8 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) case IPPROTO_AH: case IPPROTO_DSTOPTS: if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), - NULL, reason, AF_INET6)) { - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short exthdr")); + reason, AF_INET6)) { + DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); return (PF_DROP); } /* fragments may be short */ @@ -9989,18 +10338,11 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) /* reassembly needs the ext header before the frag */ if (pd->fragoff == 0) pd->extoff = pd->off; - if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { - if (pf_walk_option6(pd, h, - pd->off + sizeof(ext), - pd->off + (ext.ip6e_len + 1) * 8, reason) - != PF_PASS) - return (PF_DROP); - if (ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { - DPFPRINTF(PF_DEBUG_MISC, - ("IPv6 missing jumbo")); - REASON_SET(reason, PFRES_IPOPTIONS); - return (PF_DROP); - } + if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 && + ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { + DPFPRINTF(PF_DEBUG_MISC, "IPv6 missing jumbo"); + REASON_SET(reason, PFRES_IPOPTIONS); + return (PF_DROP); } if (pd->proto == IPPROTO_AH) pd->off += (ext.ip6e_len + 2) * 4; @@ -10008,10 +10350,45 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) pd->off += (ext.ip6e_len + 1) * 8; pd->proto = ext.ip6e_nxt; break; + case IPPROTO_ICMPV6: + /* fragments may be short, ignore inner header then */ + if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) { + pd->off = pd->fragoff; + pd->proto = IPPROTO_FRAGMENT; + return (PF_PASS); + } + if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), + reason, AF_INET6)) { + DPFPRINTF(PF_DEBUG_MISC, + "IPv6 short icmp6hdr"); + return (PF_DROP); + } + /* ICMP multicast packets have router alert options */ + switch (icmp6.icmp6_type) { + case MLD_LISTENER_QUERY: + case MLD_LISTENER_REPORT: + case MLD_LISTENER_DONE: + case MLDV2_LISTENER_REPORT: + /* + * According to RFC 2710 all MLD messages are + * sent with hop-limit (ttl) set to 1, and link + * local source address. If either one is + * missing then MLD message is invalid and + * should be discarded. + */ + if ((h->ip6_hlim != 1) || + !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) { + DPFPRINTF(PF_DEBUG_MISC, "Invalid MLD"); + REASON_SET(reason, PFRES_IPOPTIONS); + return (PF_DROP); + } + pd->badopts &= ~PF_OPT_ROUTER_ALERT; + break; + } + return (PF_PASS); case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: - case IPPROTO_ICMPV6: /* fragments may be short, ignore inner header then */ if (pd->fragoff != 0 && end < pd->off + (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : @@ -10026,7 +10403,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) return (PF_PASS); } } - DPFPRINTF(PF_DEBUG_MISC, ("IPv6 nested extension header limit")); + DPFPRINTF(PF_DEBUG_MISC, "IPv6 nested extension header limit"); REASON_SET(reason, PFRES_IPOPTIONS); return (PF_DROP); } @@ -10052,6 +10429,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, pd->didx = (dir == PF_IN) ? 1 : 0; pd->af = pd->naf = af; + PF_RULES_ASSERT(); + TAILQ_INIT(&pd->sctp_multihome_jobs); if (default_actions != NULL) memcpy(&pd->act, default_actions, sizeof(pd->act)); @@ -10069,31 +10448,32 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (__predict_false((*m0)->m_len < sizeof(struct ip)) && (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) { DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m_len < sizeof(struct ip), pullup failed\n", - __func__)); + "%s: m_len < sizeof(struct ip), pullup failed", + __func__); *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); + } + + h = mtod(pd->m, struct ip *); + if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { + *action = PF_DROP; + REASON_SET(reason, PFRES_SHORT); + return (PF_DROP); } if (pf_normalize_ip(reason, pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ *m0 = pd->m; *action = PF_DROP; - return (-1); + return (PF_DROP); } *m0 = pd->m; - h = mtod(pd->m, struct ip *); - if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { - *action = PF_DROP; - REASON_SET(reason, PFRES_SHORT); - return (-1); - } if (pf_walk_header(pd, h, reason) != PF_PASS) { *action = PF_DROP; - return (-1); + return (PF_DROP); } pd->src = (struct pf_addr *)&h->ip_src; @@ -10119,18 +10499,33 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) && (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) { DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m_len < sizeof(struct ip6_hdr)" - ", pullup failed\n", __func__)); + "%s: m_len < sizeof(struct ip6_hdr)" + ", pullup failed", __func__); *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } h = mtod(pd->m, struct ip6_hdr *); + if (pd->m->m_pkthdr.len < + sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { + *action = PF_DROP; + REASON_SET(reason, PFRES_SHORT); + return (PF_DROP); + } + + /* + * we do not support jumbogram. if we keep going, zero ip6_plen + * will do something bad, so drop the packet for now. + */ + if (htons(h->ip6_plen) == 0) { + *action = PF_DROP; + return (PF_DROP); + } if (pf_walk_header6(pd, h, reason) != PF_PASS) { *action = PF_DROP; - return (-1); + return (PF_DROP); } h = mtod(pd->m, struct ip6_hdr *); @@ -10147,27 +10542,18 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, pd->virtual_proto = (pd->fragoff != 0) ? PF_VPROTO_FRAGMENT : pd->proto; - /* - * we do not support jumbogram. if we keep going, zero ip6_plen - * will do something bad, so drop the packet for now. - */ - if (htons(h->ip6_plen) == 0) { - *action = PF_DROP; - return (-1); - } - /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(pd->fragoff, reason, pd) != PF_PASS) { *m0 = pd->m; *action = PF_DROP; - return (-1); + return (PF_DROP); } *m0 = pd->m; if (pd->m == NULL) { /* packet sits in reassembly queue, no error */ *action = PF_PASS; - return (-1); + return (PF_DROP); } /* Update pointers into the packet. */ @@ -10179,7 +10565,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (pf_walk_header6(pd, h, reason) != PF_PASS) { *action = PF_DROP; - return (-1); + return (PF_DROP); } if (m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL) != NULL) { @@ -10205,11 +10591,11 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, case IPPROTO_TCP: { struct tcphdr *th = &pd->hdr.tcp; - if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action, + if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } pd->hdrlen = sizeof(*th); pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); @@ -10221,11 +10607,11 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, case IPPROTO_UDP: { struct udphdr *uh = &pd->hdr.udp; - if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action, + if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } pd->hdrlen = sizeof(*uh); if (uh->uh_dport == 0 || @@ -10233,7 +10619,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } pd->sport = &uh->uh_sport; pd->dport = &uh->uh_dport; @@ -10242,10 +10628,10 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } case IPPROTO_SCTP: { if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp), - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } pd->hdrlen = sizeof(pd->hdr.sctp); pd->p_len = pd->tot_len - pd->off; @@ -10255,27 +10641,31 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (pd->hdr.sctp.src_port == 0 || pd->hdr.sctp.dest_port == 0) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); - } - if (pf_scan_sctp(pd) != PF_PASS) { - *action = PF_DROP; - REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } + /* * Placeholder. The SCTP checksum is 32-bits, but * pf_test_state() expects to update a 16-bit checksum. * Provide a dummy value which we'll subsequently ignore. + * Do this before pf_scan_sctp() so any jobs we enqueue + * have a pcksum set. */ pd->pcksum = &pd->sctp_dummy_sum; + + if (pf_scan_sctp(pd) != PF_PASS) { + *action = PF_DROP; + REASON_SET(reason, PFRES_SHORT); + return (PF_DROP); + } break; } case IPPROTO_ICMP: { if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } pd->pcksum = &pd->hdr.icmp.icmp_cksum; pd->hdrlen = ICMP_MINLEN; @@ -10286,10 +10676,10 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, size_t icmp_hlen = sizeof(struct icmp6_hdr); if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } /* ICMP headers we look further into to match state */ switch (pd->hdr.icmp6.icmp6_type) { @@ -10312,16 +10702,23 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, } if (icmp_hlen > sizeof(struct icmp6_hdr) && !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, - action, reason, af)) { + reason, af)) { *action = PF_DROP; REASON_SET(reason, PFRES_SHORT); - return (-1); + return (PF_DROP); } pd->hdrlen = icmp_hlen; pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; break; } #endif /* INET6 */ + default: + /* + * Placeholder value, so future calls to pf_change_ap() don't + * try to update a NULL checksum pointer. + */ + pd->pcksum = &pd->sctp_dummy_sum; + break; } if (pd->sport) @@ -10329,111 +10726,175 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, if (pd->dport) pd->odport = pd->ndport = *pd->dport; - return (0); + MPASS(pd->pcksum != NULL); + + return (PF_PASS); +} + +static __inline void +pf_rule_counters_inc(struct pf_pdesc *pd, struct pf_krule *r, int dir_out, + int op_pass, sa_family_t af, struct pf_addr *src_host, + struct pf_addr *dst_host) +{ + pf_counter_u64_add_protected(&(r->packets[dir_out]), 1); + pf_counter_u64_add_protected(&(r->bytes[dir_out]), pd->tot_len); + pf_update_timestamp(r); + + if (r->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(r->src.addr.p.tbl, src_host, af, + pd->tot_len, dir_out, op_pass, r->src.neg); + if (r->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(r->dst.addr.p.tbl, dst_host, af, + pd->tot_len, dir_out, op_pass, r->dst.neg); } static void -pf_counters_inc(int action, struct pf_pdesc *pd, - struct pf_kstate *s, struct pf_krule *r, struct pf_krule *a) +pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_kstate *s, + struct pf_krule *r, struct pf_krule *a, struct pf_krule_slist *match_rules) { - struct pf_krule *tr; - int dir = pd->dir; - int dirndx; + struct pf_krule_slist *mr = match_rules; + struct pf_krule_item *ri; + struct pf_krule *nr = NULL; + struct pf_addr *src_host = pd->src; + struct pf_addr *dst_host = pd->dst; + struct pf_state_key *key; + int dir_out = (pd->dir == PF_OUT); + int op_r_pass = (r->action == PF_PASS); + int op_pass = (action == PF_PASS || action == PF_AFRT); + int s_dir_in, s_dir_out, s_dir_rev; + sa_family_t af = pd->af; pf_counter_u64_critical_enter(); + + /* + * Set AF for interface counters, it will be later overwritten for + * rule and state counters with value from proper state key. + */ + if (action == PF_AFRT) { + MPASS(s != NULL); + if (s->direction == PF_OUT && dir_out) + af = pd->naf; + } + pf_counter_u64_add_protected( - &pd->kif->pfik_bytes[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS], + &pd->kif->pfik_bytes[af == AF_INET6][dir_out][!op_pass], pd->tot_len); pf_counter_u64_add_protected( - &pd->kif->pfik_packets[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS], + &pd->kif->pfik_packets[af == AF_INET6][dir_out][!op_pass], 1); - if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { - dirndx = (dir == PF_OUT); - pf_counter_u64_add_protected(&r->packets[dirndx], 1); - pf_counter_u64_add_protected(&r->bytes[dirndx], pd->tot_len); - pf_update_timestamp(r); + /* If the rule has failed to apply, don't increase its counters */ + if (!(op_pass || r->action == PF_DROP)) { + pf_counter_u64_critical_exit(); + return; + } + + if (s != NULL) { + PF_STATE_LOCK_ASSERT(s); + mr = &(s->match_rules); - if (a != NULL) { - pf_counter_u64_add_protected(&a->packets[dirndx], 1); - pf_counter_u64_add_protected(&a->bytes[dirndx], pd->tot_len); + /* + * For af-to on the inbound direction we can determine + * the direction of passing packet only by checking direction + * of AF translation. The af-to in "in" direction covers both + * the inbound and the outbound side of state tracking, + * so pd->dir is always PF_IN. We set dir_out and s_dir_rev + * in a way to count packets as if the state was outbound, + * because pfctl -ss shows the state with "->", as if it was + * oubound. + */ + if (action == PF_AFRT && s->direction == PF_IN) { + dir_out = (pd->naf == s->rule->naf); + s_dir_in = 1; + s_dir_out = 0; + s_dir_rev = (pd->naf == s->rule->af); + } else { + dir_out = (pd->dir == PF_OUT); + s_dir_in = (s->direction == PF_IN); + s_dir_out = (s->direction == PF_OUT); + s_dir_rev = (pd->dir != s->direction); } - if (s != NULL) { - struct pf_krule_item *ri; - if (s->nat_rule != NULL) { - pf_counter_u64_add_protected(&s->nat_rule->packets[dirndx], + /* pd->tot_len is a problematic with af-to rules. Sure, we can + * agree that it's the post-af-to packet length that was + * forwarded through a state, but what about tables which match + * on pre-af-to addresses? We don't have access the the original + * packet length anymore. + */ + s->packets[s_dir_rev]++; + s->bytes[s_dir_rev] += pd->tot_len; + + /* + * Source nodes are accessed unlocked here. But since we are + * operating with stateful tracking and the state is locked, + * those SNs could not have been freed. + */ + for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) { + if (s->sns[sn_type] != NULL) { + counter_u64_add( + s->sns[sn_type]->packets[dir_out], 1); - pf_counter_u64_add_protected(&s->nat_rule->bytes[dirndx], + counter_u64_add( + s->sns[sn_type]->bytes[dir_out], pd->tot_len); } - /* - * Source nodes are accessed unlocked here. - * But since we are operating with stateful tracking - * and the state is locked, those SNs could not have - * been freed. - */ - for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) { - if (s->sns[sn_type] != NULL) { - counter_u64_add( - s->sns[sn_type]->packets[dirndx], - 1); - counter_u64_add( - s->sns[sn_type]->bytes[dirndx], - pd->tot_len); - } - } - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd->tot_len; - - SLIST_FOREACH(ri, &s->match_rules, entry) { - pf_counter_u64_add_protected(&ri->r->packets[dirndx], 1); - pf_counter_u64_add_protected(&ri->r->bytes[dirndx], pd->tot_len); + } - if (ri->r->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(ri->r->src.addr.p.tbl, - (s == NULL) ? pd->src : - &s->key[(s->direction == PF_IN)]-> - addr[(s->direction == PF_OUT)], - pd->af, pd->tot_len, dir == PF_OUT, - r->action == PF_PASS, ri->r->src.neg); - if (ri->r->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(ri->r->dst.addr.p.tbl, - (s == NULL) ? pd->dst : - &s->key[(s->direction == PF_IN)]-> - addr[(s->direction == PF_IN)], - pd->af, pd->tot_len, dir == PF_OUT, - r->action == PF_PASS, ri->r->dst.neg); + /* Start with pre-NAT addresses */ + key = s->key[(s->direction == PF_OUT)]; + src_host = &(key->addr[s_dir_out]); + dst_host = &(key->addr[s_dir_in]); + af = key->af; + if (s->nat_rule) { + /* Old-style NAT rules */ + if (s->nat_rule->action == PF_NAT || + s->nat_rule->action == PF_RDR || + s->nat_rule->action == PF_BINAT) { + nr = s->nat_rule; + pf_rule_counters_inc(pd, s->nat_rule, dir_out, + op_r_pass, af, src_host, dst_host); + /* Use post-NAT addresses from now on */ + key = s->key[s_dir_in]; + src_host = &(key->addr[s_dir_out]); + dst_host = &(key->addr[s_dir_in]); + af = key->af; } } + } - tr = r; - if (s != NULL && s->nat_rule != NULL && - r == &V_pf_default_rule) - tr = s->nat_rule; - - if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, - (s == NULL) ? pd->src : - &s->key[(s->direction == PF_IN)]-> - addr[(s->direction == PF_OUT)], - pd->af, pd->tot_len, dir == PF_OUT, - r->action == PF_PASS, tr->src.neg); - if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, - (s == NULL) ? pd->dst : - &s->key[(s->direction == PF_IN)]-> - addr[(s->direction == PF_IN)], - pd->af, pd->tot_len, dir == PF_OUT, - r->action == PF_PASS, tr->dst.neg); + SLIST_FOREACH(ri, mr, entry) { + pf_rule_counters_inc(pd, ri->r, dir_out, op_r_pass, af, + src_host, dst_host); + if (s && s->nat_rule == ri->r) { + /* Use post-NAT addresses after a match NAT rule */ + key = s->key[s_dir_in]; + src_host = &(key->addr[s_dir_out]); + dst_host = &(key->addr[s_dir_in]); + af = key->af; + } + } + + if (s == NULL) { + pf_free_match_rules(mr); + } + + if (a != NULL) { + pf_rule_counters_inc(pd, a, dir_out, op_r_pass, af, + src_host, dst_host); + } + + if (r != nr) { + pf_rule_counters_inc(pd, r, dir_out, op_r_pass, af, + src_host, dst_host); } + pf_counter_u64_critical_exit(); } + static void pf_log_matches(struct pf_pdesc *pd, struct pf_krule *rm, struct pf_krule *am, struct pf_kruleset *ruleset, - struct pf_krule_slist *matchrules) + struct pf_krule_slist *match_rules) { struct pf_krule_item *ri; @@ -10441,7 +10902,7 @@ pf_log_matches(struct pf_pdesc *pd, struct pf_krule *rm, if (rm->log & PF_LOG_MATCHES) return; - SLIST_FOREACH(ri, matchrules, entry) + SLIST_FOREACH(ri, match_rules, entry) if (ri->r->log & PF_LOG_MATCHES) PFLOG_PACKET(rm->action, PFRES_MATCH, rm, am, ruleset, pd, 1, ri->r); @@ -10458,6 +10919,8 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 struct pf_krule *a = NULL, *r = &V_pf_default_rule; struct pf_kstate *s = NULL; struct pf_kruleset *ruleset = NULL; + struct pf_krule_item *ri; + struct pf_krule_slist match_rules; struct pf_pdesc pd; int use_2nd_queue = 0; uint16_t tag; @@ -10465,40 +10928,36 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 PF_RULES_RLOCK_TRACKER; KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir)); M_ASSERTPKTHDR(*m0); + NET_EPOCH_ASSERT(); if (!V_pf_status.running) return (PF_PASS); - PF_RULES_RLOCK(); - kif = (struct pfi_kkif *)ifp->if_pf_kif; if (__predict_false(kif == NULL)) { DPFPRINTF(PF_DEBUG_URGENT, - ("%s: kif == NULL, if_xname %s\n", - __func__, ifp->if_xname)); - PF_RULES_RUNLOCK(); + "%s: kif == NULL, if_xname %s", + __func__, ifp->if_xname); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) { - PF_RULES_RUNLOCK(); return (PF_PASS); } if ((*m0)->m_flags & M_SKIP_FIREWALL) { - PF_RULES_RUNLOCK(); return (PF_PASS); } if (__predict_false(! M_WRITABLE(*m0))) { *m0 = m_unshare(*m0, M_NOWAIT); if (*m0 == NULL) { - PF_RULES_RUNLOCK(); return (PF_DROP); } } pf_init_pdesc(&pd, *m0); + SLIST_INIT(&match_rules); if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; @@ -10506,12 +10965,10 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 ifp = ifnet_byindexgen(pd.pf_mtag->if_index, pd.pf_mtag->if_idxgen); if (ifp == NULL || ifp->if_flags & IFF_DYING) { - PF_RULES_RUNLOCK(); m_freem(*m0); *m0 = NULL; return (PF_PASS); } - PF_RULES_RUNLOCK(); (ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL); *m0 = NULL; return (PF_PASS); @@ -10526,13 +10983,14 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 /* But only once. We may see the packet multiple times (e.g. * PFIL_IN/PFIL_OUT). */ pf_dummynet_flag_remove(pd.m, pd.pf_mtag); - PF_RULES_RUNLOCK(); return (PF_PASS); } + PF_RULES_RLOCK(); + if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason, - kif, default_actions) == -1) { + kif, default_actions) != PF_PASS) { if (action != PF_PASS) pd.act.log |= PF_LOG_FORCE; goto done; @@ -10596,7 +11054,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 action = PF_DROP; else action = pf_test_rule(&r, &s, &pd, &a, - &ruleset, &reason, inp); + &ruleset, &reason, inp, &match_rules); if (action != PF_PASS) REASON_SET(&reason, PFRES_FRAG); break; @@ -10605,7 +11063,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 /* Respond to SYN with a syncookie. */ if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN && pd.dir == PF_IN && pf_synflood_check(&pd)) { - pf_syncookie_send(&pd); + pf_syncookie_send(&pd, &reason); action = PF_DROP; break; } @@ -10629,7 +11087,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 pd.dir == PF_IN) { struct mbuf *msyn; - msyn = pf_syncookie_recreate_syn(&pd); + msyn = pf_syncookie_recreate_syn(&pd, &reason); if (msyn == NULL) { action = PF_DROP; break; @@ -10654,7 +11112,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 break; } else { action = pf_test_rule(&r, &s, &pd, - &a, &ruleset, &reason, inp); + &a, &ruleset, &reason, inp, &match_rules); } } break; @@ -10675,7 +11133,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 a = s->anchor; } else if (s == NULL) { action = pf_test_rule(&r, &s, - &pd, &a, &ruleset, &reason, inp); + &pd, &a, &ruleset, &reason, inp, &match_rules); } break; @@ -10685,14 +11143,14 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 action = PF_DROP; REASON_SET(&reason, PFRES_NORM); DPFPRINTF(PF_DEBUG_MISC, - ("dropping IPv6 packet with ICMPv4 payload")); + "dropping IPv6 packet with ICMPv4 payload"); break; } if (pd.virtual_proto == IPPROTO_ICMPV6 && af != AF_INET6) { action = PF_DROP; REASON_SET(&reason, PFRES_NORM); DPFPRINTF(PF_DEBUG_MISC, - ("pf: dropping IPv4 packet with ICMPv6 payload\n")); + "pf: dropping IPv4 packet with ICMPv6 payload"); break; } action = pf_test_state_icmp(&s, &pd, &reason); @@ -10703,7 +11161,7 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 a = s->anchor; } else if (s == NULL) action = pf_test_rule(&r, &s, &pd, - &a, &ruleset, &reason, inp); + &a, &ruleset, &reason, inp, &match_rules); break; } @@ -10712,18 +11170,21 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0 done: PF_RULES_RUNLOCK(); - if (pd.m == NULL) + /* if packet sits in reassembly queue, return without error */ + if (pd.m == NULL) { + pf_free_match_rules(&match_rules); goto eat_pkt; + } if (s) memcpy(&pd.act, &s->act, sizeof(s->act)); - if (action == PF_PASS && pd.badopts && !pd.act.allow_opts) { + if (action == PF_PASS && pd.badopts != 0 && !pd.act.allow_opts) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, - ("pf: dropping packet with dangerous headers\n")); + "pf: dropping packet with dangerous headers"); } if (pd.act.max_pkt_size && pd.act.max_pkt_size && @@ -10732,7 +11193,7 @@ done: REASON_SET(&reason, PFRES_NORM); pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, - ("pf: dropping overly long packet\n")); + "pf: dropping overly long packet"); } if (s) { @@ -10764,7 +11225,7 @@ done: REASON_SET(&reason, PFRES_MEMORY); pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate 802.1q mtag\n")); + "pf: failed to allocate 802.1q mtag"); } } @@ -10810,6 +11271,8 @@ done: (dir == PF_IN) ? PF_DIVERT_MTAG_DIR_IN : PF_DIVERT_MTAG_DIR_OUT; + pf_counters_inc(action, &pd, s, r, a, &match_rules); + if (s) PF_STATE_UNLOCK(s); @@ -10821,7 +11284,7 @@ done: REASON_SET(&reason, PFRES_MEMORY); pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate tag\n")); + "pf: failed to allocate tag"); } else { pd.pf_mtag->flags |= PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; @@ -10838,7 +11301,7 @@ done: REASON_SET(&reason, PFRES_MEMORY); pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate divert tag\n")); + "pf: failed to allocate divert tag"); } } /* XXX: Anybody working on it?! */ @@ -10851,7 +11314,6 @@ done: if (pd.act.log) { struct pf_krule *lr; - struct pf_krule_item *ri; if (s != NULL && s->nat_rule != NULL && s->nat_rule->log & PF_LOG_ALL) @@ -10870,7 +11332,7 @@ done: } } - pf_counters_inc(action, &pd, s, r, a); + pf_counters_inc(action, &pd, s, r, a, &match_rules); switch (action) { case PF_SYNPROXY_DROP: @@ -10890,15 +11352,18 @@ done: break; } #ifdef INET - if (pd.naf == AF_INET) - pf_route(r, kif->pfik_ifp, s, &pd, inp); + if (pd.naf == AF_INET) { + action = pf_route(r, kif->pfik_ifp, s, &pd, + inp); + } #endif /* INET */ #ifdef INET6 - if (pd.naf == AF_INET6) - pf_route6(r, kif->pfik_ifp, s, &pd, inp); + if (pd.naf == AF_INET6) { + action = pf_route6(r, kif->pfik_ifp, s, &pd, + inp); +} #endif /* INET6 */ *m0 = pd.m; - action = PF_PASS; goto out; break; default: @@ -10907,13 +11372,15 @@ done: #ifdef INET case AF_INET: /* pf_route() returns unlocked. */ - pf_route(r, kif->pfik_ifp, s, &pd, inp); + action = pf_route(r, kif->pfik_ifp, s, &pd, + inp); break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* pf_route6() returns unlocked. */ - pf_route6(r, kif->pfik_ifp, s, &pd, inp); + action = pf_route6(r, kif->pfik_ifp, s, &pd, + inp); break; #endif /* INET6 */ } diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 2009d2907985..bcd66fd17d5d 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -120,7 +120,8 @@ enum { enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, - PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; + PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_ANCHORS, PF_LIMIT_ETH_ANCHORS, + PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; @@ -130,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_POOL_ENDPI 0x40 +#define PF_POOL_IPV6NH 0x80 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f @@ -140,7 +142,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_LOG 0x01 #define PF_LOG_ALL 0x02 -#define PF_LOG_SOCKET_LOOKUP 0x04 +#define PF_LOG_USER 0x04 #define PF_LOG_FORCE 0x08 #define PF_LOG_MATCHES 0x10 @@ -245,6 +247,12 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define SCNT_SRC_NODE_REMOVALS 2 #define SCNT_MAX 3 +/* fragment counters */ +#define NCNT_FRAG_SEARCH 0 +#define NCNT_FRAG_INSERT 1 +#define NCNT_FRAG_REMOVALS 2 +#define NCNT_MAX 3 + #define PF_TABLE_NAME_SIZE 32 #define PF_QNAME_SIZE 64 @@ -490,6 +498,8 @@ struct pf_osfp_ioctl { #define PF_ANCHOR_NAME_SIZE 64 #define PF_ANCHOR_MAXPATH (MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1) +#define PF_ANCHOR_HIWAT 512 +#define PF_OPTIMIZER_TABLE_PFX "__automatic_" struct pf_rule { struct pf_rule_addr src; @@ -627,6 +637,8 @@ struct pf_rule { #define PFRULE_PFLOW 0x00040000 #define PFRULE_ALLOW_RELATED 0x00080000 #define PFRULE_AFTO 0x00200000 /* af-to rule */ +#define PFRULE_ONCE 0x00400000 /* one shot rule */ +#define PFRULE_EXPIRED 0x00800000 /* one shot rule hit by pkt */ #ifdef _KERNEL #define PFRULE_REFS 0x0080 /* rule has references */ diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c index e2200c15c704..6f41d453a7d1 100644 --- a/sys/netpfil/pf/pf_if.c +++ b/sys/netpfil/pf/pf_if.c @@ -655,8 +655,10 @@ pfi_kkif_update(struct pfi_kkif *kif) /* again for all groups kif is member of */ if (kif->pfik_ifp != NULL) { CK_STAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) - pfi_kkif_update((struct pfi_kkif *) - ifgl->ifgl_group->ifg_pf_kif); + if (ifgl->ifgl_group->ifg_pf_kif) { + pfi_kkif_update((struct pfi_kkif *) + ifgl->ifgl_group->ifg_pf_kif); + } } } @@ -700,7 +702,7 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kkif *kif, uint8_t net, } if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) + NULL, NULL, NULL, PFR_FLAG_START | PFR_FLAG_DONE, PFR_TFLAG_ALLMASK))) printf("%s: cannot set %d new addresses into table %s: %d\n", __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e); } diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index c96741023db9..5ec67021068b 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -116,7 +116,6 @@ static int pf_rollback_altq(u_int32_t); static int pf_commit_altq(u_int32_t); static int pf_enable_altq(struct pf_altq *); static int pf_disable_altq(struct pf_altq *); -static uint16_t pf_qname2qid(const char *); static void pf_qid_unref(uint16_t); #endif /* ALTQ */ static int pf_begin_rules(u_int32_t *, int, const char *); @@ -187,6 +186,7 @@ VNET_DEFINE(uma_zone_t, pf_tag_z); #define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); +MALLOC_DEFINE(M_PF, "pf", "pf(4)"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE @@ -213,12 +213,9 @@ static void pf_init_tagset(struct pf_tagset *, unsigned int *, static void pf_cleanup_tagset(struct pf_tagset *); static uint16_t tagname2hashindex(const struct pf_tagset *, const char *); static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t); -static u_int16_t tagname2tag(struct pf_tagset *, const char *); -static u_int16_t pf_tagname2tag(const char *); +static u_int16_t tagname2tag(struct pf_tagset *, const char *, bool); static void tag_unref(struct pf_tagset *, u_int16_t); -#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x - struct cdev *pf_dev; /* @@ -262,7 +259,7 @@ static void dehook_pf_eth(void); static void dehook_pf(void); static int shutdown_pf(void); static int pf_load(void); -static void pf_unload(void); +static void pf_unload(void *); static struct cdevsw pf_cdevsw = { .d_ioctl = pfioctl, @@ -287,6 +284,7 @@ int pf_end_threads; struct proc *pf_purge_proc; VNET_DEFINE(struct rmlock, pf_rules_lock); +VNET_DEFINE(struct rmlock, pf_tags_lock); VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock); #define V_pf_ioctl_lock VNET(pf_ioctl_lock) struct sx pf_end_lock; @@ -333,6 +331,8 @@ pfattach_vnet(void) V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; + V_pf_limits[PF_LIMIT_ANCHORS].limit = PF_ANCHOR_HIWAT; + V_pf_limits[PF_LIMIT_ETH_ANCHORS].limit = PF_ANCHOR_HIWAT; RB_INIT(&V_pf_anchors); pf_init_kruleset(&pf_main_ruleset); @@ -420,6 +420,8 @@ pfattach_vnet(void) pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK); for (int i = 0; i < SCNT_MAX; i++) V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK); + for (int i = 0; i < NCNT_MAX; i++) + V_pf_status.ncounters[i] = counter_u64_alloc(M_WAITOK); if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET, INTR_MPSAFE, &V_pf_swi_cookie) != 0) @@ -684,19 +686,50 @@ tag2hashindex(const struct pf_tagset *ts, uint16_t tag) } static u_int16_t -tagname2tag(struct pf_tagset *ts, const char *tagname) +tagname2tag(struct pf_tagset *ts, const char *tagname, bool add_new) { struct pf_tagname *tag; u_int32_t index; u_int16_t new_tagid; - PF_RULES_WASSERT(); + PF_TAGS_RLOCK_TRACKER; + + PF_TAGS_RLOCK(); index = tagname2hashindex(ts, tagname); TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; - return (tag->tag); + new_tagid = tag->tag; + PF_TAGS_RUNLOCK(); + return (new_tagid); + } + + /* + * When used for pfsync with queues we must not create new entries. + * Pf tags can be created just fine by this function, but queues + * require additional configuration. If they are missing on the target + * system we just ignore them + */ + if (add_new == false) { + printf("%s: Not creating a new tag\n", __func__); + PF_TAGS_RUNLOCK(); + return (0); + } + + /* + * If a new entry must be created do it under a write lock. + * But first search again, somebody could have created the tag + * between unlocking the read lock and locking the write lock. + */ + PF_TAGS_RUNLOCK(); + PF_TAGS_WLOCK(); + TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) + if (strcmp(tagname, tag->name) == 0) { + tag->ref++; + new_tagid = tag->tag; + PF_TAGS_WUNLOCK(); + return (new_tagid); } /* @@ -713,16 +746,20 @@ tagname2tag(struct pf_tagset *ts, const char *tagname) * to rounding of the number of bits in the vector up to a multiple * of the vector word size at declaration/allocation time. */ - if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) + if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) { + PF_TAGS_WUNLOCK(); return (0); + } /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */ BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail); /* allocate and fill new struct pf_tagname */ tag = uma_zalloc(V_pf_tag_z, M_NOWAIT); - if (tag == NULL) + if (tag == NULL) { + PF_TAGS_WUNLOCK(); return (0); + } strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref = 1; @@ -734,7 +771,29 @@ tagname2tag(struct pf_tagset *ts, const char *tagname) index = tag2hashindex(ts, new_tagid); TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries); - return (tag->tag); + PF_TAGS_WUNLOCK(); + return (new_tagid); +} + +static char * +tag2tagname(struct pf_tagset *ts, u_int16_t tag) +{ + struct pf_tagname *t; + uint16_t index; + + PF_TAGS_RLOCK_TRACKER; + + PF_TAGS_RLOCK(); + + index = tag2hashindex(ts, tag); + TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) + if (tag == t->tag) { + PF_TAGS_RUNLOCK(); + return (t->name); + } + + PF_TAGS_RUNLOCK(); + return (NULL); } static void @@ -743,7 +802,7 @@ tag_unref(struct pf_tagset *ts, u_int16_t tag) struct pf_tagname *t; uint16_t index; - PF_RULES_WASSERT(); + PF_TAGS_WLOCK(); index = tag2hashindex(ts, tag); TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) @@ -760,12 +819,20 @@ tag_unref(struct pf_tagset *ts, u_int16_t tag) } break; } + + PF_TAGS_WUNLOCK(); } -static uint16_t +uint16_t pf_tagname2tag(const char *tagname) { - return (tagname2tag(&V_pf_tags, tagname)); + return (tagname2tag(&V_pf_tags, tagname, true)); +} + +static const char * +pf_tag2tagname(uint16_t tag) +{ + return (tag2tagname(&V_pf_tags, tag)); } static int @@ -896,10 +963,16 @@ pf_commit_eth(uint32_t ticket, const char *anchor) } #ifdef ALTQ -static uint16_t -pf_qname2qid(const char *qname) +uint16_t +pf_qname2qid(const char *qname, bool add_new) +{ + return (tagname2tag(&V_pf_qids, qname, add_new)); +} + +static const char * +pf_qid2qname(uint16_t qid) { - return (tagname2tag(&V_pf_qids, qname)); + return (tag2tagname(&V_pf_qids, qid)); } static void @@ -1148,7 +1221,7 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) } bcopy(a1, a2, sizeof(struct pf_altq)); - if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { + if ((a2->qid = pf_qname2qid(a2->qname, true)) == 0) { error = EBUSY; free(a2, M_PFALTQ); break; @@ -1181,18 +1254,18 @@ pf_rule_tree_alloc(int flags) { struct pf_krule_global *tree; - tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags); + tree = malloc(sizeof(struct pf_krule_global), M_PF, flags); if (tree == NULL) return (NULL); RB_INIT(tree); return (tree); } -static void +void pf_rule_tree_free(struct pf_krule_global *tree) { - free(tree, M_TEMP); + free(tree, M_PF); } static int @@ -1211,7 +1284,7 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) return (ENOMEM); rs = pf_find_or_create_kruleset(anchor); if (rs == NULL) { - free(tree, M_TEMP); + pf_rule_tree_free(tree); return (EINVAL); } pf_rule_tree_free(rs->rules[rs_num].inactive.tree); @@ -1274,7 +1347,9 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) PF_MD5_UPD(pfr, addr.iflags); break; case PF_ADDR_TABLE: - PF_MD5_UPD(pfr, addr.v.tblname); + if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX, + strlen(PF_OPTIMIZER_TABLE_PFX))) + PF_MD5_UPD(pfr, addr.v.tblname); break; case PF_ADDR_ADDRMASK: /* XXX ignore af? */ @@ -1357,7 +1432,7 @@ static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_kruleset *rs; - struct pf_krule *rule, **old_array, *old_rule; + struct pf_krule *rule, *old_rule; struct pf_krulequeue *old_rules; struct pf_krule_global *old_tree; int error; @@ -1382,13 +1457,10 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) /* Swap rules, keep the old. */ old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; - old_array = rs->rules[rs_num].active.ptr_array; old_tree = rs->rules[rs_num].active.tree; rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; - rs->rules[rs_num].active.ptr_array = - rs->rules[rs_num].inactive.ptr_array; rs->rules[rs_num].active.tree = rs->rules[rs_num].inactive.tree; rs->rules[rs_num].active.rcount = @@ -1418,7 +1490,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) } rs->rules[rs_num].inactive.ptr = old_rules; - rs->rules[rs_num].inactive.ptr_array = old_array; rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */ rs->rules[rs_num].inactive.rcount = old_rcount; @@ -1431,13 +1502,10 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_unlink_rule_locked(old_rules, rule); PF_UNLNKDRULES_UNLOCK(); - if (rs->rules[rs_num].inactive.ptr_array) - free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); - rs->rules[rs_num].inactive.ptr_array = NULL; rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_kruleset(rs); - free(old_tree, M_TEMP); + pf_rule_tree_free(old_tree); return (0); } @@ -1456,24 +1524,11 @@ pf_setup_pfsync_matching(struct pf_kruleset *rs) if (rs_cnt == PF_RULESET_SCRUB) continue; - if (rs->rules[rs_cnt].inactive.ptr_array) - free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); - rs->rules[rs_cnt].inactive.ptr_array = NULL; - if (rs->rules[rs_cnt].inactive.rcount) { - rs->rules[rs_cnt].inactive.ptr_array = - mallocarray(rs->rules[rs_cnt].inactive.rcount, - sizeof(struct pf_rule **), - M_TEMP, M_NOWAIT); - - if (!rs->rules[rs_cnt].inactive.ptr_array) - return (ENOMEM); - } - - TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, - entries) { - pf_hash_rule_rolling(&ctx, rule); - (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule_rolling(&ctx, rule); + } } } @@ -1621,7 +1676,7 @@ pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) #define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x #define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \ SATU32(q->pq_u.hfsc_opts.x) - + ASSIGN_OPT_SATU32(rtsc_m1); ASSIGN_OPT(rtsc_d); ASSIGN_OPT_SATU32(rtsc_m2); @@ -1635,7 +1690,7 @@ pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) ASSIGN_OPT_SATU32(ulsc_m2); ASSIGN_OPT(flags); - + #undef ASSIGN_OPT #undef ASSIGN_OPT_SATU32 } else @@ -1743,7 +1798,7 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) ASSIGN_OPT(ulsc_m2); ASSIGN_OPT(flags); - + #undef ASSIGN_OPT } else COPY(pq_u); @@ -1775,7 +1830,7 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) ASSIGN(qid); break; } - default: + default: panic("%s: unhandled struct pfioc_altq version", __func__); break; } @@ -2059,6 +2114,47 @@ pf_ioctl_getrules(struct pfioc_rule *pr) return (0); } +static int +pf_rule_checkaf(struct pf_krule *r) +{ + switch (r->af) { + case 0: + if (r->rule_flag & PFRULE_AFTO) + return (EPFNOSUPPORT); + break; + case AF_INET: + if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET6) + return (EPFNOSUPPORT); + break; +#ifdef INET6 + case AF_INET6: + if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET) + return (EPFNOSUPPORT); + break; +#endif /* INET6 */ + default: + return (EPFNOSUPPORT); + } + + if ((r->rule_flag & PFRULE_AFTO) == 0 && r->naf != 0) + return (EPFNOSUPPORT); + + return (0); +} + +static int +pf_validate_range(uint8_t op, uint16_t port[2]) +{ + uint16_t a = ntohs(port[0]); + uint16_t b = ntohs(port[1]); + + if ((op == PF_OP_RRG && a > b) || /* 34:12, i.e. none */ + (op == PF_OP_IRG && a >= b) || /* 34><12, i.e. none */ + (op == PF_OP_XRG && a > b)) /* 34<>22, i.e. all */ + return 1; + return 0; +} + int pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, uint32_t pool_ticket, const char *anchor, const char *anchor_call, @@ -2071,12 +2167,18 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, int rs_num; int error = 0; - if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) { - error = EINVAL; - goto errout_unlocked; - } +#define ERROUT(x) ERROUT_FUNCTION(errout, x) +#define ERROUT_UNLOCKED(x) ERROUT_FUNCTION(errout_unlocked, x) + + if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) + ERROUT_UNLOCKED(EINVAL); -#define ERROUT(x) ERROUT_FUNCTION(errout, x) + if ((error = pf_rule_checkaf(rule))) + ERROUT_UNLOCKED(error); + if (pf_validate_range(rule->src.port_op, rule->src.port)) + ERROUT_UNLOCKED(EINVAL); + if (pf_validate_range(rule->dst.port_op, rule->dst.port)) + ERROUT_UNLOCKED(EINVAL); if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); @@ -2113,14 +2215,14 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, ERROUT(EINVAL); if (ticket != ruleset->rules[rs_num].inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, - ("ticket: %d != [%d]%d\n", ticket, rs_num, - ruleset->rules[rs_num].inactive.ticket)); + "ticket: %d != [%d]%d", ticket, rs_num, + ruleset->rules[rs_num].inactive.ticket); ERROUT(EBUSY); } if (pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, - ("pool_ticket: %d != %d\n", pool_ticket, - V_ticket_pabuf)); + "pool_ticket: %d != %d", pool_ticket, + V_ticket_pabuf); ERROUT(EBUSY); } /* @@ -2159,11 +2261,11 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { - if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0) ERROUT(EBUSY); else if (rule->pqname[0] != 0) { if ((rule->pqid = - pf_qname2qid(rule->pqname)) == 0) + pf_qname2qid(rule->pqname, true)) == 0) ERROUT(EBUSY); } else rule->pqid = rule->qid; @@ -2247,6 +2349,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, rule->nat.cur = TAILQ_FIRST(&rule->nat.list); rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list); rule->route.cur = TAILQ_FIRST(&rule->route.list); + rule->route.ipv6_nexthop_af = AF_INET6; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; @@ -2266,6 +2369,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, return (0); #undef ERROUT +#undef ERROUT_UNLOCKED errout: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); @@ -2439,7 +2543,7 @@ pf_start(void) V_pf_status.since = time_uptime; new_unrhdr64(&V_pf_stateid, time_second); - DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); + DPFPRINTF(PF_DEBUG_MISC, "pf: started"); } sx_xunlock(&V_pf_ioctl_lock); @@ -2459,7 +2563,7 @@ pf_stop(void) dehook_pf(); dehook_pf_eth(); V_pf_status.since = time_uptime; - DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); + DPFPRINTF(PF_DEBUG_MISC, "pf: stopped"); } sx_xunlock(&V_pf_ioctl_lock); @@ -2476,6 +2580,8 @@ pf_ioctl_clear_status(void) pf_counter_u64_zero(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_zero(V_pf_status.scounters[i]); + for (int i = 0; i < NCNT_MAX; i++) + counter_u64_zero(V_pf_status.ncounters[i]); for (int i = 0; i < KLCNT_MAX; i++) counter_u64_zero(V_pf_status.lcounters[i]); V_pf_status.since = time_uptime; @@ -2633,6 +2739,7 @@ pf_ioctl_add_addr(struct pf_nl_pooladdr *pp) PF_RULES_WUNLOCK(); goto out; } + pa->af = pp->af; switch (pp->which) { case PF_NAT: TAILQ_INSERT_TAIL(&V_pf_pabuf[0], pa, entries); @@ -2714,6 +2821,7 @@ pf_ioctl_get_addr(struct pf_nl_pooladdr *pp) return (EBUSY); } pf_kpooladdr_to_pooladdr(pa, &pp->addr); + pp->af = pa->af; pf_addr_copyout(&pp->addr.addr); PF_RULES_RUNLOCK(); @@ -3234,9 +3342,9 @@ DIOCGETETHRULE_error: if (nvlist_get_number(nvl, "ticket") != ruleset->inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, - ("ticket: %d != %d\n", + "ticket: %d != %d", (u_int32_t)nvlist_get_number(nvl, "ticket"), - ruleset->inactive.ticket)); + ruleset->inactive.ticket); ERROUT(EBUSY); } @@ -3276,7 +3384,7 @@ DIOCGETETHRULE_error: #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { - if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0) error = EBUSY; else rule->qid = rule->qid; @@ -3567,7 +3675,7 @@ DIOCADDRULENV_error: error = pf_rule_to_krule(&pr->rule, rule); if (error != 0) { pf_krule_free(rule); - break; + goto fail; } pr->anchor[sizeof(pr->anchor) - 1] = '\0'; @@ -3726,11 +3834,11 @@ DIOCGETRULENV_error: if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; - break; + goto fail; } if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; - break; + goto fail; } if (pcr->action != PF_CHANGE_REMOVE) { @@ -3738,9 +3846,13 @@ DIOCGETRULENV_error: error = pf_rule_to_krule(&pcr->rule, newrule); if (error != 0) { pf_krule_free(newrule); - break; + goto fail; } + if ((error = pf_rule_checkaf(newrule))) { + pf_krule_free(newrule); + goto fail; + } if (newrule->ifname[0]) kif = pf_kkif_create(M_WAITOK); pf_counter_u64_init(&newrule->evaluations, M_WAITOK); @@ -3823,11 +3935,11 @@ DIOCGETRULENV_error: /* set queue IDs */ if (newrule->qname[0] != 0) { if ((newrule->qid = - pf_qname2qid(newrule->qname)) == 0) + pf_qname2qid(newrule->qname, true)) == 0) error = EBUSY; else if (newrule->pqname[0] != 0) { if ((newrule->pqid = - pf_qname2qid(newrule->pqname)) == 0) + pf_qname2qid(newrule->pqname, true)) == 0) error = EBUSY; } else newrule->pqid = newrule->qid; @@ -3888,7 +4000,7 @@ DIOCGETRULENV_error: pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); - break; + goto fail; } newrule->nat.cur = TAILQ_FIRST(&newrule->nat.list); @@ -3915,7 +4027,7 @@ DIOCGETRULENV_error: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); error = EINVAL; - break; + goto fail; } } @@ -3933,7 +4045,7 @@ DIOCGETRULENV_error: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); error = EEXIST; - break; + goto fail; } if (oldrule == NULL) @@ -3989,7 +4101,7 @@ DIOCCHANGERULE_error: if (sp->timeout >= PFTM_MAX) { error = EINVAL; - break; + goto fail; } if (V_pfsync_state_import_ptr != NULL) { PF_RULES_RLOCK(); @@ -4009,7 +4121,7 @@ DIOCCHANGERULE_error: s = pf_find_state_byid(ps->state.id, ps->state.creatorid); if (s == NULL) { error = ENOENT; - break; + goto fail; } pfsync_state_export((union pfsync_state_union*)&ps->state, @@ -4040,7 +4152,7 @@ DIOCCHANGERULE_error: out = ps->ps_states; pstore = mallocarray(slice_count, - sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { @@ -4062,10 +4174,10 @@ DIOCGETSTATES_retry: if (count > slice_count) { PF_HASHROW_UNLOCK(ih); - free(pstore, M_TEMP); + free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, - sizeof(struct pfsync_state_1301), M_TEMP, + sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATES_retry; } @@ -4087,13 +4199,15 @@ DIOCGETSTATES_retry: PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pfsync_state_1301) * count); - if (error) - break; + if (error) { + free(pstore, M_PF); + goto fail; + } out = ps->ps_states + nr; } DIOCGETSTATES_full: ps->ps_len = sizeof(struct pfsync_state_1301) * nr; - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -4108,7 +4222,7 @@ DIOCGETSTATES_full: if (ps->ps_req_version > PF_STATE_VERSION) { error = ENOTSUP; - break; + goto fail; } if (ps->ps_len <= 0) { @@ -4119,7 +4233,7 @@ DIOCGETSTATES_full: out = ps->ps_states; pstore = mallocarray(slice_count, - sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { @@ -4141,10 +4255,10 @@ DIOCGETSTATESV2_retry: if (count > slice_count) { PF_HASHROW_UNLOCK(ih); - free(pstore, M_TEMP); + free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, - sizeof(struct pf_state_export), M_TEMP, + sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATESV2_retry; } @@ -4165,13 +4279,15 @@ DIOCGETSTATESV2_retry: PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pf_state_export) * count); - if (error) - break; + if (error) { + free(pstore, M_PF); + goto fail; + } out = ps->ps_states + nr; } DIOCGETSTATESV2_full: ps->ps_len = nr * sizeof(struct pf_state_export); - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -4272,12 +4388,12 @@ DIOCGETSTATESV2_full: if (psp->ifname[0] == '\0') { error = EINVAL; - break; + goto fail; } error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ); if (error != 0) - break; + goto fail; ifp = ifunit(ps.ifname); if (ifp != NULL) { psp->baudrate32 = @@ -4306,7 +4422,7 @@ DIOCGETSTATESV2_full: if (error == 0) V_pf_altq_running = 1; PF_RULES_WUNLOCK(); - DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); + DPFPRINTF(PF_DEBUG_MISC, "altq: started"); break; } @@ -4325,7 +4441,7 @@ DIOCGETSTATESV2_full: if (error == 0) V_pf_altq_running = 0; PF_RULES_WUNLOCK(); - DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); + DPFPRINTF(PF_DEBUG_MISC, "altq: stopped"); break; } @@ -4338,7 +4454,7 @@ DIOCGETSTATESV2_full: altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO); error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd)); if (error) - break; + goto fail; altq->local_flags = 0; PF_RULES_WLOCK(); @@ -4346,7 +4462,7 @@ DIOCGETSTATESV2_full: PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); error = EBUSY; - break; + goto fail; } /* @@ -4354,11 +4470,11 @@ DIOCGETSTATESV2_full: * copy the necessary fields */ if (altq->qname[0] != 0) { - if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + if ((altq->qid = pf_qname2qid(altq->qname, true)) == 0) { PF_RULES_WUNLOCK(); error = EBUSY; free(altq, M_PFALTQ); - break; + goto fail; } altq->altq_disc = NULL; TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) { @@ -4378,7 +4494,7 @@ DIOCGETSTATESV2_full: if (error) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); - break; + goto fail; } if (altq->qname[0] != 0) @@ -4416,13 +4532,13 @@ DIOCGETSTATESV2_full: if (pa->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; - break; + goto fail; } altq = pf_altq_get_nth_active(pa->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; - break; + goto fail; } pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_RUNLOCK(); @@ -4446,20 +4562,20 @@ DIOCGETSTATESV2_full: if (pq->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; - break; + goto fail; } nbytes = pq->nbytes; altq = pf_altq_get_nth_active(pq->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; - break; + goto fail; } if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { PF_RULES_RUNLOCK(); error = ENXIO; - break; + goto fail; } PF_RULES_RUNLOCK(); if (cmd == DIOCGETQSTATSV0) @@ -4528,30 +4644,30 @@ DIOCGETSTATESV2_full: if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { error = EINVAL; - break; + goto fail; } if (pca->addr.addr.type != PF_ADDR_ADDRMASK && pca->addr.addr.type != PF_ADDR_DYNIFTL && pca->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; - break; + goto fail; } if (pca->addr.addr.p.dyn != NULL) { error = EINVAL; - break; + goto fail; } if (pca->action != PF_CHANGE_REMOVE) { #ifndef INET if (pca->af == AF_INET) { error = EAFNOSUPPORT; - break; + goto fail; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { error = EAFNOSUPPORT; - break; + goto fail; } #endif /* INET6 */ newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK); @@ -4674,8 +4790,19 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != 0) { error = ENODEV; - break; + goto fail; + } + if (strnlen(io->pfrio_table.pfrt_anchor, MAXPATHLEN) + == MAXPATHLEN) { + error = EINVAL; + goto fail; } + if (strnlen(io->pfrio_table.pfrt_name, PF_TABLE_NAME_SIZE) + == PF_TABLE_NAME_SIZE) { + error = EINVAL; + goto fail; + } + PF_RULES_WLOCK(); error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); @@ -4690,28 +4817,28 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); - break; + free(pfrts, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4722,28 +4849,28 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); - break; + free(pfrts, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4755,32 +4882,32 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; - break; + goto fail; } PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); error = EINVAL; - break; + goto fail; } io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_NOWAIT | M_ZERO); + M_PF, M_NOWAIT | M_ZERO); if (pfrts == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); - break; + goto fail; } error = pfr_get_tables(&io->pfrio_table, pfrts, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrts, io->pfrio_buffer, totlen); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4792,7 +4919,7 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; - break; + goto fail; } PF_TABLE_STATS_LOCK(); PF_RULES_RLOCK(); @@ -4801,18 +4928,18 @@ DIOCCHANGEADDR_error: PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); error = EINVAL; - break; + goto fail; } io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_tstats); pfrtstats = mallocarray(io->pfrio_size, - sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); + sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); if (pfrtstats == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); - break; + goto fail; } error = pfr_get_tstats(&io->pfrio_table, pfrtstats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); @@ -4820,7 +4947,7 @@ DIOCCHANGEADDR_error: PF_TABLE_STATS_UNLOCK(); if (error == 0) error = copyout(pfrtstats, io->pfrio_buffer, totlen); - free(pfrtstats, M_TEMP); + free(pfrtstats, M_PF); break; } @@ -4831,7 +4958,7 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || @@ -4840,16 +4967,16 @@ DIOCCHANGEADDR_error: * size, so we didn't fail on overly large requests. * Keep doing so. */ io->pfrio_size = pf_ioctl_maxcount; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); - break; + free(pfrts, M_PF); + goto fail; } PF_TABLE_STATS_LOCK(); @@ -4858,7 +4985,7 @@ DIOCCHANGEADDR_error: &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4870,7 +4997,7 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; - break; + goto fail; } PF_RULES_RLOCK(); @@ -4878,7 +5005,7 @@ DIOCCHANGEADDR_error: if (n < 0) { PF_RULES_RUNLOCK(); error = EINVAL; - break; + goto fail; } io->pfrio_size = min(io->pfrio_size, n); @@ -4886,18 +5013,18 @@ DIOCCHANGEADDR_error: totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { - free(pfrts, M_TEMP); - break; + free(pfrts, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfrts, M_TEMP); + free(pfrts, M_PF); break; } @@ -4906,7 +5033,7 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != 0) { error = ENODEV; - break; + goto fail; } PF_RULES_WLOCK(); error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, @@ -4922,30 +5049,31 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); - break; + free(pfras, M_PF); + goto fail; } PF_RULES_WLOCK(); + io->pfrio_nadd = 0; error = pfr_add_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -4956,21 +5084,21 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); - break; + free(pfras, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_del_addrs(&io->pfrio_table, pfras, @@ -4979,7 +5107,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -4990,35 +5118,35 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size2 < 0) { error = EINVAL; - break; + goto fail; } count = max(io->pfrio_size, io->pfrio_size2); if (count > pf_ioctl_maxcount || WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = count * sizeof(struct pfr_addr); - pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP, + pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); - break; + free(pfras, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_set_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL, 0); + PFR_FLAG_START | PFR_FLAG_DONE | PFR_FLAG_USERIOCTL, 0); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5029,24 +5157,24 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK | M_ZERO); + M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5057,24 +5185,24 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_astats); pfrastats = mallocarray(io->pfrio_size, - sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO); + sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrastats, io->pfrio_buffer, totlen); - free(pfrastats, M_TEMP); + free(pfrastats, M_PF); break; } @@ -5085,21 +5213,21 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); - break; + free(pfras, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_clr_astats(&io->pfrio_table, pfras, @@ -5108,7 +5236,7 @@ DIOCCHANGEADDR_error: PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5119,21 +5247,21 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); - break; + free(pfras, M_PF); + goto fail; } PF_RULES_RLOCK(); error = pfr_tst_addrs(&io->pfrio_table, pfras, @@ -5142,7 +5270,7 @@ DIOCCHANGEADDR_error: PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5153,28 +5281,28 @@ DIOCCHANGEADDR_error: if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; - break; + goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; - break; + goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { - free(pfras, M_TEMP); - break; + free(pfras, M_PF); + goto fail; } PF_RULES_WLOCK(); error = pfr_ina_define(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); - free(pfras, M_TEMP); + free(pfras, M_PF); break; } @@ -5202,21 +5330,21 @@ DIOCCHANGEADDR_error: if (io->esize != sizeof(*ioe)) { error = ENODEV; - break; + goto fail; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; - break; + goto fail; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); - break; + free(ioes, M_PF); + goto fail; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { @@ -5225,7 +5353,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ETH: if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5233,13 +5361,13 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5254,7 +5382,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5263,7 +5391,7 @@ DIOCCHANGEADDR_error: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; } break; @@ -5271,7 +5399,7 @@ DIOCCHANGEADDR_error: } PF_RULES_WUNLOCK(); error = copyout(ioes, io->array, totlen); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5283,21 +5411,21 @@ DIOCCHANGEADDR_error: if (io->esize != sizeof(*ioe)) { error = ENODEV; - break; + goto fail; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; - break; + goto fail; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); - break; + free(ioes, M_PF); + goto fail; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { @@ -5307,7 +5435,7 @@ DIOCCHANGEADDR_error: if ((error = pf_rollback_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5315,13 +5443,13 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5336,7 +5464,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5345,14 +5473,14 @@ DIOCCHANGEADDR_error: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5366,23 +5494,23 @@ DIOCCHANGEADDR_error: if (io->esize != sizeof(*ioe)) { error = ENODEV; - break; + goto fail; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; - break; + goto fail; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), - M_TEMP, M_WAITOK); + M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { - free(ioes, M_TEMP); - break; + free(ioes, M_PF); + goto fail; } PF_RULES_WLOCK(); /* First makes sure everything will succeed. */ @@ -5394,7 +5522,7 @@ DIOCCHANGEADDR_error: if (ers == NULL || ioe->ticket == 0 || ioe->ticket != ers->inactive.ticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } @@ -5403,14 +5531,14 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5421,7 +5549,7 @@ DIOCCHANGEADDR_error: if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5430,7 +5558,7 @@ DIOCCHANGEADDR_error: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EINVAL; goto fail; } @@ -5440,7 +5568,7 @@ DIOCCHANGEADDR_error: rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); error = EBUSY; goto fail; } @@ -5453,7 +5581,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ETH: if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5461,7 +5589,7 @@ DIOCCHANGEADDR_error: case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5476,7 +5604,7 @@ DIOCCHANGEADDR_error: if ((error = pfr_ina_commit(&table, ioe->ticket, NULL, NULL, 0))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5485,7 +5613,7 @@ DIOCCHANGEADDR_error: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); - free(ioes, M_TEMP); + free(ioes, M_PF); goto fail; /* really bad */ } break; @@ -5499,7 +5627,7 @@ DIOCCHANGEADDR_error: else dehook_pf_eth(); - free(ioes, M_TEMP); + free(ioes, M_PF); break; } @@ -5523,12 +5651,12 @@ DIOCCHANGEADDR_error: if (psn->psn_len == 0) { psn->psn_len = sizeof(struct pf_src_node) * nr; - break; + goto fail; } nr = 0; - p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO); + p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO); for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); @@ -5547,11 +5675,11 @@ DIOCCHANGEADDR_error: error = copyout(pstore, psn->psn_src_nodes, sizeof(struct pf_src_node) * nr); if (error) { - free(pstore, M_TEMP); - break; + free(pstore, M_PF); + goto fail; } psn->psn_len = sizeof(struct pf_src_node) * nr; - free(pstore, M_TEMP); + free(pstore, M_PF); break; } @@ -5604,27 +5732,27 @@ DIOCCHANGEADDR_error: if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; - break; + goto fail; } if (io->pfiio_size < 0 || io->pfiio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) { error = EINVAL; - break; + goto fail; } io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; bufsiz = io->pfiio_size * sizeof(struct pfi_kif); ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif), - M_TEMP, M_WAITOK | M_ZERO); + M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); PF_RULES_RUNLOCK(); error = copyout(ifstore, io->pfiio_buffer, bufsiz); - free(ifstore, M_TEMP); + free(ifstore, M_PF); break; } @@ -5676,6 +5804,7 @@ fail: void pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version) { + const char *tagname; bzero(sp, sizeof(union pfsync_state_union)); /* copy from state key */ @@ -5687,8 +5816,6 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_ sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; - sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname)); @@ -5700,16 +5827,31 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_ else sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime); - sp->pfs_1301.direction = st->direction; - sp->pfs_1301.log = st->act.log; - sp->pfs_1301.timeout = st->timeout; - switch (msg_version) { case PFSYNC_MSG_VERSION_1301: sp->pfs_1301.state_flags = st->state_flags; + sp->pfs_1301.direction = st->direction; + sp->pfs_1301.log = st->act.log; + sp->pfs_1301.timeout = st->timeout; + sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; + /* + * XXX Why do we bother pfsyncing source node information if source + * nodes are not synced? Showing users that there is source tracking + * when there is none seems useless. + */ + if (st->sns[PF_SN_LIMIT] != NULL) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; break; case PFSYNC_MSG_VERSION_1400: sp->pfs_1400.state_flags = htons(st->state_flags); + sp->pfs_1400.direction = st->direction; + sp->pfs_1400.log = st->act.log; + sp->pfs_1400.timeout = st->timeout; + sp->pfs_1400.proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1400.af = st->key[PF_SK_WIRE]->af; sp->pfs_1400.qid = htons(st->act.qid); sp->pfs_1400.pqid = htons(st->act.pqid); sp->pfs_1400.dnpipe = htons(st->act.dnpipe); @@ -5725,22 +5867,53 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_ strlcpy(sp->pfs_1400.rt_ifname, st->act.rt_kif->pfik_name, sizeof(sp->pfs_1400.rt_ifname)); + /* + * XXX Why do we bother pfsyncing source node information if source + * nodes are not synced? Showing users that there is source tracking + * when there is none seems useless. + */ + if (st->sns[PF_SN_LIMIT] != NULL) + sp->pfs_1400.sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) + sp->pfs_1400.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + break; + case PFSYNC_MSG_VERSION_1500: + sp->pfs_1500.state_flags = htons(st->state_flags); + sp->pfs_1500.direction = st->direction; + sp->pfs_1500.log = st->act.log; + sp->pfs_1500.timeout = st->timeout; + sp->pfs_1500.wire_proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1500.wire_af = st->key[PF_SK_WIRE]->af; + sp->pfs_1500.stack_proto = st->key[PF_SK_STACK]->proto; + sp->pfs_1500.stack_af = st->key[PF_SK_STACK]->af; + sp->pfs_1500.qid = htons(st->act.qid); + sp->pfs_1500.pqid = htons(st->act.pqid); + sp->pfs_1500.dnpipe = htons(st->act.dnpipe); + sp->pfs_1500.dnrpipe = htons(st->act.dnrpipe); + sp->pfs_1500.rtableid = htonl(st->act.rtableid); + sp->pfs_1500.min_ttl = st->act.min_ttl; + sp->pfs_1500.set_tos = st->act.set_tos; + sp->pfs_1500.max_mss = htons(st->act.max_mss); + sp->pfs_1500.set_prio[0] = st->act.set_prio[0]; + sp->pfs_1500.set_prio[1] = st->act.set_prio[1]; + sp->pfs_1500.rt = st->act.rt; + sp->pfs_1500.rt_af = st->act.rt_af; + if (st->act.rt_kif) + strlcpy(sp->pfs_1500.rt_ifname, + st->act.rt_kif->pfik_name, + sizeof(sp->pfs_1500.rt_ifname)); + strlcpy(sp->pfs_1500.orig_ifname, + st->orig_kif->pfik_name, + sizeof(sp->pfs_1500.orig_ifname)); + if ((tagname = pf_tag2tagname(st->tag)) != NULL) + strlcpy(sp->pfs_1500.tagname, tagname, + sizeof(sp->pfs_1500.tagname)); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } - /* - * XXX Why do we bother pfsyncing source node information if source - * nodes are not synced? Showing users that there is source tracking - * when there is none seems useless. - */ - if (st->sns[PF_SN_LIMIT] != NULL) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; - sp->pfs_1301.id = st->id; sp->pfs_1301.creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->pfs_1301.src); @@ -6407,25 +6580,20 @@ shutdown_pf(void) int error = 0; u_int32_t t[5]; char nn = '\0'; - struct pf_kanchor *anchor; - struct pf_keth_anchor *eth_anchor; + struct pf_kanchor *anchor, *tmp_anchor; + struct pf_keth_anchor *eth_anchor, *tmp_eth_anchor; int rs_num; do { /* Unlink rules of all user defined anchors */ - RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) { - /* Wildcard based anchors may not have a respective - * explicit anchor rule or they may be left empty - * without rules. It leads to anchor.refcnt=0, and the - * rest of the logic does not expect it. */ - if (anchor->refcnt == 0) - anchor->refcnt = 1; + RB_FOREACH_SAFE(anchor, pf_kanchor_global, &V_pf_anchors, + tmp_anchor) { for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) { if ((error = pf_begin_rules(&t[rs_num], rs_num, anchor->path)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: " - "anchor.path=%s rs_num=%d\n", - __func__, anchor->path, rs_num)); + DPFPRINTF(PF_DEBUG_MISC, "%s: " + "anchor.path=%s rs_num=%d", + __func__, anchor->path, rs_num); goto error; /* XXX: rollback? */ } } @@ -6437,19 +6605,13 @@ shutdown_pf(void) } /* Unlink rules of all user defined ether anchors */ - RB_FOREACH(eth_anchor, pf_keth_anchor_global, - &V_pf_keth_anchors) { - /* Wildcard based anchors may not have a respective - * explicit anchor rule or they may be left empty - * without rules. It leads to anchor.refcnt=0, and the - * rest of the logic does not expect it. */ - if (eth_anchor->refcnt == 0) - eth_anchor->refcnt = 1; + RB_FOREACH_SAFE(eth_anchor, pf_keth_anchor_global, + &V_pf_keth_anchors, tmp_eth_anchor) { if ((error = pf_begin_eth(&t[0], eth_anchor->path)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: eth " - "anchor.path=%s\n", __func__, - eth_anchor->path)); + DPFPRINTF(PF_DEBUG_MISC, "%s: eth " + "anchor.path=%s", __func__, + eth_anchor->path); goto error; } error = pf_commit_eth(t[0], eth_anchor->path); @@ -6458,27 +6620,27 @@ shutdown_pf(void) if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: SCRUB\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: SCRUB", __func__); break; } if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: FILTER\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: FILTER", __func__); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: NAT\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: NAT", __func__); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: BINAT\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: BINAT", __func__); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: RDR\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: RDR", __func__); break; /* XXX: rollback? */ } @@ -6497,7 +6659,7 @@ shutdown_pf(void) break; if ((error = pf_begin_eth(&t[0], &nn)) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: eth\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: eth", __func__); break; } error = pf_commit_eth(t[0], &nn); @@ -6505,7 +6667,7 @@ shutdown_pf(void) #ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { - DPFPRINTF(PF_DEBUG_MISC, ("%s: ALTQ\n", __func__)); + DPFPRINTF(PF_DEBUG_MISC, "%s: ALTQ", __func__); break; } pf_commit_altq(t[0]); @@ -6515,6 +6677,11 @@ shutdown_pf(void) pf_kill_srcnodes(NULL); + for (int i = 0; i < PF_RULESET_MAX; i++) { + pf_rule_tree_free(pf_main_ruleset.rules[i].active.tree); + pf_rule_tree_free(pf_main_ruleset.rules[i].inactive.tree); + } + /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ } while(0); @@ -6801,6 +6968,7 @@ pf_load_vnet(void) NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE); + rm_init_flags(&V_pf_tags_lock, "pf tags and queues", RM_RECURSE); sx_init(&V_pf_ioctl_lock, "pf ioctl"); pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize, @@ -6917,13 +7085,15 @@ pf_unload_vnet(void) pf_counter_u64_deinit(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_free(V_pf_status.scounters[i]); + for (int i = 0; i < NCNT_MAX; i++) + counter_u64_free(V_pf_status.ncounters[i]); rm_destroy(&V_pf_rules_lock); sx_destroy(&V_pf_ioctl_lock); } static void -pf_unload(void) +pf_unload(void *dummy __unused) { sx_xlock(&pf_end_lock); @@ -6950,7 +7120,7 @@ vnet_pf_init(void *unused __unused) pf_load_vnet(); } -VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, +VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_init, NULL); static void @@ -6958,7 +7128,7 @@ vnet_pf_uninit(const void *unused __unused) { pf_unload_vnet(); -} +} SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL); VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_uninit, NULL); diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 308d76c46e5b..7aeb8266ca8c 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -71,16 +71,13 @@ #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; -#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x - static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); -struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); +static struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *, struct pf_krule *); static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, struct pf_addr *, uint16_t *, uint16_t, uint16_t, - struct pf_ksrc_node **, struct pf_srchash **, struct pf_kpool *, struct pf_udp_mapping **, pf_sn_types_t); static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); @@ -219,6 +216,7 @@ pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_krulese */ ctx->arsm = ctx->aruleset; } + break; } else { ctx->a = r; /* remember anchor */ ctx->aruleset = ruleset; /* and its ruleset */ @@ -276,7 +274,7 @@ pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_k return (rv); } -struct pf_krule * +static struct pf_krule * pf_match_translation(int rs_num, struct pf_test_ctx *ctx) { enum pf_test_status rv; @@ -291,10 +289,8 @@ pf_match_translation(int rs_num, struct pf_test_ctx *ctx) } static int -pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, - struct pf_addr *naddr, uint16_t *nport, uint16_t low, - uint16_t high, struct pf_ksrc_node **sn, - struct pf_srchash **sh, struct pf_kpool *rpool, +pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, + uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool, struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) { struct pf_state_key_cmp key; @@ -322,19 +318,24 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); udp_source.port = pd->nsport; if (udp_mapping) { + struct pf_ksrc_node *sn = NULL; + struct pf_srchash *sh = NULL; *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { pf_addrcpy(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); *nport = (*udp_mapping)->endpoints[1].port; - /* Try to find a src_node as per pf_map_addr(). */ - if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && + /* + * Try to find a src_node as per pf_map_addr(). + * XXX: Why? This code seems to do nothing. + */ + if (rpool->opts & PF_POOL_STICKYADDR && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(&pd->nsaddr, r, - pd->af, sh, sn_type, false); - if (*sn != NULL) - PF_SRC_NODE_UNLOCK(*sn); + sn = pf_find_src_node(&pd->nsaddr, r, + pd->af, &sh, sn_type, false); + if (sn != NULL) + PF_SRC_NODE_UNLOCK(sn); return (0); } else { *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, @@ -345,8 +346,8 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, } } - if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr, - sn, sh, rpool, sn_type)) + if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL, + &init_addr, rpool, sn_type)) goto failed; if (pd->proto == IPPROTO_ICMP) { @@ -470,9 +471,8 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, * pick a different source address since we're out * of free port choices for the current one. */ - (*sn) = NULL; - if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, - &init_addr, sn, sh, rpool, sn_type)) + if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, + &(pd->naf), NULL, &init_addr, rpool, sn_type)) return (1); break; case PF_POOL_NONE: @@ -502,9 +502,8 @@ pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) static int pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, - struct pf_addr *naddr, uint16_t *nport, - struct pf_ksrc_node **sn, struct pf_srchash **sh, - struct pf_udp_mapping **udp_mapping, struct pf_kpool *rpool) + struct pf_addr *naddr, uint16_t *nport, struct pf_udp_mapping **udp_mapping, + struct pf_kpool *rpool) { uint16_t psmask, low, highmask; uint16_t i, ahigh, cut; @@ -523,30 +522,100 @@ pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, for (i = cut; i <= ahigh; i++) { low = (i << ashift) | psmask; - if (!pf_get_sport(pd, r, - naddr, nport, low, low | highmask, sn, sh, rpool, - udp_mapping, PF_SN_NAT)) + if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, + rpool, udp_mapping, PF_SN_NAT)) return (0); } for (i = cut - 1; i > 0; i--) { low = (i << ashift) | psmask; - if (!pf_get_sport(pd, r, - naddr, nport, low, low | highmask, sn, sh, rpool, - udp_mapping, PF_SN_NAT)) + if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, + rpool, udp_mapping, PF_SN_NAT)) return (0); } return (1); } +static __inline u_short +pf_check_src_node_valid(struct pf_ksrc_node *sn, struct pf_kpool *rpool) +{ + struct pf_addr *raddr, *rmask; + struct pf_addr *caddr; /* cached redirection address */ + struct pf_kpooladdr *pa; + sa_family_t raf; + sa_family_t caf; /* cached redirection AF */ + u_short valid = 0; + + KASSERT(sn != NULL, ("sn is NULL")); + KASSERT(rpool != NULL, ("rpool is NULL")); + + /* check if the cached entry is still valid */ + + if (sn->type == PF_SN_LIMIT) { + /* Always valid as it does not store redirection address */ + return (1); + } + + mtx_lock(&rpool->mtx); + caddr = &(sn->raddr); + caf = sn->raf; + + TAILQ_FOREACH(pa, &rpool->list, entries) { + if (PF_AZERO(caddr, caf)) { + valid = 1; + goto done; + } else if (pa->addr.type == PF_ADDR_DYNIFTL) { + if (pfr_kentry_byaddr(pa->addr.p.dyn->pfid_kt, caddr, caf, 0)) { + valid = 1; + goto done; + } + } else if (pa->addr.type == PF_ADDR_TABLE) { + if (pfr_kentry_byaddr(pa->addr.p.tbl, caddr, caf, 0)) { + valid = 1; + goto done; + } + } else if (pa->addr.type != PF_ADDR_NOROUTE) { + /* PF_ADDR_URPFFAILED, PF_ADDR_RANGE, PF_ADDR_ADDRMASK */ + raddr = &(pa->addr.v.a.addr); + rmask = &(pa->addr.v.a.mask); + raf = pa->af; + if (raf == caf && pf_match_addr(0, raddr, rmask, caddr, caf)) { + valid = 1; + goto done; + } + } + /* else PF_ADDR_NOROUTE */ + } + +done: + mtx_unlock(&rpool->mtx); + + return (valid); +} + u_short -pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, - struct pf_kpool *rpool) +pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, + struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf, + struct pf_addr *init_addr, struct pf_kpool *rpool) { u_short reason = PFRES_MATCH; struct pf_addr *raddr = NULL, *rmask = NULL; + struct pfr_ktable *kt; uint64_t hashidx; int cnt; + sa_family_t wanted_af; + u_int8_t pool_type; + bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; + + KASSERT(saf != 0, ("%s: saf == 0", __func__)); + KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); + KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); + + /* + * Given (*naf) is a hint about AF of the forwarded packet. + * It might be changed if prefer_ipv6_nexthop is enabled and + * the combination of nexthop AF and packet AF allows for it. + */ + wanted_af = (*naf); mtx_lock(&rpool->mtx); /* Find the route using chosen algorithm. Store the found route @@ -556,7 +625,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, goto done_pool_mtx; } if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - switch (af) { + switch (wanted_af) { #ifdef INET case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && @@ -580,7 +649,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, break; #endif /* INET6 */ default: - unhandled_af(af); + unhandled_af(wanted_af); } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!PF_POOL_DYNTYPE(rpool->opts)) { @@ -592,43 +661,81 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, rmask = &rpool->cur->addr.v.a.mask; } - switch (rpool->opts & PF_POOL_TYPEMASK) { + /* + * For pools with a single host with the prefer-ipv6-nexthop option + * we can return pool address of any AF, unless the forwarded packet + * is IPv6, then we can return only if pool address is IPv6. + * For non-prefer-ipv6-nexthop we can return pool address only + * of wanted AF, unless the pool address'es AF is unknown, which + * happens in case old ioctls have been used to set up the pool. + * + * Round-robin pools have their own logic for retrying next addresses. + */ + pool_type = rpool->opts & PF_POOL_TYPEMASK; + if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || + ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && + rpool->cur->addr.type != PF_ADDR_TABLE && + rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { + if (prefer_ipv6_nexthop) { + if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + wanted_af = rpool->cur->af; + } else { + if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + } + } + + switch (pool_type) { case PF_POOL_NONE: - pf_addrcpy(naddr, raddr, af); + pf_addrcpy(naddr, raddr, wanted_af); break; case PF_POOL_BITMASK: - pf_poolmask(naddr, raddr, rmask, saddr, af); + pf_poolmask(naddr, raddr, rmask, saddr, wanted_af); break; case PF_POOL_RANDOM: - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; - if (cnt == 0) - rpool->tblidx = 0; + if (rpool->cur->addr.type == PF_ADDR_TABLE || + rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (rpool->cur->addr.type == PF_ADDR_TABLE) + kt = rpool->cur->addr.p.tbl; else - rpool->tblidx = (int)arc4random_uniform(cnt); - memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { + kt = rpool->cur->addr.p.dyn->pfid_kt; + kt = pfr_ktable_select_active(kt); + if (kt == NULL) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - pf_addrcpy(naddr, &rpool->counter, af); - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; + cnt = kt->pfrkt_cnt; if (cnt == 0) rpool->tblidx = 0; else rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, - pf_islinklocal)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_random: + if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, + wanted_af, pf_islinklocal, false)) { + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_random; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } - pf_addrcpy(naddr, &rpool->counter, af); - } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { - switch (af) { + pf_addrcpy(naddr, &rpool->counter, wanted_af); + } else if (init_addr != NULL && PF_AZERO(init_addr, + wanted_af)) { + switch (wanted_af) { #ifdef INET case AF_INET: rpool->counter.addr32[0] = arc4random(); @@ -657,12 +764,14 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, break; #endif /* INET6 */ } - pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); - pf_addrcpy(init_addr, naddr, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, + wanted_af); + pf_addrcpy(init_addr, naddr, wanted_af); } else { - pf_addr_inc(&rpool->counter, af); - pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); + pf_addr_inc(&rpool->counter, wanted_af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, + wanted_af); } break; case PF_POOL_SRCHASH: @@ -670,37 +779,46 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, unsigned char hash[16]; hashidx = - pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; - if (cnt == 0) - rpool->tblidx = 0; + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, + wanted_af); + if (rpool->cur->addr.type == PF_ADDR_TABLE || + rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (rpool->cur->addr.type == PF_ADDR_TABLE) + kt = rpool->cur->addr.p.tbl; else - rpool->tblidx = (int)(hashidx % cnt); - memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { + kt = rpool->cur->addr.p.dyn->pfid_kt; + kt = pfr_ktable_select_active(kt); + if (kt == NULL) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - pf_addrcpy(naddr, &rpool->counter, af); - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; + cnt = kt->pfrkt_cnt; if (cnt == 0) rpool->tblidx = 0; else rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, - pf_islinklocal)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_srchash: + if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, + wanted_af, pf_islinklocal, false)) { + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_srchash; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } - pf_addrcpy(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, wanted_af); } else { pf_poolmask(naddr, raddr, rmask, - (struct pf_addr *)&hash, af); + (struct pf_addr *)&hash, wanted_af); } break; } @@ -708,107 +826,171 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, { struct pf_kpooladdr *acur = rpool->cur; + retry_other_af_rr: + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) + &rpool->tblidx, &rpool->counter, wanted_af, + NULL, true)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) + &rpool->tblidx, &rpool->counter, wanted_af, + pf_islinklocal, true)) goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + } else if (rpool->cur->af == wanted_af && + pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) goto get_addr; - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + /* Reset table index when changing wanted AF. */ + rpool->tblidx = -1; + rpool->ipv6_nexthop_af = AF_INET; + goto retry_other_af_rr; + } try_next: + /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ + rpool->ipv6_nexthop_af = AF_INET6; if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); + try_next_ipv6_nexthop_rr: + /* Reset table index when iterating pools or changing wanted AF. */ + rpool->tblidx = -1; + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { - /* table contains no address of type 'af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + if (!pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, wanted_af, NULL, + true)) + goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - rpool->tblidx = -1; - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) { - /* table contains no address of type 'af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, + true)) + goto get_addr; } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - pf_addrcpy(&rpool->counter, raddr, af); + if (rpool->cur->af == wanted_af) { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + pf_addrcpy(&rpool->counter, raddr, wanted_af); + goto get_addr; + } } - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + rpool->ipv6_nexthop_af = AF_INET; + goto try_next_ipv6_nexthop_rr; + } + if (rpool->cur != acur) + goto try_next; + reason = PFRES_MAPFAILED; + goto done_pool_mtx; get_addr: - pf_addrcpy(naddr, &rpool->counter, af); - if (init_addr != NULL && PF_AZERO(init_addr, af)) - pf_addrcpy(init_addr, naddr, af); - pf_addr_inc(&rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, wanted_af); + if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) + pf_addrcpy(init_addr, naddr, wanted_af); + pf_addr_inc(&rpool->counter, wanted_af); break; } } + if (wanted_af == 0) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + if (nkif) *nkif = rpool->cur->kif; + (*naf) = wanted_af; + done_pool_mtx: mtx_unlock(&rpool->mtx); - if (reason) { - counter_u64_add(V_pf_status.counters[reason], 1); - } - return (reason); } u_short -pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, - struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool, - pf_sn_types_t sn_type) +pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, + struct pf_addr *naddr, sa_family_t *naf, struct pfi_kkif **nkif, + struct pf_addr *init_addr, struct pf_kpool *rpool, pf_sn_types_t sn_type) { + struct pf_ksrc_node *sn = NULL; + struct pf_srchash *sh = NULL; u_short reason = 0; - KASSERT(*sn == NULL, ("*sn not NULL")); - /* * If this is a sticky-address rule, try to find an existing src_node. - * Request the sh to be unlocked if sn was not found, as we never - * insert a new sn when parsing the ruleset. */ if (rpool->opts & PF_POOL_STICKYADDR && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(saddr, r, af, sh, sn_type, false); + sn = pf_find_src_node(saddr, r, saf, &sh, sn_type, false); + + if (sn != NULL) { + PF_SRC_NODE_LOCK_ASSERT(sn); + /* + * Check if source node's redirection address still exists + * in pool from which the SN was created. If not, delete it. + * Similar to pf_kill_srcnodes(). Unlink the source node + * from tree, unlink it from states, then free it. Do not + * overlap source node and state locks to avoid LOR. + */ + if (!pf_check_src_node_valid(sn, rpool)) { + pf_unlink_src_node(sn); + PF_SRC_NODE_UNLOCK(sn); + if (V_pf_status.debug >= PF_DEBUG_NOISY) { + printf("%s: stale src tracking (%d) ", + __func__, sn_type); + pf_print_host(saddr, 0, saf); + printf(" to "); + pf_print_host(&(sn->raddr), 0, sn->raf); + if (nkif) + printf("@%s", sn->rkif->pfik_name); + printf("\n"); + } - if (*sn != NULL) { - PF_SRC_NODE_LOCK_ASSERT(*sn); + for (int i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + struct pf_kstate *st; + + PF_HASHROW_LOCK(ih); + LIST_FOREACH(st, &ih->states, entry) { + if (st->sns[sn->type] == sn) { + st->sns[sn->type] = NULL; + } + } + PF_HASHROW_UNLOCK(ih); + } + pf_free_src_node(sn); + counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); + sn = NULL; + goto map_addr; + } + + (*naf) = sn->raf; /* If the supplied address is the same as the current one we've * been asked before, so tell the caller that there's no other * address to be had. */ - if (PF_AEQ(naddr, &(*sn)->raddr, af)) { + + if (PF_AEQ(naddr, &(sn->raddr), *naf)) { + printf("%s: no more addresses\n", __func__); reason = PFRES_MAPFAILED; goto done; } - pf_addrcpy(naddr, &(*sn)->raddr, af); + pf_addrcpy(naddr, &(sn->raddr), *naf); + if (nkif) - *nkif = (*sn)->rkif; + *nkif = sn->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { - printf("pf_map_addr: src tracking maps "); - pf_print_host(saddr, 0, af); + printf("%s: src tracking maps ", __func__); + pf_print_host(saddr, 0, saf); printf(" to "); - pf_print_host(naddr, 0, af); + pf_print_host(naddr, 0, *naf); if (nkif) printf("@%s", (*nkif)->pfik_name); printf("\n"); @@ -816,31 +998,30 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, goto done; } +map_addr: /* - * Source node has not been found. Find a new address and store it - * in variables given by the caller. + * Source node has not been found or is invalid. Find a new address + * and store it in variables given by the caller. */ - if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) { - /* pf_map_addr() sets reason counters on its own */ + if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr, + rpool)) != 0) { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: pf_map_addr has failed\n", __func__); goto done; } if (V_pf_status.debug >= PF_DEBUG_NOISY && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - printf("pf_map_addr: selected address "); - pf_print_host(naddr, 0, af); + printf("%s: selected address ", __func__); + pf_print_host(naddr, 0, *naf); if (nkif) printf("@%s", (*nkif)->pfik_name); printf("\n"); } done: - if ((*sn) != NULL) - PF_SRC_NODE_UNLOCK(*sn); - - if (reason) { - counter_u64_add(V_pf_status.counters[reason], 1); - } + if (sn != NULL) + PF_SRC_NODE_UNLOCK(sn); return (reason); } @@ -890,8 +1071,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, { struct pf_pdesc *pd = ctx->pd; struct pf_addr *naddr; - struct pf_ksrc_node *sn = NULL; - struct pf_srchash *sh = NULL; + int idx; uint16_t *nportp; uint16_t low, high; u_short reason; @@ -906,8 +1086,19 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, return (PFRES_MEMORY); } - naddr = &ctx->nk->addr[1]; - nportp = &ctx->nk->port[1]; + switch (nat_action) { + case PF_NAT: + idx = pd->sidx; + break; + case PF_BINAT: + idx = 1; + break; + case PF_RDR: + idx = pd->didx; + break; + } + naddr = &ctx->nk->addr[idx]; + nportp = &ctx->nk->port[idx]; switch (nat_action) { case PF_NAT: @@ -919,22 +1110,22 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, high = rpool->proxy_port[1]; } if (rpool->mape.offset > 0) { - if (pf_get_mape_sport(pd, r, naddr, nportp, &sn, - &sh, &ctx->udp_mapping, rpool)) { + if (pf_get_mape_sport(pd, r, naddr, nportp, + &ctx->udp_mapping, rpool)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: MAP-E port allocation (%u/%u/%u)" - " failed\n", + "pf: MAP-E port allocation (%u/%u/%u)" + " failed", rpool->mape.offset, rpool->mape.psidlen, - rpool->mape.psid)); + rpool->mape.psid); reason = PFRES_MAPFAILED; goto notrans; } - } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn, - &sh, rpool, &ctx->udp_mapping, PF_SN_NAT)) { + } else if (pf_get_sport(pd, r, naddr, nportp, low, high, + rpool, &ctx->udp_mapping, PF_SN_NAT)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: NAT proxy port allocation (%u-%u) failed\n", - rpool->proxy_port[0], rpool->proxy_port[1])); + "pf: NAT proxy port allocation (%u-%u) failed", + rpool->proxy_port[0], rpool->proxy_port[1]); reason = PFRES_MAPFAILED; goto notrans; } @@ -1016,8 +1207,9 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, int tries; uint16_t cut, low, high, nport; - reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL, - NULL, &sn, &sh, rpool, PF_SN_NAT); + reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, + &(pd->naf), NULL, NULL, rpool, PF_SN_NAT); + if (reason != 0) goto notrans; if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) @@ -1030,10 +1222,13 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, if (rpool->proxy_port[1]) { uint32_t tmp_nport; + uint16_t div; + + div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; + div = (div == 0) ? 1 : div; - tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % - (rpool->proxy_port[1] - rpool->proxy_port[0] + - 1)) + rpool->proxy_port[0]; + tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + + rpool->proxy_port[0]; /* Wrap around if necessary. */ if (tmp_nport > 65535) @@ -1100,13 +1295,13 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, * the state may be reused if the TCP state is terminal. */ DPFPRINTF(PF_DEBUG_MISC, - ("pf: RDR source port allocation failed\n")); + "pf: RDR source port allocation failed"); break; out: DPFPRINTF(PF_DEBUG_MISC, - ("pf: RDR source port allocation %u->%u\n", - ntohs(pd->nsport), ntohs(ctx->nk->port[0]))); + "pf: RDR source port allocation %u->%u", + ntohs(pd->nsport), ntohs(ctx->nk->port[0])); break; } default: @@ -1134,8 +1329,6 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) struct pf_addr ndaddr, nsaddr, naddr; u_int16_t nport = 0; int prefixlen = 96; - struct pf_srchash *sh = NULL; - struct pf_ksrc_node *sns = NULL; bzero(&nsaddr, sizeof(nsaddr)); bzero(&ndaddr, sizeof(ndaddr)); @@ -1154,12 +1347,11 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) panic("pf_get_transaddr_af: no nat pool for source address"); /* get source address and port */ - if (pf_get_sport(pd, r, &nsaddr, &nport, - r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat, - NULL, PF_SN_NAT)) { + if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0], + r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: af-to NAT proxy port allocation (%u-%u) failed", - r->nat.proxy_port[0], r->nat.proxy_port[1])); + "pf: af-to NAT proxy port allocation (%u-%u) failed", + r->nat.proxy_port[0], r->nat.proxy_port[1]); return (-1); } @@ -1181,8 +1373,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) /* get the destination address and port */ if (! TAILQ_EMPTY(&r->rdr.list)) { - if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL, - &sns, NULL, &r->rdr, PF_SN_NAT)) + if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, &(pd->naf), + NULL, NULL, &r->rdr, PF_SN_NAT)) return (-1); if (r->rdr.proxy_port[0]) pd->ndport = htons(r->rdr.proxy_port[0]); diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 73933c022ca2..993981a9c0de 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -118,7 +118,7 @@ dump_state_peer(struct nl_writer *nw, int attr, const struct pf_state_peer *peer nlattr_add_u16(nw, PF_STP_PFSS_FLAGS, pfss_flags); nlattr_add_u32(nw, PF_STP_PFSS_TS_MOD, sc->pfss_ts_mod); nlattr_add_u8(nw, PF_STP_PFSS_TTL, sc->pfss_ttl); - nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PFSYNC_SCRUB_FLAG_VALID); + nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PF_SCRUB_FLAG_VALID); } nlattr_set_len(nw, off); @@ -178,7 +178,7 @@ dump_state(struct nlpcb *nlp, const struct nlmsghdr *hdr, struct pf_kstate *s, nlattr_add_string(nw, PF_ST_IFNAME, s->kif->pfik_name); nlattr_add_string(nw, PF_ST_ORIG_IFNAME, s->orig_kif->pfik_name); - dump_addr(nw, PF_ST_RT_ADDR, &s->act.rt_addr, af); + dump_addr(nw, PF_ST_RT_ADDR, &s->act.rt_addr, s->act.rt_af); nlattr_add_u32(nw, PF_ST_CREATION, time_uptime - (s->creation / 1000)); uint32_t expire = pf_state_expires(s); if (expire > time_uptime) @@ -224,6 +224,7 @@ dump_state(struct nlpcb *nlp, const struct nlmsghdr *hdr, struct pf_kstate *s, if (s->sns[PF_SN_ROUTE] != NULL) src_node_flags |= PFSTATE_SRC_NODE_ROUTE; nlattr_add_u8(nw, PF_ST_SRC_NODE_FLAGS, src_node_flags); + nlattr_add_u8(nw, PF_ST_RT_AF, s->act.rt_af); if (!dump_state_peer(nw, PF_ST_PEER_SRC, &s->src)) goto enomem; @@ -762,6 +763,8 @@ static const struct nlattr_parser nla_p_rule[] = { { .type = PF_RT_RCV_IFNOT, .off = _OUT(rcvifnot), .cb = nlattr_get_bool }, { .type = PF_RT_PKTRATE, .off = _OUT(pktrate), .arg = &threshold_parser, .cb = nlattr_get_nested }, { .type = PF_RT_MAX_PKT_SIZE, .off = _OUT(max_pkt_size), .cb = nlattr_get_uint16 }, + { .type = PF_RT_TYPE_2, .off = _OUT(type), .cb = nlattr_get_uint16 }, + { .type = PF_RT_CODE_2, .off = _OUT(code), .cb = nlattr_get_uint16 }, }; NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule); #undef _OUT @@ -983,8 +986,12 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u8(nw, PF_RT_AF, rule->af); nlattr_add_u8(nw, PF_RT_NAF, rule->naf); nlattr_add_u8(nw, PF_RT_PROTO, rule->proto); + nlattr_add_u8(nw, PF_RT_TYPE, rule->type); nlattr_add_u8(nw, PF_RT_CODE, rule->code); + nlattr_add_u16(nw, PF_RT_TYPE_2, rule->type); + nlattr_add_u16(nw, PF_RT_CODE_2, rule->code); + nlattr_add_u8(nw, PF_RT_FLAGS, rule->flags); nlattr_add_u8(nw, PF_RT_FLAGSET, rule->flagset); nlattr_add_u8(nw, PF_RT_MIN_TTL, rule->min_ttl); @@ -1018,6 +1025,7 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u64(nw, PF_RT_SRC_NODES_NAT, counter_u64_fetch(rule->src_nodes[PF_SN_NAT])); nlattr_add_u64(nw, PF_RT_SRC_NODES_ROUTE, counter_u64_fetch(rule->src_nodes[PF_SN_ROUTE])); nlattr_add_pf_threshold(nw, PF_RT_PKTRATE, &rule->pktrate); + nlattr_add_time_t(nw, PF_RT_EXPTIME, time_second - (time_uptime - rule->exptime)); error = pf_kanchor_copyout(ruleset, rule, anchor_call, sizeof(anchor_call)); MPASS(error == 0); @@ -1227,6 +1235,9 @@ pf_handle_get_status(struct nlmsghdr *hdr, struct nl_pstate *npt) V_pf_status.fcounters); nlattr_add_counters(nw, PF_GS_SCOUNTERS, SCNT_MAX, pf_fcounter, V_pf_status.scounters); + nlattr_add_counters(nw, PF_GS_NCOUNTERS, NCNT_MAX, pf_fcounter, + V_pf_status.ncounters); + nlattr_add_u64(nw, PF_GS_FRAGMENTS, pf_normalize_get_frag_count()); pfi_update_status(V_pf_status.ifname, &s); nlattr_add_u64_array(nw, PF_GS_BCOUNTERS, 2 * 2, (uint64_t *)s.bcounters); @@ -1761,7 +1772,7 @@ pf_handle_get_srcnodes(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u32(nw, PF_SN_STATES, n->states); nlattr_add_u32(nw, PF_SN_CONNECTIONS, n->conn); nlattr_add_u8(nw, PF_SN_AF, n->af); - nlattr_add_u8(nw, PF_SN_NAF, n->naf); + nlattr_add_u8(nw, PF_SN_RAF, n->raf); nlattr_add_u8(nw, PF_SN_RULE_TYPE, n->ruletype); nlattr_add_u64(nw, PF_SN_CREATION, secs - n->creation); @@ -1944,7 +1955,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt) n = pfr_table_count(&attrs.pfrio_table, attrs.pfrio_flags); pfrtstats = mallocarray(n, - sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); + sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); error = pfr_get_tstats(&attrs.pfrio_table, pfrtstats, &n, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); @@ -1996,7 +2007,7 @@ pf_handle_get_tstats(struct nlmsghdr *hdr, struct nl_pstate *npt) } } } - free(pfrtstats, M_TEMP); + free(pfrtstats, M_PF); if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); @@ -2081,6 +2092,241 @@ pf_handle_clear_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) return (error); } +TAILQ_HEAD(pfr_addrq, pfr_addr_item); +struct nl_parsed_table_addrs { + struct pfr_table table; + uint32_t flags; + struct pfr_addr addrs[256]; + size_t addr_count; + int nadd; + int ndel; + int nchange; +}; +#define _OUT(_field) offsetof(struct pfr_addr, _field) +static const struct nlattr_parser nla_p_pfr_addr[] = { + { .type = PFR_A_AF, .off = _OUT(pfra_af), .cb = nlattr_get_uint8 }, + { .type = PFR_A_NET, .off = _OUT(pfra_net), .cb = nlattr_get_uint8 }, + { .type = PFR_A_NOT, .off = _OUT(pfra_not), .cb = nlattr_get_bool }, + { .type = PFR_A_ADDR, .off = _OUT(pfra_u), .cb = nlattr_get_in6_addr }, +}; +#undef _OUT +NL_DECLARE_ATTR_PARSER(pfra_addr_parser, nla_p_pfr_addr); + +static int +nlattr_get_pfr_addr(struct nlattr *nla, struct nl_pstate *npt, const void *arg, + void *target) +{ + struct nl_parsed_table_addrs *attrs = target; + struct pfr_addr addr = { 0 }; + int error; + + if (attrs->addr_count >= nitems(attrs->addrs)) + return (E2BIG); + + error = nlattr_get_nested(nla, npt, &pfra_addr_parser, &addr); + if (error != 0) + return (error); + + memcpy(&attrs->addrs[attrs->addr_count], &addr, sizeof(addr)); + attrs->addr_count++; + + return (0); +} + +NL_DECLARE_ATTR_PARSER(nested_table_parser, nla_p_table); + +#define _OUT(_field) offsetof(struct nl_parsed_table_addrs, _field) +static const struct nlattr_parser nla_p_table_addr[] = { + { .type = PF_TA_TABLE, .off = _OUT(table), .arg = &nested_table_parser, .cb = nlattr_get_nested }, + { .type = PF_TA_ADDR, .cb = nlattr_get_pfr_addr }, + { .type = PF_TA_FLAGS, .off = _OUT(flags), .cb = nlattr_get_uint32 }, +}; +NL_DECLARE_PARSER(table_addr_parser, struct genlmsghdr, nlf_p_empty, nla_p_table_addr); +#undef _OUT + +static int +pf_handle_table_add_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + struct nl_parsed_table_addrs attrs = { 0 }; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + int error; + + error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs); + if (error != 0) + return (error); + + PF_RULES_WLOCK(); + error = pfr_add_addrs(&attrs.table, &attrs.addrs[0], + attrs.addr_count, &attrs.nadd, attrs.flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) + return (ENOMEM); + + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_TABLE_ADD_ADDR; + ghdr_new->version = 0; + ghdr_new->reserved = 0; + + nlattr_add_u32(nw, PF_TA_NBR_ADDED, attrs.nadd); + + if (!nlmsg_end(nw)) + return (ENOMEM); + + return (error); +} + +static int +pf_handle_table_del_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + struct nl_parsed_table_addrs attrs = { 0 }; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + int error; + + error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs); + if (error != 0) + return (error); + + PF_RULES_WLOCK(); + error = pfr_del_addrs(&attrs.table, &attrs.addrs[0], + attrs.addr_count, &attrs.ndel, attrs.flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) + return (ENOMEM); + + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_TABLE_DEL_ADDR; + ghdr_new->version = 0; + ghdr_new->reserved = 0; + + nlattr_add_u32(nw, PF_TA_NBR_DELETED, attrs.ndel); + + if (!nlmsg_end(nw)) + return (ENOMEM); + + return (error); +} + +static int +pf_handle_table_set_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + struct nl_parsed_table_addrs attrs = { 0 }; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + int error; + + error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs); + if (error != 0) + return (error); + + PF_RULES_WLOCK(); + error = pfr_set_addrs(&attrs.table, &attrs.addrs[0], + attrs.addr_count, NULL, &attrs.nadd, &attrs.ndel, &attrs.nchange, + attrs.flags | PFR_FLAG_USERIOCTL, 0); + PF_RULES_WUNLOCK(); + + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) + return (ENOMEM); + + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_TABLE_DEL_ADDR; + ghdr_new->version = 0; + ghdr_new->reserved = 0; + + nlattr_add_u32(nw, PF_TA_NBR_ADDED, attrs.nadd); + nlattr_add_u32(nw, PF_TA_NBR_DELETED, attrs.ndel); + nlattr_add_u32(nw, PF_TA_NBR_CHANGED, attrs.nchange); + + if (!nlmsg_end(nw)) + return (ENOMEM); + + return (error); +} + +static int +nlattr_add_pfr_addr(struct nl_writer *nw, int attr, const struct pfr_addr *a) +{ + int off = nlattr_add_nested(nw, attr); + if (off == 0) + return (false); + + nlattr_add_u32(nw, PFR_A_AF, a->pfra_af); + nlattr_add_u8(nw, PFR_A_NET, a->pfra_net); + nlattr_add_bool(nw, PFR_A_NOT, a->pfra_not); + nlattr_add_in6_addr(nw, PFR_A_ADDR, &a->pfra_u._pfra_ip6addr); + + nlattr_set_len(nw, off); + + return (true); +} + +static int +pf_handle_table_get_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + struct pfioc_table attrs = { 0 }; + struct pfr_addr *pfras; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + int size = 0; + int error; + + PF_RULES_RLOCK_TRACKER; + + error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs); + if (error != 0) + return (error); + + PF_RULES_RLOCK(); + /* Get required size. */ + error = pfr_get_addrs(&attrs.pfrio_table, NULL, + &size, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); + if (error != 0) { + PF_RULES_RUNLOCK(); + return (error); + } + pfras = mallocarray(size, sizeof(struct pfr_addr), M_PF, + M_NOWAIT | M_ZERO); + if (pfras == NULL) { + PF_RULES_RUNLOCK(); + return (ENOMEM); + } + /* Now get the addresses. */ + error = pfr_get_addrs(&attrs.pfrio_table, pfras, + &size, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error != 0) + goto out; + + for (int i = 0; i < size; i++) { + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { + nlmsg_abort(nw); + error = ENOMEM; + goto out; + } + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_TABLE_GET_ADDR; + ghdr_new->version = 0; + ghdr_new->reserved = 0; + + if (i == 0) + nlattr_add_u32(nw, PF_TA_ADDR_COUNT, size); + + nlattr_add_pfr_addr(nw, PF_TA_ADDR, &pfras[i]); + if (!nlmsg_end(nw)) { + nlmsg_abort(nw); + error = ENOMEM; + goto out; + } + } + +out: + free(pfras, M_PF); + return (error); +} + static const struct nlhdr_parser *all_parsers[] = { &state_parser, &addrule_parser, @@ -2095,6 +2341,7 @@ static const struct nlhdr_parser *all_parsers[] = { &add_addr_parser, &ruleset_parser, &table_parser, + &table_addr_parser, }; static uint16_t family_id; @@ -2317,6 +2564,34 @@ static const struct genl_cmd pf_cmds[] = { .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, + { + .cmd_num = PFNL_CMD_TABLE_ADD_ADDR, + .cmd_name = "TABLE_ADD_ADDRS", + .cmd_cb = pf_handle_table_add_addrs, + .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, + .cmd_priv = PRIV_NETINET_PF, + }, + { + .cmd_num = PFNL_CMD_TABLE_DEL_ADDR, + .cmd_name = "TABLE_DEL_ADDRS", + .cmd_cb = pf_handle_table_del_addrs, + .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, + .cmd_priv = PRIV_NETINET_PF, + }, + { + .cmd_num = PFNL_CMD_TABLE_SET_ADDR, + .cmd_name = "TABLE_SET_ADDRS", + .cmd_cb = pf_handle_table_set_addrs, + .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, + .cmd_priv = PRIV_NETINET_PF, + }, + { + .cmd_num = PFNL_CMD_TABLE_GET_ADDR, + .cmd_name = "TABLE_GET_ADDRS", + .cmd_cb = pf_handle_table_get_addrs, + .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, + .cmd_priv = PRIV_NETINET_PF, + }, }; void diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h index 929c20e4c582..e1eb3e628df5 100644 --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -67,6 +67,10 @@ enum { PFNL_CMD_GET_TSTATS = 29, PFNL_CMD_CLR_TSTATS = 30, PFNL_CMD_CLR_ADDRS = 31, + PFNL_CMD_TABLE_ADD_ADDR = 32, + PFNL_CMD_TABLE_DEL_ADDR = 33, + PFNL_CMD_TABLE_SET_ADDR = 34, + PFNL_CMD_TABLE_GET_ADDR = 35, __PFNL_CMD_MAX, }; #define PFNL_CMD_MAX (__PFNL_CMD_MAX -1) @@ -135,6 +139,7 @@ enum pfstate_type_t { PF_ST_RT = 36, /* u8 */ PF_ST_RT_IFNAME = 37, /* string */ PF_ST_SRC_NODE_FLAGS = 38, /* u8 */ + PF_ST_RT_AF = 39, /* u8 */ }; enum pf_addr_type_t { @@ -280,6 +285,9 @@ enum pf_rule_type_t { PF_RT_SRC_NODES_ROUTE = 81, /* u64 */ PF_RT_PKTRATE = 82, /* nested, pf_threshold_type_t */ PF_RT_MAX_PKT_SIZE = 83, /* u16 */ + PF_RT_TYPE_2 = 84, /* u16 */ + PF_RT_CODE_2 = 85, /* u16 */ + PF_RT_EXPTIME = 86, /* time_t */ }; enum pf_addrule_type_t { @@ -347,6 +355,8 @@ enum pf_get_status_types_t { PF_GS_CHKSUM = 14, /* byte array */ PF_GS_PCOUNTERS = 15, /* u64 array */ PF_GS_BCOUNTERS = 16, /* u64 array */ + PF_GS_NCOUNTERS = 17, /* nested, */ + PF_GS_FRAGMENTS = 18, /* u64, */ }; enum pf_natlook_types_t { @@ -433,7 +443,7 @@ enum pf_srcnodes_types_t { PF_SN_CREATION = 12, /* u64 */ PF_SN_EXPIRE = 13, /* u64 */ PF_SN_CONNECTION_RATE = 14, /* nested, pf_threshold */ - PF_SN_NAF = 15, /* u8 */ + PF_SN_RAF = 15, /* u8 */ PF_SN_NODE_TYPE = 16, /* u8 */ }; @@ -460,6 +470,25 @@ enum pf_tstats_t { PF_TS_NZERO = 9, /* u64 */ }; +enum pfr_addr_t { + PFR_A_UNSPEC, + PFR_A_AF = 1, /* uint8_t */ + PFR_A_NET = 2, /* uint8_t */ + PFR_A_NOT = 3, /* bool */ + PFR_A_ADDR = 4, /* in6_addr */ +}; + +enum pf_table_addrs_t { + PF_TA_UNSPEC, + PF_TA_TABLE = 1, /* nested, pf_table_t */ + PF_TA_ADDR = 2, /* nested, pfr_addr_t */ + PF_TA_FLAGS = 3, /* u32 */ + PF_TA_NBR_ADDED = 4, /* u32 */ + PF_TA_NBR_DELETED = 5, /* u32 */ + PF_TA_NBR_CHANGED = 6, /* u32 */ + PF_TA_ADDR_COUNT = 7, /* u32 */ +}; + #ifdef _KERNEL void pf_nl_register(void); diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index 369292ca365e..53010222dd07 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -118,6 +118,8 @@ VNET_DEFINE_STATIC(uma_zone_t, pf_frnode_z); #define V_pf_frnode_z VNET(pf_frnode_z) VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z); #define V_pf_frag_z VNET(pf_frag_z) +VNET_DEFINE(uma_zone_t, pf_anchor_z); +VNET_DEFINE(uma_zone_t, pf_eth_anchor_z); TAILQ_HEAD(pf_fragqueue, pf_fragment); TAILQ_HEAD(pf_cachequeue, pf_fragment); @@ -160,13 +162,6 @@ static int pf_reassemble6(struct mbuf **, struct ip6_frag *, uint16_t, uint16_t, u_short *); #endif /* INET6 */ -#define DPFPRINTF(x) do { \ - if (V_pf_status.debug >= PF_DEBUG_MISC) { \ - printf("%s: ", __func__); \ - printf x ; \ - } \ -} while(0) - #ifdef INET static void pf_ip2key(struct ip *ip, struct pf_frnode *key) @@ -216,6 +211,12 @@ pf_normalize_cleanup(void) mtx_destroy(&V_pf_frag_mtx); } +uint64_t +pf_normalize_get_frag_count(void) +{ + return (uma_zone_get_cur(V_pf_frent_z)); +} + static int pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b) { @@ -262,7 +263,8 @@ pf_purge_fragments(uint32_t expire) if (frag->fr_timeout > expire) break; - DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + DPFPRINTF(PF_DEBUG_MISC, "expiring %d(%p)", + frag->fr_id, frag); pf_free_fragment(frag); } @@ -281,7 +283,7 @@ pf_flush_fragments(void) PF_FRAG_ASSERT(); goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10; - DPFPRINTF(("trying to free %d frag entriess\n", goal)); + DPFPRINTF(PF_DEBUG_MISC, "trying to free %d frag entriess", goal); while (goal < uma_zone_get_cur(V_pf_frent_z)) { frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); if (frag) @@ -318,6 +320,7 @@ pf_free_fragment(struct pf_fragment *frag) /* Free all fragment entries */ while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); m_freem(frent->fe_m); uma_zfree(V_pf_frent_z, frent); @@ -335,6 +338,7 @@ pf_find_fragment(struct pf_frnode *key, uint32_t id) PF_FRAG_ASSERT(); frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_SEARCH], 1); if (frnode == NULL) return (NULL); MPASS(frnode->fn_fragments >= 1); @@ -442,6 +446,7 @@ pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, ("overlapping fragment")); TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); } + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_INSERT], 1); if (frag->fr_firstoff[index] == NULL) { KASSERT(prev == NULL || pf_frent_index(prev) < index, @@ -500,6 +505,7 @@ pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) } TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); KASSERT(frag->fr_entries[index] > 0, ("No fragments remaining")); frag->fr_entries[index]--; @@ -573,26 +579,30 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id, /* No empty fragments. */ if (frent->fe_len == 0) { - DPFPRINTF(("bad fragment: len 0\n")); + DPFPRINTF(PF_DEBUG_MISC, "bad fragment: len 0"); goto bad_fragment; } /* All fragments are 8 byte aligned. */ if (frent->fe_mff && (frent->fe_len & 0x7)) { - DPFPRINTF(("bad fragment: mff and len %d\n", frent->fe_len)); + DPFPRINTF(PF_DEBUG_MISC, "bad fragment: mff and len %d", + frent->fe_len); goto bad_fragment; } /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */ if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { - DPFPRINTF(("bad fragment: max packet %d\n", - frent->fe_off + frent->fe_len)); + DPFPRINTF(PF_DEBUG_MISC, "bad fragment: max packet %d", + frent->fe_off + frent->fe_len); goto bad_fragment; } - DPFPRINTF((key->fn_af == AF_INET ? - "reass frag %d @ %d-%d\n" : "reass frag %#08x @ %d-%d\n", - id, frent->fe_off, frent->fe_off + frent->fe_len)); + if (key->fn_af == AF_INET) + DPFPRINTF(PF_DEBUG_MISC, "reass frag %d @ %d-%d\n", + id, frent->fe_off, frent->fe_off + frent->fe_len); + else + DPFPRINTF(PF_DEBUG_MISC, "reass frag %#08x @ %d-%d", + id, frent->fe_off, frent->fe_off + frent->fe_len); /* Fully buffer all of the fragments in this fragment queue. */ frag = pf_find_fragment(key, id); @@ -690,10 +700,10 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id, precut = prev->fe_off + prev->fe_len - frent->fe_off; if (precut >= frent->fe_len) { - DPFPRINTF(("new frag overlapped\n")); + DPFPRINTF(PF_DEBUG_MISC, "new frag overlapped"); goto drop_fragment; } - DPFPRINTF(("frag head overlap %d\n", precut)); + DPFPRINTF(PF_DEBUG_MISC, "frag head overlap %d", precut); m_adj(frent->fe_m, precut); frent->fe_off += precut; frent->fe_len -= precut; @@ -705,7 +715,8 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id, aftercut = frent->fe_off + frent->fe_len - after->fe_off; if (aftercut < after->fe_len) { - DPFPRINTF(("frag tail overlap %d", aftercut)); + DPFPRINTF(PF_DEBUG_MISC, "frag tail overlap %d", + aftercut); m_adj(after->fe_m, aftercut); /* Fragment may switch queue as fe_off changes */ pf_frent_remove(frag, after); @@ -713,7 +724,8 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id, after->fe_len -= aftercut; /* Insert into correct queue */ if (pf_frent_insert(frag, after, prev)) { - DPFPRINTF(("fragment requeue limit exceeded")); + DPFPRINTF(PF_DEBUG_MISC, + "fragment requeue limit exceeded"); m_freem(after->fe_m); uma_zfree(V_pf_frent_z, after); /* There is not way to recover */ @@ -723,7 +735,7 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id, } /* This fragment is completely overlapped, lose it. */ - DPFPRINTF(("old frag overlapped\n")); + DPFPRINTF(PF_DEBUG_MISC, "old frag overlapped"); next = TAILQ_NEXT(after, fr_next); pf_frent_remove(frag, after); m_freem(after->fe_m); @@ -732,7 +744,7 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id, /* If part of the queue gets too long, there is not way to recover. */ if (pf_frent_insert(frag, frent, prev)) { - DPFPRINTF(("fragment queue limit exceeded\n")); + DPFPRINTF(PF_DEBUG_MISC, "fragment queue limit exceeded"); goto bad_fragment; } @@ -748,7 +760,7 @@ free_fragment: * fragment, the entire datagram (and any constituent fragments) MUST * be silently discarded. */ - DPFPRINTF(("flush overlapping fragments\n")); + DPFPRINTF(PF_DEBUG_MISC, "flush overlapping fragments"); pf_free_fragment(frag); bad_fragment: @@ -766,6 +778,7 @@ pf_join_fragment(struct pf_fragment *frag) frent = TAILQ_FIRST(&frag->fr_queue); TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); m = frent->fe_m; if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len) @@ -773,6 +786,7 @@ pf_join_fragment(struct pf_fragment *frag) uma_zfree(V_pf_frent_z, frent); while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); m2 = frent->fe_m; /* Strip off ip header. */ @@ -826,7 +840,8 @@ pf_reassemble(struct mbuf **m0, u_short *reason) m = *m0 = NULL; if (frag->fr_holes) { - DPFPRINTF(("frag %d, holes %d\n", frag->fr_id, frag->fr_holes)); + DPFPRINTF(PF_DEBUG_MISC, "frag %d, holes %d", + frag->fr_id, frag->fr_holes); return (PF_PASS); /* drop because *m0 is NULL, no error */ } @@ -872,14 +887,14 @@ pf_reassemble(struct mbuf **m0, u_short *reason) ip->ip_off &= ~(IP_MF|IP_OFFMASK); if (hdrlen + total > IP_MAXPACKET) { - DPFPRINTF(("drop: too big: %d\n", total)); + DPFPRINTF(PF_DEBUG_MISC, "drop: too big: %d", total); ip->ip_len = 0; REASON_SET(reason, PFRES_SHORT); /* PF_DROP requires a valid mbuf *m0 in pf_test() */ return (PF_DROP); } - DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); + DPFPRINTF(PF_DEBUG_MISC, "complete: %p(%d)", m, ntohs(ip->ip_len)); return (PF_PASS); } #endif /* INET */ @@ -931,8 +946,8 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, m = *m0 = NULL; if (frag->fr_holes) { - DPFPRINTF(("frag %d, holes %d\n", frag->fr_id, - frag->fr_holes)); + DPFPRINTF(PF_DEBUG_MISC, "frag %d, holes %d", frag->fr_id, + frag->fr_holes); PF_FRAG_UNLOCK(); return (PF_PASS); /* Drop because *m0 is NULL, no error. */ } @@ -993,14 +1008,15 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, ip6->ip6_nxt = proto; if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { - DPFPRINTF(("drop: too big: %d\n", total)); + DPFPRINTF(PF_DEBUG_MISC, "drop: too big: %d", total); ip6->ip6_plen = 0; REASON_SET(reason, PFRES_SHORT); /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */ return (PF_DROP); } - DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip6->ip6_plen))); + DPFPRINTF(PF_DEBUG_MISC, "complete: %p(%d)", m, + ntohs(ip6->ip6_plen)); return (PF_PASS); fail: @@ -1090,7 +1106,7 @@ pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag, action = PF_PASS; } else { /* Drop expects an mbuf to free. */ - DPFPRINTF(("refragment error %d\n", error)); + DPFPRINTF(PF_DEBUG_MISC, "refragment error %d", error); action = PF_DROP; } for (; m; m = t) { @@ -1230,7 +1246,7 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd) * no-df above, fine. Otherwise drop it. */ if (h->ip_off & htons(IP_DF)) { - DPFPRINTF(("IP_DF\n")); + DPFPRINTF(PF_DEBUG_MISC, "IP_DF"); goto bad; } @@ -1238,13 +1254,13 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd) /* All fragments are 8 byte aligned */ if (mff && (ip_len & 0x7)) { - DPFPRINTF(("mff and %d\n", ip_len)); + DPFPRINTF(PF_DEBUG_MISC, "mff and %d", ip_len); goto bad; } /* Respect maximum length */ if (fragoff + ip_len > IP_MAXPACKET) { - DPFPRINTF(("max packet %d\n", fragoff + ip_len)); + DPFPRINTF(PF_DEBUG_MISC, "max packet %d", fragoff + ip_len); goto bad; } @@ -1256,7 +1272,8 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd) /* Fully buffer all of the fragments * Might return a completely reassembled mbuf, or NULL */ PF_FRAG_LOCK(); - DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); + DPFPRINTF(PF_DEBUG_MISC, "reass frag %d @ %d-%d", + h->ip_id, fragoff, max); verdict = pf_reassemble(&pd->m, reason); PF_FRAG_UNLOCK(); @@ -1282,7 +1299,7 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd) return (PF_PASS); bad: - DPFPRINTF(("dropping bad fragment\n")); + DPFPRINTF(PF_DEBUG_MISC, "dropping bad fragment"); REASON_SET(reason, PFRES_FRAG); drop: if (r != NULL && r->log) @@ -1349,7 +1366,7 @@ pf_normalize_ip6(int off, u_short *reason, pf_rule_to_actions(r, &pd->act); } - if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), NULL, reason, AF_INET6)) + if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), reason, AF_INET6)) return (PF_DROP); /* Offset now points to data portion. */ @@ -1537,7 +1554,7 @@ pf_normalize_tcp_init(struct pf_pdesc *pd, struct tcphdr *th, olen = (th->th_off << 2) - sizeof(*th); if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m, - pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) + pd->off + sizeof(*th), opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -1640,7 +1657,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, if (olen >= TCPOLEN_TIMESTAMP && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && - pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af)) { + pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, pd->af)) { /* Modulate the timestamps. Can be used for NAT detection, OS * uptime determination or reboot detection. */ @@ -1711,7 +1728,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || time_uptime - (state->creation / 1000) > TS_MAX_CONN)) { if (V_pf_status.debug >= PF_DEBUG_MISC) { - DPFPRINTF(("src idled out of PAWS\n")); + DPFPRINTF(PF_DEBUG_MISC, "src idled out of PAWS"); pf_print_state(state); printf("\n"); } @@ -1721,7 +1738,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { if (V_pf_status.debug >= PF_DEBUG_MISC) { - DPFPRINTF(("dst idled out of PAWS\n")); + DPFPRINTF(PF_DEBUG_MISC, "dst idled out of PAWS"); pf_print_state(state); printf("\n"); } @@ -1826,22 +1843,22 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, * an old timestamp. */ - DPFPRINTF(("Timestamp failed %c%c%c%c\n", + DPFPRINTF(PF_DEBUG_MISC, "Timestamp failed %c%c%c%c", SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ? '1' : ' ', SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', - SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); - DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " - "idle: %jus %lums\n", + SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); + DPFPRINTF(PF_DEBUG_MISC, " tsval: %u tsecr: %u +ticks: " + "%u idle: %jus %lums", tsval, tsecr, tsval_from_last, (uintmax_t)delta_ts.tv_sec, - delta_ts.tv_usec / 1000)); - DPFPRINTF((" src->tsval: %u tsecr: %u\n", - src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); - DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u" - "\n", dst->scrub->pfss_tsval, - dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); + delta_ts.tv_usec / 1000); + DPFPRINTF(PF_DEBUG_MISC, " src->tsval: %u tsecr: %u", + src->scrub->pfss_tsval, src->scrub->pfss_tsecr); + DPFPRINTF(PF_DEBUG_MISC, " dst->tsval: %u tsecr: %u " + "tsval0: %u", dst->scrub->pfss_tsval, + dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0); if (V_pf_status.debug >= PF_DEBUG_MISC) { pf_print_state(state); pf_print_flags(tcp_get_flags(th)); @@ -1891,8 +1908,8 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, * stack changed its RFC1323 behavior?!?! */ if (V_pf_status.debug >= PF_DEBUG_MISC) { - DPFPRINTF(("Did not receive expected RFC1323 " - "timestamp\n")); + DPFPRINTF(PF_DEBUG_MISC, "Did not receive expected " + "RFC1323 timestamp"); pf_print_state(state); pf_print_flags(tcp_get_flags(th)); printf("\n"); @@ -1919,9 +1936,9 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd, if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ - DPFPRINTF(("Broken RFC1323 stack did not " - "timestamp data packet. Disabled PAWS " - "security.\n")); + DPFPRINTF(PF_DEBUG_MISC, "Broken RFC1323 stack did " + "not timestamp data packet. Disabled PAWS " + "security."); pf_print_state(state); pf_print_flags(tcp_get_flags(th)); printf("\n"); @@ -1970,7 +1987,7 @@ pf_normalize_mss(struct pf_pdesc *pd) olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); optsoff = pd->off + sizeof(struct tcphdr); if (olen < TCPOLEN_MAXSEG || - !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) + !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) return (0); opt = opts; @@ -2004,7 +2021,7 @@ pf_scan_sctp(struct pf_pdesc *pd) int ret; while (pd->off + chunk_off < pd->tot_len) { - if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL, + if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), NULL, pd->af)) return (PF_DROP); @@ -2021,7 +2038,7 @@ pf_scan_sctp(struct pf_pdesc *pd) struct sctp_init_chunk init; if (!pf_pull_hdr(pd->m, pd->off + chunk_start, &init, - sizeof(init), NULL, NULL, pd->af)) + sizeof(init), NULL, pd->af)) return (PF_DROP); /* diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c index 89486928e6e1..2f484e2dabc6 100644 --- a/sys/netpfil/pf/pf_nv.c +++ b/sys/netpfil/pf/pf_nv.c @@ -505,6 +505,7 @@ int pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) { int error = 0; + uint8_t tmp; #define ERROUT(x) ERROUT_FUNCTION(errout, x) @@ -610,8 +611,10 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) PFNV_CHK(pf_nvuint8(nvl, "keep_state", &rule->keep_state)); PFNV_CHK(pf_nvuint8(nvl, "af", &rule->af)); PFNV_CHK(pf_nvuint8(nvl, "proto", &rule->proto)); - PFNV_CHK(pf_nvuint8(nvl, "type", &rule->type)); - PFNV_CHK(pf_nvuint8(nvl, "code", &rule->code)); + PFNV_CHK(pf_nvuint8(nvl, "type", &tmp)); + rule->type = tmp; + PFNV_CHK(pf_nvuint8(nvl, "code", &tmp)); + rule->code = tmp; PFNV_CHK(pf_nvuint8(nvl, "flags", &rule->flags)); PFNV_CHK(pf_nvuint8(nvl, "flagset", &rule->flagset)); PFNV_CHK(pf_nvuint8(nvl, "min_ttl", &rule->min_ttl)); diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c index 3e00cc7c80a2..8c041d45eae8 100644 --- a/sys/netpfil/pf/pf_osfp.c +++ b/sys/netpfil/pf/pf_osfp.c @@ -40,9 +40,6 @@ #endif static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints"); -#define DPFPRINTF(format, x...) \ - if (V_pf_status.debug >= PF_DEBUG_NOISY) \ - printf(format , ##x) SLIST_HEAD(pf_osfp_list, pf_os_fingerprint); VNET_DEFINE_STATIC(struct pf_osfp_list, pf_osfp_list) = @@ -85,7 +82,7 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, const struct tcphdr *tcp) ip6 = mtod(pd->m, struct ip6_hdr *); break; } - if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, NULL, + if (!pf_pull_hdr(pd->m, pd->off, hdr, tcp->th_off << 2, NULL, pd->af)) return (NULL); return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); @@ -189,8 +186,8 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st optlen = MAX(optlen, 1); /* paranoia */ } - DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " - "(TS=%s,M=%s%d,W=%s%d)\n", + DPFPRINTF(PF_DEBUG_NOISY, "fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " + "(TS=%s,M=%s%d,W=%s%d)", srcname, ntohs(tcp->th_sport), fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, @@ -219,7 +216,7 @@ pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) if (os == PF_OSFP_ANY) return (1); if (list == NULL) { - DPFPRINTF("osfp no match against %x\n", os); + DPFPRINTF(PF_DEBUG_NOISY, "osfp no match against %x", os); return (os == PF_OSFP_UNKNOWN); } PF_OSFP_UNPACK(os, os_class, os_version, os_subtype); @@ -228,13 +225,13 @@ pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) if ((os_class == PF_OSFP_ANY || en_class == os_class) && (os_version == PF_OSFP_ANY || en_version == os_version) && (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) { - DPFPRINTF("osfp matched %s %s %s %x==%x\n", + DPFPRINTF(PF_DEBUG_NOISY, "osfp matched %s %s %s %x==%x", entry->fp_class_nm, entry->fp_version_nm, entry->fp_subtype_nm, os, entry->fp_os); return (1); } } - DPFPRINTF("fingerprint 0x%x didn't match\n", os); + DPFPRINTF(PF_DEBUG_NOISY, "fingerprint 0x%x didn't match", os); return (0); } @@ -275,8 +272,8 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fpadd.fp_ttl = fpioc->fp_ttl; #if 0 /* XXX RYAN wants to fix logging */ - DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " - "(TS=%s,M=%s%d,W=%s%d) %x\n", + DPFPRINTF(PF_DEBUG_NOISY, "adding osfp %s %s %s =" + " %s%d:%d:%d:%s%d:0x%llx %d (TS=%s,M=%s%d,W=%s%d) %x", fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, fpioc->fp_os.fp_subtype_nm, (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" : diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 2e5165a9900c..4e16eaa76f9d 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -59,11 +59,8 @@ #error "Kernel only file. Please use sbin/pfctl/pf_ruleset.c instead." #endif -#define DPFPRINTF(format, x...) \ - if (V_pf_status.debug >= PF_DEBUG_NOISY) \ - printf(format , ##x) -#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) -#define rs_free(x) free(x, M_TEMP) +#define rs_malloc(x) malloc(x, M_PF, M_NOWAIT|M_ZERO) +#define rs_free(x) free(x, M_PF) VNET_DEFINE(struct pf_kanchor_global, pf_anchors); VNET_DEFINE(struct pf_kanchor, pf_main_anchor); @@ -241,7 +238,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) ((parent != NULL) && (strlen(parent->path) >= PF_ANCHOR_MAXPATH))) return (NULL); - anchor = rs_malloc(sizeof(*anchor)); + anchor = uma_zalloc(V_pf_anchor_z, M_NOWAIT | M_ZERO); if (anchor == NULL) return (NULL); @@ -262,7 +259,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) printf("%s: RB_INSERT1 " "'%s' '%s' collides with '%s' '%s'\n", __func__, anchor->path, anchor->name, dup->path, dup->name); - rs_free(anchor); + uma_zfree(V_pf_anchor_z, anchor); return (NULL); } @@ -276,7 +273,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) anchor->name, dup->path, dup->name); RB_REMOVE(pf_kanchor_global, &V_pf_anchors, anchor); - rs_free(anchor); + uma_zfree(V_pf_anchor_z, anchor); return (NULL); } } @@ -349,11 +346,17 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset) !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || ruleset->rules[i].inactive.open) return; + for (int i = 0; i < PF_RULESET_MAX; i++) { + pf_rule_tree_free(ruleset->rules[i].active.tree); + ruleset->rules[i].active.tree = NULL; + pf_rule_tree_free(ruleset->rules[i].inactive.tree); + ruleset->rules[i].inactive.tree = NULL; + } RB_REMOVE(pf_kanchor_global, &V_pf_anchors, ruleset->anchor); if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_kanchor_node, &parent->children, ruleset->anchor); - rs_free(ruleset->anchor); + uma_zfree(V_pf_anchor_z, ruleset->anchor); if (parent == NULL) return; ruleset = &parent->ruleset; @@ -386,7 +389,8 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s, strlcpy(path, s->anchor->path, MAXPATHLEN); while (name[0] == '.' && name[1] == '.' && name[2] == '/') { if (!path[0]) { - DPFPRINTF("%s: .. beyond root\n", __func__); + DPFPRINTF(PF_DEBUG_NOISY, "%s: .. beyond root", + __func__); rs_free(path); return (1); } @@ -408,7 +412,7 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s, ruleset = pf_find_or_create_kruleset(path); rs_free(path); if (ruleset == NULL || ruleset == &pf_main_ruleset) { - DPFPRINTF("%s: ruleset\n", __func__); + DPFPRINTF(PF_DEBUG_NOISY, "%s: ruleset", __func__); return (1); } r->anchor = ruleset->anchor; @@ -615,7 +619,7 @@ pf_find_or_create_keth_ruleset(const char *path) rs_free(p); return (NULL); } - anchor = (struct pf_keth_anchor *)rs_malloc(sizeof(*anchor)); + anchor = uma_zalloc(V_pf_eth_anchor_z, M_NOWAIT | M_ZERO); if (anchor == NULL) { rs_free(p); return (NULL); @@ -633,7 +637,7 @@ pf_find_or_create_keth_ruleset(const char *path) printf("%s: RB_INSERT1 " "'%s' '%s' collides with '%s' '%s'\n", __func__, anchor->path, anchor->name, dup->path, dup->name); - rs_free(anchor); + uma_zfree(V_pf_eth_anchor_z, anchor); rs_free(p); return (NULL); } @@ -647,7 +651,7 @@ pf_find_or_create_keth_ruleset(const char *path) anchor->name, dup->path, dup->name); RB_REMOVE(pf_keth_anchor_global, &V_pf_keth_anchors, anchor); - rs_free(anchor); + uma_zfree(V_pf_eth_anchor_z, anchor); rs_free(p); return (NULL); } @@ -690,7 +694,8 @@ pf_keth_anchor_setup(struct pf_keth_rule *r, const struct pf_keth_ruleset *s, strlcpy(path, s->anchor->path, MAXPATHLEN); while (name[0] == '.' && name[1] == '.' && name[2] == '/') { if (!path[0]) { - DPFPRINTF("%s: .. beyond root\n", __func__); + DPFPRINTF(PF_DEBUG_NOISY, "%s: .. beyond root", + __func__); rs_free(path); return (1); } @@ -712,7 +717,7 @@ pf_keth_anchor_setup(struct pf_keth_rule *r, const struct pf_keth_ruleset *s, ruleset = pf_find_or_create_keth_ruleset(path); rs_free(path); if (ruleset == NULL || ruleset->anchor == NULL) { - DPFPRINTF("%s: ruleset\n", __func__); + DPFPRINTF(PF_DEBUG_NOISY, "%s: ruleset", __func__); return (1); } r->anchor = ruleset->anchor; @@ -755,7 +760,7 @@ pf_remove_if_empty_keth_ruleset(struct pf_keth_ruleset *ruleset) if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_keth_anchor_node, &parent->children, ruleset->anchor); - rs_free(ruleset->anchor); + uma_zfree(V_pf_eth_anchor_z, ruleset->anchor); if (parent == NULL) return; ruleset = &parent->ruleset; diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c index 66757fa4b756..d11551ffb6ae 100644 --- a/sys/netpfil/pf/pf_syncookies.c +++ b/sys/netpfil/pf/pf_syncookies.c @@ -88,8 +88,6 @@ #include <net/pfvar.h> #include <netpfil/pf/pf_nv.h> -#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x - union pf_syncookie { uint8_t cookie; struct { @@ -281,7 +279,7 @@ pf_synflood_check(struct pf_pdesc *pd) pf_syncookie_rotate, curvnet); V_pf_status.syncookies_active = true; DPFPRINTF(LOG_WARNING, - ("synflood detected, enabling syncookies\n")); + "synflood detected, enabling syncookies"); // XXXTODO V_pf_status.lcounters[LCNT_SYNFLOODS]++; } @@ -289,7 +287,7 @@ pf_synflood_check(struct pf_pdesc *pd) } void -pf_syncookie_send(struct pf_pdesc *pd) +pf_syncookie_send(struct pf_pdesc *pd, u_short *reason) { uint16_t mss; uint32_t iss; @@ -299,7 +297,7 @@ pf_syncookie_send(struct pf_pdesc *pd) pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport, iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss, 0, M_SKIP_FIREWALL | (pd->m->m_flags & M_LOOP), 0, 0, - pd->act.rtableid); + pd->act.rtableid, reason); counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_SENT], 1); /* XXX Maybe only in adaptive mode? */ atomic_add_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven], @@ -367,7 +365,7 @@ pf_syncookie_rotate(void *arg) V_pf_status.syncookies_mode == PF_SYNCOOKIES_NEVER) ) { V_pf_status.syncookies_active = false; - DPFPRINTF(PF_DEBUG_MISC, ("syncookies disabled\n")); + DPFPRINTF(PF_DEBUG_MISC, "syncookies disabled"); } /* nothing in flight any more? delete keys and return */ @@ -497,7 +495,7 @@ pf_syncookie_generate(struct pf_pdesc *pd, uint16_t mss) } struct mbuf * -pf_syncookie_recreate_syn(struct pf_pdesc *pd) +pf_syncookie_recreate_syn(struct pf_pdesc *pd, u_short *reason) { uint8_t wscale; uint16_t mss; @@ -518,5 +516,5 @@ pf_syncookie_recreate_syn(struct pf_pdesc *pd) return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport, *pd->dport, seq, 0, TH_SYN, wscale, mss, pd->ttl, (pd->m->m_flags & M_LOOP), 0, PF_MTAG_FLAG_SYNCOOKIE_RECREATED, - cookie.flags.sack_ok, pd->act.rtableid)); + cookie.flags.sack_ok, pd->act.rtableid, reason)); } diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c index 43e4366845a2..0e2b9fe1cac8 100644 --- a/sys/netpfil/pf/pf_table.c +++ b/sys/netpfil/pf/pf_table.c @@ -49,8 +49,6 @@ #include <net/vnet.h> #include <net/pfvar.h> -#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x - #define ACCEPT_FLAGS(flags, oklist) \ do { \ if ((flags & ~(oklist)) & \ @@ -296,7 +294,7 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, else pfr_destroy_kentries(&workq); if (nadd != NULL) - *nadd = xadd; + *nadd += xadd; pfr_destroy_ktable(tmpkt, 0); return (0); _bad: @@ -401,7 +399,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, PF_RULES_WASSERT(); - ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_START | PFR_FLAG_DONE | + PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -413,7 +412,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, tmpkt = pfr_create_ktable(&V_pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); - pfr_mark_addrs(kt); + if (flags & PFR_FLAG_START) + pfr_mark_addrs(kt); SLIST_INIT(&addq); SLIST_INIT(&delq); SLIST_INIT(&changeq); @@ -446,6 +446,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } p = pfr_create_kentry(&ad, (kt->pfrkt_flags & PFR_TFLAG_COUNTERS) != 0); + p->pfrke_mark = PFR_FB_ADDED; if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -461,7 +462,8 @@ _skip: if (flags & PFR_FLAG_FEEDBACK) bcopy(&ad, addr + i, sizeof(ad)); } - pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); + if (flags & PFR_FLAG_DONE) + pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); if ((flags & PFR_FLAG_FEEDBACK) && *size2) { if (*size2 < size+xdel) { *size2 = size+xdel; @@ -819,10 +821,10 @@ pfr_create_kentry(struct pfr_addr *ad, bool counters) static void pfr_destroy_kentries(struct pfr_kentryworkq *workq) { - struct pfr_kentry *p, *q; + struct pfr_kentry *p; - for (p = SLIST_FIRST(workq); p != NULL; p = q) { - q = SLIST_NEXT(p, pfrke_workq); + while ((p = SLIST_FIRST(workq)) != NULL) { + SLIST_REMOVE_HEAD(workq, pfrke_workq); pfr_destroy_kentry(p); } } @@ -1680,8 +1682,7 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, } if (!(flags & PFR_FLAG_DUMMY)) { - for (p = SLIST_FIRST(&workq); p != NULL; p = q) { - q = SLIST_NEXT(p, pfrkt_workq); + SLIST_FOREACH_SAFE(p, &workq, pfrkt_workq, q) { pfr_commit_ktable(p, tzero); } rs->topen = 0; @@ -1710,7 +1711,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero) } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) { /* kt might contain addresses */ struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq; - struct pfr_kentry *p, *q, *next; + struct pfr_kentry *p, *q; struct pfr_addr ad; pfr_enqueue_addrs(shadow, &addrq, NULL, 0); @@ -1720,7 +1721,8 @@ pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero) SLIST_INIT(&delq); SLIST_INIT(&garbageq); pfr_clean_node_mask(shadow, &addrq); - SLIST_FOREACH_SAFE(p, &addrq, pfrke_workq, next) { + while ((p = SLIST_FIRST(&addrq)) != NULL) { + SLIST_REMOVE_HEAD(&addrq, pfrke_workq); pfr_copyout_addr(&ad, p); q = pfr_lookup_addr(kt, &ad, 1); if (q != NULL) { @@ -1864,8 +1866,7 @@ pfr_setflags_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p, *q; - for (p = SLIST_FIRST(workq); p; p = q) { - q = SLIST_NEXT(p, pfrkt_workq); + SLIST_FOREACH_SAFE(p, workq, pfrkt_workq, q) { pfr_setflags_ktable(p, p->pfrkt_nflags); } } @@ -2015,10 +2016,10 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset) static void pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) { - struct pfr_ktable *p, *q; + struct pfr_ktable *p; - for (p = SLIST_FIRST(workq); p; p = q) { - q = SLIST_NEXT(p, pfrkt_workq); + while ((p = SLIST_FIRST(workq)) != NULL) { + SLIST_REMOVE_HEAD(workq, pfrkt_workq); pfr_destroy_ktable(p, flushaddr); } } @@ -2074,17 +2075,16 @@ pfr_lookup_table(struct pfr_table *tbl) (struct pfr_ktable *)tbl)); } -int -pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +struct pfr_kentry * +pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, + int exact) { struct pfr_kentry *ke = NULL; - int match; PF_RULES_RASSERT(); - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + kt = pfr_ktable_select_active(kt); + if (kt == NULL) return (0); switch (af) { @@ -2121,11 +2121,26 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) default: unhandled_af(af); } + if (exact && ke && KENTRY_NETWORK(ke)) + ke = NULL; + + return (ke); +} + +int +pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +{ + struct pfr_kentry *ke = NULL; + int match; + + ke = pfr_kentry_byaddr(kt, a, af, 0); + match = (ke && !ke->pfrke_not); if (match) pfr_kstate_counter_add(&kt->pfrkt_match, 1); else pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1); + return (match); } @@ -2135,9 +2150,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, { struct pfr_kentry *ke = NULL; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + kt = pfr_ktable_select_active(kt); + if (kt == NULL) return; switch (af) { @@ -2177,7 +2191,7 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, if ((ke == NULL || ke->pfrke_not) != notrule) { if (op_pass != PFR_OP_PASS) DPFPRINTF(PF_DEBUG_URGENT, - ("pfr_update_stats: assertion failed.\n")); + "pfr_update_stats: assertion failed."); op_pass = PFR_OP_XPASS; } pfr_kstate_counter_add(&kt->pfrkt_packets[dir_out][op_pass], 1); @@ -2281,7 +2295,7 @@ pfr_detach_table(struct pfr_ktable *kt) int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, - sa_family_t af, pf_addr_filter_func_t filter) + sa_family_t af, pf_addr_filter_func_t filter, bool loop_once) { struct pf_addr *addr, cur, mask, umask_addr; union sockaddr_union uaddr, umask; @@ -2306,9 +2320,8 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, unhandled_af(af); } - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + kt = pfr_ktable_select_active(kt); + if (kt == NULL) return (-1); idx = *pidx; @@ -2327,7 +2340,7 @@ _next_block: ke = pfr_kentry_byidx(kt, idx, af); if (ke == NULL) { /* we don't have this idx, try looping */ - if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) { + if ((loop || loop_once) || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) { pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1); return (1); } @@ -2455,3 +2468,14 @@ pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) unhandled_af(dyn->pfid_af); } } + +struct pfr_ktable * +pfr_ktable_select_active(struct pfr_ktable *kt) +{ + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (NULL); + + return (kt); +} |
