aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristof Provost <kp@FreeBSD.org>2025-12-30 19:06:48 +0000
committerKristof Provost <kp@FreeBSD.org>2026-01-14 06:44:38 +0000
commit4616481212302b5d875cfc7a00766af017318f7f (patch)
tree96d5023704b950ed22ce7dbe8648149cfc8be725
parentc498eaa2f9090d7bdc6456181d8bf74869288bbb (diff)
pf: introduce source and state limiters
both source and state limiters can provide constraints on the number of states that a set of rules can create, and optionally the rate at which they are created. state limiters have a single limit, but source limiters apply limits against a source address (or network). the source address entries are dynamically created and destroyed, and are also limited. this started out because i was struggling to understand the source and state tracking options in pf.conf, and looking at the code made it worse. it looked like some functionality was missing, and the code also did some things that surprised me. taking a step back from it, even it if did work, what is described doesn't work well outside very simple environments. the functionality i'm talking about is most of the stuff in the Stateful Tracking Options section of pf.conf(4). some of the problems are illustrated one of the simplest options: the "max number" option that limits the number of states that a rule is allowed to create: - wiring limits up to rules is a problem because when you load a new ruleset the limit is reset, allowing more states to be created than you intended. - a single "rule" in pf.conf can expand to multiple rules in the kernel thanks to things like macro expansion for multiple ports. "max 1000" on a line in pf.conf could end up being many times that in effect. - when a state limit on a rule is reached, the packet is dropped. this makes it difficult to do other things with the packet, such a redirect it to a tarpit or another server that replies with an outage notices or such. a state limiter solves these problems. the example from the pf.conf.5 change demonstrates this: An example use case for a state limiter is to restrict the number of connections allowed to a service that is accessible via multiple protocols, e.g. a DNS server that can be accessed by both TCP and UDP on port 53, DNS-over-TLS on TCP port 853, and DNS-over-HTTPS on TCP port 443 can be limited to 1000 concurrent connections: state limiter "dns-server" id 1 limit 1000 pass in proto { tcp udp } to port domain state limiter "dns-server" pass in proto tcp to port { 853 443 } state limiter "dns-server" a single limit across all these protocols can't be implemented with per rule state limits, and any limits that were applied are reset if the ruleset is reloaded. the existing source-track implementation appears to be incomplete, i could only see code for "source-track global", but not "source-track rule". source-track global is too heavy and unweildy a hammer, and source-track rule would suffer the same issues around rule lifetimes and expansions that the "max number" state tracking config above has. a slightly expanded example from the pf.conf.5 change for source limiters: An example use for a source limiter is the mitigation of denial of service caused by the exhaustion of firewall resources by network or port scans from outside the network. The states created by any one scanner from any one source address can be limited to avoid impacting other sources. Below, up to 10000 IPv4 hosts and IPv6 /64 networks from the external network are each limited to a maximum of 1000 connections, and are rate limited to creating 100 states over a 10 second interval: source limiter "internet" id 1 entries 10000 \ limit 1000 rate 100/10 \ inet6 mask 64 block in on egress pass in quick on egress source limiter "internet" pass in on egress proto tcp probability 20% rdr-to $tarpit the extra bit is if the source limiter doesn't have "space" for the state, the rule doesn't match and you can fall through to tarpitting 20% of the tcp connections for fun. i've been using this in anger in production for over 3 years now. sashan@ has been poking me along (slowly) to get it in a good enough shape for the tree for a long time. it's been one of those years. bluhm@ says this doesnt break the regress tests. ok sashan@ Obtained from: OpenBSD, dlg <dlg@openbsd.org>, 8463cae72e Sponsored by: Rubicon Communications, LLC ("Netgate")
-rw-r--r--lib/libpfctl/libpfctl.c5
-rw-r--r--lib/libpfctl/libpfctl.h2
-rw-r--r--sbin/pfctl/parse.y497
-rw-r--r--sbin/pfctl/pfctl.822
-rw-r--r--sbin/pfctl/pfctl.c780
-rw-r--r--sbin/pfctl/pfctl_parser.c55
-rw-r--r--sbin/pfctl/pfctl_parser.h33
-rw-r--r--share/man/man5/pf.conf.5160
-rw-r--r--sys/net/pfvar.h414
-rw-r--r--sys/netpfil/pf/pf.c647
-rw-r--r--sys/netpfil/pf/pf_ioctl.c922
-rw-r--r--sys/netpfil/pf/pf_nl.c4
-rw-r--r--sys/netpfil/pf/pf_nl.h2
-rw-r--r--sys/netpfil/pf/pf_table.c20
14 files changed, 3458 insertions, 105 deletions
diff --git a/lib/libpfctl/libpfctl.c b/lib/libpfctl/libpfctl.c
index f8c92a5cd319..c3fdaf70ad0d 100644
--- a/lib/libpfctl/libpfctl.c
+++ b/lib/libpfctl/libpfctl.c
@@ -1313,6 +1313,9 @@ snl_add_msg_attr_pf_rule(struct snl_writer *nw, uint32_t type, const struct pfct
snl_add_msg_attr_ip6(nw, PF_RT_DIVERT_ADDRESS, &r->divert.addr.v6);
snl_add_msg_attr_u16(nw, PF_RT_DIVERT_PORT, r->divert.port);
+ snl_add_msg_attr_u8(nw, PF_RT_STATE_LIMIT, r->statelim);
+ snl_add_msg_attr_u8(nw, PF_RT_SOURCE_LIMIT, r->sourcelim);
+
snl_end_attr_nested(nw, off);
}
@@ -1704,6 +1707,8 @@ static struct snl_attr_parser ap_getrule[] = {
{ .type = PF_RT_TYPE_2, .off = _OUT(r.type), .cb = snl_attr_get_uint16 },
{ .type = PF_RT_CODE_2, .off = _OUT(r.code), .cb = snl_attr_get_uint16 },
{ .type = PF_RT_EXPTIME, .off = _OUT(r.exptime), .cb = snl_attr_get_time_t },
+ { .type = PF_RT_STATE_LIMIT, .off = _OUT(r.statelim), .cb = snl_attr_get_uint8 },
+ { .type = PF_RT_SOURCE_LIMIT, .off = _OUT(r.sourcelim), .cb = snl_attr_get_uint8 },
};
#undef _OUT
SNL_DECLARE_PARSER(getrule_parser, struct genlmsghdr, snl_f_p_empty, ap_getrule);
diff --git a/lib/libpfctl/libpfctl.h b/lib/libpfctl/libpfctl.h
index b885497ab0e8..785ac2bc7fd7 100644
--- a/lib/libpfctl/libpfctl.h
+++ b/lib/libpfctl/libpfctl.h
@@ -249,6 +249,8 @@ struct pfctl_rule {
struct pf_rule_gid gid;
char rcv_ifname[IFNAMSIZ];
bool rcvifnot;
+ uint8_t statelim;
+ uint8_t sourcelim;
uint32_t rule_flag;
uint8_t action;
diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
index 127e2c257d69..ded74a6391f1 100644
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -72,6 +72,8 @@
#include "pfctl_parser.h"
#include "pfctl.h"
+#define ISSET(_v, _m) ((_v) & (_m))
+
static struct pfctl *pf = NULL;
static int debug = 0;
static int rulestate = 0;
@@ -178,7 +180,8 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK,
PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES,
PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK,
PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY,
- PF_STATE_OPT_PFLOW, PF_STATE_OPT_ALLOW_RELATED };
+ PF_STATE_OPT_PFLOW, PF_STATE_OPT_ALLOW_RELATED,
+ PF_STATE_OPT_STATELIM, PF_STATE_OPT_SOURCELIM };
enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE };
@@ -284,6 +287,8 @@ static struct filter_opts {
u_int32_t tos;
u_int32_t prob;
u_int32_t ridentifier;
+ u_int32_t statelim;
+ u_int32_t sourcelim;
struct {
int action;
struct node_state_opt *options;
@@ -362,6 +367,51 @@ static struct table_opts {
struct node_tinithead init_nodes;
} table_opts;
+struct statelim_opts {
+ unsigned int marker;
+#define STATELIM_M_ID 0x01
+#define STATELIM_M_LIMIT 0x02
+#define STATELIM_M_RATE 0x04
+
+ uint32_t id;
+ char name[PF_STATELIM_NAME_LEN];
+ unsigned int limit;
+ struct {
+ unsigned int limit;
+ unsigned int seconds;
+ } rate;
+};
+
+static struct statelim_opts statelim_opts;
+
+struct sourcelim_opts {
+ unsigned int marker;
+#define SOURCELIM_M_ID 0x01
+#define SOURCELIM_M_ENTRIES 0x02
+#define SOURCELIM_M_LIMIT 0x04
+#define SOURCELIM_M_RATE 0x08
+#define SOURCELIM_M_TABLE 0x10
+#define SOURCELIM_M_INET_MASK 0x20
+#define SOURCELIM_M_INET6_MASK 0x40
+
+ uint32_t id;
+ unsigned int entries;
+ unsigned int limit;
+ struct {
+ unsigned int limit;
+ unsigned int seconds;
+ } rate;
+ struct {
+ char name[PF_TABLE_NAME_SIZE];
+ unsigned int above;
+ unsigned int below;
+ } table;
+ unsigned int inet_mask;
+ unsigned int inet6_mask;
+};
+
+static struct sourcelim_opts sourcelim_opts;
+
static struct codel_opts codel_opts;
static struct node_hfsc_opts hfsc_opts;
static struct node_fairq_opts fairq_opts;
@@ -513,6 +563,8 @@ typedef struct {
struct node_hfsc_opts hfsc_opts;
struct node_fairq_opts fairq_opts;
struct codel_opts codel_opts;
+ struct statelim_opts *statelim_opts;
+ struct sourcelim_opts *sourcelim_opts;
struct pfctl_watermarks *watermarks;
} v;
int lineno;
@@ -548,12 +600,13 @@ int parseport(char *, struct range *r, int);
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY BRIDGE_TO RECEIVEDON NE LE GE AFTO NATTO RDRTO
%token BINATTO MAXPKTRATE MAXPKTSIZE IPV6NH
+%token LIMITER ID RATE SOURCE ENTRIES ABOVE BELOW MASK
%token <v.string> STRING
%token <v.number> NUMBER
%token <v.i> PORTBINARY
%type <v.interface> interface if_list if_item_not if_item
%type <v.number> number icmptype icmp6type uid gid
-%type <v.number> tos not yesno optnodf
+%type <v.number> tos not yesno optnodf sourcelim_opt_below
%type <v.probability> probability
%type <v.i> no dir af fragcache optimizer syncookie_val
%type <v.i> sourcetrack flush unaryop statelock
@@ -610,12 +663,19 @@ int parseport(char *, struct range *r, int);
%type <v.etheraddr> etherfrom etherto
%type <v.bridge_to> bridge
%type <v.mac> xmac mac mac_list macspec
+%type <v.string> statelim_nm sourcelim_nm
+%type <v.number> statelim_id sourcelim_id
+%type <v.number> statelim_filter_opt sourcelim_filter_opt
+%type <v.statelim_opts> statelim_opts
+%type <v.sourcelim_opts> sourcelim_opts
%%
ruleset : /* empty */
| ruleset include '\n'
| ruleset '\n'
| ruleset option '\n'
+ | ruleset statelim '\n'
+ | ruleset sourcelim '\n'
| ruleset etherrule '\n'
| ruleset etheranchorrule '\n'
| ruleset scrubrule '\n'
@@ -2322,6 +2382,401 @@ qassign_item : STRING {
}
;
+statelim : statelim_nm statelim_opts {
+ struct pfctl_statelim *stlim;
+ size_t len;
+
+ if (!ISSET($2->marker, STATELIM_M_ID)) {
+ yyerror("id not specified");
+ free($1);
+ YYERROR;
+ }
+ if (!ISSET($2->marker, STATELIM_M_LIMIT)) {
+ yyerror("limit not specified");
+ free($1);
+ YYERROR;
+ }
+
+ stlim = calloc(1, sizeof(*stlim));
+ if (stlim == NULL)
+ err(1, "state limiter: malloc");
+
+ len = strlcpy(stlim->ioc.name, $1,
+ sizeof(stlim->ioc.name));
+ free($1);
+ if (len >= sizeof(stlim->ioc.name)) {
+ /* abort? */
+ YYERROR;
+ }
+
+ stlim->ioc.id = $2->id;
+ stlim->ioc.limit = $2->limit;
+ stlim->ioc.rate.limit = $2->rate.limit;
+ stlim->ioc.rate.seconds = $2->rate.seconds;
+
+ if (pfctl_add_statelim(pf, stlim) != 0) {
+ yyerror("state limiter %s id %u"
+ " already exists",
+ stlim->ioc.name, stlim->ioc.id);
+ free(stlim);
+ YYERROR;
+ }
+ }
+ ;
+
+statelim_nm : STATE LIMITER string {
+ size_t len = strlen($3);
+ if (len < 1) {
+ yyerror("state limiter name is too short");
+ free($3);
+ YYERROR;
+ }
+ if (len >= PF_STATELIM_NAME_LEN) {
+ yyerror("state limiter name is too long");
+ free($3);
+ YYERROR;
+ }
+ $$ = $3;
+ }
+ ;
+
+statelim_id : ID NUMBER {
+ if ($2 < PF_STATELIM_ID_MIN ||
+ $2 > PF_STATELIM_ID_MAX) {
+ yyerror("state limiter id %lld: "
+ "invalid identifier", $2);
+ YYERROR;
+ }
+
+ $$ = $2;
+ }
+ ;
+
+statelim_opts : /* empty */ {
+ yyerror("state limiter missing options");
+ YYERROR;
+ }
+ | {
+ memset(&statelim_opts, 0, sizeof(statelim_opts));
+ } statelim_opts_l {
+ $$ = &statelim_opts;
+ }
+ ;
+
+statelim_opts_l : statelim_opts_l statelim_opt
+ | statelim_opt
+ ;
+
+statelim_opt : statelim_id {
+ if (ISSET(statelim_opts.marker, STATELIM_M_ID)) {
+ yyerror("id cannot be respecified");
+ YYERROR;
+ }
+
+ statelim_opts.id = $1;
+
+ statelim_opts.marker |= STATELIM_M_ID;
+ }
+ | LIMIT NUMBER {
+ if (ISSET(statelim_opts.marker, STATELIM_M_LIMIT)) {
+ yyerror("limit cannot be respecified");
+ YYERROR;
+ }
+
+ if ($2 < PF_STATELIM_LIMIT_MIN ||
+ $2 > PF_STATELIM_LIMIT_MAX) {
+ yyerror("invalid state limiter limit");
+ YYERROR;
+ }
+
+ statelim_opts.limit = $2;
+
+ statelim_opts.marker |= STATELIM_M_LIMIT;
+ }
+ | RATE NUMBER '/' NUMBER {
+ if (ISSET(statelim_opts.marker, STATELIM_M_RATE)) {
+ yyerror("rate cannot be respecified");
+ YYERROR;
+ }
+ if ($2 < 1) {
+ yyerror("invalid rate limit %lld", $2);
+ YYERROR;
+ }
+ if ($4 < 1) {
+ yyerror("invalid rate seconds %lld", $4);
+ YYERROR;
+ }
+
+ statelim_opts.rate.limit = $2;
+ statelim_opts.rate.seconds = $4;
+
+ statelim_opts.marker |= STATELIM_M_RATE;
+ }
+ ;
+
+statelim_filter_opt
+ : statelim_nm {
+ struct pfctl_statelim *stlim;
+
+ stlim = pfctl_get_statelim_nm(pf, $1);
+ free($1);
+ if (stlim == NULL) {
+ yyerror("state limiter not found");
+ YYERROR;
+ }
+
+ $$ = stlim->ioc.id;
+ }
+ | STATE LIMITER statelim_id {
+ $$ = $3;
+ }
+ ;
+
+sourcelim : sourcelim_nm sourcelim_opts {
+ struct pfctl_sourcelim *srlim;
+ size_t len;
+
+ if (!ISSET($2->marker, SOURCELIM_M_ID)) {
+ yyerror("id not specified");
+ free($1);
+ YYERROR;
+ }
+ if (!ISSET($2->marker, SOURCELIM_M_ENTRIES)) {
+ yyerror("entries not specified");
+ free($1);
+ YYERROR;
+ }
+ if (!ISSET($2->marker, SOURCELIM_M_LIMIT)) {
+ yyerror("state limit not specified");
+ free($1);
+ YYERROR;
+ }
+
+ srlim = calloc(1, sizeof(*srlim));
+ if (srlim == NULL)
+ err(1, "source limiter: malloc");
+
+ len = strlcpy(srlim->ioc.name, $1,
+ sizeof(srlim->ioc.name));
+ free($1);
+ if (len >= sizeof(srlim->ioc.name)) {
+ /* abort? */
+ YYERROR;
+ }
+
+ srlim->ioc.id = $2->id;
+ srlim->ioc.entries = $2->entries;
+ srlim->ioc.limit = $2->limit;
+ srlim->ioc.rate.limit = $2->rate.limit;
+ srlim->ioc.rate.seconds = $2->rate.seconds;
+
+ if (ISSET($2->marker, SOURCELIM_M_TABLE)) {
+ if (strlcpy(srlim->ioc.overload_tblname,
+ $2->table.name,
+ sizeof(srlim->ioc.overload_tblname)) >=
+ sizeof(srlim->ioc.overload_tblname)) {
+ abort();
+ }
+ srlim->ioc.overload_hwm = $2->table.above;
+ srlim->ioc.overload_lwm = $2->table.below;
+ }
+
+ srlim->ioc.inet_prefix = $2->inet_mask;
+ srlim->ioc.inet6_prefix = $2->inet6_mask;
+
+ if (pfctl_add_sourcelim(pf, srlim) != 0) {
+ yyerror("source limiter %s id %u"
+ " already exists",
+ srlim->ioc.name, srlim->ioc.id);
+ free(srlim);
+ YYERROR;
+ }
+ }
+ ;
+
+sourcelim_nm : SOURCE LIMITER string {
+ size_t len = strlen($3);
+ if (len < 1) {
+ yyerror("source limiter name is too short");
+ free($3);
+ YYERROR;
+ }
+ if (len >= PF_SOURCELIM_NAME_LEN) {
+ yyerror("source limiter name is too long");
+ free($3);
+ YYERROR;
+ }
+ $$ = $3;
+ }
+ ;
+
+sourcelim_id : ID NUMBER {
+ if ($2 < PF_SOURCELIM_ID_MIN ||
+ $2 > PF_SOURCELIM_ID_MAX) {
+ yyerror("source limiter id %lld: "
+ "invalid identifier", $2);
+ YYERROR;
+ }
+
+ $$ = $2;
+ }
+ ;
+
+sourcelim_opts : /* empty */ {
+ yyerror("source limiter missing options");
+ YYERROR;
+ }
+ | {
+ memset(&sourcelim_opts, 0, sizeof(sourcelim_opts));
+ sourcelim_opts.inet_mask = 32;
+ sourcelim_opts.inet6_mask = 128;
+ } sourcelim_opts_l {
+ $$ = &sourcelim_opts;
+ }
+ ;
+
+sourcelim_opts_l : sourcelim_opts_l sourcelim_opt
+ | sourcelim_opt
+ ;
+
+sourcelim_opt : sourcelim_id {
+ if (ISSET(sourcelim_opts.marker, SOURCELIM_M_ID)) {
+ yyerror("entries cannot be respecified");
+ YYERROR;
+ }
+
+ sourcelim_opts.id = $1;
+
+ sourcelim_opts.marker |= SOURCELIM_M_ID;
+ }
+ | ENTRIES NUMBER {
+ if (ISSET(sourcelim_opts.marker, SOURCELIM_M_ENTRIES)) {
+ yyerror("entries cannot be respecified");
+ YYERROR;
+ }
+
+ sourcelim_opts.entries = $2;
+
+ sourcelim_opts.marker |= SOURCELIM_M_ENTRIES;
+ }
+ | LIMIT NUMBER {
+ if (ISSET(sourcelim_opts.marker, SOURCELIM_M_LIMIT)) {
+ yyerror("state limit cannot be respecified");
+ YYERROR;
+ }
+
+ sourcelim_opts.limit = $2;
+
+ sourcelim_opts.marker |= SOURCELIM_M_LIMIT;
+ }
+ | RATE NUMBER '/' NUMBER {
+ if (ISSET(sourcelim_opts.marker, SOURCELIM_M_RATE)) {
+ yyerror("rate cannot be respecified");
+ YYERROR;
+ }
+
+ sourcelim_opts.rate.limit = $2;
+ sourcelim_opts.rate.seconds = $4;
+
+ sourcelim_opts.marker |= SOURCELIM_M_RATE;
+ }
+ | TABLE '<' STRING '>' ABOVE NUMBER sourcelim_opt_below {
+ size_t stringlen;
+
+ if (ISSET(sourcelim_opts.marker, SOURCELIM_M_TABLE)) {
+ free($3);
+ yyerror("rate cannot be respecified");
+ YYERROR;
+ }
+
+ stringlen = strlcpy(sourcelim_opts.table.name,
+ $3, sizeof(sourcelim_opts.table.name));
+ free($3);
+ if (stringlen == 0 ||
+ stringlen >= PF_TABLE_NAME_SIZE) {
+ yyerror("invalid table name");
+ YYERROR;
+ }
+
+ if ($6 < 0) {
+ yyerror("above limit is invalid");
+ YYERROR;
+ }
+ if ($7 > $6) {
+ yyerror("below limit higher than above limit");
+ YYERROR;
+ }
+
+ sourcelim_opts.table.above = $6;
+ sourcelim_opts.table.below = $7;
+
+ sourcelim_opts.marker |= SOURCELIM_M_TABLE;
+ }
+ | INET MASK NUMBER {
+ if (ISSET(sourcelim_opts.marker,
+ SOURCELIM_M_INET_MASK)) {
+ yyerror("inet mask cannot be respecified");
+ YYERROR;
+ }
+
+ if ($3 < 1 || $3 > 32) {
+ yyerror("inet mask length out of range");
+ YYERROR;
+ }
+
+ sourcelim_opts.inet_mask = $3;
+
+ sourcelim_opts.marker |= SOURCELIM_M_INET_MASK;
+ }
+ | INET6 MASK NUMBER {
+ if (ISSET(sourcelim_opts.marker,
+ SOURCELIM_M_INET6_MASK)) {
+ yyerror("inet6 mask cannot be respecified");
+ YYERROR;
+ }
+
+ if ($3 < 1 || $3 > 128) {
+ yyerror("inet6 mask length out of range");
+ YYERROR;
+ }
+
+ sourcelim_opts.inet6_mask = $3;
+
+ sourcelim_opts.marker |= SOURCELIM_M_INET6_MASK;
+ }
+ ;
+
+sourcelim_opt_below
+ : /* empty */ {
+ $$ = 0;
+ }
+ | BELOW NUMBER {
+ if ($2 < 1) {
+ yyerror("below limit is invalid");
+ YYERROR;
+ }
+ $$ = $2;
+ }
+ ;
+
+sourcelim_filter_opt
+ : sourcelim_nm {
+ struct pfctl_sourcelim *srlim;
+
+ srlim = pfctl_get_sourcelim_nm(pf, $1);
+ free($1);
+ if (srlim == NULL) {
+ yyerror("source limiter not found");
+ YYERROR;
+ }
+
+ $$ = srlim->ioc.id;
+ }
+ | SOURCE LIMITER sourcelim_id {
+ $$ = $3;
+ }
+ ;
+
pfrule : action dir logquick interface route af proto fromto
filter_opts
{
@@ -2562,6 +3017,7 @@ pfrule : action dir logquick interface route af proto fromto
}
r.timeout[o->data.timeout.number] =
o->data.timeout.seconds;
+ break;
}
o = o->next;
if (!defaults)
@@ -2713,12 +3169,16 @@ pfrule : action dir logquick interface route af proto fromto
filter_opts : {
bzero(&filter_opts, sizeof filter_opts);
+ filter_opts.statelim = PF_STATELIM_ID_NONE;
+ filter_opts.sourcelim = PF_SOURCELIM_ID_NONE;
filter_opts.rtableid = -1;
}
filter_opts_l
{ $$ = filter_opts; }
| /* empty */ {
bzero(&filter_opts, sizeof filter_opts);
+ filter_opts.statelim = PF_STATELIM_ID_NONE;
+ filter_opts.sourcelim = PF_SOURCELIM_ID_NONE;
filter_opts.rtableid = -1;
$$ = filter_opts;
}
@@ -2862,6 +3322,20 @@ filter_opt : USER uids {
if (filter_opts.prob == 0)
filter_opts.prob = 1;
}
+ | statelim_filter_opt {
+ if (filter_opts.statelim != PF_STATELIM_ID_NONE) {
+ yyerror("state limiter already specified");
+ YYERROR;
+ }
+ filter_opts.statelim = $1;
+ }
+ | sourcelim_filter_opt {
+ if (filter_opts.sourcelim != PF_SOURCELIM_ID_NONE) {
+ yyerror("source limiter already specified");
+ YYERROR;
+ }
+ filter_opts.sourcelim = $1;
+ }
| RTABLE NUMBER {
if ($2 < 0 || $2 > rt_tableid_max()) {
yyerror("invalid rtable id");
@@ -6615,6 +7089,7 @@ lookup(char *s)
{
/* this has to be sorted always */
static const struct keywords keywords[] = {
+ { "above", ABOVE},
{ "af-to", AFTO},
{ "all", ALL},
{ "allow-opts", ALLOWOPTS},
@@ -6624,6 +7099,7 @@ lookup(char *s)
{ "antispoof", ANTISPOOF},
{ "any", ANY},
{ "bandwidth", BANDWIDTH},
+ { "below", BELOW},
{ "binat", BINAT},
{ "binat-anchor", BINATANCHOR},
{ "binat-to", BINATTO},
@@ -6643,6 +7119,7 @@ lookup(char *s)
{ "drop", DROP},
{ "dup-to", DUPTO},
{ "endpoint-independent", ENDPI},
+ { "entries", ENTRIES},
{ "ether", ETHER},
{ "fail-policy", FAILPOLICY},
{ "fairq", FAIRQ},
@@ -6662,6 +7139,7 @@ lookup(char *s)
{ "hostid", HOSTID},
{ "icmp-type", ICMPTYPE},
{ "icmp6-type", ICMP6TYPE},
+ { "id", ID},
{ "if-bound", IFBOUND},
{ "in", IN},
{ "include", INCLUDE},
@@ -6673,11 +7151,13 @@ lookup(char *s)
{ "l3", L3},
{ "label", LABEL},
{ "limit", LIMIT},
+ { "limiter", LIMITER},
{ "linkshare", LINKSHARE},
{ "load", LOAD},
{ "log", LOG},
{ "loginterface", LOGINTERFACE},
{ "map-e-portset", MAPEPORTSET},
+ { "mask", MASK},
{ "match", MATCH},
{ "matches", MATCHES},
{ "max", MAXIMUM},
@@ -6717,6 +7197,7 @@ lookup(char *s)
{ "quick", QUICK},
{ "random", RANDOM},
{ "random-id", RANDOMID},
+ { "rate", RATE},
{ "rdr", RDR},
{ "rdr-anchor", RDRANCHOR},
{ "rdr-to", RDRTO},
@@ -6741,6 +7222,7 @@ lookup(char *s)
{ "set-tos", SETTOS},
{ "skip", SKIP},
{ "sloppy", SLOPPY},
+ { "source", SOURCE},
{ "source-hash", SOURCEHASH},
{ "source-track", SOURCETRACK},
{ "state", STATE},
@@ -7720,10 +8202,21 @@ filteropts_to_rule(struct pfctl_rule *r, struct filter_opts *opts)
r->rule_flag |= PFRULE_ONCE;
}
+ if (opts->statelim != PF_STATELIM_ID_NONE && r->action != PF_PASS) {
+ yyerror("state limiter only applies to pass rules");
+ return (1);
+ }
+ if (opts->sourcelim != PF_SOURCELIM_ID_NONE && r->action != PF_PASS) {
+ yyerror("source limiter only applies to pass rules");
+ return (1);
+ }
+
r->keep_state = opts->keep.action;
r->pktrate.limit = opts->pktrate.limit;
r->pktrate.seconds = opts->pktrate.seconds;
r->prob = opts->prob;
+ r->statelim = opts->statelim;
+ r->sourcelim = opts->sourcelim;
r->rtableid = opts->rtableid;
r->ridentifier = opts->ridentifier;
r->max_pkt_size = opts->max_pkt_size;
diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8
index 58de54cdf923..d3c8b1273b79 100644
--- a/sbin/pfctl/pfctl.8
+++ b/sbin/pfctl/pfctl.8
@@ -24,7 +24,7 @@
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd August 28, 2025
+.Dd December 30, 2025
.Dt PFCTL 8
.Os
.Sh NAME
@@ -524,6 +524,26 @@ When used together with
interface statistics are also shown.
.Fl i
can be used to select an interface or a group of interfaces.
+.It Cm Stlimiter
+Show information about state limiters.
+If
+.Fl R Ar id
+is specified as well,
+only the state limiter identified by
+.Ar id
+is shown.
+.It Cm Srclimiter
+Show information about source limiters.
+If
+.Fl R Ar id
+is specified as well,
+only the state limiter identified by
+.Ar id
+is shown.
+If
+.Fl v
+is specified,
+the address entries for the source pools are shown too.
.It Cm all
Show all of the above, except for the lists of interfaces and operating
system fingerprints.
diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c
index da27afb0a179..04deccf7e890 100644
--- a/sbin/pfctl/pfctl.c
+++ b/sbin/pfctl/pfctl.c
@@ -60,11 +60,14 @@
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
+#include <stddef.h>
#include <libgen.h>
#include "pfctl_parser.h"
#include "pfctl.h"
+struct pfctl_opt_id;
+
void usage(void);
int pfctl_enable(int, int);
int pfctl_disable(int, int);
@@ -87,6 +90,7 @@ void pfctl_gateway_kill_states(int, const char *, int);
void pfctl_label_kill_states(int, const char *, int);
void pfctl_id_kill_states(int, const char *, int);
void pfctl_key_kill_states(int, const char *, int);
+void pfctl_kill_source(int, const char *, const char *, int);
int pfctl_parse_host(char *, struct pf_rule_addr *);
void pfctl_init_options(struct pfctl *);
int pfctl_load_options(struct pfctl *);
@@ -101,6 +105,8 @@ int pfctl_get_pool(int, struct pfctl_pool *, u_int32_t, u_int32_t, int,
const char *, int);
void pfctl_print_eth_rule_counters(struct pfctl_eth_rule *, int);
void pfctl_print_rule_counters(struct pfctl_rule *, int);
+int pfctl_show_statelims(int, enum pfctl_show);
+int pfctl_show_sourcelims(int, enum pfctl_show, int, const char *);
int pfctl_show_eth_rules(int, char *, int, enum pfctl_show, char *, int, int);
int pfctl_show_rules(int, char *, int, enum pfctl_show, char *, int, int);
int pfctl_show_nat(int, const char *, int, char *, int, int);
@@ -117,6 +123,10 @@ int pfctl_test_altqsupport(int, int);
int pfctl_show_anchors(int, int, char *);
int pfctl_show_eth_anchors(int, int, char *);
int pfctl_ruleset_trans(struct pfctl *, char *, struct pfctl_anchor *, bool);
+void pfctl_load_statelims(struct pfctl *);
+void pfctl_load_statelim(struct pfctl *, struct pfctl_statelim *);
+void pfctl_load_sourcelims(struct pfctl *);
+void pfctl_load_sourcelim(struct pfctl *, struct pfctl_sourcelim *);
int pfctl_eth_ruleset_trans(struct pfctl *, char *,
struct pfctl_eth_anchor *);
int pfctl_load_eth_ruleset(struct pfctl *, char *,
@@ -127,6 +137,7 @@ int pfctl_load_ruleset(struct pfctl *, char *,
struct pfctl_ruleset *, int, int);
int pfctl_load_rule(struct pfctl *, char *, struct pfctl_rule *, int);
const char *pfctl_lookup_option(char *, const char * const *);
+int pfctl_lookup_id(const char *, const struct pfctl_opt_id *);
void pfctl_reset(int, int);
int pfctl_walk_show(int, struct pfioc_ruleset *, void *);
int pfctl_walk_get(int, struct pfioc_ruleset *, void *);
@@ -141,6 +152,38 @@ int pfctl_call_cleartables(int, int, struct pfr_anchoritem *);
int pfctl_call_clearanchors(int, int, struct pfr_anchoritem *);
int pfctl_call_showtables(int, int, struct pfr_anchoritem *);
+RB_PROTOTYPE(pfctl_statelim_ids, pfctl_statelim, entry,
+ pfctl_statelim_id_cmp);
+RB_PROTOTYPE(pfctl_statelim_nms, pfctl_statelim, entry,
+ pfctl_statelim_nm_cmp);
+RB_PROTOTYPE(pfctl_sourcelim_ids, pfctl_sourcelim, entry,
+ pfctl_sourcelim_id_cmp);
+RB_PROTOTYPE(pfctl_sourcelim_nms, pfctl_sourcelim, entry,
+ pfctl_sourcelim_nm_cmp);
+
+enum showopt_id {
+ SHOWOPT_NONE = 0,
+ SHOWOPT_ETHER,
+ SHOWOPT_NAT,
+ SHOWOPT_QUEUE,
+ SHOWOPT_RULES,
+ SHOWOPT_ANCHORS,
+ SHOWOPT_SOURCES,
+ SHOWOPT_STATES,
+ SHOWOPT_INFO,
+ SHOWOPT_IFACES,
+ SHOWOPT_LABELS,
+ SHOWOPT_TIMEOUTS,
+ SHOWOPT_MEMORY,
+ SHOWOPT_TABLES,
+ SHOWOPT_OSFP,
+ SHOWOPT_RUNNING,
+ SHOWOPT_STATELIMS,
+ SHOWOPT_SOURCELIMS,
+ SHOWOPT_CREATORIDS,
+ SHOWOPT_ALL,
+};
+
static struct pfctl_anchor_global pf_anchors;
struct pfctl_anchor pf_main_anchor;
struct pfctl_eth_anchor pf_eth_main_anchor;
@@ -148,7 +191,7 @@ static struct pfr_buffer skip_b;
static const char *clearopt;
static char *rulesopt;
-static const char *showopt;
+static int showopt;
static const char *debugopt;
static char *anchoropt;
static const char *optiopt = NULL;
@@ -256,10 +299,33 @@ static const char * const clearopt_list[] = {
"ethernet", "Reset", NULL
};
-static const char * const showopt_list[] = {
- "ether", "nat", "queue", "rules", "Anchors", "Sources", "states",
- "info", "Interfaces", "labels", "timeouts", "memory", "Tables",
- "osfp", "Running", "all", "creatorids", NULL
+struct pfctl_opt_id {
+ const char *name;
+ int id;
+};
+
+static const struct pfctl_opt_id showopt_list[] = {
+ { "ether", SHOWOPT_ETHER },
+ { "nat", SHOWOPT_NAT },
+ { "queue", SHOWOPT_QUEUE },
+ { "rules", SHOWOPT_RULES },
+ { "Anchors", SHOWOPT_ANCHORS },
+ { "Sources", SHOWOPT_SOURCES },
+ { "states", SHOWOPT_STATES },
+ { "info", SHOWOPT_INFO },
+ { "Interfaces", SHOWOPT_IFACES },
+ { "labels", SHOWOPT_LABELS },
+ { "timeouts", SHOWOPT_TIMEOUTS },
+ { "memory", SHOWOPT_MEMORY },
+ { "Tables", SHOWOPT_TABLES },
+ { "osfp", SHOWOPT_OSFP },
+ { "Running", SHOWOPT_RUNNING },
+ { "Stlimiters", SHOWOPT_STATELIMS },
+ { "Srclimiters", SHOWOPT_SOURCELIMS },
+ { "creatorids", SHOWOPT_CREATORIDS },
+ { "all", SHOWOPT_ALL },
+
+ { NULL, SHOWOPT_NONE },
};
static const char * const tblcmdopt_list[] = {
@@ -1192,6 +1258,276 @@ pfctl_print_title(char *title)
}
int
+pfctl_show_statelims(int dev, enum pfctl_show format)
+{
+ struct pfioc_statelim stlim;
+ uint32_t id = PF_STATELIM_ID_MIN;
+
+ if (format == PFCTL_SHOW_LABELS) {
+ printf("%3s %8s/%-8s %5s/%-5s %8s %8s %8s\n", "ID", "USE",
+ "LIMIT", "RATE", "SECS", "ADMIT", "HARDLIM", "RATELIM");
+ }
+
+ for (;;) {
+ memset(&stlim, 0, sizeof(stlim));
+ stlim.id = id;
+
+ if (ioctl(dev, DIOCGETNSTATELIM, &stlim) == -1) {
+ if (errno == ENOENT) {
+ /* we're done */
+ return (0);
+ }
+ warn("DIOCGETNSTATELIM %u", stlim.id);
+ return (-1);
+ }
+
+ switch (format) {
+ case PFCTL_SHOW_RULES:
+ print_statelim(&stlim);
+ break;
+ case PFCTL_SHOW_LABELS:
+ printf("%3u %8u/%-8u ", stlim.id, stlim.inuse,
+ stlim.limit);
+ if (stlim.rate.limit != 0) {
+ printf("%5u/%-5u ", stlim.rate.limit,
+ stlim.rate.seconds);
+ } else
+ printf("%5s/%-5s ", "nil", "nil");
+ printf("%8ju %8ju %8ju\n", stlim.admitted,
+ stlim.hardlimited, stlim.ratelimited);
+ break;
+ default:
+ errx(1, "%s: unexpected format %d", __func__, format);
+ /* NOTREACHED */
+ }
+
+ id = stlim.id + 1;
+ }
+}
+
+static inline int
+pf_addr_inc(struct pf_addr *addr)
+{
+ int i;
+ uint32_t val, inc;
+
+ for (i = 3; i >= 0; i--) {
+ val = ntohl(addr->addr32[i]);
+ inc = val + 1;
+ addr->addr32[i] = htonl(inc);
+ if (inc > val)
+ return (0);
+ }
+
+ return (1);
+}
+
+static int
+pfctl_show_sources(int dev, const struct pfioc_sourcelim *srlim,
+ enum pfctl_show format, int opts)
+{
+ struct pfioc_source sr = { .id = srlim->id };
+ struct pfioc_source_entry *entries, *e;
+ unsigned int nentries;
+ size_t len, used;
+
+ if (format != PFCTL_SHOW_LABELS)
+ errx(1, "%s format is not PFCTL_SHOW_LABELS", __func__);
+
+ nentries = srlim->nentries;
+ if (nentries == 0)
+ return (0);
+ if (nentries > 128) /* arbitrary */
+ nentries = 128;
+
+ entries = reallocarray(NULL, nentries, sizeof(*entries));
+ if (entries == NULL)
+ err(1, "alloc %u source limiter entries", nentries);
+
+ len = nentries * sizeof(*entries);
+
+ e = entries;
+
+ /* start from af 0 address 0 */
+ memset(e, 0, sizeof(*e));
+
+ sr.entry_size = sizeof(*e);
+ sr.key = e;
+
+ for (;;) {
+ sr.entries = entries;
+ sr.entrieslen = len;
+
+ if (ioctl(dev, DIOCGETNSOURCE, &sr) == -1) {
+ switch (errno) {
+ case ESRCH: /* can't find the sourcelim */
+ case ENOENT: /* no more sources */
+ return (0); /* we're done */
+ }
+ warn("DIOCGETNSOURCE %u", sr.id);
+ return (-1);
+ }
+
+ used = 0;
+ if (sr.entrieslen > len)
+ errx(1, "DIOCGETNSOURCE used too much buffer");
+
+ e = entries;
+ for (;;) {
+ if (used > sr.entrieslen)
+ errx(1, "DIOCGETNSOURCE weird entrieslen");
+
+ print_addr_str(e->af, &e->addr);
+ switch (e->af) {
+ case AF_INET:
+ printf("/%u ", sr.inet_prefix);
+ break;
+ case AF_INET6:
+ printf("/%u ", sr.inet6_prefix);
+ break;
+ default:
+ printf("/af? ");
+ break;
+ }
+ printf("rdomain %u ", e->rdomain);
+
+ printf("inuse %u/%u ", e->inuse, sr.limit);
+ printf("admit %ju hardlim %ju ratelim %ju\n",
+ e->admitted, e->hardlimited, e->ratelimited);
+
+ used += sizeof(*e);
+ if (used == sr.entrieslen)
+ break;
+
+ e++;
+ }
+
+ /* reuse the last entry as the next key */
+ e->af += pf_addr_inc(&e->addr);
+ sr.key = e;
+ }
+
+ return (0);
+}
+
+int
+pfctl_show_sourcelims(int dev, enum pfctl_show format, int opts,
+ const char *idopt)
+{
+ struct pfioc_sourcelim srlim;
+ uint32_t id = PF_SOURCELIM_ID_MIN;
+ unsigned long cmd = DIOCGETNSOURCELIM;
+
+ if (idopt != NULL) {
+ const char *errstr;
+
+ id = strtonum(idopt, PF_SOURCELIM_ID_MIN, PF_SOURCELIM_ID_MAX,
+ &errstr);
+ if (errstr != NULL)
+ errx(1, "source limiter id: %s", errstr);
+
+ cmd = DIOCGETSOURCELIM;
+ }
+
+ if (format == PFCTL_SHOW_LABELS) {
+ printf("%3s %8s/%-8s %5s %5s/%-5s %8s %8s %8s %8s\n", "ID",
+ "USE", "ADDRS", "LIMIT", "RATE", "SECS", "ADMIT", "ADDRLIM",
+ "HARDLIM", "RATELIM");
+ }
+
+ for (;;) {
+ memset(&srlim, 0, sizeof(srlim));
+ srlim.id = id;
+
+ if (ioctl(dev, cmd, &srlim) == -1) {
+ if (errno == ESRCH) {
+ /* we're done */
+ return (0);
+ }
+ warn("DIOCGETNSOURCELIM %u", srlim.id);
+ return (-1);
+ }
+
+ switch (format) {
+ case PFCTL_SHOW_RULES:
+ print_sourcelim(&srlim);
+ break;
+
+ case PFCTL_SHOW_LABELS:
+ printf("%3u %8u/%-8u %5u ", srlim.id, srlim.nentries,
+ srlim.entries, srlim.limit);
+ if (srlim.rate.limit != 0) {
+ printf("%5u/%-5u ", srlim.rate.limit,
+ srlim.rate.seconds);
+ } else
+ printf("%5s/%-5s ", "nil", "nil");
+ printf("%8ju %8ju %8ju %8ju\n",
+ srlim.admitted, srlim.addrlimited, srlim.hardlimited,
+ srlim.ratelimited);
+
+ if (opts & PF_OPT_VERBOSE)
+ if (pfctl_show_sources(dev, &srlim, format,
+ opts) != 0)
+ return (-1);
+ break;
+
+ default:
+ errx(1, "%s: unexpected format %d", __func__, format);
+ /* NOTREACHED */
+ }
+
+ id = srlim.id + 1;
+ }
+
+ return (0);
+}
+
+void
+pfctl_kill_source(int dev, const char *idopt, const char *source, int opts)
+{
+ struct pfioc_source_kill ioc;
+ unsigned int id;
+ const char *errstr;
+ struct addrinfo hints, *res;
+ int error;
+
+ if (idopt == NULL)
+ errx(1, "source limiter id unspecified");
+ if (source == NULL)
+ errx(1, "source limiter address unspecified");
+
+ id = strtonum(idopt, PF_SOURCELIM_ID_MIN, PF_SOURCELIM_ID_MAX, &errstr);
+ if (errstr != NULL)
+ errx(1, "source limiter id: %s", errstr);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_DGRAM; /* dummy */
+ hints.ai_flags = AI_NUMERICHOST;
+
+ error = getaddrinfo(source, NULL, &hints, &res);
+ if (error != 0)
+ errx(1, "source limiter address: %s", gai_strerror(error));
+
+ ioc.id = id;
+ ioc.af = res->ai_family;
+ copy_satopfaddr(&ioc.addr, res->ai_addr);
+ ioc.rmstates = 0;
+
+ freeaddrinfo(res);
+
+ if (ioctl(dev, DIOCCLRSOURCE, &ioc) == -1) {
+ switch (errno) {
+ case ESRCH:
+ errx(1, "source limiter %u not found", id);
+ case ENOENT:
+ errx(1, "source limiter %u: %s not found", id, source);
+ default:
+ err(1, "kill source limiter %u entry %s", id, source);
+ }
+ }
+}
+
+int
pfctl_show_eth_rules(int dev, char *path, int opts, enum pfctl_show format,
char *anchorname, int depth, int wildcard)
{
@@ -1319,6 +1655,15 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format,
int len = strlen(path), ret = 0;
char *npath, *p;
+ if (anchorname[0] == '\0') {
+ ret = pfctl_show_statelims(dev, format);
+ if (ret != 0)
+ goto error;
+ ret = pfctl_show_sourcelims(dev, format, opts, NULL);
+ if (ret != 0)
+ goto error;
+ }
+
/*
* Truncate a trailing / and * on an anchorname before searching for
* the ruleset, this is syntactic sugar that doesn't actually make it
@@ -1977,6 +2322,72 @@ pfctl_ruleset_trans(struct pfctl *pf, char *path, struct pfctl_anchor *a, bool d
return (0);
}
+void
+pfctl_load_statelim(struct pfctl *pf, struct pfctl_statelim *stlim)
+{
+ if (pf->opts & PF_OPT_VERBOSE)
+ print_statelim(&stlim->ioc);
+
+ if (pf->opts & PF_OPT_NOACTION)
+ return;
+
+ if (ioctl(pf->dev, DIOCADDSTATELIM, &stlim->ioc) == -1) {
+ err(1, "DIOCADDSTATELIM %s id %u", stlim->ioc.name,
+ stlim->ioc.id);
+ }
+}
+
+void
+pfctl_load_statelims(struct pfctl *pf)
+{
+ struct pfctl_statelim *stlim;
+ u_int32_t ticket = 0;
+
+ if ((pf->opts & PF_OPT_NOACTION) == 0)
+ ticket = pfctl_get_ticket(pf->trans, PF_RULESET_FILTER, "");
+
+ RB_FOREACH(stlim, pfctl_statelim_ids, &pf->statelim_ids)
+ {
+ stlim->ioc.ticket = ticket;
+ pfctl_load_statelim(pf, stlim);
+ }
+
+ /* Don't free the statelims because we're about to exit anyway. */
+}
+
+void
+pfctl_load_sourcelim(struct pfctl *pf, struct pfctl_sourcelim *srlim)
+{
+ if (pf->opts & PF_OPT_VERBOSE)
+ print_sourcelim(&srlim->ioc);
+
+ if (pf->opts & PF_OPT_NOACTION)
+ return;
+
+ if (ioctl(pf->dev, DIOCADDSOURCELIM, &srlim->ioc) == -1) {
+ err(1, "DIOCADDSOURCELIM %s id %u", srlim->ioc.name,
+ srlim->ioc.id);
+ }
+}
+
+void
+pfctl_load_sourcelims(struct pfctl *pf)
+{
+ struct pfctl_sourcelim *srlim;
+ uint32_t ticket = 0;
+
+ if ((pf->opts & PF_OPT_NOACTION) == 0)
+ ticket = pfctl_get_ticket(pf->trans, PF_RULESET_FILTER, "");
+
+ RB_FOREACH(srlim, pfctl_sourcelim_ids, &pf->sourcelim_ids)
+ {
+ srlim->ioc.ticket = ticket;
+ pfctl_load_sourcelim(pf, srlim);
+ }
+
+ /* Don't free the sourcelims because we're about to exit anyway. */
+}
+
int
pfctl_load_eth_ruleset(struct pfctl *pf, char *path,
struct pfctl_eth_ruleset *rs, int depth)
@@ -2332,6 +2743,11 @@ pfctl_rules(int dev, char *filename, int opts, int optimize,
pf.optimize = optimize;
pf.loadopt = loadopt;
+ RB_INIT(&pf.statelim_ids);
+ RB_INIT(&pf.statelim_nms);
+ RB_INIT(&pf.sourcelim_ids);
+ RB_INIT(&pf.sourcelim_nms);
+
/* non-brace anchor, create without resolving the path */
if ((pf.anchor = calloc(1, sizeof(*pf.anchor))) == NULL)
ERRX("%s: calloc", __func__);
@@ -2398,6 +2814,11 @@ pfctl_rules(int dev, char *filename, int opts, int optimize,
if (loadopt & PFCTL_FLAG_OPTION)
pfctl_adjust_skip_ifaces(&pf);
+ if (anchorname[0] == '\0' && pf.loadopt & PFCTL_FLAG_FILTER) {
+ pfctl_load_statelims(&pf);
+ pfctl_load_sourcelims(&pf);
+ }
+
if ((pf.loadopt & PFCTL_FLAG_FILTER &&
(pfctl_load_ruleset(&pf, path, rs, PF_RULESET_SCRUB, 0))) ||
(pf.loadopt & PFCTL_FLAG_ETH &&
@@ -3209,6 +3630,25 @@ pfctl_lookup_option(char *cmd, const char * const *list)
return (NULL);
}
+int
+pfctl_lookup_id(const char *cmd, const struct pfctl_opt_id *opt_ids)
+{
+ const struct pfctl_opt_id *opt_id;
+ int id = 0;
+ size_t cmdlen = strlen(cmd);
+
+ for (opt_id = opt_ids; opt_id->id != 0; opt_id++) {
+ if (strncmp(cmd, opt_id->name, cmdlen) == 0) {
+ if (id != 0)
+ errx(1, "%s is ambiguous", cmd);
+
+ id = opt_id->id;
+ }
+ }
+
+ return (id);
+}
+
void
pfctl_reset(int dev, int opts)
{
@@ -3258,12 +3698,13 @@ main(int argc, char *argv[])
int optimize = PF_OPTIMIZE_BASIC;
char anchorname[MAXPATHLEN];
char *path;
+ const char *idopt = NULL;
if (argc < 2)
usage();
while ((ch = getopt(argc, argv,
- "a:AdD:eqf:F:ghi:k:K:mMnNOo:Pp:rRs:St:T:vx:z")) != -1) {
+ "a:AdD:eqf:F:ghi:I:k:K:mMnNOo:Pp:rRs:St:T:vx:z")) != -1) {
switch (ch) {
case 'a':
anchoropt = optarg;
@@ -3295,6 +3736,9 @@ main(int argc, char *argv[])
case 'i':
ifaceopt = optarg;
break;
+ case 'I':
+ idopt = optarg;
+ break;
case 'k':
if (state_killers >= 2) {
warnx("can only specify -k twice");
@@ -3359,8 +3803,8 @@ main(int argc, char *argv[])
opts |= PF_OPT_NUMERIC;
break;
case 's':
- showopt = pfctl_lookup_option(optarg, showopt_list);
- if (showopt == NULL) {
+ showopt = pfctl_lookup_id(optarg, showopt_list);
+ if (showopt == 0) {
warnx("Unknown show modifier '%s'", optarg);
usage();
}
@@ -3433,7 +3877,7 @@ main(int argc, char *argv[])
if (anchoropt[0] == '\0')
errx(1, "anchor name must not be empty");
- if (mode == O_RDONLY && showopt == NULL && tblcmdopt == NULL) {
+ if (mode == O_RDONLY && showopt == 0 && tblcmdopt == NULL) {
warnx("anchors apply to -f, -F, -s, and -T only");
usage();
}
@@ -3467,7 +3911,8 @@ main(int argc, char *argv[])
opts |= PF_OPT_DUMMYACTION;
/* turn off options */
opts &= ~ (PF_OPT_DISABLE | PF_OPT_ENABLE);
- clearopt = showopt = debugopt = NULL;
+ clearopt = debugopt = NULL;
+ showopt = 0;
#if !defined(ENABLE_ALTQ)
altqsupport = 0;
#else
@@ -3489,97 +3934,100 @@ main(int argc, char *argv[])
if ((path = calloc(1, MAXPATHLEN)) == NULL)
errx(1, "%s: calloc", __func__);
- if (showopt != NULL) {
- switch (*showopt) {
- case 'A':
- pfctl_show_anchors(dev, opts, anchorname);
- if (opts & PF_OPT_VERBOSE2)
- printf("Ethernet:\n");
- pfctl_show_eth_anchors(dev, opts, anchorname);
- break;
- case 'r':
- pfctl_load_fingerprints(dev, opts);
- pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES,
- anchorname, 0, 0);
- break;
- case 'l':
- pfctl_load_fingerprints(dev, opts);
- pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS,
- anchorname, 0, 0);
- break;
- case 'n':
- pfctl_load_fingerprints(dev, opts);
- pfctl_show_nat(dev, path, opts, anchorname, 0, 0);
- break;
- case 'q':
- pfctl_show_altq(dev, ifaceopt, opts,
- opts & PF_OPT_VERBOSE2);
- break;
- case 's':
- pfctl_show_states(dev, ifaceopt, opts);
- break;
- case 'S':
- pfctl_show_src_nodes(dev, opts);
- break;
- case 'i':
- pfctl_show_status(dev, opts);
- break;
- case 'R':
- exit_val = pfctl_show_running(dev);
- break;
- case 't':
- pfctl_show_timeouts(dev, opts);
- break;
- case 'm':
- pfctl_show_limits(dev, opts);
- break;
- case 'e':
- pfctl_show_eth_rules(dev, path, opts, 0, anchorname, 0,
- 0);
- break;
- case 'a':
- opts |= PF_OPT_SHOWALL;
- pfctl_load_fingerprints(dev, opts);
-
- pfctl_show_eth_rules(dev, path, opts, 0, anchorname, 0,
- 0);
-
- pfctl_show_nat(dev, path, opts, anchorname, 0, 0);
- pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES,
- anchorname, 0, 0);
- pfctl_show_altq(dev, ifaceopt, opts, 0);
- pfctl_show_states(dev, ifaceopt, opts);
- pfctl_show_src_nodes(dev, opts);
- pfctl_show_status(dev, opts);
- pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS,
- anchorname, 0, 0);
- pfctl_show_timeouts(dev, opts);
- pfctl_show_limits(dev, opts);
+ switch (showopt) {
+ case SHOWOPT_NONE:
+ break;
+ case SHOWOPT_ANCHORS:
+ pfctl_show_anchors(dev, opts, anchorname);
+ if (opts & PF_OPT_VERBOSE2)
+ printf("Ethernet:\n");
+ pfctl_show_eth_anchors(dev, opts, anchorname);
+ break;
+ case SHOWOPT_RULES:
+ pfctl_load_fingerprints(dev, opts);
+ pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES, anchorname,
+ 0, 0);
+ break;
+ case SHOWOPT_LABELS:
+ pfctl_load_fingerprints(dev, opts);
+ pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS, anchorname,
+ 0, 0);
+ break;
+ case SHOWOPT_NAT:
+ pfctl_load_fingerprints(dev, opts);
+ pfctl_show_nat(dev, path, opts, anchorname, 0, 0);
+ break;
+ case SHOWOPT_QUEUE:
+ pfctl_show_altq(dev, ifaceopt, opts, opts & PF_OPT_VERBOSE2);
+ break;
+ case SHOWOPT_STATES:
+ pfctl_show_states(dev, ifaceopt, opts);
+ break;
+ case SHOWOPT_SOURCES:
+ pfctl_show_src_nodes(dev, opts);
+ break;
+ case SHOWOPT_INFO:
+ pfctl_show_status(dev, opts);
+ break;
+ case SHOWOPT_RUNNING:
+ exit_val = pfctl_show_running(dev);
+ break;
+ case SHOWOPT_TIMEOUTS:
+ pfctl_show_timeouts(dev, opts);
+ break;
+ case SHOWOPT_MEMORY:
+ pfctl_show_limits(dev, opts);
+ break;
+ case SHOWOPT_ETHER:
+ pfctl_show_eth_rules(dev, path, opts, 0, anchorname, 0, 0);
+ break;
+ case SHOWOPT_ALL:
+ opts |= PF_OPT_SHOWALL;
+ pfctl_load_fingerprints(dev, opts);
+
+ pfctl_show_eth_rules(dev, path, opts, 0, anchorname, 0, 0);
+
+ pfctl_show_nat(dev, path, opts, anchorname, 0, 0);
+ pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES, anchorname,
+ 0, 0);
+ pfctl_show_altq(dev, ifaceopt, opts, 0);
+ pfctl_show_states(dev, ifaceopt, opts);
+ pfctl_show_src_nodes(dev, opts);
+ pfctl_show_status(dev, opts);
+ pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS, anchorname,
+ 0, 0);
+ pfctl_show_timeouts(dev, opts);
+ pfctl_show_limits(dev, opts);
+ pfctl_show_tables(anchorname, opts);
+ pfctl_show_fingerprints(opts);
+ break;
+ case SHOWOPT_TABLES:
+ if (opts & PF_OPT_RECURSE) {
+ opts |= PF_OPT_CALLSHOW;
+ pfctl_recurse(dev, opts, anchorname,
+ pfctl_call_showtables);
+ } else
pfctl_show_tables(anchorname, opts);
- pfctl_show_fingerprints(opts);
- break;
- case 'T':
- if (opts & PF_OPT_RECURSE) {
- opts |= PF_OPT_CALLSHOW;
- pfctl_recurse(dev, opts, anchorname,
- pfctl_call_showtables);
- } else
- pfctl_show_tables(anchorname, opts);
- break;
- case 'o':
- pfctl_load_fingerprints(dev, opts);
- pfctl_show_fingerprints(opts);
- break;
- case 'I':
- pfctl_show_ifaces(ifaceopt, opts);
- break;
- case 'c':
- pfctl_show_creators(opts);
- break;
- }
+ break;
+ case SHOWOPT_OSFP:
+ pfctl_load_fingerprints(dev, opts);
+ pfctl_show_fingerprints(opts);
+ break;
+ case SHOWOPT_IFACES:
+ pfctl_show_ifaces(ifaceopt, opts);
+ break;
+ case SHOWOPT_CREATORIDS:
+ pfctl_show_creators(opts);
+ break;
+ case SHOWOPT_STATELIMS:
+ pfctl_show_statelims(dev, PFCTL_SHOW_LABELS);
+ break;
+ case SHOWOPT_SOURCELIMS:
+ pfctl_show_sourcelims(dev, PFCTL_SHOW_LABELS, opts, idopt);
+ break;
}
- if ((opts & PF_OPT_CLRRULECTRS) && showopt == NULL) {
+ if ((opts & PF_OPT_CLRRULECTRS) && showopt == 0) {
pfctl_show_eth_rules(dev, path, opts, PFCTL_SHOW_NOTHING,
anchorname, 0, 0);
pfctl_show_rules(dev, path, opts, PFCTL_SHOW_NOTHING,
@@ -3668,6 +4116,8 @@ main(int argc, char *argv[])
pfctl_gateway_kill_states(dev, ifaceopt, opts);
else if (!strcmp(state_kill[0], "key"))
pfctl_key_kill_states(dev, ifaceopt, opts);
+ else if (!strcmp(state_kill[0], "source"))
+ pfctl_kill_source(dev, idopt, state_kill[1], opts);
else
pfctl_net_kill_states(dev, ifaceopt, opts);
}
@@ -3758,3 +4208,137 @@ pf_strerror(int errnum)
return strerror(errnum);
}
}
+
+static inline int
+pfctl_statelim_id_cmp(const struct pfctl_statelim *a,
+ const struct pfctl_statelim *b)
+{
+ uint32_t ida = a->ioc.id;
+ uint32_t idb = b->ioc.id;
+
+ if (ida > idb)
+ return (1);
+ if (ida < idb)
+ return (-1);
+
+ return (0);
+}
+
+RB_GENERATE(pfctl_statelim_ids, pfctl_statelim, entry, pfctl_statelim_id_cmp);
+
+static inline int
+pfctl_statelim_nm_cmp(const struct pfctl_statelim *a,
+ const struct pfctl_statelim *b)
+{
+ return (strcmp(a->ioc.name, b->ioc.name));
+}
+
+RB_GENERATE(pfctl_statelim_nms, pfctl_statelim, entry, pfctl_statelim_nm_cmp);
+
+int
+pfctl_add_statelim(struct pfctl *pf, struct pfctl_statelim *stlim)
+{
+ struct pfctl_statelim *ostlim;
+
+ ostlim = RB_INSERT(pfctl_statelim_ids, &pf->statelim_ids, stlim);
+ if (ostlim != NULL)
+ return (-1);
+
+ ostlim = RB_INSERT(pfctl_statelim_nms, &pf->statelim_nms, stlim);
+ if (ostlim != NULL) {
+ RB_REMOVE(pfctl_statelim_ids, &pf->statelim_ids, stlim);
+ return (-1);
+ }
+
+ return (0);
+}
+
+struct pfctl_statelim *
+pfctl_get_statelim_id(struct pfctl *pf, uint32_t id)
+{
+ struct pfctl_statelim key;
+
+ key.ioc.id = id;
+
+ return (RB_FIND(pfctl_statelim_nms, &pf->statelim_nms, &key));
+}
+
+struct pfctl_statelim *
+pfctl_get_statelim_nm(struct pfctl *pf, const char *name)
+{
+ struct pfctl_statelim key;
+
+ if (strlcpy(key.ioc.name, name, sizeof(key.ioc.name)) >=
+ sizeof(key.ioc.name))
+ return (NULL);
+
+ return (RB_FIND(pfctl_statelim_nms, &pf->statelim_nms, &key));
+}
+
+static inline int
+pfctl_sourcelim_id_cmp(const struct pfctl_sourcelim *a,
+ const struct pfctl_sourcelim *b)
+{
+ uint32_t ida = a->ioc.id;
+ uint32_t idb = b->ioc.id;
+
+ if (ida > idb)
+ return (1);
+ if (ida < idb)
+ return (-1);
+
+ return (0);
+}
+
+RB_GENERATE(pfctl_sourcelim_ids, pfctl_sourcelim, entry,
+ pfctl_sourcelim_id_cmp);
+
+static inline int
+pfctl_sourcelim_nm_cmp(const struct pfctl_sourcelim *a,
+ const struct pfctl_sourcelim *b)
+{
+ return (strcmp(a->ioc.name, b->ioc.name));
+}
+
+RB_GENERATE(pfctl_sourcelim_nms, pfctl_sourcelim, entry,
+ pfctl_sourcelim_nm_cmp);
+
+int
+pfctl_add_sourcelim(struct pfctl *pf, struct pfctl_sourcelim *srlim)
+{
+ struct pfctl_sourcelim *osrlim;
+
+ osrlim = RB_INSERT(pfctl_sourcelim_ids, &pf->sourcelim_ids, srlim);
+ if (osrlim != NULL)
+ return (-1);
+
+ osrlim = RB_INSERT(pfctl_sourcelim_nms, &pf->sourcelim_nms, srlim);
+ if (osrlim != NULL) {
+ RB_REMOVE(pfctl_sourcelim_ids, &pf->sourcelim_ids, srlim);
+ return (-1);
+ }
+
+ return (0);
+}
+
+struct pfctl_sourcelim *
+pfctl_get_sourcelim_id(struct pfctl *pf, uint32_t id)
+{
+ struct pfctl_sourcelim key;
+
+ key.ioc.id = id;
+
+ return (RB_FIND(pfctl_sourcelim_nms, &pf->sourcelim_nms, &key));
+}
+
+struct pfctl_sourcelim *
+pfctl_get_sourcelim_nm(struct pfctl *pf, const char *name)
+{
+ struct pfctl_sourcelim key;
+
+ if (strlcpy(key.ioc.name, name, sizeof(key.ioc.name)) >=
+ sizeof(key.ioc.name))
+ return (NULL);
+
+ return (RB_FIND(pfctl_sourcelim_nms, &pf->sourcelim_nms, &key));
+}
diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c
index b8531067d3f6..617d3f8e0733 100644
--- a/sbin/pfctl/pfctl_parser.c
+++ b/sbin/pfctl/pfctl_parser.c
@@ -856,6 +856,38 @@ print_eth_rule(struct pfctl_eth_rule *r, const char *anchor_call,
}
void
+print_statelim(const struct pfioc_statelim *ioc)
+{
+ printf("state limiter %s id %u limit %u", ioc->name, ioc->id,
+ ioc->limit);
+ if (ioc->rate.limit != 0)
+ printf(" rate %u/%u", ioc->rate.limit, ioc->rate.seconds);
+
+ printf("\n");
+}
+
+void
+print_sourcelim(const struct pfioc_sourcelim *ioc)
+{
+ printf("source limiter %s id %u limit %u states %u", ioc->name,
+ ioc->id, ioc->entries, ioc->limit);
+ if (ioc->rate.limit != 0)
+ printf(" rate %u/%u", ioc->rate.limit, ioc->rate.seconds);
+ if (ioc->overload_tblname[0] != '\0') {
+ printf(" table <%s> above %u", ioc->overload_tblname,
+ ioc->overload_hwm);
+ if (ioc->overload_lwm)
+ printf(" below %u", ioc->overload_lwm);
+ }
+ if (ioc->inet_prefix < 32)
+ printf(" inet mask %u", ioc->inet_prefix);
+ if (ioc->inet6_prefix < 128)
+ printf(" inet6 mask %u", ioc->inet6_prefix);
+
+ printf("\n");
+}
+
+void
print_rule(struct pfctl_rule *r, const char *anchor_call, int opts, int numeric)
{
static const char *actiontypes[] = { "pass", "block", "scrub",
@@ -1080,6 +1112,29 @@ print_rule(struct pfctl_rule *r, const char *anchor_call, int opts, int numeric)
}
printf(" probability %s%%", buf);
}
+ if (r->statelim != PF_STATELIM_ID_NONE) {
+#if 0 /* XXX need pf to find statelims */
+ struct pfctl_statelim *stlim =
+ pfctl_get_statelim_id(pf, r->statelim);
+
+ if (stlim != NULL)
+ printf(" state limiter %s", stlim->ioc.name);
+ else
+#endif
+ printf(" state limiter id %u", r->statelim);
+ }
+ if (r->sourcelim != PF_SOURCELIM_ID_NONE) {
+#if 0 /* XXX need pf to find sourcelims */
+ struct pfctl_sourcelim *srlim =
+ pfctl_get_sourcelim_id(pf, r->sourcelim);
+
+ if (srlim != NULL)
+ printf(" source limiter %s", srlim->ioc.name);
+ else
+#endif
+ printf(" source limiter id %u", r->sourcelim);
+ }
+
ropts = 0;
if (r->max_states || r->max_src_nodes || r->max_src_states)
ropts = 1;
diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h
index 44ddfb45fbe1..6d0417cde061 100644
--- a/sbin/pfctl/pfctl_parser.h
+++ b/sbin/pfctl/pfctl_parser.h
@@ -75,6 +75,21 @@
struct pfr_buffer; /* forward definition */
+struct pfctl_statelim {
+ struct pfioc_statelim ioc;
+ RB_ENTRY(pfctl_statelim) entry;
+};
+
+RB_HEAD(pfctl_statelim_ids, pfctl_statelim);
+RB_HEAD(pfctl_statelim_nms, pfctl_statelim);
+
+struct pfctl_sourcelim {
+ struct pfioc_sourcelim ioc;
+ RB_ENTRY(pfctl_sourcelim) entry;
+};
+
+RB_HEAD(pfctl_sourcelim_ids, pfctl_sourcelim);
+RB_HEAD(pfctl_sourcelim_nms, pfctl_sourcelim);
struct pfctl {
int dev;
@@ -99,6 +114,11 @@ struct pfctl {
u_int32_t eth_ticket;
const char *ruleset;
+ struct pfctl_statelim_ids statelim_ids;
+ struct pfctl_statelim_nms statelim_nms;
+ struct pfctl_sourcelim_ids sourcelim_ids;
+ struct pfctl_sourcelim_nms sourcelim_nms;
+
/* 'set foo' options */
u_int32_t timeout[PFTM_MAX];
u_int32_t limit[PF_LIMIT_MAX];
@@ -296,6 +316,17 @@ int pfctl_add_pool(struct pfctl *, struct pfctl_pool *, int);
void pfctl_move_pool(struct pfctl_pool *, struct pfctl_pool *);
void pfctl_clear_pool(struct pfctl_pool *);
+int pfctl_add_statelim(struct pfctl *, struct pfctl_statelim *);
+struct pfctl_statelim *
+ pfctl_get_statelim_id(struct pfctl *, uint32_t);
+struct pfctl_statelim *
+ pfctl_get_statelim_nm(struct pfctl *, const char *);
+int pfctl_add_sourcelim(struct pfctl *, struct pfctl_sourcelim *);
+struct pfctl_sourcelim *
+ pfctl_get_sourcelim_id(struct pfctl *, uint32_t);
+struct pfctl_sourcelim *
+ pfctl_get_sourcelim_nm(struct pfctl *, const char *);
+
int pfctl_apply_timeout(struct pfctl *, const char *, int, int);
int pfctl_set_reassembly(struct pfctl *, int, int);
int pfctl_set_optimization(struct pfctl *, const char *);
@@ -312,6 +343,8 @@ int pfctl_load_anchors(int, struct pfctl *);
void print_pool(struct pfctl_pool *, u_int16_t, u_int16_t, int);
void print_src_node(struct pfctl_src_node *, int);
+void print_statelim(const struct pfioc_statelim *);
+void print_sourcelim(const struct pfioc_sourcelim *);
void print_eth_rule(struct pfctl_eth_rule *, const char *, int);
void print_rule(struct pfctl_rule *, const char *, int, int);
void print_tabledef(const char *, int, int, struct node_tinithead *);
diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5
index 33097000310d..d4bd61f970ff 100644
--- a/share/man/man5/pf.conf.5
+++ b/share/man/man5/pf.conf.5
@@ -27,7 +27,7 @@
.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd November 3, 2025
+.Dd December 30, 2025
.Dt PF.CONF 5
.Os
.Sh NAME
@@ -2365,6 +2365,24 @@ For example, the following rule will drop 20% of incoming ICMP packets:
.Bd -literal -offset indent
block in proto icmp probability 20%
.Ed
+.It Cm state limiter Ar name
+Use the specified state limiter to restrict the creation of states
+by this rule.
+If capacity is not availabe, the rule does not match and evaluation
+of the ruleset continues.
+See the
+.Sx State Limiters
+section for more information.
+.Pp
+.It Cm source limiter Ar name
+Use the specified source limiter to restrict the creation of states
+by this rule.
+If capacity is not availabe, the rule does not match and evaluation
+of the ruleset continues.
+See the
+.Sx Source Limiters
+section for more information.
+.Pp
.It Ar prio Aq Ar number
Only match packets which have the given queueing priority assigned.
.El
@@ -2609,6 +2627,145 @@ Example:
.Bd -literal -offset indent
pass in proto tcp from any to any port www synproxy state
.Ed
+.Ss State Limiter
+State limiters provide a mechanism to limit the number of states created,
+or the rate of state creation,
+by a set of rules.
+State limiters are configured and loaded with the main ruleset, but
+can be used by rules in any anchor.
+The overall number of states is still subject to the limit set with
+.Cm set limit states ,
+but the number of states created by a subset of rules can be provided
+by a state limiter.
+.Pp
+A state limiter is configured with the following statement:
+.Pp
+.Bl -tag -width xxxx -compact
+.It Cm state limiter Ar name
+Each state limiter is identified by a unique name.
+.El
+.Pp
+State limiters support the following configuration:
+.Pp
+.Bl -tag -width xxxx -compact
+.It Cm id Ar number
+A unique identifier between 1 and 255.
+This configuration is required.
+.It Cm limit Ar number
+Specify the maximum number of states.
+This configuration is required.
+.It Cm rate Ar number Ns / Ns Ar seconds
+Limit the rate at which states can be created over a time interval.
+The connection rate is an approximation calculated as a moving
+average.
+.El
+.Pp
+Pass rules can specify a state limiter using the
+.Cm state limiter Ar name
+option.
+If the number of states allowed has hit the limit, the pass rule
+does not match and ruleset evalation continues past it.
+.Pp
+An example use case for a state limiter is to restrict the number of
+connections allowed to a service that is accessible via multiple
+protocols, e.g. a DNS server that can be accessed by both TCP and
+UDP on port 53, DNS-over-TLS on TCP port 853, and DNS-over-HTTPS
+on TCP port 443 can be limited to 1000 concurrent connections:
+.Pp
+.Bd -literal -offset indent -compact
+state limiter "dns-server" id 1 limit 1000
+
+pass in proto { tcp udp } to port domain state limiter "dns-server"
+pass in proto tcp to port { 853 443 } state limiter "dns-server"
+.Ed
+.Ss Source Limiters
+Source limiters apply limits on the number of states,
+or the rate of state creation,
+for connections coming from a source address or network for a set
+of rules.
+Source limiters are configured and loaded with the main ruleset, but
+can be used by rules in any anchor.
+The overall number of states is still subject to the limit set with
+.Cm set limit states ,
+but limits on states for a subset of source addresses and rules can
+be provided with source limiters.
+.Pp
+Source address entries in source pools are created on demand,
+and are used to account for the states created for each source
+address or network.
+A source limiter specifies the maximum number of source address
+entries it will track, and can be configured to mask bits in network
+prefixes to have source entries cover larger portions of the address
+space if needed.
+.Pp
+A source limiter is configured with the following statement:
+.Pp
+.Bl -tag -width xxxx -compact
+.It Cm source limiter Ar name
+Each source limiter is uniquely identified by the specified name.
+.El
+.Pp
+Source limiter support the following configuration:
+.Pp
+.Bl -tag -width xxxx -compact
+.It Cm id Ar number
+A unique identifier between 1 and 255.
+This configuration is required.
+.It Cm entries Ar number
+Specify the maximum number of source address entries.
+This configuration is required.
+.It Cm limit Ar number
+Specify the maximum number of states for each source address entry.
+This configuration is required.
+.It Cm rate Ar number Ns / Ns Ar seconds
+Limit the rate at which states can be created by each source address
+entry over a time interval.
+The connection rate is an approximation calculated as a moving
+average.
+.It Cm inet mask Ar prefixlen
+Mask IPv4 source addresses using the prefix length specified with
+.Ar prefixlen
+when creating an address entry.
+The default IPv4 prefix length is 32 bits.
+.It Cm inet6 mask Ar prefixlen
+Mask IPv6 source addresses using the prefix length specified with
+.Ar prefixlen
+when creating an address entry.
+The default IPv6 prefix length is 128 bits.
+.It Cm table < Ns Ar table Ns > Cm above Ar hwm Op Cm below Ar lwm
+Add the address to the specified
+.Ar table
+when the number of states goes above the
+.Ar hwm
+high water mark.
+The address will be removed from the table when the number of states
+drops below the
+.Ar lwm
+low water mark.
+The default low water mark is 0.
+.El
+.Pp
+Pass rules can specify a source limiter using the
+.Cm source limiter Ar name
+option.
+.Pp
+An example use for a source limiter is the mitigation of denial of
+service caused by the exhaustion of firewall resources by network
+or port scans from outside the network.
+The states created by any one scanner from any one source address
+can be limited to avoid impacting other sources.
+Below, up to 10000 IPv4 hosts and IPv6 /64 networks from the external
+network are each limited to a maximum of 1000 connections, and are
+rate limited to creating 100 states over a 10 second interval:
+.Pp
+.Bd -literal -offset indent -compact
+source limiter "internet" id 1 entries 10000 \e
+ limit 1000 rate 100/10 \e
+ inet6 mask 64
+
+block in on egress
+pass in on egress source limiter "internet"
+.Ed
.Sh STATEFUL TRACKING OPTIONS
A number of options related to stateful tracking can be applied on a
per-rule basis.
@@ -3457,6 +3614,7 @@ filteropt = user | group | flags | icmp-type | icmp6-type | "tos" tos |
"max-pkt-size" number |
"queue" ( string | "(" string [ [ "," ] string ] ")" ) |
"rtable" number | "probability" number"%" | "prio" number |
+ "state limiter" name | "source limiter" name |
"dnpipe" ( number | "(" number "," number ")" ) |
"dnqueue" ( number | "(" number "," number ")" ) |
"ridentifier" number |
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 6582250879ca..025a30378f1f 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -329,6 +329,9 @@ SYSCTL_DECL(_net_pf);
MALLOC_DECLARE(M_PF);
MALLOC_DECLARE(M_PFHASH);
MALLOC_DECLARE(M_PF_RULE_ITEM);
+MALLOC_DECLARE(M_PF_STATE_LINK);
+MALLOC_DECLARE(M_PF_SOURCE_LIM);
+MALLOC_DECLARE(M_PF_STATE_LIM);
SDT_PROVIDER_DECLARE(pf);
SDT_PROBE_DECLARE(pf, , test, reason_set);
@@ -893,6 +896,8 @@ struct pf_krule {
u_int8_t set_prio[2];
sa_family_t naf;
u_int8_t rcvifnot;
+ uint8_t statelim;
+ uint8_t sourcelim;
struct {
struct pf_addr addr;
@@ -1102,7 +1107,54 @@ struct pf_state_export {
};
_Static_assert(sizeof(struct pf_state_export) == 384, "size incorrect");
+#define PF_STATELIM_NAME_LEN 16 /* kstat istr */
+#define PF_STATELIM_DESCR_LEN 64
+
+#define PF_SOURCELIM_NAME_LEN 16 /* kstat istr */
+#define PF_SOURCELIM_DESCR_LEN 64
+
#ifdef _KERNEL
+struct kstat;
+
+/*
+ * PF state links
+ *
+ * This is used to augment a struct pf_state so it can be
+ * tracked/referenced by the state and source address limiter things.
+ * Each limiter maintains a list of the states they "own", and these
+ * state links are what the limiters use to wire a state into their
+ * lists.
+ *
+ * Without PF state links, the pf_state struct would have to grow
+ * a lot to support a feature that may not be used.
+ *
+ * pfl_entry is used by the pools to add states to their list.
+ * pfl_state allows the pools to get from their list of states to
+ * the states themselves.
+ *
+ * pfl_link allows operations on states (well, delete) to be able
+ * to quickly locate the pf_state_link struct so they can be unwired
+ * from the pools.
+ */
+
+#define PF_STATE_LINK_TYPE_STATELIM 1
+#define PF_STATE_LINK_TYPE_SOURCELIM 2
+
+struct pf_kstate;
+struct pf_state_link {
+ /* used by source/state pools to get to states */
+ TAILQ_ENTRY(pf_state_link) pfl_link;
+
+ /* used by pf_state to get to source/state pools */
+ SLIST_ENTRY(pf_state_link) pfl_linkage;
+
+ struct pf_kstate *pfl_state;
+ unsigned int pfl_type;
+};
+
+TAILQ_HEAD(pf_state_link_list, pf_state_link);
+SLIST_HEAD(pf_state_linkage, pf_state_link);
+
struct pf_kstate {
/*
* Area shared with pf_state_cmp
@@ -1144,13 +1196,227 @@ struct pf_kstate {
u_int16_t tag;
u_int16_t if_index_in;
u_int16_t if_index_out;
+ uint8_t statelim;
+ uint8_t sourcelim;
+ struct pf_state_linkage linkage;
};
/*
- * 6 cache lines per struct, 10 structs per page.
- * Try to not grow the struct beyond that.
+ * State limiter
*/
-_Static_assert(sizeof(struct pf_kstate) <= 384, "pf_kstate size crosses 384 bytes");
+
+struct pf_statelim {
+ RB_ENTRY(pf_statelim) pfstlim_id_tree;
+ RB_ENTRY(pf_statelim) pfstlim_nm_tree;
+ TAILQ_ENTRY(pf_statelim) pfstlim_list;
+ struct kstat *pfstlim_ks;
+
+ uint32_t pfstlim_id;
+ char pfstlim_nm[PF_STATELIM_NAME_LEN];
+
+ /* config */
+
+ unsigned int pfstlim_limit;
+ struct {
+ unsigned int limit;
+ unsigned int seconds;
+ } pfstlim_rate;
+
+ /* run state */
+ struct mtx pfstlim_lock;
+
+ /* rate limiter */
+ uint64_t pfstlim_rate_ts;
+ uint64_t pfstlim_rate_token;
+ uint64_t pfstlim_rate_bucket;
+
+ unsigned int pfstlim_inuse;
+ struct pf_state_link_list pfstlim_states;
+
+ /* counters */
+
+ struct {
+ uint64_t admitted;
+ uint64_t hardlimited;
+ uint64_t ratelimited;
+ } pfstlim_counters;
+
+ struct {
+ time_t created;
+ time_t updated;
+ time_t cleared;
+ } pfstlim_timestamps;
+};
+
+RB_HEAD(pf_statelim_id_tree, pf_statelim);
+RB_PROTOTYPE(pf_statelim_id_tree, pf_statelim, pfstlim_id_tree, cmp);
+
+RB_HEAD(pf_statelim_nm_tree, pf_statelim);
+RB_PROTOTYPE(pf_statelim_nm_tree, pf_statelim, pfstlim_nm_tree, cmp);
+
+TAILQ_HEAD(pf_statelim_list, pf_statelim);
+
+VNET_DECLARE(struct pf_statelim_id_tree, pf_statelim_id_tree_active);
+#define V_pf_statelim_id_tree_active VNET(pf_statelim_id_tree_active)
+VNET_DECLARE(struct pf_statelim_list, pf_statelim_list_active);
+#define V_pf_statelim_list_active VNET(pf_statelim_list_active)
+
+VNET_DECLARE(struct pf_statelim_id_tree, pf_statelim_id_tree_inactive);
+#define V_pf_statelim_id_tree_inactive VNET(pf_statelim_id_tree_inactive)
+VNET_DECLARE(struct pf_statelim_nm_tree, pf_statelim_nm_tree_inactive);
+#define V_pf_statelim_nm_tree_inactive VNET(pf_statelim_nm_tree_inactive)
+VNET_DECLARE(struct pf_statelim_list, pf_statelim_list_inactive);
+#define V_pf_statelim_list_inactive VNET(pf_statelim_list_inactive)
+
+static inline unsigned int
+pf_statelim_enter(struct pf_statelim *pfstlim)
+{
+ mtx_lock(&pfstlim->pfstlim_lock);
+
+ return (0);
+}
+
+static inline void
+pf_statelim_leave(struct pf_statelim *pfstlim, unsigned int gen)
+{
+ return (mtx_unlock(&pfstlim->pfstlim_lock));
+}
+
+/*
+ * Source address pools
+ */
+
+struct pf_sourcelim;
+
+struct pf_source {
+ RB_ENTRY(pf_source) pfsr_tree;
+ RB_ENTRY(pf_source) pfsr_ioc_tree;
+ struct pf_sourcelim *pfsr_parent;
+
+ sa_family_t pfsr_af;
+ u_int16_t pfsr_rdomain;
+ struct pf_addr pfsr_addr;
+
+ /* run state */
+
+ unsigned int pfsr_inuse;
+ unsigned int pfsr_intable;
+ struct pf_state_link_list pfsr_states;
+ time_t pfsr_empty_ts;
+ TAILQ_ENTRY(pf_source) pfsr_empty_gc;
+
+ /* rate limiter */
+ uint64_t pfsr_rate_ts;
+
+ struct {
+ uint64_t admitted;
+ uint64_t hardlimited;
+ uint64_t ratelimited;
+ } pfsr_counters;
+};
+
+RB_HEAD(pf_source_tree, pf_source);
+RB_PROTOTYPE(pf_source_tree, pf_source, pfsr_tree, cmp);
+
+RB_HEAD(pf_source_ioc_tree, pf_source);
+RB_PROTOTYPE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, cmp);
+
+TAILQ_HEAD(pf_source_list, pf_source);
+
+struct pf_sourcelim {
+ RB_ENTRY(pf_sourcelim) pfsrlim_id_tree;
+ RB_ENTRY(pf_sourcelim) pfsrlim_nm_tree;
+ TAILQ_ENTRY(pf_sourcelim) pfsrlim_list;
+ struct kstat *pfsrlim_ks;
+
+ uint32_t pfsrlim_id;
+ char pfsrlim_nm[PF_SOURCELIM_NAME_LEN];
+ unsigned int pfsrlim_disabled;
+
+ /* config */
+
+ unsigned int pfsrlim_entries;
+ unsigned int pfsrlim_limit;
+ unsigned int pfsrlim_ipv4_prefix;
+ unsigned int pfsrlim_ipv6_prefix;
+
+ struct {
+ unsigned int limit;
+ unsigned int seconds;
+ } pfsrlim_rate;
+
+ struct {
+ char name[PF_TABLE_NAME_SIZE];
+ unsigned int hwm;
+ unsigned int lwm;
+ struct pfr_ktable *table;
+ } pfsrlim_overload;
+
+ /* run state */
+ struct mtx pfsrlim_lock;
+
+ struct pf_addr pfsrlim_ipv4_mask;
+ struct pf_addr pfsrlim_ipv6_mask;
+
+ uint64_t pfsrlim_rate_token;
+ uint64_t pfsrlim_rate_bucket;
+
+ /* number of pf_sources */
+ unsigned int pfsrlim_nsources;
+ struct pf_source_tree pfsrlim_sources;
+ struct pf_source_ioc_tree pfsrlim_ioc_sources;
+
+ struct {
+ /* number of times pf_source was allocated */
+ uint64_t addrallocs;
+ /* state was rejected because the address limit was hit */
+ uint64_t addrlimited;
+ /* no memory to create address thing */
+ uint64_t addrnomem;
+
+ /* sum of pf_source inuse gauges */
+ uint64_t inuse;
+ /* sum of pf_source admitted counters */
+ uint64_t admitted;
+ /* sum of pf_source hardlimited counters */
+ uint64_t hardlimited;
+ /* sum of pf_source ratelimited counters */
+ uint64_t ratelimited;
+ } pfsrlim_counters;
+};
+
+RB_HEAD(pf_sourcelim_id_tree, pf_sourcelim);
+RB_PROTOTYPE(pf_sourcelim_id_tree, pf_sourcelim, pfsrlim_id_tree, cmp);
+
+RB_HEAD(pf_sourcelim_nm_tree, pf_sourcelim);
+RB_PROTOTYPE(pf_sourcelim_nm_tree, pf_sourcelim, pfsrlim_nm_tree, cmp);
+
+TAILQ_HEAD(pf_sourcelim_list, pf_sourcelim);
+
+VNET_DECLARE(struct pf_sourcelim_id_tree, pf_sourcelim_id_tree_active);
+#define V_pf_sourcelim_id_tree_active VNET(pf_sourcelim_id_tree_active)
+VNET_DECLARE(struct pf_sourcelim_list, pf_sourcelim_list_active);
+#define V_pf_sourcelim_list_active VNET(pf_sourcelim_list_active)
+
+VNET_DECLARE(struct pf_sourcelim_id_tree, pf_sourcelim_id_tree_inactive);
+#define V_pf_sourcelim_id_tree_inactive VNET(pf_sourcelim_id_tree_inactive)
+VNET_DECLARE(struct pf_sourcelim_nm_tree, pf_sourcelim_nm_tree_inactive);
+#define V_pf_sourcelim_nm_tree_inactive VNET(pf_sourcelim_nm_tree_inactive)
+VNET_DECLARE(struct pf_sourcelim_list, pf_sourcelim_list_inactive);
+#define V_pf_sourcelim_list_inactive VNET(pf_sourcelim_list_inactive)
+
+static inline unsigned int
+pf_sourcelim_enter(struct pf_sourcelim *pfsrlim)
+{
+ mtx_lock(&pfsrlim->pfsrlim_lock);
+ return (0);
+}
+
+static inline void
+pf_sourcelim_leave(struct pf_sourcelim *pfsrlim, unsigned int gen)
+{
+ mtx_unlock(&pfsrlim->pfsrlim_lock);
+}
enum pf_test_status {
PF_TEST_FAIL = -1,
@@ -1186,6 +1452,9 @@ struct pf_test_ctx {
uint16_t virtual_type;
uint16_t virtual_id;
int depth;
+ struct pf_statelim *statelim;
+ struct pf_sourcelim *sourcelim;
+ struct pf_source *source;
};
#define PF_ANCHOR_STACK_MAX 32
@@ -1805,6 +2074,127 @@ enum pf_syncookies_mode {
#define PF_SYNCOOKIES_HIWATPCT 25
#define PF_SYNCOOKIES_LOWATPCT (PF_SYNCOOKIES_HIWATPCT / 2)
+struct pfioc_statelim {
+ uint32_t ticket;
+
+ char name[PF_STATELIM_NAME_LEN];
+ uint32_t id;
+#define PF_STATELIM_ID_NONE 0
+#define PF_STATELIM_ID_MIN 1
+#define PF_STATELIM_ID_MAX 255 /* fits in pf_state uint8_t */
+
+ /* limit on the total number of states */
+ unsigned int limit;
+#define PF_STATELIM_LIMIT_MIN 1
+#define PF_STATELIM_LIMIT_MAX (1 << 24) /* pf is pretty scalable */
+
+ /* rate limit on the creation of states */
+ struct {
+ unsigned int limit;
+ unsigned int seconds;
+ } rate;
+
+ char description[PF_STATELIM_DESCR_LEN];
+
+ /* kernel state for GET ioctls */
+ unsigned int inuse; /* gauge */
+ uint64_t admitted; /* counter */
+ uint64_t hardlimited; /* counter */
+ uint64_t ratelimited; /* counter */
+};
+
+struct pfioc_sourcelim {
+ uint32_t ticket;
+
+ char name[PF_SOURCELIM_NAME_LEN];
+ uint32_t id;
+#define PF_SOURCELIM_ID_NONE 0
+#define PF_SOURCELIM_ID_MIN 1
+#define PF_SOURCELIM_ID_MAX 255 /* fits in pf_state uint8_t */
+
+ /* limit on the total number of address entries */
+ unsigned int entries;
+
+ /* limit on the number of states per address entry */
+ unsigned int limit;
+
+ /* rate limit on the creation of states by an address entry */
+ struct {
+ unsigned int limit;
+ unsigned int seconds;
+ } rate;
+
+ /*
+ * when the number of states on an entry exceeds hwm, add
+ * the address to the specified table. when the number of
+ * states goes below lwm, remove it from the table.
+ */
+ char overload_tblname[PF_TABLE_NAME_SIZE];
+ unsigned int overload_hwm;
+ unsigned int overload_lwm;
+
+ /*
+ * mask addresses before they're used for entries. /64s
+ * everywhere for inet6 makes it easy to use too much memory.
+ */
+ unsigned int inet_prefix;
+ unsigned int inet6_prefix;
+
+ char description[PF_SOURCELIM_DESCR_LEN];
+
+ /* kernel state for GET ioctls */
+ unsigned int nentries; /* gauge */
+ unsigned int inuse; /* gauge */
+
+ uint64_t addrallocs; /* counter */
+ uint64_t addrnomem; /* counter */
+ uint64_t admitted; /* counter */
+ uint64_t addrlimited; /* counter */
+ uint64_t hardlimited; /* counter */
+ uint64_t ratelimited; /* counter */
+};
+
+struct pfioc_source_entry {
+ sa_family_t af;
+ unsigned int rdomain;
+ struct pf_addr addr;
+
+ /* stats */
+
+ unsigned int inuse; /* gauge */
+ uint64_t admitted; /* counter */
+ uint64_t hardlimited; /* counter */
+ uint64_t ratelimited; /* counter */
+};
+
+struct pfioc_source {
+ char name[PF_SOURCELIM_NAME_LEN];
+ uint32_t id;
+
+ /* copied from the parent source limiter */
+
+ unsigned int inet_prefix;
+ unsigned int inet6_prefix;
+ unsigned int limit;
+
+ /* source entries */
+ size_t entry_size; /* sizeof(struct pfioc_source_entry) */
+
+ struct pfioc_source_entry *key;
+ struct pfioc_source_entry *entries;
+ size_t entrieslen; /* bytes */
+};
+
+struct pfioc_source_kill {
+ char name[PF_SOURCELIM_NAME_LEN];
+ uint32_t id;
+ unsigned int rdomain;
+ sa_family_t af;
+ struct pf_addr addr;
+
+ unsigned int rmstates; /* kill the states too? */
+};
+
#ifdef _KERNEL
struct pf_kstatus {
counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */
@@ -2179,6 +2569,15 @@ struct pfioc_iface {
#define DIOCGETETHRULESETS _IOWR('D', 100, struct pfioc_nv)
#define DIOCGETETHRULESET _IOWR('D', 101, struct pfioc_nv)
#define DIOCSETREASS _IOWR('D', 102, u_int32_t)
+#define DIOCADDSTATELIM _IOW('D', 103, struct pfioc_statelim)
+#define DIOCADDSOURCELIM _IOW('D', 104, struct pfioc_sourcelim)
+#define DIOCGETSTATELIM _IOWR('D', 105, struct pfioc_statelim)
+#define DIOCGETSOURCELIM _IOWR('D', 106, struct pfioc_sourcelim)
+#define DIOCGETSOURCE _IOWR('D', 107, struct pfioc_source)
+#define DIOCGETNSTATELIM _IOWR('D', 108, struct pfioc_statelim)
+#define DIOCGETNSOURCELIM _IOWR('D', 109, struct pfioc_sourcelim)
+#define DIOCGETNSOURCE _IOWR('D', 110, struct pfioc_source)
+#define DIOCCLRSOURCE _IOWR('D', 111, struct pfioc_source_kill)
struct pf_ifspeed_v0 {
char ifname[IFNAMSIZ];
@@ -2420,6 +2819,14 @@ pf_get_time(void)
return ((t.tv_sec * 1000) + (t.tv_usec / 1000));
}
+static inline uint64_t
+SEC_TO_NSEC(uint64_t seconds)
+{
+ if (seconds > UINT64_MAX / 1000000000ULL)
+ return (UINT64_MAX);
+ return (seconds * 1000000000ULL);
+}
+
extern struct pf_kstate *pf_find_state_byid(uint64_t, uint32_t);
extern struct pf_kstate *pf_find_state_all(
const struct pf_state_key_cmp *,
@@ -2554,6 +2961,7 @@ int pfr_clr_tstats(struct pfr_table *, int, int *, int);
int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
int pfr_clr_addrs(struct pfr_table *, int *, int);
int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, time_t);
+int pfr_remove_kentry(struct pfr_ktable *, struct pfr_addr *);
int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
int);
int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 60ca9039e9ce..88b110d744ee 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -360,6 +360,8 @@ static int pf_tcp_track_sloppy(struct pf_kstate *,
struct pf_pdesc *, u_short *,
struct pf_state_peer *, struct pf_state_peer *,
u_int8_t, u_int8_t);
+static __inline int pf_synproxy_ack(struct pf_krule *, struct pf_pdesc *,
+ struct pf_kstate **, struct pf_rule_actions *);
static int pf_test_state(struct pf_kstate **, struct pf_pdesc *,
u_short *);
int pf_icmp_state_lookup(struct pf_state_key_cmp *,
@@ -426,6 +428,269 @@ static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
+static inline int
+pf_statelim_id_cmp(const struct pf_statelim *a, const struct pf_statelim *b)
+{
+ if (a->pfstlim_id > b->pfstlim_id)
+ return (1);
+ if (a->pfstlim_id < b->pfstlim_id)
+ return (-1);
+
+ return (0);
+}
+
+RB_GENERATE(pf_statelim_id_tree, pf_statelim, pfstlim_id_tree,
+ pf_statelim_id_cmp);
+
+static inline int
+pf_statelim_nm_cmp(const struct pf_statelim *a, const struct pf_statelim *b)
+{
+ return (strncmp(a->pfstlim_nm, b->pfstlim_nm, sizeof(a->pfstlim_nm)));
+}
+
+RB_GENERATE(pf_statelim_nm_tree, pf_statelim, pfstlim_nm_tree,
+ pf_statelim_nm_cmp);
+
+VNET_DEFINE(struct pf_statelim_id_tree, pf_statelim_id_tree_active);
+VNET_DEFINE(struct pf_statelim_list, pf_statelim_list_active);
+VNET_DEFINE(struct pf_statelim_id_tree, pf_statelim_id_tree_inactive);
+VNET_DEFINE(struct pf_statelim_nm_tree, pf_statelim_nm_tree_inactive);
+VNET_DEFINE(struct pf_statelim_list, pf_statelim_list_inactive);
+
+static inline int
+pf_sourcelim_id_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b)
+{
+ if (a->pfsrlim_id > b->pfsrlim_id)
+ return (1);
+ if (a->pfsrlim_id < b->pfsrlim_id)
+ return (-1);
+
+ return (0);
+}
+
+RB_GENERATE(pf_sourcelim_id_tree, pf_sourcelim, pfsrlim_id_tree,
+ pf_sourcelim_id_cmp);
+
+static inline int
+pf_sourcelim_nm_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b)
+{
+ return (strncmp(a->pfsrlim_nm, b->pfsrlim_nm, sizeof(a->pfsrlim_nm)));
+}
+
+RB_GENERATE(pf_sourcelim_nm_tree, pf_sourcelim, pfsrlim_nm_tree,
+ pf_sourcelim_nm_cmp);
+
+static inline int
+pf_source_cmp(const struct pf_source *a, const struct pf_source *b)
+{
+ if (a->pfsr_af > b->pfsr_af)
+ return (1);
+ if (a->pfsr_af < b->pfsr_af)
+ return (-1);
+ if (a->pfsr_rdomain > b->pfsr_rdomain)
+ return (1);
+ if (a->pfsr_rdomain < b->pfsr_rdomain)
+ return (-1);
+
+ return (pf_addr_cmp(&a->pfsr_addr, &b->pfsr_addr, a->pfsr_af));
+}
+
+RB_GENERATE(pf_source_tree, pf_source, pfsr_tree, pf_source_cmp);
+
+static inline int
+pf_source_ioc_cmp(const struct pf_source *a, const struct pf_source *b)
+{
+ size_t i;
+
+ if (a->pfsr_af > b->pfsr_af)
+ return (1);
+ if (a->pfsr_af < b->pfsr_af)
+ return (-1);
+ if (a->pfsr_rdomain > b->pfsr_rdomain)
+ return (1);
+ if (a->pfsr_rdomain < b->pfsr_rdomain)
+ return (-1);
+
+ for (i = 0; i < nitems(a->pfsr_addr.addr32); i++) {
+ uint32_t wa = ntohl(a->pfsr_addr.addr32[i]);
+ uint32_t wb = ntohl(b->pfsr_addr.addr32[i]);
+
+ if (wa > wb)
+ return (1);
+ if (wa < wb)
+ return (-1);
+ }
+
+ return (0);
+}
+
+RB_GENERATE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, pf_source_ioc_cmp);
+
+VNET_DEFINE(struct pf_sourcelim_id_tree, pf_sourcelim_id_tree_active);
+VNET_DEFINE(struct pf_sourcelim_list, pf_sourcelim_list_active);
+
+VNET_DEFINE(struct pf_sourcelim_id_tree, pf_sourcelim_id_tree_inactive);
+VNET_DEFINE(struct pf_sourcelim_nm_tree, pf_sourcelim_nm_tree_inactive);
+VNET_DEFINE(struct pf_sourcelim_list, pf_sourcelim_list_inactive);
+
+static inline struct pf_statelim *
+pf_statelim_find(uint32_t id)
+{
+ struct pf_statelim key;
+
+ /* only the id is used in cmp, so don't have to zero all the things */
+ key.pfstlim_id = id;
+
+ return (RB_FIND(pf_statelim_id_tree,
+ &V_pf_statelim_id_tree_active, &key));
+}
+
+static inline struct pf_sourcelim *
+pf_sourcelim_find(uint32_t id)
+{
+ struct pf_sourcelim key;
+
+ /* only the id is used in cmp, so don't have to zero all the things */
+ key.pfsrlim_id = id;
+
+ return (RB_FIND(pf_sourcelim_id_tree,
+ &V_pf_sourcelim_id_tree_active, &key));
+}
+
+struct pf_source_list pf_source_gc = TAILQ_HEAD_INITIALIZER(pf_source_gc);
+
+static void
+pf_source_purge(void)
+{
+ struct pf_source *sr, *nsr;
+
+ TAILQ_FOREACH_SAFE(sr, &pf_source_gc, pfsr_empty_gc, nsr) {
+ struct pf_sourcelim *srlim = sr->pfsr_parent;
+
+ if (time_uptime <= sr->pfsr_empty_ts +
+ srlim->pfsrlim_rate.seconds + 1)
+ continue;
+
+ TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc);
+
+ RB_REMOVE(pf_source_tree, &srlim->pfsrlim_sources, sr);
+ RB_REMOVE(pf_source_ioc_tree, &srlim->pfsrlim_ioc_sources, sr);
+ srlim->pfsrlim_nsources--;
+
+ free(sr, M_PF_SOURCE_LIM);
+ }
+}
+
+static void
+pf_source_pfr_addr(struct pfr_addr *p, const struct pf_source *sr)
+{
+ struct pf_sourcelim *srlim = sr->pfsr_parent;
+
+ memset(p, 0, sizeof(*p));
+
+ p->pfra_af = sr->pfsr_af;
+ switch (sr->pfsr_af) {
+ case AF_INET:
+ p->pfra_net = srlim->pfsrlim_ipv4_prefix;
+ p->pfra_ip4addr = sr->pfsr_addr.v4;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ p->pfra_net = srlim->pfsrlim_ipv6_prefix;
+ p->pfra_ip6addr = sr->pfsr_addr.v6;
+ break;
+#endif /* INET6 */
+ }
+}
+
+static void
+pf_source_used(struct pf_source *sr)
+{
+ struct pf_sourcelim *srlim = sr->pfsr_parent;
+ struct pfr_ktable *t;
+ unsigned int used;
+
+ used = sr->pfsr_inuse++;
+ sr->pfsr_rate_ts += srlim->pfsrlim_rate_token;
+
+ if (used == 0)
+ TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc);
+ else if ((t = srlim->pfsrlim_overload.table) != NULL &&
+ used >= srlim->pfsrlim_overload.hwm && !sr->pfsr_intable) {
+ struct pfr_addr p;
+
+ pf_source_pfr_addr(&p, sr);
+
+ pfr_insert_kentry(t, &p, time_second);
+ sr->pfsr_intable = 1;
+ }
+}
+
+static void
+pf_source_rele(struct pf_source *sr)
+{
+ struct pf_sourcelim *srlim = sr->pfsr_parent;
+ struct pfr_ktable *t;
+ unsigned int used;
+
+ used = --sr->pfsr_inuse;
+
+ t = srlim->pfsrlim_overload.table;
+ if (t != NULL && sr->pfsr_intable &&
+ used < srlim->pfsrlim_overload.lwm) {
+ struct pfr_addr p;
+
+ pf_source_pfr_addr(&p, sr);
+
+ pfr_remove_kentry(t, &p);
+ sr->pfsr_intable = 0;
+ }
+
+ if (used == 0) {
+ TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc);
+ sr->pfsr_empty_ts = time_uptime + srlim->pfsrlim_rate.seconds;
+ }
+}
+
+static inline void
+pf_source_key(struct pf_sourcelim *srlim, struct pf_source *key,
+ sa_family_t af, const struct pf_addr *addr)
+{
+ size_t i;
+
+ /* only af+addr is used for lookup. */
+ key->pfsr_af = af;
+ key->pfsr_rdomain = 0;
+ switch (af) {
+ case AF_INET:
+ key->pfsr_addr.addr32[0] =
+ srlim->pfsrlim_ipv4_mask.v4.s_addr &
+ addr->v4.s_addr;
+
+ for (i = 1; i < nitems(key->pfsr_addr.addr32); i++)
+ key->pfsr_addr.addr32[i] = htonl(0);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ for (i = 0; i < nitems(key->pfsr_addr.addr32); i++) {
+ key->pfsr_addr.addr32[i] =
+ srlim->pfsrlim_ipv6_mask.addr32[i] &
+ addr->addr32[i];
+ }
+ break;
+#endif
+ default:
+ unhandled_af(af);
+ /* NOTREACHED */
+ }
+}
+
+static inline struct pf_source *
+pf_source_find(struct pf_sourcelim *srlim, struct pf_source *key)
+{
+ return (RB_FIND(pf_source_tree, &srlim->pfsrlim_sources, key));
+}
+
extern int pf_end_threads;
extern struct proc *pf_purge_proc;
@@ -519,6 +784,8 @@ BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd)
MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
+MALLOC_DEFINE(M_PF_STATE_LINK, "pf_state_link", "pf(4) state links");
+MALLOC_DEFINE(M_PF_SOURCE_LIM, "pf_source_lim", "pf(4) source limiter");
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
@@ -1295,6 +1562,22 @@ pf_initialize(void)
/* Unlinked, but may be referenced rules. */
TAILQ_INIT(&V_pf_unlinked_rules);
+
+ /* State limiters */
+ RB_INIT(&V_pf_statelim_id_tree_inactive);
+ RB_INIT(&V_pf_statelim_nm_tree_inactive);
+ TAILQ_INIT(&V_pf_statelim_list_inactive);
+
+ RB_INIT(&V_pf_statelim_id_tree_active);
+ TAILQ_INIT(&V_pf_statelim_list_active);
+
+ /* Source limiters */
+ RB_INIT(&V_pf_sourcelim_id_tree_active);
+ TAILQ_INIT(&V_pf_sourcelim_list_active);
+
+ RB_INIT(&V_pf_sourcelim_id_tree_inactive);
+ RB_INIT(&V_pf_sourcelim_nm_tree_inactive);
+ TAILQ_INIT(&V_pf_sourcelim_list_inactive);
}
void
@@ -2680,6 +2963,7 @@ pf_purge_thread(void *unused __unused)
pf_purge_expired_fragments();
pf_purge_expired_src_nodes();
pf_purge_unlinked_rules();
+ pf_source_purge();
pfi_kkif_purge();
}
CURVNET_RESTORE();
@@ -2712,6 +2996,7 @@ pf_unload_vnet_purge(void)
pf_purge_expired_states(0, V_pf_hashmask);
pf_purge_fragments(UINT_MAX);
pf_purge_expired_src_nodes();
+ pf_source_purge();
/*
* Now all kifs & rules should be unreferenced,
@@ -2817,6 +3102,7 @@ int
pf_remove_state(struct pf_kstate *s)
{
struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
+ struct pf_state_link *pfl;
NET_EPOCH_ASSERT();
PF_HASHROW_ASSERT(ih);
@@ -2858,6 +3144,63 @@ pf_remove_state(struct pf_kstate *s)
s->key[PF_SK_STACK]->proto == IPPROTO_TCP)
pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
+ while ((pfl = SLIST_FIRST(&s->linkage)) != NULL) {
+ struct pf_state_link_list *list;
+ unsigned int gen;
+
+ SLIST_REMOVE_HEAD(&s->linkage, pfl_linkage);
+
+ switch (pfl->pfl_type) {
+ case PF_STATE_LINK_TYPE_STATELIM: {
+ struct pf_statelim *stlim;
+
+ stlim = pf_statelim_find(s->statelim);
+ KASSERT(stlim != NULL,
+ ("pf_state %p pfl %p cannot find statelim %u", s,
+ pfl, s->statelim));
+
+ gen = pf_statelim_enter(stlim);
+ stlim->pfstlim_inuse--;
+ pf_statelim_leave(stlim, gen);
+
+ list = &stlim->pfstlim_states;
+ break;
+ }
+ case PF_STATE_LINK_TYPE_SOURCELIM: {
+ struct pf_sourcelim *srlim;
+ struct pf_source key, *sr;
+
+ srlim = pf_sourcelim_find(s->sourcelim);
+ KASSERT(srlim != NULL,
+ ("pf_state %p pfl %p cannot find sourcelim %u", s,
+ pfl, s->sourcelim));
+
+ pf_source_key(srlim, &key, s->key[PF_SK_WIRE]->af,
+ &s->key[PF_SK_WIRE]->addr[0 /* XXX or 1? */]);
+
+ sr = pf_source_find(srlim, &key);
+ KASSERT(sr != NULL,
+ ("pf_state %p pfl %p cannot find source in %u", s,
+ pfl, s->sourcelim));
+
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters.inuse--;
+ pf_sourcelim_leave(srlim, gen);
+ pf_source_rele(sr);
+
+ list = &sr->pfsr_states;
+ break;
+ }
+ default:
+ panic("%s: unexpected link type on pfl %p", __func__,
+ pfl);
+ }
+
+ PF_STATE_LOCK_ASSERT(s);
+ TAILQ_REMOVE(list, pfl, pfl_link);
+ free(pfl, M_PF_STATE_LINK);
+ }
+
PF_HASHROW_UNLOCK(ih);
pf_detach_state(s);
@@ -5656,6 +5999,11 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset,
r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr);
while (r != NULL) {
+ struct pf_statelim *stlim = NULL;
+ struct pf_sourcelim *srlim = NULL;
+ struct pf_source *sr = NULL;
+ unsigned int gen;
+
if (ctx->pd->related_rule) {
*ctx->rm = ctx->pd->related_rule;
break;
@@ -5757,6 +6105,124 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset,
pf_osfp_fingerprint(pd, ctx->th),
r->os_fingerprint)),
TAILQ_NEXT(r, entries));
+ if (r->statelim != PF_STATELIM_ID_NONE) {
+ stlim = pf_statelim_find(r->statelim);
+
+ /*
+ * Treat a missing limiter like an exhausted limiter.
+ * There is no "backend" to get a resource out of
+ * so the rule can't create state.
+ */
+ PF_TEST_ATTRIB(stlim == NULL, TAILQ_NEXT(r, entries));
+
+ /*
+ * An overcommitted pool means this rule
+ * can't create state.
+ */
+ if (stlim->pfstlim_inuse >= stlim->pfstlim_limit) {
+ gen = pf_statelim_enter(stlim);
+ stlim->pfstlim_counters.hardlimited++;
+ pf_statelim_leave(stlim, gen);
+ r = TAILQ_NEXT(r, entries);
+ continue;
+ }
+
+ /*
+ * Is access to the pool rate limited?
+ */
+ if (stlim->pfstlim_rate.limit != 0) {
+ struct timespec ts;
+ getnanouptime(&ts);
+ uint64_t diff = SEC_TO_NSEC(ts.tv_sec) +
+ ts.tv_nsec - stlim->pfstlim_rate_ts;
+
+ if (diff < stlim->pfstlim_rate_token) {
+ gen = pf_statelim_enter(stlim);
+ stlim->pfstlim_counters.ratelimited++;
+ pf_statelim_leave(stlim, gen);
+ r = TAILQ_NEXT(r, entries);
+ continue;
+ }
+
+ if (diff > stlim->pfstlim_rate_bucket) {
+ stlim->pfstlim_rate_ts =
+ SEC_TO_NSEC(ts.tv_sec) + ts.tv_nsec -
+ stlim->pfstlim_rate_bucket;
+ }
+ }
+ }
+
+ if (r->sourcelim != PF_SOURCELIM_ID_NONE) {
+ struct pf_source key;
+
+ srlim = pf_sourcelim_find(r->sourcelim);
+
+ /*
+ * Treat a missing pool like an overcommitted pool.
+ * There is no "backend" to get a resource out of
+ * so the rule can't create state.
+ */
+ PF_TEST_ATTRIB(srlim == NULL, TAILQ_NEXT(r, entries));
+
+ pf_source_key(srlim, &key, ctx->pd->af,
+ ctx->pd->src);
+ sr = pf_source_find(srlim, &key);
+ if (sr != NULL) {
+ /*
+ * An overcommitted limiter means this rule
+ * can't create state.
+ */
+ if (sr->pfsr_inuse >= srlim->pfsrlim_limit) {
+ sr->pfsr_counters.hardlimited++;
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters.hardlimited++;
+ pf_sourcelim_leave(srlim, gen);
+ r = TAILQ_NEXT(r, entries);
+ continue;
+ }
+
+ /*
+ * Is access to the pool rate limited?
+ */
+ if (srlim->pfsrlim_rate.limit != 0) {
+ struct timespec ts;
+ getnanouptime(&ts);
+ uint64_t diff = SEC_TO_NSEC(ts.tv_sec) +
+ ts.tv_nsec - sr->pfsr_rate_ts;
+
+ if (diff < srlim->pfsrlim_rate_token) {
+ sr->pfsr_counters.ratelimited++;
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters
+ .ratelimited++;
+ pf_sourcelim_leave(srlim, gen);
+ r = TAILQ_NEXT(r, entries);
+ continue;
+ }
+
+ if (diff > srlim->pfsrlim_rate_bucket) {
+ sr->pfsr_rate_ts =
+ SEC_TO_NSEC(ts.tv_sec) + ts.tv_nsec -
+ srlim->pfsrlim_rate_bucket;
+ }
+ }
+ } else {
+ /*
+ * a new source entry will (should)
+ * admit a state.
+ */
+
+ if (srlim->pfsrlim_nsources >=
+ srlim->pfsrlim_entries) {
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters.addrlimited++;
+ pf_sourcelim_leave(srlim, gen);
+ r = TAILQ_NEXT(r, entries);
+ continue;
+ }
+ }
+ }
+
/* must be last! */
if (r->pktrate.limit) {
PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
@@ -5833,6 +6299,13 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset,
* ruleset, where anchor belongs to.
*/
ctx->arsm = ctx->aruleset;
+ /*
+ * state/source pools
+ */
+
+ ctx->statelim = stlim;
+ ctx->sourcelim = srlim;
+ ctx->source = sr;
}
if (pd->act.log & PF_LOG_MATCHES)
pf_log_matches(pd, r, ctx->a, ruleset, match_rules);
@@ -6085,6 +6558,13 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
return (action);
}
+ if (pd->proto == IPPROTO_TCP &&
+ r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
+ action = pf_synproxy_ack(r, pd, sm, &ctx.act);
+ if (action != PF_PASS)
+ goto cleanup; /* PF_SYNPROXY_DROP */
+ }
+
nat64 = pd->af != pd->naf;
if (nat64) {
int ret;
@@ -6157,6 +6637,10 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
{
struct pf_pdesc *pd = ctx->pd;
struct pf_kstate *s = NULL;
+ struct pf_statelim *stlim = NULL;
+ struct pf_sourcelim *srlim = NULL;
+ struct pf_source *sr = NULL;
+ struct pf_state_link *pfl;
struct pf_ksrc_node *sns[PF_SN_MAX] = { NULL };
/*
* XXXKS: The hash for PF_SN_LIMIT and PF_SN_ROUTE should be the same
@@ -6219,6 +6703,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
s->nat_rule = ctx->nr;
s->anchor = ctx->a;
s->match_rules = *match_rules;
+ SLIST_INIT(&s->linkage);
memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions));
if (pd->act.allow_opts)
@@ -6334,6 +6819,98 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
KASSERT((ctx->sk != NULL && ctx->nk != NULL), ("%s: nr %p sk %p, nk %p",
__func__, ctx->nr, ctx->sk, ctx->nk));
+ stlim = ctx->statelim;
+ if (stlim != NULL) {
+ unsigned int gen;
+
+ pfl = malloc(sizeof(*pfl), M_PF_STATE_LINK, M_NOWAIT);
+ if (pfl == NULL) {
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ goto csfailed;
+ }
+
+ gen = pf_statelim_enter(stlim);
+ stlim->pfstlim_counters.admitted++;
+ stlim->pfstlim_inuse++;
+ pf_statelim_leave(stlim, gen);
+
+ stlim->pfstlim_rate_ts += stlim->pfstlim_rate_token;
+
+ s->statelim = stlim->pfstlim_id;
+ pfl->pfl_state = s;
+ pfl->pfl_type = PF_STATE_LINK_TYPE_STATELIM;
+
+ TAILQ_INSERT_TAIL(&stlim->pfstlim_states, pfl, pfl_link);
+ SLIST_INSERT_HEAD(&s->linkage, pfl, pfl_linkage);
+ }
+
+ srlim = ctx->sourcelim;
+ if (srlim != NULL) {
+ struct pf_source *sr = ctx->source;
+ unsigned int gen;
+
+ if (sr == NULL) {
+ sr = malloc(sizeof(*sr), M_PF_SOURCE_LIM, M_NOWAIT | M_ZERO);
+ if (sr == NULL) {
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters.addrnomem++;
+ pf_sourcelim_leave(srlim, gen);
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ goto csfailed;
+ }
+
+ sr->pfsr_parent = srlim;
+ pf_source_key(srlim, sr, ctx->pd->af, ctx->pd->src);
+ TAILQ_INIT(&sr->pfsr_states);
+
+ if (RB_INSERT(pf_source_tree, &srlim->pfsrlim_sources,
+ sr) != NULL) {
+ panic("%s: source pool %u (%p) "
+ "insert collision %p?!",
+ __func__, srlim->pfsrlim_id, srlim, sr);
+ }
+
+ if (RB_INSERT(pf_source_ioc_tree,
+ &srlim->pfsrlim_ioc_sources, sr) != NULL) {
+ panic("%s: source pool %u (%p) ioc "
+ "insert collision (%p)?!",
+ __func__, srlim->pfsrlim_id, srlim, sr);
+ }
+
+ sr->pfsr_empty_ts = time_uptime;
+ TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc);
+
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_nsources++;
+ srlim->pfsrlim_counters.addrallocs++;
+ pf_sourcelim_leave(srlim, gen);
+ } else {
+ MPASS(sr->pfsr_parent == srlim);
+ }
+
+ pfl = malloc(sizeof(*pfl), M_PF_STATE_LINK, M_NOWAIT);
+ if (pfl == NULL) {
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ goto csfailed;
+ }
+
+ pf_source_used(sr);
+
+ sr->pfsr_counters.admitted++;
+
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters.inuse++;
+ srlim->pfsrlim_counters.admitted++;
+ pf_sourcelim_leave(srlim, gen);
+
+ s->sourcelim = srlim->pfsrlim_id;
+ pfl->pfl_state = s;
+ pfl->pfl_type = PF_STATE_LINK_TYPE_SOURCELIM;
+
+ TAILQ_INSERT_TAIL(&sr->pfsr_states, pfl, pfl_link);
+ SLIST_INSERT_HEAD(&s->linkage, pfl, pfl_linkage);
+ }
+
/* Swap sk/nk for PF_OUT. */
if (pf_state_insert(BOUND_IFACE(s, pd), pd->kif,
(pd->dir == PF_IN) ? ctx->sk : ctx->nk,
@@ -6400,6 +6977,44 @@ csfailed:
drop:
if (s != NULL) {
+ struct pf_state_link *npfl;
+
+ SLIST_FOREACH_SAFE(pfl, &s->linkage, pfl_linkage, npfl) {
+ struct pf_state_link_list *list;
+ unsigned int gen;
+
+ /* who needs KASSERTS when we have NULL derefs */
+
+ switch (pfl->pfl_type) {
+ case PF_STATE_LINK_TYPE_STATELIM:
+ gen = pf_statelim_enter(stlim);
+ stlim->pfstlim_inuse--;
+ pf_statelim_leave(stlim, gen);
+
+ stlim->pfstlim_rate_ts -=
+ stlim->pfstlim_rate_token;
+ list = &stlim->pfstlim_states;
+ break;
+ case PF_STATE_LINK_TYPE_SOURCELIM:
+ gen = pf_sourcelim_enter(srlim);
+ srlim->pfsrlim_counters.inuse--;
+ pf_sourcelim_leave(srlim, gen);
+
+ sr->pfsr_rate_ts -= srlim->pfsrlim_rate_token;
+ pf_source_rele(sr);
+
+ list = &sr->pfsr_states;
+ break;
+ default:
+ panic("%s: unexpected link type on pfl %p",
+ __func__, pfl);
+ }
+
+ TAILQ_REMOVE(list, pfl, pfl_link);
+ PF_STATE_LOCK_ASSERT(s);
+ free(pfl, M_PF_STATE_LINK);
+ }
+
pf_src_tree_remove_state(s);
s->timeout = PFTM_UNLINKED;
pf_free_state(s);
@@ -7164,6 +7779,38 @@ pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason)
return (PF_PASS);
}
+static __inline int
+pf_synproxy_ack(struct pf_krule *r, struct pf_pdesc *pd, struct pf_kstate **sm,
+ struct pf_rule_actions *act)
+{
+ struct tcphdr *th = &pd->hdr.tcp;
+ struct pf_kstate *s;
+ u_int16_t mss;
+ int rtid;
+ u_short reason;
+
+ if ((th->th_flags & (TH_SYN | TH_ACK)) != TH_SYN)
+ return (PF_PASS);
+
+ s = *sm;
+ rtid = act->rtableid;
+
+ pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
+ s->src.seqhi = arc4random();
+ /* Find mss option */
+ mss = pf_get_mss(pd);
+ mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
+ mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
+ s->src.mss = mss;
+
+ pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
+ th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
+ TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, 0, r->rtableid, NULL);
+
+ REASON_SET(&reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
+}
+
static int
pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
{
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index ca1815984797..0bc8b30181d6 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -136,6 +136,12 @@ static int pf_import_kaltq(struct pfioc_altq_v1 *,
struct pf_altq *, size_t);
#endif /* ALTQ */
+static void pf_statelim_commit(void);
+static void pf_statelim_rollback(void);
+static int pf_sourcelim_check(void);
+static void pf_sourcelim_commit(void);
+static void pf_sourcelim_rollback(void);
+
VNET_DEFINE(struct pf_krule, pf_default_rule);
static __inline int pf_krule_compare(struct pf_krule *,
@@ -187,6 +193,7 @@ VNET_DEFINE(uma_zone_t, pf_tag_z);
static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
MALLOC_DEFINE(M_PF, "pf", "pf(4)");
+MALLOC_DEFINE(M_PF_STATE_LIM, "pf_state_lim", "pf(4) state limiter");
#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
@@ -1318,6 +1325,12 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
rs->rules[rs_num].inactive.rcount--;
}
rs->rules[rs_num].inactive.open = 0;
+
+ if (anchor[0])
+ return (0);
+
+ pf_statelim_rollback();
+ pf_sourcelim_rollback();
return (0);
}
@@ -1437,6 +1450,7 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
struct pf_krule_global *old_tree;
int error;
u_int32_t old_rcount;
+ bool is_main_ruleset = anchor[0] == '\0';
PF_RULES_WASSERT();
@@ -1449,6 +1463,9 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
/* Calculate checksum for the main ruleset */
if (rs == &pf_main_ruleset) {
+ error = pf_sourcelim_check();
+ if (error != 0)
+ return (error);
error = pf_setup_pfsync_matching(rs);
if (error != 0)
return (error);
@@ -1507,6 +1524,13 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
pf_remove_if_empty_kruleset(rs);
pf_rule_tree_free(old_tree);
+ /* statelim/sourcelim/queue defs only in the main ruleset */
+ if (! is_main_ruleset || rs_num != PF_RULESET_FILTER)
+ return (0);
+
+ pf_statelim_commit();
+ pf_sourcelim_commit();
+
return (0);
}
@@ -1589,6 +1613,844 @@ pf_addr_copyout(struct pf_addr_wrap *addr)
}
}
+static int
+pf_statelim_add(const struct pfioc_statelim *ioc)
+{
+ struct pf_statelim *pfstlim;
+ int error;
+ size_t namelen;
+
+ if (ioc->id < PF_STATELIM_ID_MIN ||
+ ioc->id > PF_STATELIM_ID_MAX)
+ return (EINVAL);
+
+ if (ioc->limit < PF_STATELIM_LIMIT_MIN ||
+ ioc->limit > PF_STATELIM_LIMIT_MAX)
+ return (EINVAL);
+
+ if ((ioc->rate.limit == 0) != (ioc->rate.seconds == 0))
+ return (EINVAL);
+
+ namelen = strnlen(ioc->name, sizeof(ioc->name));
+ if (namelen == sizeof(ioc->name))
+ return (EINVAL);
+
+ pfstlim = malloc(sizeof(*pfstlim), M_PF_STATE_LIM, M_WAITOK | M_ZERO);
+ if (pfstlim == NULL)
+ return (ENOMEM);
+
+ pfstlim->pfstlim_id = ioc->id;
+ memcpy(pfstlim->pfstlim_nm, ioc->name, namelen);
+ pfstlim->pfstlim_limit = ioc->limit;
+ pfstlim->pfstlim_rate.limit = ioc->rate.limit;
+ pfstlim->pfstlim_rate.seconds = ioc->rate.seconds;
+
+ if (pfstlim->pfstlim_rate.limit) {
+ uint64_t bucket = SEC_TO_NSEC(pfstlim->pfstlim_rate.seconds);
+ struct timespec ts;
+
+ getnanouptime(&ts);
+
+ pfstlim->pfstlim_rate_ts = SEC_TO_NSEC(ts.tv_sec) + ts.tv_nsec -
+ bucket;
+ pfstlim->pfstlim_rate_token = bucket /
+ pfstlim->pfstlim_rate.limit;
+ pfstlim->pfstlim_rate_bucket = bucket;
+ }
+
+ TAILQ_INIT(&pfstlim->pfstlim_states);
+ mtx_init(&pfstlim->pfstlim_lock, "pf state limit", NULL, MTX_DEF);
+
+ PF_RULES_WLOCK();
+ if (ioc->ticket != pf_main_ruleset.rules[PF_RULESET_FILTER].inactive.ticket) {
+ error = EBUSY;
+ goto unlock;
+ }
+
+ if (RB_INSERT(pf_statelim_id_tree, &V_pf_statelim_id_tree_inactive,
+ pfstlim) != NULL) {
+ error = EBUSY;
+ goto unlock;
+ }
+
+ if (RB_INSERT(pf_statelim_nm_tree, &V_pf_statelim_nm_tree_inactive,
+ pfstlim) != NULL) {
+ RB_REMOVE(pf_statelim_id_tree, &V_pf_statelim_id_tree_inactive,
+ pfstlim);
+ error = EBUSY;
+ goto unlock;
+ }
+
+ TAILQ_INSERT_HEAD(&V_pf_statelim_list_inactive, pfstlim, pfstlim_list);
+
+ PF_RULES_WUNLOCK();
+
+ return (0);
+
+unlock:
+ PF_RULES_WUNLOCK();
+
+ /* free: */
+ free(pfstlim, M_PF_STATE_LIM);
+
+ return (error);
+}
+
+static void
+pf_statelim_unlink(struct pf_statelim *pfstlim,
+ struct pf_state_link_list *garbage)
+{
+ struct pf_state_link *pfl;
+
+
+ /* unwire the links */
+ TAILQ_FOREACH(pfl, &pfstlim->pfstlim_states, pfl_link) {
+ struct pf_kstate *s = pfl->pfl_state;
+
+ /* if !rmst */
+ PF_STATE_LOCK(s);
+ s->statelim = 0;
+ SLIST_REMOVE(&s->linkage, pfl, pf_state_link, pfl_linkage);
+ PF_STATE_UNLOCK(s);
+ }
+
+ /* take the list away */
+ TAILQ_CONCAT(garbage, &pfstlim->pfstlim_states, pfl_link);
+ pfstlim->pfstlim_inuse = 0;
+}
+
+void
+pf_statelim_commit(void)
+{
+ struct pf_statelim *pfstlim, *npfstlim, *opfstlim;
+ struct pf_statelim_list l = TAILQ_HEAD_INITIALIZER(l);
+ struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage);
+ struct pf_state_link *pfl, *npfl;
+
+ PF_RULES_WASSERT();
+
+ /* merge the new statelims into the current set */
+
+ /* start with an empty active list */
+ TAILQ_CONCAT(&l, &V_pf_statelim_list_active, pfstlim_list);
+
+ /* beware, the inactive bits gets messed up here */
+
+ /* try putting pending statelims into the active tree */
+ TAILQ_FOREACH_SAFE(pfstlim, &V_pf_statelim_list_inactive, pfstlim_list,
+ npfstlim) {
+ opfstlim = RB_INSERT(pf_statelim_id_tree,
+ &V_pf_statelim_id_tree_active, pfstlim);
+ if (opfstlim != NULL) {
+ /* this statelim already exists, merge */
+ opfstlim->pfstlim_limit = pfstlim->pfstlim_limit;
+ opfstlim->pfstlim_rate.limit =
+ pfstlim->pfstlim_rate.limit;
+ opfstlim->pfstlim_rate.seconds =
+ pfstlim->pfstlim_rate.seconds;
+
+ opfstlim->pfstlim_rate_ts = pfstlim->pfstlim_rate_ts;
+ opfstlim->pfstlim_rate_token =
+ pfstlim->pfstlim_rate_token;
+ opfstlim->pfstlim_rate_bucket =
+ pfstlim->pfstlim_rate_bucket;
+
+ memcpy(opfstlim->pfstlim_nm, pfstlim->pfstlim_nm,
+ sizeof(opfstlim->pfstlim_nm));
+
+ /* use the existing statelim instead */
+ free(pfstlim, M_PF_STATE_LIM);
+ TAILQ_REMOVE(&l, opfstlim, pfstlim_list);
+ pfstlim = opfstlim;
+ }
+
+ TAILQ_INSERT_TAIL(&V_pf_statelim_list_active, pfstlim,
+ pfstlim_list);
+ }
+
+ /* clean up the now unused statelims from the old set */
+ TAILQ_FOREACH_SAFE(pfstlim, &l, pfstlim_list, npfstlim) {
+ pf_statelim_unlink(pfstlim, &garbage);
+
+ RB_REMOVE(pf_statelim_id_tree, &V_pf_statelim_id_tree_active,
+ pfstlim);
+
+ free(pfstlim, M_PF_STATE_LIM);
+ }
+
+ /* fix up the inactive tree */
+ RB_INIT(&V_pf_statelim_id_tree_inactive);
+ RB_INIT(&V_pf_statelim_nm_tree_inactive);
+ TAILQ_INIT(&V_pf_statelim_list_inactive);
+
+ TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl)
+ free(pfl, M_PF_STATE_LINK);
+}
+
+static void
+pf_sourcelim_unlink(struct pf_sourcelim *pfsrlim,
+ struct pf_state_link_list *garbage)
+{
+ extern struct pf_source_list pf_source_gc;
+ struct pf_source *pfsr;
+ struct pf_state_link *pfl;
+
+ PF_RULES_WASSERT();
+
+ while ((pfsr = RB_ROOT(&pfsrlim->pfsrlim_sources)) != NULL) {
+ RB_REMOVE(pf_source_tree, &pfsrlim->pfsrlim_sources, pfsr);
+ RB_REMOVE(pf_source_ioc_tree, &pfsrlim->pfsrlim_ioc_sources,
+ pfsr);
+ if (pfsr->pfsr_inuse == 0)
+ TAILQ_REMOVE(&pf_source_gc, pfsr, pfsr_empty_gc);
+
+ /* unwire the links */
+ TAILQ_FOREACH(pfl, &pfsr->pfsr_states, pfl_link) {
+ struct pf_kstate *s = pfl->pfl_state;
+
+ PF_STATE_LOCK(s);
+ /* if !rmst */
+ s->sourcelim = 0;
+ SLIST_REMOVE(&s->linkage, pfl, pf_state_link,
+ pfl_linkage);
+ PF_STATE_UNLOCK(s);
+ }
+
+ /* take the list away */
+ TAILQ_CONCAT(garbage, &pfsr->pfsr_states, pfl_link);
+
+ free(pfsr, M_PF_SOURCE_LIM);
+ }
+}
+
+int
+pf_sourcelim_check(void)
+{
+ struct pf_sourcelim *pfsrlim, *npfsrlim;
+
+ PF_RULES_WASSERT();
+
+ /* check if we can merge */
+
+ TAILQ_FOREACH(pfsrlim, &V_pf_sourcelim_list_inactive, pfsrlim_list) {
+ npfsrlim = RB_FIND(pf_sourcelim_id_tree,
+ &V_pf_sourcelim_id_tree_active, pfsrlim);
+
+ /* new config, no conflict */
+ if (npfsrlim == NULL)
+ continue;
+
+ /* nothing is tracked at the moment, no conflict */
+ if (RB_EMPTY(&npfsrlim->pfsrlim_sources))
+ continue;
+
+ if (strcmp(npfsrlim->pfsrlim_overload.name,
+ pfsrlim->pfsrlim_overload.name) != 0)
+ return (EBUSY);
+
+ /*
+ * we should allow the prefixlens to get shorter
+ * and merge pf_source entries.
+ */
+
+ if ((npfsrlim->pfsrlim_ipv4_prefix !=
+ pfsrlim->pfsrlim_ipv4_prefix) ||
+ (npfsrlim->pfsrlim_ipv6_prefix !=
+ pfsrlim->pfsrlim_ipv6_prefix))
+ return (EBUSY);
+ }
+
+ return (0);
+}
+
+void
+pf_sourcelim_commit(void)
+{
+ struct pf_sourcelim *pfsrlim, *npfsrlim, *opfsrlim;
+ struct pf_sourcelim_list l = TAILQ_HEAD_INITIALIZER(l);
+ struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage);
+ struct pf_state_link *pfl, *npfl;
+
+ PF_RULES_WASSERT();
+
+ /* merge the new sourcelims into the current set */
+
+ /* start with an empty active list */
+ TAILQ_CONCAT(&l, &V_pf_sourcelim_list_active, pfsrlim_list);
+
+ /* beware, the inactive bits gets messed up here */
+
+ /* try putting pending sourcelims into the active tree */
+ TAILQ_FOREACH_SAFE(pfsrlim, &V_pf_sourcelim_list_inactive, pfsrlim_list,
+ npfsrlim) {
+ opfsrlim = RB_INSERT(pf_sourcelim_id_tree,
+ &V_pf_sourcelim_id_tree_active, pfsrlim);
+ if (opfsrlim != NULL) {
+ /* this sourcelim already exists, merge */
+ opfsrlim->pfsrlim_entries = pfsrlim->pfsrlim_entries;
+ opfsrlim->pfsrlim_limit = pfsrlim->pfsrlim_limit;
+ opfsrlim->pfsrlim_ipv4_prefix =
+ pfsrlim->pfsrlim_ipv4_prefix;
+ opfsrlim->pfsrlim_ipv6_prefix =
+ pfsrlim->pfsrlim_ipv6_prefix;
+ opfsrlim->pfsrlim_rate.limit =
+ pfsrlim->pfsrlim_rate.limit;
+ opfsrlim->pfsrlim_rate.seconds =
+ pfsrlim->pfsrlim_rate.seconds;
+
+ opfsrlim->pfsrlim_ipv4_mask =
+ pfsrlim->pfsrlim_ipv4_mask;
+ opfsrlim->pfsrlim_ipv6_mask =
+ pfsrlim->pfsrlim_ipv6_mask;
+
+ /* keep the existing pfstlim_rate_ts */
+
+ opfsrlim->pfsrlim_rate_token =
+ pfsrlim->pfsrlim_rate_token;
+ opfsrlim->pfsrlim_rate_bucket =
+ pfsrlim->pfsrlim_rate_bucket;
+
+ if (opfsrlim->pfsrlim_overload.table != NULL) {
+ pfr_detach_table(
+ opfsrlim->pfsrlim_overload.table);
+ }
+
+ strlcpy(opfsrlim->pfsrlim_overload.name,
+ pfsrlim->pfsrlim_overload.name,
+ sizeof(opfsrlim->pfsrlim_overload.name));
+ opfsrlim->pfsrlim_overload.hwm =
+ pfsrlim->pfsrlim_overload.hwm;
+ opfsrlim->pfsrlim_overload.lwm =
+ pfsrlim->pfsrlim_overload.lwm;
+ opfsrlim->pfsrlim_overload.table =
+ pfsrlim->pfsrlim_overload.table,
+
+ memcpy(opfsrlim->pfsrlim_nm, pfsrlim->pfsrlim_nm,
+ sizeof(opfsrlim->pfsrlim_nm));
+
+ /* use the existing sourcelim instead */
+ free(pfsrlim, M_PF_SOURCE_LIM);
+ TAILQ_REMOVE(&l, opfsrlim, pfsrlim_list);
+ pfsrlim = opfsrlim;
+ }
+
+ TAILQ_INSERT_TAIL(&V_pf_sourcelim_list_active, pfsrlim,
+ pfsrlim_list);
+ }
+
+ /* clean up the now unused sourcelims from the old set */
+ TAILQ_FOREACH_SAFE(pfsrlim, &l, pfsrlim_list, npfsrlim) {
+ pf_sourcelim_unlink(pfsrlim, &garbage);
+
+ RB_REMOVE(pf_sourcelim_id_tree, &V_pf_sourcelim_id_tree_active,
+ pfsrlim);
+
+ if (pfsrlim->pfsrlim_overload.table != NULL)
+ pfr_detach_table(pfsrlim->pfsrlim_overload.table);
+
+ free(pfsrlim, M_PF_SOURCE_LIM);
+ }
+
+ /* fix up the inactive tree */
+ RB_INIT(&V_pf_sourcelim_id_tree_inactive);
+ RB_INIT(&V_pf_sourcelim_nm_tree_inactive);
+ TAILQ_INIT(&V_pf_sourcelim_list_inactive);
+
+ TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl)
+ free(pfl, M_PF_STATE_LINK);
+}
+
+void
+pf_statelim_rollback(void)
+{
+ struct pf_statelim *pfstlim, *npfstlim;
+
+ PF_RULES_WASSERT();
+
+ TAILQ_FOREACH_SAFE(pfstlim, &V_pf_statelim_list_inactive, pfstlim_list,
+ npfstlim)
+ free(pfstlim, M_PF_STATE_LIM);
+
+ TAILQ_INIT(&V_pf_statelim_list_inactive);
+ RB_INIT(&V_pf_statelim_id_tree_inactive);
+ RB_INIT(&V_pf_statelim_nm_tree_inactive);
+}
+
+static struct pf_statelim *
+pf_statelim_rb_find(struct pf_statelim_id_tree *tree, struct pf_statelim *key)
+{
+ PF_RULES_ASSERT();
+
+ return (RB_FIND(pf_statelim_id_tree, tree, key));
+}
+
+static struct pf_statelim *
+pf_statelim_rb_nfind(struct pf_statelim_id_tree *tree, struct pf_statelim *key)
+{
+ PF_RULES_ASSERT();
+
+ return (RB_NFIND(pf_statelim_id_tree, tree, key));
+}
+
+static int
+pf_statelim_get(struct pfioc_statelim *ioc,
+ struct pf_statelim *(*rbt_op)(struct pf_statelim_id_tree *,
+ struct pf_statelim *))
+{
+ struct pf_statelim key = { .pfstlim_id = ioc->id };
+ struct pf_statelim *pfstlim;
+ int error = 0;
+ PF_RULES_RLOCK_TRACKER;
+
+ PF_RULES_RLOCK();
+
+ pfstlim = (*rbt_op)(&V_pf_statelim_id_tree_active, &key);
+ if (pfstlim == NULL) {
+ error = ENOENT;
+ goto unlock;
+ }
+
+ ioc->id = pfstlim->pfstlim_id;
+ ioc->limit = pfstlim->pfstlim_limit;
+ ioc->rate.limit = pfstlim->pfstlim_rate.limit;
+ ioc->rate.seconds = pfstlim->pfstlim_rate.seconds;
+ CTASSERT(sizeof(ioc->name) == sizeof(pfstlim->pfstlim_nm));
+ memcpy(ioc->name, pfstlim->pfstlim_nm, sizeof(ioc->name));
+
+ ioc->inuse = pfstlim->pfstlim_inuse;
+ ioc->admitted = pfstlim->pfstlim_counters.admitted;
+ ioc->hardlimited = pfstlim->pfstlim_counters.hardlimited;
+ ioc->ratelimited = pfstlim->pfstlim_counters.ratelimited;
+
+unlock:
+ PF_RULES_RUNLOCK();
+
+ return (error);
+}
+
+static int
+pf_sourcelim_add(const struct pfioc_sourcelim *ioc)
+{
+ struct pf_sourcelim *pfsrlim;
+ int error;
+ size_t namelen, tablelen;
+ unsigned int prefix;
+ size_t i;
+
+ if (ioc->id < PF_SOURCELIM_ID_MIN ||
+ ioc->id > PF_SOURCELIM_ID_MAX)
+ return (EINVAL);
+
+ if (ioc->entries < 1)
+ return (EINVAL);
+
+ if (ioc->limit < 1)
+ return (EINVAL);
+
+ if ((ioc->rate.limit == 0) != (ioc->rate.seconds == 0))
+ return (EINVAL);
+
+ if (ioc->inet_prefix > 32)
+ return (EINVAL);
+ if (ioc->inet6_prefix > 128)
+ return (EINVAL);
+
+ namelen = strnlen(ioc->name, sizeof(ioc->name));
+ if (namelen == sizeof(ioc->name))
+ return (EINVAL);
+
+ tablelen = strnlen(ioc->overload_tblname,
+ sizeof(ioc->overload_tblname));
+ if (tablelen == sizeof(ioc->overload_tblname))
+ return (EINVAL);
+ if (tablelen != 0) {
+ if (ioc->overload_hwm == 0)
+ return (EINVAL);
+
+ /*
+ * this is stupid, but not harmful?
+ *
+ * if (ioc->states < ioc->overload_hwm)
+ * return (EINVAL);
+ */
+
+ if (ioc->overload_hwm < ioc->overload_lwm)
+ return (EINVAL);
+ }
+
+ pfsrlim = malloc(sizeof(*pfsrlim), M_PF_SOURCE_LIM, M_WAITOK | M_ZERO);
+ if (pfsrlim == NULL)
+ return (ENOMEM);
+
+ pfsrlim->pfsrlim_id = ioc->id;
+ pfsrlim->pfsrlim_entries = ioc->entries;
+ pfsrlim->pfsrlim_limit = ioc->limit;
+ pfsrlim->pfsrlim_ipv4_prefix = ioc->inet_prefix;
+ pfsrlim->pfsrlim_ipv6_prefix = ioc->inet6_prefix;
+ pfsrlim->pfsrlim_rate.limit = ioc->rate.limit;
+ pfsrlim->pfsrlim_rate.seconds = ioc->rate.seconds;
+ memcpy(pfsrlim->pfsrlim_overload.name, ioc->overload_tblname, tablelen);
+ pfsrlim->pfsrlim_overload.hwm = ioc->overload_hwm;
+ pfsrlim->pfsrlim_overload.lwm = ioc->overload_lwm;
+ memcpy(pfsrlim->pfsrlim_nm, ioc->name, namelen);
+
+ if (pfsrlim->pfsrlim_rate.limit) {
+ uint64_t bucket = pfsrlim->pfsrlim_rate.seconds * 1000000000ULL;
+
+ pfsrlim->pfsrlim_rate_token = bucket /
+ pfsrlim->pfsrlim_rate.limit;
+ pfsrlim->pfsrlim_rate_bucket = bucket;
+ }
+
+ pfsrlim->pfsrlim_ipv4_mask.v4.s_addr = htonl(
+ 0xffffffff << (32 - pfsrlim->pfsrlim_ipv4_prefix));
+
+ prefix = pfsrlim->pfsrlim_ipv6_prefix;
+ for (i = 0; i < nitems(pfsrlim->pfsrlim_ipv6_mask.addr32); i++) {
+ if (prefix == 0) {
+ /* the memory is already zeroed */
+ break;
+ }
+ if (prefix < 32) {
+ pfsrlim->pfsrlim_ipv6_mask.addr32[i] = htonl(
+ 0xffffffff << (32 - prefix));
+ break;
+ }
+
+ pfsrlim->pfsrlim_ipv6_mask.addr32[i] = htonl(0xffffffff);
+ prefix -= 32;
+ }
+
+ RB_INIT(&pfsrlim->pfsrlim_sources);
+ mtx_init(&pfsrlim->pfsrlim_lock, "pf source limit", NULL, MTX_DEF);
+
+ PF_RULES_WLOCK();
+ if (ioc->ticket != pf_main_ruleset.rules[PF_RULESET_FILTER].inactive.ticket) {
+ error = EBUSY;
+ goto unlock;
+ }
+
+ if (pfsrlim->pfsrlim_overload.name[0] != '\0') {
+ pfsrlim->pfsrlim_overload.table = pfr_attach_table(
+ &pf_main_ruleset, pfsrlim->pfsrlim_overload.name);
+ if (pfsrlim->pfsrlim_overload.table == NULL) {
+ error = EINVAL;
+ goto unlock;
+ }
+ }
+
+ if (RB_INSERT(pf_sourcelim_id_tree, &V_pf_sourcelim_id_tree_inactive,
+ pfsrlim) != NULL) {
+ error = EBUSY;
+ goto unlock;
+ }
+
+ if (RB_INSERT(pf_sourcelim_nm_tree, &V_pf_sourcelim_nm_tree_inactive,
+ pfsrlim) != NULL) {
+ RB_INSERT(pf_sourcelim_nm_tree, &V_pf_sourcelim_nm_tree_inactive,
+ pfsrlim);
+ error = EBUSY;
+ goto unlock;
+ }
+
+ TAILQ_INSERT_HEAD(&V_pf_sourcelim_list_inactive, pfsrlim, pfsrlim_list);
+
+ PF_RULES_WUNLOCK();
+
+ return (0);
+
+unlock:
+ PF_RULES_WUNLOCK();
+ /* free: */
+ free(pfsrlim, M_PF_SOURCE_LIM);
+
+ return (error);
+}
+
+void
+pf_sourcelim_rollback(void)
+{
+ struct pf_sourcelim *pfsrlim, *npfsrlim;
+
+ PF_RULES_WASSERT();
+
+ TAILQ_FOREACH_SAFE(pfsrlim, &V_pf_sourcelim_list_inactive, pfsrlim_list,
+ npfsrlim) {
+ if (pfsrlim->pfsrlim_overload.table != NULL)
+ pfr_detach_table(pfsrlim->pfsrlim_overload.table);
+
+ free(pfsrlim, M_PF_SOURCE_LIM);
+ }
+
+ TAILQ_INIT(&V_pf_sourcelim_list_inactive);
+ RB_INIT(&V_pf_sourcelim_id_tree_inactive);
+ RB_INIT(&V_pf_sourcelim_nm_tree_inactive);
+}
+
+static struct pf_sourcelim *
+pf_sourcelim_rb_find(struct pf_sourcelim_id_tree *tree,
+ struct pf_sourcelim *key)
+{
+ PF_RULES_ASSERT();
+ return (RB_FIND(pf_sourcelim_id_tree, tree, key));
+}
+
+static struct pf_sourcelim *
+pf_sourcelim_rb_nfind(struct pf_sourcelim_id_tree *tree,
+ struct pf_sourcelim *key)
+{
+ PF_RULES_ASSERT();
+ return (RB_NFIND(pf_sourcelim_id_tree, tree, key));
+}
+
+static int
+pf_sourcelim_get(struct pfioc_sourcelim *ioc,
+ struct pf_sourcelim *(*rbt_op)(struct pf_sourcelim_id_tree *,
+ struct pf_sourcelim *))
+{
+ struct pf_sourcelim key = { .pfsrlim_id = ioc->id };
+ struct pf_sourcelim *pfsrlim;
+ int error = 0;
+ PF_RULES_RLOCK_TRACKER;
+
+ PF_RULES_RLOCK();
+#if 0
+ if (ioc->ticket != pf_main_ruleset.rules.active.ticket) {
+ error = EBUSY;
+ goto unlock;
+ }
+#endif
+
+ pfsrlim = (*rbt_op)(&V_pf_sourcelim_id_tree_active, &key);
+ if (pfsrlim == NULL) {
+ error = ESRCH;
+ goto unlock;
+ }
+
+ ioc->id = pfsrlim->pfsrlim_id;
+ ioc->entries = pfsrlim->pfsrlim_entries;
+ ioc->limit = pfsrlim->pfsrlim_limit;
+ ioc->inet_prefix = pfsrlim->pfsrlim_ipv4_prefix;
+ ioc->inet6_prefix = pfsrlim->pfsrlim_ipv6_prefix;
+ ioc->rate.limit = pfsrlim->pfsrlim_rate.limit;
+ ioc->rate.seconds = pfsrlim->pfsrlim_rate.seconds;
+
+ CTASSERT(sizeof(ioc->overload_tblname) ==
+ sizeof(pfsrlim->pfsrlim_overload.name));
+ memcpy(ioc->overload_tblname, pfsrlim->pfsrlim_overload.name,
+ sizeof(pfsrlim->pfsrlim_overload.name));
+ ioc->overload_hwm = pfsrlim->pfsrlim_overload.hwm;
+ ioc->overload_lwm = pfsrlim->pfsrlim_overload.lwm;
+
+ CTASSERT(sizeof(ioc->name) == sizeof(pfsrlim->pfsrlim_nm));
+ memcpy(ioc->name, pfsrlim->pfsrlim_nm, sizeof(ioc->name));
+ /* XXX overload table thing */
+
+ ioc->nentries = pfsrlim->pfsrlim_nsources;
+
+ ioc->inuse = pfsrlim->pfsrlim_counters.inuse;
+ ioc->addrallocs = pfsrlim->pfsrlim_counters.addrallocs;
+ ioc->addrnomem = pfsrlim->pfsrlim_counters.addrnomem;
+ ioc->admitted = pfsrlim->pfsrlim_counters.admitted;
+ ioc->addrlimited = pfsrlim->pfsrlim_counters.addrlimited;
+ ioc->hardlimited = pfsrlim->pfsrlim_counters.hardlimited;
+ ioc->ratelimited = pfsrlim->pfsrlim_counters.ratelimited;
+
+unlock:
+ PF_RULES_RUNLOCK();
+
+ return (error);
+}
+
+static struct pf_source *
+pf_source_rb_find(struct pf_source_ioc_tree *tree,
+ struct pf_source *key)
+{
+ PF_RULES_ASSERT();
+
+ return (RB_FIND(pf_source_ioc_tree, tree, key));
+}
+
+static struct pf_source *
+pf_source_rb_nfind(struct pf_source_ioc_tree *tree,
+ struct pf_source *key)
+{
+ PF_RULES_ASSERT();
+
+ return (RB_NFIND(pf_source_ioc_tree, tree, key));
+}
+
+static int
+pf_source_get(struct pfioc_source *ioc,
+ struct pf_source *(*rbt_op)(struct pf_source_ioc_tree *,
+ struct pf_source *))
+{
+ struct pf_sourcelim plkey = { .pfsrlim_id = ioc->id };
+ struct pfioc_source_entry e, *uentry;
+ struct pf_source key;
+ struct pf_sourcelim *pfsrlim;
+ struct pf_source *pfsr;
+ size_t used = 0, len = ioc->entrieslen;
+ int error = 0;
+ PF_RULES_RLOCK_TRACKER;
+
+ if (ioc->entry_size != sizeof(e))
+ return (EINVAL);
+ if (len < sizeof(e))
+ return (EMSGSIZE);
+
+ error = copyin(ioc->key, &e, sizeof(e));
+ if (error != 0)
+ return (error);
+
+ PF_RULES_RLOCK();
+
+#if 0
+ if (ioc->ticket != pf_main_ruleset.rules.active.ticket) {
+ error = EBUSY;
+ goto unlock;
+ }
+#endif
+
+ pfsrlim = pf_sourcelim_rb_find(&V_pf_sourcelim_id_tree_active, &plkey);
+ if (pfsrlim == NULL) {
+ error = ESRCH;
+ goto unlock;
+ }
+
+ key.pfsr_af = e.af;
+ key.pfsr_rdomain = e.rdomain;
+ key.pfsr_addr = e.addr;
+ pfsr = (*rbt_op)(&pfsrlim->pfsrlim_ioc_sources, &key);
+ if (pfsr == NULL) {
+ error = ENOENT;
+ goto unlock;
+ }
+
+ memset(&e, 0, sizeof(e));
+
+ uentry = ioc->entries;
+ for (;;) {
+ e.af = pfsr->pfsr_af;
+ e.rdomain = pfsr->pfsr_rdomain;
+ e.addr = pfsr->pfsr_addr;
+
+ e.inuse = pfsr->pfsr_inuse;
+ e.admitted = pfsr->pfsr_counters.admitted;
+ e.hardlimited = pfsr->pfsr_counters.hardlimited;
+ e.ratelimited = pfsr->pfsr_counters.ratelimited;
+
+ error = copyout(&e, uentry, sizeof(e));
+ if (error != 0)
+ goto unlock;
+
+ used += sizeof(e);
+ if (used == len)
+ break;
+
+ pfsr = RB_NEXT(pf_source_ioc_tree, srlim->pfsrlim_ioc_sources, pfsr);
+ if (pfsr == NULL)
+ break;
+
+ if ((len - used) < sizeof(e)) {
+ error = EMSGSIZE;
+ goto unlock;
+ }
+
+ uentry++;
+ }
+ MPASS(error == 0);
+
+ ioc->inet_prefix = pfsrlim->pfsrlim_ipv4_prefix;
+ ioc->inet6_prefix = pfsrlim->pfsrlim_ipv6_prefix;
+ ioc->limit = pfsrlim->pfsrlim_limit;
+
+ ioc->entrieslen = used;
+
+unlock:
+ PF_RULES_RUNLOCK();
+
+ return (error);
+}
+
+static int
+pf_source_clr(struct pfioc_source_kill *ioc)
+{
+ extern struct pf_source_list pf_source_gc;
+ struct pf_sourcelim plkey = {
+ .pfsrlim_id = ioc->id,
+ };
+ struct pf_source skey = {
+ .pfsr_af = ioc->af,
+ .pfsr_rdomain = ioc->rdomain,
+ .pfsr_addr = ioc->addr,
+ };
+ struct pf_sourcelim *pfsrlim;
+ struct pf_source *pfsr;
+ struct pf_state_link *pfl, *npfl;
+ int error = 0;
+ unsigned int gen;
+
+ if (ioc->rmstates) {
+ /* XXX userland wants the states removed too */
+ return (EOPNOTSUPP);
+ }
+
+ PF_RULES_WLOCK();
+
+#if 0
+ if (ioc->ticket != pf_main_ruleset.rules.active.ticket) {
+ error = EBUSY;
+ goto unlock;
+ }
+#endif
+
+ pfsrlim = pf_sourcelim_rb_find(&V_pf_sourcelim_id_tree_active, &plkey);
+ if (pfsrlim == NULL) {
+ error = ESRCH;
+ goto unlock;
+ }
+
+ pfsr = pf_source_rb_find(&pfsrlim->pfsrlim_ioc_sources, &skey);
+ if (pfsr == NULL) {
+ error = ENOENT;
+ goto unlock;
+ }
+
+ RB_REMOVE(pf_source_tree, &pfsrlim->pfsrlim_sources, pfsr);
+ RB_REMOVE(pf_source_ioc_tree, &pfsrlim->pfsrlim_ioc_sources, pfsr);
+ if (pfsr->pfsr_inuse == 0)
+ TAILQ_REMOVE(&pf_source_gc, pfsr, pfsr_empty_gc);
+
+ gen = pf_sourcelim_enter(pfsrlim);
+ pfsrlim->pfsrlim_nsources--;
+ pfsrlim->pfsrlim_counters.inuse -= pfsr->pfsr_inuse;
+ pf_sourcelim_leave(pfsrlim, gen);
+
+ /* unwire the links */
+ TAILQ_FOREACH(pfl, &pfsr->pfsr_states, pfl_link) {
+ struct pf_kstate *st = pfl->pfl_state;
+
+ /* if !rmst */
+ st->sourcelim = 0;
+ SLIST_REMOVE(&st->linkage, pfl, pf_state_link, pfl_linkage);
+ }
+
+ PF_RULES_WUNLOCK();
+
+ TAILQ_FOREACH_SAFE(pfl, &pfsr->pfsr_states, pfl_link, npfl)
+ free(pfl, M_PF_STATE_LINK);
+
+ free(pfsr, M_PF_SOURCE_LIM);
+
+ return (0);
+
+unlock:
+ PF_RULES_WUNLOCK();
+
+ return (error);
+}
+
static void
pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out)
{
@@ -2181,6 +3043,18 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
if (pf_validate_range(rule->dst.port_op, rule->dst.port))
ERROUT_UNLOCKED(EINVAL);
+ if (rule->statelim != PF_STATELIM_ID_NONE) {
+ if (rule->statelim < PF_STATELIM_ID_MIN ||
+ rule->statelim > PF_STATELIM_ID_MAX)
+ ERROUT_UNLOCKED(EINVAL);
+ }
+
+ if (rule->sourcelim != PF_SOURCELIM_ID_NONE) {
+ if (rule->sourcelim < PF_SOURCELIM_ID_MIN ||
+ rule->sourcelim > PF_SOURCELIM_ID_MAX)
+ ERROUT_UNLOCKED(EINVAL);
+ }
+
if (rule->ifname[0])
kif = pf_kkif_create(M_WAITOK);
if (rule->rcv_ifname[0])
@@ -3002,6 +3876,12 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
case DIOCGETTIMEOUT:
case DIOCCLRRULECTRS:
case DIOCGETLIMIT:
+ case DIOCGETSTATELIM:
+ case DIOCGETNSTATELIM:
+ case DIOCGETSOURCELIM:
+ case DIOCGETNSOURCELIM:
+ case DIOCGETSOURCE:
+ case DIOCGETNSOURCE:
case DIOCGETALTQSV0:
case DIOCGETALTQSV1:
case DIOCGETALTQV0:
@@ -3061,6 +3941,12 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
#endif
case DIOCGETTIMEOUT:
case DIOCGETLIMIT:
+ case DIOCGETSTATELIM:
+ case DIOCGETNSTATELIM:
+ case DIOCGETSOURCELIM:
+ case DIOCGETNSOURCELIM:
+ case DIOCGETSOURCE:
+ case DIOCGETNSOURCE:
case DIOCGETALTQSV0:
case DIOCGETALTQSV1:
case DIOCGETALTQV0:
@@ -4349,6 +5235,42 @@ DIOCGETSTATESV2_full:
break;
}
+ case DIOCADDSTATELIM:
+ error = pf_statelim_add((struct pfioc_statelim *)addr);
+ break;
+ case DIOCGETSTATELIM:
+ error = pf_statelim_get((struct pfioc_statelim *)addr,
+ pf_statelim_rb_find);
+ break;
+ case DIOCGETNSTATELIM:
+ error = pf_statelim_get((struct pfioc_statelim *)addr,
+ pf_statelim_rb_nfind);
+ break;
+
+ case DIOCADDSOURCELIM:
+ error = pf_sourcelim_add((struct pfioc_sourcelim *)addr);
+ break;
+ case DIOCGETSOURCELIM:
+ error = pf_sourcelim_get((struct pfioc_sourcelim *)addr,
+ pf_sourcelim_rb_find);
+ break;
+ case DIOCGETNSOURCELIM:
+ error = pf_sourcelim_get((struct pfioc_sourcelim *)addr,
+ pf_sourcelim_rb_nfind);
+ break;
+
+ case DIOCGETSOURCE:
+ error = pf_source_get((struct pfioc_source *)addr,
+ pf_source_rb_find);
+ break;
+ case DIOCGETNSOURCE:
+ error = pf_source_get((struct pfioc_source *)addr,
+ pf_source_rb_nfind);
+ break;
+ case DIOCCLRSOURCE:
+ error = pf_source_clr((struct pfioc_source_kill *)addr);
+ break;
+
case DIOCCLRRULECTRS: {
/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
struct pf_kruleset *ruleset = &pf_main_ruleset;
diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c
index 1c8a1f95b650..6e19beec5bfe 100644
--- a/sys/netpfil/pf/pf_nl.c
+++ b/sys/netpfil/pf/pf_nl.c
@@ -784,6 +784,8 @@ static const struct nlattr_parser nla_p_rule[] = {
{ .type = PF_RT_MAX_PKT_SIZE, .off = _OUT(max_pkt_size), .cb = nlattr_get_uint16 },
{ .type = PF_RT_TYPE_2, .off = _OUT(type), .cb = nlattr_get_uint16 },
{ .type = PF_RT_CODE_2, .off = _OUT(code), .cb = nlattr_get_uint16 },
+ { .type = PF_RT_STATE_LIMIT, .off = _OUT(statelim), .cb = nlattr_get_uint8 },
+ { .type = PF_RT_SOURCE_LIMIT, .off = _OUT(sourcelim), .cb = nlattr_get_uint8 },
};
NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule);
#undef _OUT
@@ -1041,6 +1043,8 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt)
nlattr_add_u64(nw, PF_RT_SRC_NODES_ROUTE, counter_u64_fetch(rule->src_nodes[PF_SN_ROUTE]));
nlattr_add_pf_threshold(nw, PF_RT_PKTRATE, &rule->pktrate);
nlattr_add_time_t(nw, PF_RT_EXPTIME, time_second - (time_uptime - rule->exptime));
+ nlattr_add_u8(nw, PF_RT_STATE_LIMIT, rule->statelim);
+ nlattr_add_u8(nw, PF_RT_SOURCE_LIMIT, rule->sourcelim);
error = pf_kanchor_copyout(ruleset, rule, anchor_call, sizeof(anchor_call));
MPASS(error == 0);
diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h
index 216f3d13db32..d45766b91a30 100644
--- a/sys/netpfil/pf/pf_nl.h
+++ b/sys/netpfil/pf/pf_nl.h
@@ -290,6 +290,8 @@ enum pf_rule_type_t {
PF_RT_TYPE_2 = 84, /* u16 */
PF_RT_CODE_2 = 85, /* u16 */
PF_RT_EXPTIME = 86, /* time_t */
+ PF_RT_STATE_LIMIT = 87, /* uint8_t */
+ PF_RT_SOURCE_LIMIT = 88, /* uint8_t */
};
enum pf_addrule_type_t {
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index 0e2b9fe1cac8..650334c45db3 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -882,6 +882,26 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, time_t tzero)
return (0);
}
+int
+pfr_remove_kentry(struct pfr_ktable *kt, struct pfr_addr *ad)
+{
+ struct pfr_kentryworkq workq = SLIST_HEAD_INITIALIZER(workq);
+ struct pfr_kentry *p;
+
+ p = pfr_lookup_addr(kt, ad, 1);
+ if (p == NULL || p->pfrke_not)
+ return (ESRCH);
+
+ if (p->pfrke_mark)
+ return (0);
+
+ p->pfrke_mark = 1;
+ SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+ pfr_remove_kentries(kt, &workq);
+
+ return (0);
+}
+
static void
pfr_remove_kentries(struct pfr_ktable *kt,
struct pfr_kentryworkq *workq)