aboutsummaryrefslogtreecommitdiff
path: root/sys/netpfil/pf/pf_lb.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netpfil/pf/pf_lb.c')
-rw-r--r--sys/netpfil/pf/pf_lb.c273
1 files changed, 226 insertions, 47 deletions
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index bc9e1dc72902..7aeb8266ca8c 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -73,7 +73,7 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
static uint64_t pf_hash(struct pf_addr *, struct pf_addr *,
struct pf_poolhashkey *, sa_family_t);
-struct pf_krule *pf_match_translation(int, struct pf_test_ctx *);
+static struct pf_krule *pf_match_translation(int, struct pf_test_ctx *);
static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *,
struct pf_krule *);
static int pf_get_sport(struct pf_pdesc *, struct pf_krule *,
@@ -216,6 +216,7 @@ pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_krulese
*/
ctx->arsm = ctx->aruleset;
}
+ break;
} else {
ctx->a = r; /* remember anchor */
ctx->aruleset = ruleset; /* and its ruleset */
@@ -273,7 +274,7 @@ pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_k
return (rv);
}
-struct pf_krule *
+static struct pf_krule *
pf_match_translation(int rs_num, struct pf_test_ctx *ctx)
{
enum pf_test_status rv;
@@ -534,6 +535,63 @@ pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
return (1);
}
+static __inline u_short
+pf_check_src_node_valid(struct pf_ksrc_node *sn, struct pf_kpool *rpool)
+{
+ struct pf_addr *raddr, *rmask;
+ struct pf_addr *caddr; /* cached redirection address */
+ struct pf_kpooladdr *pa;
+ sa_family_t raf;
+ sa_family_t caf; /* cached redirection AF */
+ u_short valid = 0;
+
+ KASSERT(sn != NULL, ("sn is NULL"));
+ KASSERT(rpool != NULL, ("rpool is NULL"));
+
+ /* check if the cached entry is still valid */
+
+ if (sn->type == PF_SN_LIMIT) {
+ /* Always valid as it does not store redirection address */
+ return (1);
+ }
+
+ mtx_lock(&rpool->mtx);
+ caddr = &(sn->raddr);
+ caf = sn->raf;
+
+ TAILQ_FOREACH(pa, &rpool->list, entries) {
+ if (PF_AZERO(caddr, caf)) {
+ valid = 1;
+ goto done;
+ } else if (pa->addr.type == PF_ADDR_DYNIFTL) {
+ if (pfr_kentry_byaddr(pa->addr.p.dyn->pfid_kt, caddr, caf, 0)) {
+ valid = 1;
+ goto done;
+ }
+ } else if (pa->addr.type == PF_ADDR_TABLE) {
+ if (pfr_kentry_byaddr(pa->addr.p.tbl, caddr, caf, 0)) {
+ valid = 1;
+ goto done;
+ }
+ } else if (pa->addr.type != PF_ADDR_NOROUTE) {
+ /* PF_ADDR_URPFFAILED, PF_ADDR_RANGE, PF_ADDR_ADDRMASK */
+ raddr = &(pa->addr.v.a.addr);
+ rmask = &(pa->addr.v.a.mask);
+ raf = pa->af;
+ if (raf == caf && pf_match_addr(0, raddr, rmask, caddr, caf)) {
+ valid = 1;
+ goto done;
+ }
+ }
+ /* else PF_ADDR_NOROUTE */
+ }
+
+done:
+ mtx_unlock(&rpool->mtx);
+
+ return (valid);
+}
+
u_short
pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf,
@@ -545,11 +603,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
uint64_t hashidx;
int cnt;
sa_family_t wanted_af;
+ u_int8_t pool_type;
+ bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH;
KASSERT(saf != 0, ("%s: saf == 0", __func__));
KASSERT(naf != NULL, ("%s: naf = NULL", __func__));
KASSERT((*naf) != 0, ("%s: *naf = 0", __func__));
+ /*
+ * Given (*naf) is a hint about AF of the forwarded packet.
+ * It might be changed if prefer_ipv6_nexthop is enabled and
+ * the combination of nexthop AF and packet AF allows for it.
+ */
wanted_af = (*naf);
mtx_lock(&rpool->mtx);
@@ -594,19 +659,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
} else {
raddr = &rpool->cur->addr.v.a.addr;
rmask = &rpool->cur->addr.v.a.mask;
- /*
- * For single addresses check their address family. Unless they
- * have none, which happens when addresses are added with
- * the old ioctl mechanism. In such case trust that the address
- * has the proper AF.
- */
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ }
+
+ /*
+ * For pools with a single host with the prefer-ipv6-nexthop option
+ * we can return pool address of any AF, unless the forwarded packet
+ * is IPv6, then we can return only if pool address is IPv6.
+ * For non-prefer-ipv6-nexthop we can return pool address only
+ * of wanted AF, unless the pool address'es AF is unknown, which
+ * happens in case old ioctls have been used to set up the pool.
+ *
+ * Round-robin pools have their own logic for retrying next addresses.
+ */
+ pool_type = rpool->opts & PF_POOL_TYPEMASK;
+ if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK ||
+ ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) &&
+ rpool->cur->addr.type != PF_ADDR_TABLE &&
+ rpool->cur->addr.type != PF_ADDR_DYNIFTL)) {
+ if (prefer_ipv6_nexthop) {
+ if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+ wanted_af = rpool->cur->af;
+ } else {
+ if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
}
- switch (rpool->opts & PF_POOL_TYPEMASK) {
+ switch (pool_type) {
case PF_POOL_NONE:
pf_addrcpy(naddr, raddr, wanted_af);
break;
@@ -631,10 +715,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)arc4random_uniform(cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_random:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_random;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else if (init_addr != NULL && PF_AZERO(init_addr,
@@ -702,10 +798,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)(hashidx % cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_srchash:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_srchash;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else {
@@ -718,6 +826,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
{
struct pf_kpooladdr *acur = rpool->cur;
+ retry_other_af_rr:
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af,
@@ -728,46 +839,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
&rpool->tblidx, &rpool->counter, wanted_af,
pf_islinklocal, true))
goto get_addr;
- } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
- wanted_af))
+ } else if (rpool->cur->af == wanted_af &&
+ pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af))
goto get_addr;
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ /* Reset table index when changing wanted AF. */
+ rpool->tblidx = -1;
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto retry_other_af_rr;
+ }
try_next:
+ /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */
+ rpool->ipv6_nexthop_af = AF_INET6;
if (TAILQ_NEXT(rpool->cur, entries) == NULL)
rpool->cur = TAILQ_FIRST(&rpool->list);
else
rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+ try_next_ipv6_nexthop_rr:
+ /* Reset table index when iterating pools or changing wanted AF. */
rpool->tblidx = -1;
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af, NULL,
- true)) {
- /* table contains no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ true))
+ goto get_addr;
} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
- &rpool->tblidx, &rpool->counter, wanted_af,
- pf_islinklocal, true)) {
- /* interface has no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal,
+ true))
+ goto get_addr;
} else {
- raddr = &rpool->cur->addr.v.a.addr;
- rmask = &rpool->cur->addr.v.a.mask;
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ if (rpool->cur->af == wanted_af) {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ pf_addrcpy(&rpool->counter, raddr, wanted_af);
+ goto get_addr;
}
- pf_addrcpy(&rpool->counter, raddr, wanted_af);
}
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto try_next_ipv6_nexthop_rr;
+ }
+ if (rpool->cur != acur)
+ goto try_next;
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
get_addr:
pf_addrcpy(naddr, &rpool->counter, wanted_af);
if (init_addr != NULL && PF_AZERO(init_addr, wanted_af))
@@ -777,9 +897,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
}
}
+ if (wanted_af == 0) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+
if (nkif)
*nkif = rpool->cur->kif;
+ (*naf) = wanted_af;
+
done_pool_mtx:
mtx_unlock(&rpool->mtx);
@@ -804,6 +931,45 @@ pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
if (sn != NULL) {
PF_SRC_NODE_LOCK_ASSERT(sn);
+ /*
+ * Check if source node's redirection address still exists
+ * in pool from which the SN was created. If not, delete it.
+ * Similar to pf_kill_srcnodes(). Unlink the source node
+ * from tree, unlink it from states, then free it. Do not
+ * overlap source node and state locks to avoid LOR.
+ */
+ if (!pf_check_src_node_valid(sn, rpool)) {
+ pf_unlink_src_node(sn);
+ PF_SRC_NODE_UNLOCK(sn);
+ if (V_pf_status.debug >= PF_DEBUG_NOISY) {
+ printf("%s: stale src tracking (%d) ",
+ __func__, sn_type);
+ pf_print_host(saddr, 0, saf);
+ printf(" to ");
+ pf_print_host(&(sn->raddr), 0, sn->raf);
+ if (nkif)
+ printf("@%s", sn->rkif->pfik_name);
+ printf("\n");
+ }
+
+ for (int i = 0; i <= V_pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+ struct pf_kstate *st;
+
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(st, &ih->states, entry) {
+ if (st->sns[sn->type] == sn) {
+ st->sns[sn->type] = NULL;
+ }
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+ pf_free_src_node(sn);
+ counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
+ sn = NULL;
+ goto map_addr;
+ }
+
(*naf) = sn->raf;
/* If the supplied address is the same as the current one we've
@@ -832,9 +998,10 @@ pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
goto done;
}
+map_addr:
/*
- * Source node has not been found. Find a new address and store it
- * in variables given by the caller.
+ * Source node has not been found or is invalid. Find a new address
+ * and store it in variables given by the caller.
*/
if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr,
rpool)) != 0) {
@@ -904,6 +1071,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
{
struct pf_pdesc *pd = ctx->pd;
struct pf_addr *naddr;
+ int idx;
uint16_t *nportp;
uint16_t low, high;
u_short reason;
@@ -918,8 +1086,19 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
return (PFRES_MEMORY);
}
- naddr = &ctx->nk->addr[1];
- nportp = &ctx->nk->port[1];
+ switch (nat_action) {
+ case PF_NAT:
+ idx = pd->sidx;
+ break;
+ case PF_BINAT:
+ idx = 1;
+ break;
+ case PF_RDR:
+ idx = pd->didx;
+ break;
+ }
+ naddr = &ctx->nk->addr[idx];
+ nportp = &ctx->nk->port[idx];
switch (nat_action) {
case PF_NAT: