aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrey V. Elsukov <ae@FreeBSD.org>2019-03-19 10:57:03 +0000
committerAndrey V. Elsukov <ae@FreeBSD.org>2019-03-19 10:57:03 +0000
commitd18c1f26a4bbd5c3871bb67660a5899d28f5efa5 (patch)
tree7b90ad4e40d767c5d50b9b23303d7b4648229ae1
parentc5be49da01dc36e7e681026bd1a9b271929d2bd7 (diff)
downloadsrc-d18c1f26a4bbd5c3871bb67660a5899d28f5efa5.tar.gz
src-d18c1f26a4bbd5c3871bb67660a5899d28f5efa5.zip
Reapply r345274 with build fixes for 32-bit architectures.
Update NAT64LSN implementation: o most of data structures and relations were modified to be able support large number of translation states. Now each supported protocol can use full ports range. Ports groups now are belongs to IPv4 alias addresses, not hosts. Each ports group can keep several states chunks. This is controlled with new `states_chunks` config option. States chunks allow to have several translation states for single alias address and port, but for different destination addresses. o by default all hash tables now use jenkins hash. o ConcurrencyKit and epoch(9) is used to make NAT64LSN lockless on fast path. o one NAT64LSN instance now can be used to handle several IPv6 prefixes, special prefix "::" value should be used for this purpose when instance is created. o due to modified internal data structures relations, the socket opcode that does states listing was changed. Obtained from: Yandex LLC MFC after: 1 month Sponsored by: Yandex LLC
Notes
Notes: svn path=/head/; revision=345293
-rw-r--r--sbin/ipfw/ipfw.830
-rw-r--r--sbin/ipfw/ipfw2.h1
-rw-r--r--sbin/ipfw/nat64lsn.c124
-rw-r--r--sys/conf/files4
-rw-r--r--sys/modules/ipfw_nat64/Makefile2
-rw-r--r--sys/netinet6/ip_fw_nat64.h45
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.c2520
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.h425
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn_control.c434
9 files changed, 1766 insertions, 1819 deletions
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 31448aff92bb..f02ec3e148cd 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -1,7 +1,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 18, 2019
+.Dd March 19, 2019
.Dt IPFW 8
.Os
.Sh NAME
@@ -3300,6 +3300,7 @@ See
.Sx SYSCTL VARIABLES
for more info.
.Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+.Ss Stateful translation
.Nm
supports in-kernel IPv6/IPv4 network address and protocol translation.
Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers
@@ -3317,7 +3318,8 @@ to be able use stateful NAT64 translator.
Stateful NAT64 uses a bunch of memory for several types of objects.
When IPv6 client initiates connection, NAT64 translator creates a host entry
in the states table.
-Each host entry has a number of ports group entries allocated on demand.
+Each host entry uses preallocated IPv4 alias entry.
+Each alias entry has a number of ports group entries allocated on demand.
Ports group entries contains connection state entries.
There are several options to control limits and lifetime for these objects.
.Pp
@@ -3337,6 +3339,11 @@ First time an original packet is handled and consumed by translator,
and then it is handled again as translated packet.
This behavior can be changed by sysctl variable
.Va net.inet.ip.fw.nat64_direct_output .
+Also translated packet can be tagged using
+.Cm tag
+rule action, and then matched by
+.Cm tagged
+opcode to avoid loops and extra overhead.
.Pp
The stateful NAT64 configuration command is the following:
.Bd -ragged -offset indent
@@ -3364,15 +3371,16 @@ to represent IPv4 addresses. This IPv6 prefix should be configured in DNS64.
The translator implementation follows RFC6052, that restricts the length of
prefixes to one of following: 32, 40, 48, 56, 64, or 96.
The Well-Known IPv6 Prefix 64:ff9b:: must be 96 bits long.
-.It Cm max_ports Ar number
-Maximum number of ports reserved for upper level protocols to one IPv6 client.
-All reserved ports are divided into chunks between supported protocols.
-The number of connections from one IPv6 client is limited by this option.
-Note that closed TCP connections still remain in the list of connections until
-.Cm tcp_close_age
-interval will not expire.
-Default value is
-.Ar 2048 .
+The special
+.Ar ::/length
+prefix can be used to handle several IPv6 prefixes with one NAT64 instance.
+The NAT64 instance will determine a destination IPv4 address from prefix
+.Ar length .
+.It Cm states_chunks Ar number
+The number of states chunks in single ports group.
+Each ports group by default can keep 64 state entries in single chunk.
+The above value affects the maximum number of states that can be associated with single IPv4 alias address and port.
+The value must be power of 2, and up to 128.
.It Cm host_del_age Ar seconds
The number of seconds until the host entry for a IPv6 client will be deleted
and all its resources will be released due to inactivity.
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index ff6990ae1c06..2b562734d15f 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -278,6 +278,7 @@ enum tokens {
TOK_AGG_LEN,
TOK_AGG_COUNT,
TOK_MAX_PORTS,
+ TOK_STATES_CHUNKS,
TOK_JMAXLEN,
TOK_PORT_RANGE,
TOK_HOST_DEL_AGE,
diff --git a/sbin/ipfw/nat64lsn.c b/sbin/ipfw/nat64lsn.c
index c6a892572818..4a6d7a7914c3 100644
--- a/sbin/ipfw/nat64lsn.c
+++ b/sbin/ipfw/nat64lsn.c
@@ -87,68 +87,70 @@ nat64lsn_print_states(void *buf)
char sflags[4], *sf, *proto;
ipfw_obj_header *oh;
ipfw_obj_data *od;
- ipfw_nat64lsn_stg *stg;
- ipfw_nat64lsn_state *ste;
+ ipfw_nat64lsn_stg_v1 *stg;
+ ipfw_nat64lsn_state_v1 *ste;
uint64_t next_idx;
int i, sz;
oh = (ipfw_obj_header *)buf;
od = (ipfw_obj_data *)(oh + 1);
- stg = (ipfw_nat64lsn_stg *)(od + 1);
+ stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
sz = od->head.length - sizeof(*od);
next_idx = 0;
while (sz > 0 && next_idx != 0xFF) {
- next_idx = stg->next_idx;
+ next_idx = stg->next.index;
sz -= sizeof(*stg);
if (stg->count == 0) {
stg++;
continue;
}
- switch (stg->proto) {
- case IPPROTO_TCP:
- proto = "TCP";
- break;
- case IPPROTO_UDP:
- proto = "UDP";
- break;
- case IPPROTO_ICMPV6:
- proto = "ICMPv6";
- break;
- }
- inet_ntop(AF_INET6, &stg->host6, s, sizeof(s));
+ /*
+ * NOTE: addresses are in network byte order,
+ * ports are in host byte order.
+ */
inet_ntop(AF_INET, &stg->alias4, a, sizeof(a));
- ste = (ipfw_nat64lsn_state *)(stg + 1);
+ ste = (ipfw_nat64lsn_state_v1 *)(stg + 1);
for (i = 0; i < stg->count && sz > 0; i++) {
sf = sflags;
+ inet_ntop(AF_INET6, &ste->host6, s, sizeof(s));
inet_ntop(AF_INET, &ste->daddr, f, sizeof(f));
- if (stg->proto == IPPROTO_TCP) {
+ switch (ste->proto) {
+ case IPPROTO_TCP:
+ proto = "TCP";
if (ste->flags & 0x02)
*sf++ = 'S';
if (ste->flags & 0x04)
*sf++ = 'E';
if (ste->flags & 0x01)
*sf++ = 'F';
+ break;
+ case IPPROTO_UDP:
+ proto = "UDP";
+ break;
+ case IPPROTO_ICMP:
+ proto = "ICMPv6";
+ break;
}
*sf = '\0';
- switch (stg->proto) {
+ switch (ste->proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n",
s, ste->sport, a, ste->aport, proto,
sflags, ste->idle, f, ste->dport);
break;
- case IPPROTO_ICMPV6:
+ case IPPROTO_ICMP:
printf("%s\t%s\t%s\t\t%d\t%s\n",
s, a, proto, ste->idle, f);
break;
default:
printf("%s\t%s\t%d\t\t%d\t%s\n",
- s, a, stg->proto, ste->idle, f);
+ s, a, ste->proto, ste->idle, f);
}
ste++;
sz -= sizeof(*ste);
}
- stg = (ipfw_nat64lsn_stg *)ste;
+ stg = (ipfw_nat64lsn_stg_v1 *)ste;
}
return (next_idx);
}
@@ -174,6 +176,7 @@ nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
err(EX_OSERR, NULL);
do {
oh = (ipfw_obj_header *)buf;
+ oh->opheader.version = 1; /* Force using ov new API */
od = (ipfw_obj_data *)(oh + 1);
nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set);
od->head.type = IPFW_TLV_OBJDATA;
@@ -363,12 +366,8 @@ nat64lsn_parse_int(const char *arg, const char *desc)
static struct _s_x nat64newcmds[] = {
{ "prefix6", TOK_PREFIX6 },
- { "agg_len", TOK_AGG_LEN }, /* not yet */
- { "agg_count", TOK_AGG_COUNT }, /* not yet */
- { "port_range", TOK_PORT_RANGE }, /* not yet */
{ "jmaxlen", TOK_JMAXLEN },
{ "prefix4", TOK_PREFIX4 },
- { "max_ports", TOK_MAX_PORTS },
{ "host_del_age", TOK_HOST_DEL_AGE },
{ "pg_del_age", TOK_PG_DEL_AGE },
{ "tcp_syn_age", TOK_TCP_SYN_AGE },
@@ -376,10 +375,13 @@ static struct _s_x nat64newcmds[] = {
{ "tcp_est_age", TOK_TCP_EST_AGE },
{ "udp_age", TOK_UDP_AGE },
{ "icmp_age", TOK_ICMP_AGE },
+ { "states_chunks",TOK_STATES_CHUNKS },
{ "log", TOK_LOG },
{ "-log", TOK_LOGOFF },
{ "allow_private", TOK_PRIVATE },
{ "-allow_private", TOK_PRIVATEOFF },
+ /* for compatibility with old configurations */
+ { "max_ports", TOK_MAX_PORTS }, /* unused */
{ NULL, 0 }
};
@@ -436,42 +438,17 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6,
&cfg->plen6);
if (ipfw_check_nat64prefix(&cfg->prefix6,
- cfg->plen6) != 0)
+ cfg->plen6) != 0 &&
+ !IN6_IS_ADDR_UNSPECIFIED(&cfg->prefix6))
errx(EX_USAGE, "Bad prefix6 %s", *av);
ac--; av++;
break;
-#if 0
- case TOK_AGG_LEN:
- NEED1("Aggregation prefix len required");
- cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt);
- ac--; av++;
- break;
- case TOK_AGG_COUNT:
- NEED1("Max per-prefix count required");
- cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt);
- ac--; av++;
- break;
- case TOK_PORT_RANGE:
- NEED1("port range x[:y] required");
- if ((p = strchr(*av, ':')) == NULL)
- cfg->min_port = (uint16_t)nat64lsn_parse_int(
- *av, opt);
- else {
- *p++ = '\0';
- cfg->min_port = (uint16_t)nat64lsn_parse_int(
- *av, opt);
- cfg->max_port = (uint16_t)nat64lsn_parse_int(
- p, opt);
- }
- ac--; av++;
- break;
case TOK_JMAXLEN:
NEED1("job queue length required");
cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
ac--; av++;
break;
-#endif
case TOK_MAX_PORTS:
NEED1("Max per-user ports required");
cfg->max_ports = nat64lsn_parse_int(*av, opt);
@@ -519,6 +496,12 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
*av, opt);
ac--; av++;
break;
+ case TOK_STATES_CHUNKS:
+ NEED1("number of chunks required");
+ cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
case TOK_LOG:
cfg->flags |= NAT64_LOG;
break;
@@ -630,6 +613,12 @@ nat64lsn_config(const char *name, uint8_t set, int ac, char **av)
*av, opt);
ac--; av++;
break;
+ case TOK_STATES_CHUNKS:
+ NEED1("number of chunks required");
+ cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
case TOK_LOG:
cfg->flags |= NAT64_LOG;
break;
@@ -789,31 +778,24 @@ nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
printf("nat64lsn %s prefix4 %s/%u", cfg->name, abuf, cfg->plen4);
inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf));
printf(" prefix6 %s/%u", abuf, cfg->plen6);
-#if 0
- printf("agg_len %u agg_count %u ", cfg->agg_prefix_len,
- cfg->agg_prefix_max);
- if (cfg->min_port != NAT64LSN_PORT_MIN ||
- cfg->max_port != NAT64LSN_PORT_MAX)
- printf(" port_range %u:%u", cfg->min_port, cfg->max_port);
- if (cfg->jmaxlen != NAT64LSN_JMAXLEN)
- printf(" jmaxlen %u ", cfg->jmaxlen);
-#endif
- if (cfg->max_ports != NAT64LSN_MAX_PORTS)
- printf(" max_ports %u", cfg->max_ports);
- if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
+ if (co.verbose || cfg->states_chunks > 1)
+ printf(" states_chunks %u", cfg->states_chunks);
+ if (co.verbose || cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
printf(" host_del_age %u", cfg->nh_delete_delay);
- if (cfg->pg_delete_delay != NAT64LSN_PG_AGE)
+ if (co.verbose || cfg->pg_delete_delay != NAT64LSN_PG_AGE)
printf(" pg_del_age %u ", cfg->pg_delete_delay);
- if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
+ if (co.verbose || cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
printf(" tcp_syn_age %u", cfg->st_syn_ttl);
- if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
+ if (co.verbose || cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
printf(" tcp_close_age %u", cfg->st_close_ttl);
- if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
+ if (co.verbose || cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
printf(" tcp_est_age %u", cfg->st_estab_ttl);
- if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
+ if (co.verbose || cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
printf(" udp_age %u", cfg->st_udp_ttl);
- if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
+ if (co.verbose || cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
printf(" icmp_age %u", cfg->st_icmp_ttl);
+ if (co.verbose || cfg->jmaxlen != NAT64LSN_JMAXLEN)
+ printf(" jmaxlen %u ", cfg->jmaxlen);
if (cfg->flags & NAT64_LOG)
printf(" log");
if (cfg->flags & NAT64_ALLOW_PRIVATE)
diff --git a/sys/conf/files b/sys/conf/files
index 45968c43852c..ed982409534f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4398,9 +4398,9 @@ netpfil/ipfw/nat64/nat64clat.c optional inet inet6 ipfirewall \
netpfil/ipfw/nat64/nat64clat_control.c optional inet inet6 ipfirewall \
ipfirewall_nat64
netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \
- ipfirewall_nat64
+ ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \
- ipfirewall_nat64
+ ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \
ipfirewall_nat64
netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \
diff --git a/sys/modules/ipfw_nat64/Makefile b/sys/modules/ipfw_nat64/Makefile
index ee2ad7da15af..037215a71481 100644
--- a/sys/modules/ipfw_nat64/Makefile
+++ b/sys/modules/ipfw_nat64/Makefile
@@ -8,4 +8,6 @@ SRCS+= nat64clat.c nat64clat_control.c
SRCS+= nat64lsn.c nat64lsn_control.c
SRCS+= nat64stl.c nat64stl_control.c
+CFLAGS+= -I${SRCTOP}/sys/contrib/ck/include
+
.include <bsd.kmod.mk>
diff --git a/sys/netinet6/ip_fw_nat64.h b/sys/netinet6/ip_fw_nat64.h
index 47c0a70d167f..40e3441132e1 100644
--- a/sys/netinet6/ip_fw_nat64.h
+++ b/sys/netinet6/ip_fw_nat64.h
@@ -122,7 +122,7 @@ typedef struct _ipfw_nat64clat_cfg {
/*
* NAT64LSN default configuration values
*/
-#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */
+#define NAT64LSN_MAX_PORTS 2048 /* Unused */
#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
@@ -135,16 +135,20 @@ typedef struct _ipfw_nat64clat_cfg {
typedef struct _ipfw_nat64lsn_cfg {
char name[64]; /* NAT name */
uint32_t flags;
- uint32_t max_ports; /* Max ports per client */
- uint32_t agg_prefix_len; /* Prefix length to count */
- uint32_t agg_prefix_max; /* Max hosts per agg prefix */
+
+ uint32_t max_ports; /* Unused */
+ uint32_t agg_prefix_len; /* Unused */
+ uint32_t agg_prefix_max; /* Unused */
+
struct in_addr prefix4;
uint16_t plen4; /* Prefix length */
uint16_t plen6; /* Prefix length */
struct in6_addr prefix6; /* NAT64 prefix */
uint32_t jmaxlen; /* Max jobqueue length */
- uint16_t min_port; /* Min port group # to use */
- uint16_t max_port; /* Max port group # to use */
+
+ uint16_t min_port; /* Unused */
+ uint16_t max_port; /* Unused */
+
uint16_t nh_delete_delay;/* Stale host delete delay */
uint16_t pg_delete_delay;/* Stale portgroup delete delay */
uint16_t st_syn_ttl; /* TCP syn expire */
@@ -153,7 +157,7 @@ typedef struct _ipfw_nat64lsn_cfg {
uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */
uint8_t set; /* Named instance set [0..31] */
- uint8_t spare;
+ uint8_t states_chunks; /* Number of states chunks per PG */
} ipfw_nat64lsn_cfg;
typedef struct _ipfw_nat64lsn_state {
@@ -177,5 +181,30 @@ typedef struct _ipfw_nat64lsn_stg {
uint32_t spare2;
} ipfw_nat64lsn_stg;
-#endif /* _NETINET6_IP_FW_NAT64_H_ */
+typedef struct _ipfw_nat64lsn_state_v1 {
+ struct in6_addr host6; /* Bound IPv6 host */
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint16_t spare;
+ uint16_t idle; /* Last used time */
+ uint8_t flags; /* State flags */
+ uint8_t proto; /* protocol */
+} ipfw_nat64lsn_state_v1;
+typedef struct _ipfw_nat64lsn_stg_v1 {
+ union nat64lsn_pgidx {
+ uint64_t index;
+ struct {
+ uint8_t chunk; /* states chunk */
+ uint8_t proto; /* protocol */
+ uint16_t port; /* base port */
+ in_addr_t addr; /* alias address */
+ };
+ } next; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint32_t count; /* Number of states */
+} ipfw_nat64lsn_stg_v1;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.c b/sys/netpfil/ipfw/nat64/nat64lsn.c
index 1ddeaafc7dce..af88fd1622c5 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn.c
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -33,16 +33,17 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
#include <sys/errno.h>
+#include <sys/hash.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rmlock.h>
-#include <sys/rwlock.h>
#include <sys/socket.h>
-#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
@@ -71,17 +72,22 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
-static void nat64lsn_periodic(void *data);
-#define PERIODIC_DELAY 4
-static uint8_t nat64lsn_proto_map[256];
-uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+static epoch_t nat64lsn_epoch;
+#define NAT64LSN_EPOCH_ENTER(et) epoch_enter_preempt(nat64lsn_epoch, &(et))
+#define NAT64LSN_EPOCH_EXIT(et) epoch_exit_preempt(nat64lsn_epoch, &(et))
+#define NAT64LSN_EPOCH_WAIT() epoch_wait_preempt(nat64lsn_epoch)
+#define NAT64LSN_EPOCH_ASSERT() MPASS(in_epoch(nat64lsn_epoch))
+#define NAT64LSN_EPOCH_CALL(c, f) epoch_call(nat64lsn_epoch, (c), (f))
-#define NAT64_FLAG_FIN 0x01 /* FIN was seen */
-#define NAT64_FLAG_SYN 0x02 /* First syn in->out */
-#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */
-#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_pgchunk_zone;
+static uma_zone_t nat64lsn_pg_zone;
+static uma_zone_t nat64lsn_aliaslink_zone;
+static uma_zone_t nat64lsn_state_zone;
+static uma_zone_t nat64lsn_job_zone;
-#define NAT64_FLAG_RDR 0x80 /* Port redirect */
+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
#define NAT64_LOOKUP(chain, cmd) \
(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
/*
@@ -91,25 +97,33 @@ uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
enum nat64lsn_jtype {
JTYPE_NEWHOST = 1,
JTYPE_NEWPORTGROUP,
- JTYPE_DELPORTGROUP,
+ JTYPE_DESTROY,
};
struct nat64lsn_job_item {
- TAILQ_ENTRY(nat64lsn_job_item) next;
+ STAILQ_ENTRY(nat64lsn_job_item) entries;
enum nat64lsn_jtype jtype;
- struct nat64lsn_host *nh;
- struct nat64lsn_portgroup *pg;
- void *spare_idx;
- struct in6_addr haddr;
- uint8_t nat_proto;
- uint8_t done;
- int needs_idx;
- int delcount;
- unsigned int fhash; /* Flow hash */
- uint32_t aaddr; /* Last used address (net) */
- struct mbuf *m;
- struct ipfw_flow_id f_id;
- uint64_t delmask[NAT64LSN_PGPTRNMASK];
+
+ union {
+ struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
+ struct mbuf *m;
+ struct nat64lsn_host *host;
+ struct nat64lsn_state *state;
+ uint32_t src6_hval;
+ uint32_t state_hval;
+ struct ipfw_flow_id f_id;
+ in_addr_t faddr;
+ uint16_t port;
+ uint8_t proto;
+ uint8_t done;
+ };
+ struct { /* used by JTYPE_DESTROY */
+ struct nat64lsn_hosts_slist hosts;
+ struct nat64lsn_pg_slist portgroups;
+ struct nat64lsn_pgchunk *pgchunk;
+ struct epoch_context epoch_ctx;
+ };
+ };
};
static struct mtx jmtx;
@@ -118,143 +132,311 @@ static struct mtx jmtx;
#define JQUEUE_LOCK() mtx_lock(&jmtx)
#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static struct nat64lsn_job_item *nat64lsn_create_job(
+ struct nat64lsn_cfg *cfg, int jtype);
static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
struct nat64lsn_job_item *ji);
-static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
- struct nat64lsn_job_head *jhead, int jlen);
-
-static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, int jtype);
-static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
- int needs_idx);
-static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static void nat64lsn_job_destroy(epoch_context_t ctx);
+static void nat64lsn_destroy_host(struct nat64lsn_host *host);
+static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
+
static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm);
+ const struct ipfw_flow_id *f_id, struct mbuf **mp);
static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
- struct ipfw_flow_id *f_id, struct mbuf **pm);
-
-static int alloc_portgroup(struct nat64lsn_job_item *ji);
-static void destroy_portgroup(struct nat64lsn_portgroup *pg);
-static void destroy_host6(struct nat64lsn_host *nh);
-static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+ struct ipfw_flow_id *f_id, struct mbuf **mp);
+static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
+ struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
+
+#define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
+#define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
+#define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
+#define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
+#define NAT64_BIT_STALE 7 /* state is going to be expired */
+
+#define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
+#define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
+#define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
-static int attach_portgroup(struct nat64lsn_cfg *cfg,
- struct nat64lsn_job_item *ji);
-static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+#define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
+#define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
-/* XXX tmp */
-static uma_zone_t nat64lsn_host_zone;
-static uma_zone_t nat64lsn_pg_zone;
-static uma_zone_t nat64lsn_pgidx_zone;
+ result = flags & (TH_FIN|TH_SYN);
+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
-static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
- struct nat64lsn_host *nh);
+ return (result);
+}
-#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))
-#define I6_first(_ph, h) (_ph)[h]
-#define I6_next(x) (x)->next
-#define I6_val(x) (&(x)->addr)
-#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
-#define I6_lock(a, b)
-#define I6_unlock(a, b)
+static void
+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ struct nat64lsn_state *state)
+{
-#define I6HASH_FIND(_cfg, _res, _a) \
- CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
-#define I6HASH_INSERT(_cfg, _i) \
- CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
-#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \
- CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+ memset(plog, 0, sizeof(*plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(state->ip_src);
+ plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
+ (state->proto << 8) | (state->ip_dst & 0xff));
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
-#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \
- CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+#define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
+#define HOST_HVAL(c, a) HVAL((a),\
+ sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
+#define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
+
+#define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
+ sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
+#define ALIAS_BYHASH(c, v) \
+ ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
+static struct nat64lsn_aliaslink*
+nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
+ struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
+{
-#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)
+ /*
+ * We can implement some different algorithms how
+ * select an alias address.
+ * XXX: for now we use first available.
+ */
+ return (CK_SLIST_FIRST(&host->aliases));
+}
-static unsigned
-djb_hash(const unsigned char *h, const int len)
+#define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
+#define STATE_HASH(h, v) \
+ ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
+#define STATES_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? (p)->states : \
+ ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
+
+#ifdef __LP64__
+#define FREEMASK_FFSLL(pg, faddr) \
+ ffsll(*FREEMASK_CHUNK((pg), (faddr)))
+#define FREEMASK_BTR(pg, faddr, bit) \
+ ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_BTS(pg, faddr, bit) \
+ ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_ISSET(pg, faddr, bit) \
+ ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
+#define FREEMASK_COPY(pg, n, out) \
+ (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
+#else
+static inline int
+freemask_ffsll(uint32_t *freemask)
{
- unsigned int result = 0;
int i;
- for (i = 0; i < len; i++)
- result = 33 * result ^ h[i];
-
- return (result);
+ if ((i = ffsl(freemask[0])) != 0)
+ return (i);
+ if ((i = ffsl(freemask[1])) != 0)
+ return (i + 32);
+ return (0);
}
-
-/*
-static size_t
-bitmask_size(size_t num, int *level)
+#define FREEMASK_FFSLL(pg, faddr) \
+ freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
+#define FREEMASK_BTR(pg, faddr, bit) \
+ ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
+#define FREEMASK_BTS(pg, faddr, bit) \
+ ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
+#define FREEMASK_ISSET(pg, faddr, bit) \
+ ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
+#define FREEMASK_COPY(pg, n, out) \
+ (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
+ ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
+#endif /* !__LP64__ */
+
+
+#define NAT64LSN_TRY_PGCNT 32
+static struct nat64lsn_pg*
+nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
+ struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
+ uint32_t *pgidx, in_addr_t faddr)
{
- size_t x;
- int c;
+ struct nat64lsn_pg *pg, *oldpg;
+ uint32_t idx, oldidx;
+ int cnt;
+
+ cnt = 0;
+ /* First try last used PG */
+ oldpg = pg = ck_pr_load_ptr(pgptr);
+ idx = oldidx = ck_pr_load_32(pgidx);
+ /* If pgidx is out of range, reset it to the first pgchunk */
+ if (!ISSET32(*chunkmask, idx / 32))
+ idx = 0;
+ do {
+ ck_pr_fence_load();
+ if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
+ /*
+ * If last used PG has not free states,
+ * try to update pointer.
+ * NOTE: it can be already updated by jobs handler,
+ * thus we use CAS operation.
+ */
+ if (cnt > 0)
+ ck_pr_cas_ptr(pgptr, oldpg, pg);
+ return (pg);
+ }
+ /* Stop if idx is out of range */
+ if (!ISSET32(*chunkmask, idx / 32))
+ break;
+
+ if (ISSET32(pgmask[idx / 32], idx % 32))
+ pg = ck_pr_load_ptr(
+ &chunks[idx / 32]->pgptr[idx % 32]);
+ else
+ pg = NULL;
- for (c = 0, x = num; num > 1; num /= 64, c++)
- ;
+ idx++;
+ } while (++cnt < NAT64LSN_TRY_PGCNT);
- return (x);
+ /* If pgidx is out of range, reset it to the first pgchunk */
+ if (!ISSET32(*chunkmask, idx / 32))
+ idx = 0;
+ ck_pr_cas_32(pgidx, oldidx, idx);
+ return (NULL);
}
-static void
-bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+static struct nat64lsn_state*
+nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
+ const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
+ uint16_t port, uint8_t proto)
{
- size_t x, z;
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_state *state;
+ struct nat64lsn_pg *pg;
+ int i, offset;
+
+ NAT64LSN_EPOCH_ASSERT();
+
+ /* Check that we already have state for given arguments */
+ CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
+ if (state->proto == proto && state->ip_dst == faddr &&
+ state->sport == port && state->dport == f_id->dst_port)
+ return (state);
+ }
- memset(pmask, 0xFF, bufsize);
- for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
- ;
- pmask[x] ~= 0x01;
-}
-*/
+ link = nat64lsn_get_aliaslink(cfg, host, f_id);
+ if (link == NULL)
+ return (NULL);
-static void
-nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
- uint32_t n, uint32_t sn)
-{
+ switch (proto) {
+ case IPPROTO_TCP:
+ pg = nat64lsn_get_pg(
+ &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
+ link->alias->tcp, &link->alias->tcp_pg,
+ &link->alias->tcp_pgidx, faddr);
+ break;
+ case IPPROTO_UDP:
+ pg = nat64lsn_get_pg(
+ &link->alias->udp_chunkmask, link->alias->udp_pgmask,
+ link->alias->udp, &link->alias->udp_pg,
+ &link->alias->udp_pgidx, faddr);
+ break;
+ case IPPROTO_ICMP:
+ pg = nat64lsn_get_pg(
+ &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
+ link->alias->icmp, &link->alias->icmp_pg,
+ &link->alias->icmp_pgidx, faddr);
+ break;
+ default:
+ panic("%s: wrong proto %d", __func__, proto);
+ }
+ if (pg == NULL)
+ return (NULL);
- memset(plog, 0, sizeof(*plog));
- plog->length = PFLOG_REAL_HDRLEN;
- plog->af = family;
- plog->action = PF_NAT;
- plog->dir = PF_IN;
- plog->rulenr = htonl(n);
- plog->subrulenr = htonl(sn);
- plog->ruleset[0] = '\0';
- strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
- ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+ /* Check that PG has some free states */
+ state = NULL;
+ i = FREEMASK_BITCOUNT(pg, faddr);
+ while (i-- > 0) {
+ offset = FREEMASK_FFSLL(pg, faddr);
+ if (offset == 0) {
+ /*
+ * We lost the race.
+ * No more free states in this PG.
+ */
+ break;
+ }
+
+ /* Lets try to atomically grab the state */
+ if (FREEMASK_BTR(pg, faddr, offset - 1)) {
+ state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
+ /* Initialize */
+ state->flags = proto != IPPROTO_TCP ? 0 :
+ convert_tcp_flags(f_id->_flags);
+ state->proto = proto;
+ state->aport = pg->base_port + offset - 1;
+ state->dport = f_id->dst_port;
+ state->sport = port;
+ state->ip6_dst = f_id->dst_ip6;
+ state->ip_dst = faddr;
+ state->ip_src = link->alias->addr;
+ state->hval = hval;
+ state->host = host;
+ SET_AGE(state->timestamp);
+
+ /* Insert new state into host's hash table */
+ HOST_LOCK(host);
+ CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
+ state, entries);
+ host->states_count++;
+ /*
+ * XXX: In case if host is going to be expired,
+ * reset NAT64LSN_DEADHOST flag.
+ */
+ host->flags &= ~NAT64LSN_DEADHOST;
+ HOST_UNLOCK(host);
+ NAT64STAT_INC(&cfg->base.stats, screated);
+ /* Mark the state as ready for translate4 */
+ ck_pr_fence_store();
+ ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
+ break;
+ }
+ }
+ return (state);
}
+
/*
* Inspects icmp packets to see if the message contains different
* packet header so we need to alter @addr and @port.
*/
static int
-inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
uint16_t *port)
{
+ struct icmp *icmp;
struct ip *ip;
- struct tcphdr *tcp;
- struct udphdr *udp;
- struct icmphdr *icmp;
int off;
- uint8_t proto;
+ uint8_t inner_proto;
- ip = mtod(*m, struct ip *); /* Outer IP header */
+ ip = mtod(*mp, struct ip *); /* Outer IP header */
off = (ip->ip_hl << 2) + ICMP_MINLEN;
- if ((*m)->m_len < off)
- *m = m_pullup(*m, off);
- if (*m == NULL)
+ if ((*mp)->m_len < off)
+ *mp = m_pullup(*mp, off);
+ if (*mp == NULL)
return (ENOMEM);
- ip = mtod(*m, struct ip *); /* Outer IP header */
- icmp = L3HDR(ip, struct icmphdr *);
+ ip = mtod(*mp, struct ip *); /* Outer IP header */
+ icmp = L3HDR(ip, struct icmp *);
switch (icmp->icmp_type) {
case ICMP_ECHO:
case ICMP_ECHOREPLY:
/* Use icmp ID as distinguisher */
- *port = ntohs(*((uint16_t *)(icmp + 1)));
+ *port = ntohs(icmp->icmp_id);
return (0);
case ICMP_UNREACH:
case ICMP_TIMXCEED:
@@ -266,90 +448,133 @@ inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
* ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
* of ULP header.
*/
- if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+ if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
return (EINVAL);
- if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
- *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
- if (*m == NULL)
+ if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+ *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
+ if (*mp == NULL)
return (ENOMEM);
- ip = mtodo(*m, off); /* Inner IP header */
- proto = ip->ip_p;
+ ip = mtodo(*mp, off); /* Inner IP header */
+ inner_proto = ip->ip_p;
off += ip->ip_hl << 2; /* Skip inner IP header */
*addr = ntohl(ip->ip_src.s_addr);
- if ((*m)->m_len < off + ICMP_MINLEN)
- *m = m_pullup(*m, off + ICMP_MINLEN);
- if (*m == NULL)
+ if ((*mp)->m_len < off + ICMP_MINLEN)
+ *mp = m_pullup(*mp, off + ICMP_MINLEN);
+ if (*mp == NULL)
return (ENOMEM);
- switch (proto) {
+ switch (inner_proto) {
case IPPROTO_TCP:
- tcp = mtodo(*m, off);
- *nat_proto = NAT_PROTO_TCP;
- *port = ntohs(tcp->th_sport);
- return (0);
case IPPROTO_UDP:
- udp = mtodo(*m, off);
- *nat_proto = NAT_PROTO_UDP;
- *port = ntohs(udp->uh_sport);
+ /* Copy source port from the header */
+ *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
+ *proto = inner_proto;
return (0);
case IPPROTO_ICMP:
/*
* We will translate only ICMP errors for our ICMP
* echo requests.
*/
- icmp = mtodo(*m, off);
+ icmp = mtodo(*mp, off);
if (icmp->icmp_type != ICMP_ECHO)
return (EOPNOTSUPP);
- *port = ntohs(*((uint16_t *)(icmp + 1)));
+ *port = ntohs(icmp->icmp_id);
return (0);
};
return (EOPNOTSUPP);
}
-static inline uint8_t
-convert_tcp_flags(uint8_t flags)
+static struct nat64lsn_state*
+nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{
- uint8_t result;
+ struct nat64lsn_state *state;
+ struct nat64lsn_pg *pg;
+ int chunk_idx, pg_idx, state_idx;
- result = flags & (TH_FIN|TH_SYN);
- result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
- result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+ NAT64LSN_EPOCH_ASSERT();
- return (result);
+ if (port < NAT64_MIN_PORT)
+ return (NULL);
+ /*
+ * Alias keeps 32 pgchunks for each protocol.
+ * Each pgchunk has 32 pointers to portgroup.
+ * Each portgroup has 64 states for ports.
+ */
+ port -= NAT64_MIN_PORT;
+ chunk_idx = port / 2048;
+
+ port -= chunk_idx * 2048;
+ pg_idx = port / 64;
+ state_idx = port % 64;
+
+ /*
+ * First check in proto_chunkmask that we have allocated PG chunk.
+ * Then check in proto_pgmask that we have valid PG pointer.
+ */
+ pg = NULL;
+ switch (proto) {
+ case IPPROTO_TCP:
+ if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
+ ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ case IPPROTO_UDP:
+ if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
+ ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->udp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ case IPPROTO_ICMP:
+ if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
+ ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
+ pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
+ break;
+ }
+ return (NULL);
+ default:
+ panic("%s: wrong proto %d", __func__, proto);
+ }
+ if (pg == NULL)
+ return (NULL);
+
+ if (FREEMASK_ISSET(pg, faddr, state_idx))
+ return (NULL);
+
+ state = &STATES_CHUNK(pg, faddr)->state[state_idx];
+ ck_pr_fence_load();
+ if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
+ return (state);
+ return (NULL);
}
-static NAT64NOINLINE int
-nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
- struct mbuf **pm)
+static int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **mp)
{
struct pfloghdr loghdr, *logdata;
struct in6_addr src6;
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_host *nh;
- struct nat64lsn_state *st;
- struct ip *ip;
- uint32_t addr;
- uint16_t state_flags, state_ts;
- uint16_t port, lport;
- uint8_t nat_proto;
+ struct nat64lsn_state *state;
+ struct nat64lsn_alias *alias;
+ uint32_t addr, flags;
+ uint16_t port, ts;
int ret;
+ uint8_t proto;
addr = f_id->dst_ip;
port = f_id->dst_port;
+ proto = f_id->proto;
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
- /* Check if protocol is supported and get its short id */
- nat_proto = nat64lsn_proto_map[f_id->proto];
- if (nat_proto == 0) {
- NAT64STAT_INC(&cfg->base.stats, noproto);
- return (cfg->nomatch_verdict);
- }
-
- /* We might need to handle icmp differently */
- if (nat_proto == NAT_PROTO_ICMP) {
- ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+ /* Check if protocol is supported */
+ switch (proto) {
+ case IPPROTO_ICMP:
+ ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
if (ret != 0) {
if (ret == ENOMEM) {
NAT64STAT_INC(&cfg->base.stats, nomem);
@@ -358,804 +583,640 @@ nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
NAT64STAT_INC(&cfg->base.stats, noproto);
return (cfg->nomatch_verdict);
}
- /* XXX: Check addr for validity */
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
return (cfg->nomatch_verdict);
}
+ /* FALLTHROUGH */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ NAT64STAT_INC(&cfg->base.stats, noproto);
+ return (cfg->nomatch_verdict);
}
- /* Calc portgroup offset w.r.t protocol */
- pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+ alias = &ALIAS_BYHASH(cfg, addr);
+ MPASS(addr == alias->addr);
- /* Check if this port is occupied by any portgroup */
- if (pg == NULL) {
+ /* Check that we have state for this port */
+ state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
+ port, proto);
+ if (state == NULL) {
NAT64STAT_INC(&cfg->base.stats, nomatch4);
-#if 0
- DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
- _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
-#endif
return (cfg->nomatch_verdict);
}
/* TODO: Check flags to see if we need to do some static mapping */
- nh = pg->host;
-
- /* Prepare some fields we might need to update */
- SET_AGE(state_ts);
- ip = mtod(*pm, struct ip *);
- if (ip->ip_p == IPPROTO_TCP)
- state_flags = convert_tcp_flags(
- L3HDR(ip, struct tcphdr *)->th_flags);
- else
- state_flags = 0;
-
- /* Lock host and get port mapping */
- NAT64_LOCK(nh);
- st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
- if (st->timestamp != state_ts)
- st->timestamp = state_ts;
- if ((st->flags & state_flags) != state_flags)
- st->flags |= state_flags;
- lport = htons(st->u.s.lport);
+ /* Update some state fields if need */
+ SET_AGE(ts);
+ if (f_id->proto == IPPROTO_TCP)
+ flags = convert_tcp_flags(f_id->_flags);
+ else
+ flags = 0;
+ if (state->timestamp != ts)
+ state->timestamp = ts;
+ if ((state->flags & flags) != flags)
+ state->flags |= flags;
- NAT64_UNLOCK(nh);
+ port = htons(state->sport);
+ src6 = state->ip6_dst;
if (cfg->base.flags & NAT64_LOG) {
logdata = &loghdr;
- nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
+ nat64lsn_log(logdata, *mp, AF_INET, state);
} else
logdata = NULL;
+ /*
+ * We already have src6 with embedded address, but it is possible,
+ * that src_ip is different than state->ip_dst, this is why we
+ * do embedding again.
+ */
nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
- ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+ ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
&cfg->base, logdata);
-
if (ret == NAT64SKIP)
return (cfg->nomatch_verdict);
- if (ret == NAT64MFREE)
- m_freem(*pm);
- *pm = NULL;
-
+ if (ret == NAT64RETURN)
+ *mp = NULL;
return (IP_FW_DENY);
}
-void
-nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
- const char *px, int off)
-{
- char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
-
- if ((V_nat64_debug & DP_STATE) == 0)
- return;
- inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
- inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
- inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
-
- DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
- "%s:%d AGE %d", px, pg->idx, st, off,
- s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
- d, st->u.s.fport, GET_AGE(st->timestamp));
-}
-
/*
- * Check if particular TCP state is stale and should be deleted.
+ * Check if particular state is stale and should be deleted.
* Return 1 if true, 0 otherwise.
*/
static int
-nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_state *st, int age)
+nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
{
- int ttl;
-
- if (st->flags & NAT64_FLAG_FIN)
- ttl = cfg->st_close_ttl;
- else if (st->flags & NAT64_FLAG_ESTAB)
- ttl = cfg->st_estab_ttl;
- else if (st->flags & NAT64_FLAG_SYN)
- ttl = cfg->st_syn_ttl;
- else
- ttl = cfg->st_syn_ttl;
+ int age, ttl;
- if (age > ttl)
+ /* State was marked as stale in previous pass. */
+ if (ISSET32(state->flags, NAT64_BIT_STALE))
return (1);
- return (0);
-}
-
-/*
- * Check if nat state @st is stale and should be deleted.
- * Return 1 if true, 0 otherwise.
- */
-static NAT64NOINLINE int
-nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
-{
- int age, delete;
-
- age = GET_AGE(st->timestamp);
- delete = 0;
- /* Skip immutable records */
- if (st->flags & NAT64_FLAG_RDR)
+ /* State is not yet initialized, it is going to be READY */
+ if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
return (0);
- switch (pg->nat_proto) {
- case NAT_PROTO_TCP:
- delete = nat64lsn_periodic_check_tcp(cfg, st, age);
- break;
- case NAT_PROTO_UDP:
- if (age > cfg->st_udp_ttl)
- delete = 1;
- break;
- case NAT_PROTO_ICMP:
- if (age > cfg->st_icmp_ttl)
- delete = 1;
- break;
+ age = GET_AGE(state->timestamp);
+ switch (state->proto) {
+ case IPPROTO_TCP:
+ if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
+ ttl = cfg->st_close_ttl;
+ else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
+ ttl = cfg->st_estab_ttl;
+ else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
+ ttl = cfg->st_syn_ttl;
+ else
+ ttl = cfg->st_syn_ttl;
+ if (age > ttl)
+ return (1);
+ break;
+ case IPPROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ return (1);
+ break;
+ case IPPROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ return (1);
+ break;
}
-
- return (delete);
+ return (0);
}
-
-/*
- * The following structures and functions
- * are used to perform SLIST_FOREACH_SAFE()
- * analog for states identified by struct st_ptr.
- */
-
-struct st_idx {
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
- struct st_ptr sidx_next;
-};
-
-static struct st_idx *
-st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
- struct st_ptr *sidx, struct st_idx *si)
+static int
+nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
-
- if (sidx->idx == 0) {
- memset(si, 0, sizeof(*si));
- return (si);
+ struct nat64lsn_state *state;
+ struct nat64lsn_host *host;
+ uint64_t freemask;
+ int c, i, update_age;
+
+ update_age = 0;
+ for (c = 0; c < pg->chunks_count; c++) {
+ FREEMASK_COPY(pg, c, freemask);
+ for (i = 0; i < 64; i++) {
+ if (ISSET64(freemask, i))
+ continue;
+ state = &STATES_CHUNK(pg, c)->state[i];
+ if (nat64lsn_check_state(cfg, state) == 0) {
+ update_age = 1;
+ continue;
+ }
+ /*
+ * Expire state:
+ * 1. Mark as STALE and unlink from host's hash.
+ * 2. Set bit in freemask.
+ */
+ if (ISSET32(state->flags, NAT64_BIT_STALE)) {
+ /*
+ * State was marked as STALE in previous
+ * pass. Now it is safe to release it.
+ */
+ state->flags = 0;
+ ck_pr_fence_store();
+ FREEMASK_BTS(pg, c, i);
+ NAT64STAT_INC(&cfg->base.stats, sdeleted);
+ continue;
+ }
+ MPASS(state->flags & NAT64_FLAG_READY);
+
+ host = state->host;
+ HOST_LOCK(host);
+ CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
+ state, nat64lsn_state, entries);
+ host->states_count--;
+ HOST_UNLOCK(host);
+
+ /* Reset READY flag */
+ ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
+ /* And set STALE flag */
+ ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
+ ck_pr_fence_store();
+ /*
+ * Now translate6 will not use this state, wait
+ * until it become safe for translate4, then mark
+ * state as free.
+ */
+ }
}
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
- st = &pg->states[sidx->off];
+ /*
+ * We have some alive states, update timestamp.
+ */
+ if (update_age)
+ SET_AGE(pg->timestamp);
- si->pg = pg;
- si->st = st;
- si->sidx_next = st->next;
+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+ return (0);
- return (si);
+ return (1);
}
-static struct st_idx *
-st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
- struct st_idx *si)
+static void
+nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_pg_slist *portgroups)
{
- struct st_ptr sidx;
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
-
- sidx = si->sidx_next;
- if (sidx.idx == 0) {
- memset(si, 0, sizeof(*si));
- si->st = NULL;
- si->pg = NULL;
- return (si);
+ struct nat64lsn_alias *alias;
+ struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
+ uint32_t *pgmask, *pgidx;
+ int i, idx;
+
+ for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
+ alias = &cfg->aliases[i];
+ CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
+ if (nat64lsn_maintain_pg(cfg, pg) == 0)
+ continue;
+ /* Always keep first PG */
+ if (pg->base_port == NAT64_MIN_PORT)
+ continue;
+ /*
+ * PG is expired, unlink it and schedule for
+ * deferred destroying.
+ */
+ idx = (pg->base_port - NAT64_MIN_PORT) / 64;
+ switch (pg->proto) {
+ case IPPROTO_TCP:
+ pgmask = alias->tcp_pgmask;
+ pgptr = &alias->tcp_pg;
+ pgidx = &alias->tcp_pgidx;
+ firstpg = alias->tcp[0]->pgptr[0];
+ break;
+ case IPPROTO_UDP:
+ pgmask = alias->udp_pgmask;
+ pgptr = &alias->udp_pg;
+ pgidx = &alias->udp_pgidx;
+ firstpg = alias->udp[0]->pgptr[0];
+ break;
+ case IPPROTO_ICMP:
+ pgmask = alias->icmp_pgmask;
+ pgptr = &alias->icmp_pg;
+ pgidx = &alias->icmp_pgidx;
+ firstpg = alias->icmp[0]->pgptr[0];
+ break;
+ }
+ /* Reset the corresponding bit in pgmask array. */
+ ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
+ ck_pr_fence_store();
+ /* If last used PG points to this PG, reset it. */
+ ck_pr_cas_ptr(pgptr, pg, firstpg);
+ ck_pr_cas_32(pgidx, idx, 0);
+ /* Unlink PG from alias's chain */
+ ALIAS_LOCK(alias);
+ CK_SLIST_REMOVE(&alias->portgroups, pg,
+ nat64lsn_pg, entries);
+ alias->portgroups_count--;
+ ALIAS_UNLOCK(alias);
+ /* And link to job's chain for deferred destroying */
+ NAT64STAT_INC(&cfg->base.stats, spgdeleted);
+ CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
+ }
}
-
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- st = &pg->states[sidx.off];
-
- si->pg = pg;
- si->st = st;
- si->sidx_next = st->next;
-
- return (si);
-}
-
-static struct st_idx *
-st_save_cond(struct st_idx *si_dst, struct st_idx *si)
-{
- if (si->st != NULL)
- *si_dst = *si;
-
- return (si_dst);
}
-unsigned int
-nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+static void
+nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_hosts_slist *hosts)
{
- struct st_idx si, si_prev;
+ struct nat64lsn_host *host, *tmp;
int i;
- unsigned int delcount;
-
- delcount = 0;
- for (i = 0; i < nh->hsize; i++) {
- memset(&si_prev, 0, sizeof(si_prev));
- for (st_first(cfg, nh, &nh->phash[i], &si);
- si.st != NULL;
- st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
- if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
+ entries, tmp) {
+ /* Is host was marked in previous call? */
+ if (host->flags & NAT64LSN_DEADHOST) {
+ if (host->states_count > 0) {
+ host->flags &= ~NAT64LSN_DEADHOST;
+ continue;
+ }
+ /*
+ * Unlink host from hash table and schedule
+ * it for deferred destroying.
+ */
+ CFG_LOCK(cfg);
+ CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
+ nat64lsn_host, entries);
+ cfg->hosts_count--;
+ CFG_UNLOCK(cfg);
+ CK_SLIST_INSERT_HEAD(hosts, host, entries);
+ continue;
+ }
+ if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
continue;
- nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
- si.st->cur.off);
- /* Unlink from hash */
- if (si_prev.st != NULL)
- si_prev.st->next = si.st->next;
- else
- nh->phash[i] = si.st->next;
- /* Delete state and free its data */
- PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
- memset(si.st, 0, sizeof(struct nat64lsn_state));
- si.st = NULL;
- delcount++;
-
- /* Update portgroup timestamp */
- SET_AGE(si.pg->timestamp);
+ if (host->states_count > 0)
+ continue;
+ /* Mark host as going to be expired in next pass */
+ host->flags |= NAT64LSN_DEADHOST;
+ ck_pr_fence_store();
}
}
- NAT64STAT_ADD(&cfg->base.stats, sdeleted, delcount);
- return (delcount);
-}
-
-/*
- * Checks if portgroup is not used and can be deleted,
- * Returns 1 if stale, 0 otherwise
- */
-static int
-stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
-{
-
- if (!PG_IS_EMPTY(pg))
- return (0);
- if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
- return (0);
- return (1);
}
-/*
- * Checks if host record is not used and can be deleted,
- * Returns 1 if stale, 0 otherwise
- */
-static int
-stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+static struct nat64lsn_pgchunk*
+nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
{
-
- if (nh->pg_used != 0)
- return (0);
- if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
- return (0);
- return (1);
-}
-
-struct nat64lsn_periodic_data {
- struct nat64lsn_cfg *cfg;
- struct nat64lsn_job_head jhead;
- int jlen;
-};
-
-static NAT64NOINLINE int
-nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
- struct nat64lsn_periodic_data *d)
-{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_job_item *ji;
- uint64_t delmask[NAT64LSN_PGPTRNMASK];
- int delcount, i;
-
- delcount = 0;
- memset(delmask, 0, sizeof(delmask));
-
- if (V_nat64_debug & DP_JQUEUE) {
- char a[INET6_ADDRSTRLEN];
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
- stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
- }
- if (!stale_nh(d->cfg, nh)) {
- /* Non-stale host. Inspect internals */
- NAT64_LOCK(nh);
-
- /* Stage 1: Check&expire states */
- if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
- SET_AGE(nh->timestamp);
-
- /* Stage 2: Check if we need to expire */
- for (i = 0; i < nh->pg_used; i++) {
- pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
- if (pg == NULL)
+#if 0
+ struct nat64lsn_alias *alias;
+ struct nat64lsn_pgchunk *chunk;
+ uint32_t pgmask;
+ int i, c;
+
+ for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
+ alias = &cfg->aliases[i];
+ if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
+ continue;
+ /* Always keep single chunk allocated */
+ for (c = 1; c < 32; c++) {
+ if ((alias->tcp_chunkmask & (1 << c)) == 0)
+ break;
+ chunk = ck_pr_load_ptr(&alias->tcp[c]);
+ if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue;
-
- /* Check if we can delete portgroup */
- if (stale_pg(d->cfg, pg) == 0)
+ ck_pr_btr_32(&alias->tcp_chunkmask, c);
+ ck_pr_fence_load();
+ if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
continue;
-
- DPRINTF(DP_JQUEUE, "Check PG %d", i);
- delmask[i / 64] |= ((uint64_t)1 << (i % 64));
- delcount++;
}
-
- NAT64_UNLOCK(nh);
- if (delcount == 0)
- return (0);
}
+#endif
+ return (NULL);
+}
- DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
- /* We have something to delete - add it to queue */
- ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
- if (ji == NULL)
- return (0);
-
- ji->haddr = nh->addr;
- ji->delcount = delcount;
- memcpy(ji->delmask, delmask, sizeof(ji->delmask));
-
- TAILQ_INSERT_TAIL(&d->jhead, ji, next);
- d->jlen++;
- return (0);
+#if 0
+static void
+nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_host *h;
+ struct nat64lsn_states_slist *hash;
+ int i, j, hsize;
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
+ if (h->states_count / 2 < h->states_hashsize ||
+ h->states_hashsize >= NAT64LSN_MAX_HSIZE)
+ continue;
+ hsize = h->states_hashsize * 2;
+ hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
+ if (hash == NULL)
+ continue;
+ for (j = 0; j < hsize; j++)
+ CK_SLIST_INIT(&hash[i]);
+
+ ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
+ }
+ }
}
+#endif
/*
* This procedure is used to perform various maintance
- * on dynamic hash list. Currently it is called every second.
+ * on dynamic hash list. Currently it is called every 4 seconds.
*/
static void
nat64lsn_periodic(void *data)
{
- struct ip_fw_chain *ch;
- IPFW_RLOCK_TRACKER;
+ struct nat64lsn_job_item *ji;
struct nat64lsn_cfg *cfg;
- struct nat64lsn_periodic_data d;
- struct nat64lsn_host *nh, *tmp;
cfg = (struct nat64lsn_cfg *) data;
- ch = cfg->ch;
CURVNET_SET(cfg->vp);
-
- memset(&d, 0, sizeof(d));
- d.cfg = cfg;
- TAILQ_INIT(&d.jhead);
-
- IPFW_RLOCK(ch);
-
- /* Stage 1: foreach host, check all its portgroups */
- I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
-
- /* Enqueue everything we have requested */
- nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
-
+ if (cfg->hosts_count > 0) {
+ ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
+ if (ji != NULL) {
+ ji->jtype = JTYPE_DESTROY;
+ CK_SLIST_INIT(&ji->hosts);
+ CK_SLIST_INIT(&ji->portgroups);
+ nat64lsn_expire_hosts(cfg, &ji->hosts);
+ nat64lsn_expire_portgroups(cfg, &ji->portgroups);
+ ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
+ NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
+ nat64lsn_job_destroy);
+ } else
+ NAT64STAT_INC(&cfg->base.stats, jnomem);
+ }
callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
-
- IPFW_RUNLOCK(ch);
-
CURVNET_RESTORE();
}
-static NAT64NOINLINE void
-reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
-
- if (ji->m == NULL)
- return;
-
- /* Request has failed or packet type is wrong */
- if (ji->f_id.addr_type != 6 || ji->done == 0) {
- m_freem(ji->m);
- ji->m = NULL;
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
- ji->jtype, ji->done);
- return;
- }
-
- /*
- * XXX: Limit recursion level
- */
-
- NAT64STAT_INC(&cfg->base.stats, jreinjected);
- DPRINTF(DP_JQUEUE, "Reinject mbuf");
- nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
-}
-
-static void
-destroy_portgroup(struct nat64lsn_portgroup *pg)
-{
-
- DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
- uma_zfree(nat64lsn_pg_zone, pg);
-}
-
-static NAT64NOINLINE int
-alloc_portgroup(struct nat64lsn_job_item *ji)
-{
- struct nat64lsn_portgroup *pg;
-
- pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
- if (pg == NULL)
- return (1);
-
- if (ji->needs_idx != 0) {
- ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
- /* Failed alloc isn't always fatal, so don't check */
- }
- memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
- pg->nat_proto = ji->nat_proto;
- ji->pg = pg;
- return (0);
-
-}
-
-static void
-destroy_host6(struct nat64lsn_host *nh)
+#define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
+#define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
+#define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
+static int
+nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_host *host;
+ struct nat64lsn_state *state;
+ uint32_t hval, data[2];
int i;
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
- nh->pg_used);
- NAT64_LOCK_DESTROY(nh);
- for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
- uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
- uma_zfree(nat64lsn_host_zone, nh);
-}
-
-static NAT64NOINLINE int
-alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
- struct nat64lsn_host *nh;
- char a[INET6_ADDRSTRLEN];
-
- nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
- if (nh == NULL)
- return (1);
- PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
- if (PORTGROUP_CHUNK(nh, 0) == NULL) {
- uma_zfree(nat64lsn_host_zone, nh);
- return (2);
- }
- if (alloc_portgroup(ji) != 0) {
- NAT64STAT_INC(&cfg->base.stats, jportfails);
- uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
- uma_zfree(nat64lsn_host_zone, nh);
- return (3);
+ /* Check that host was not yet added. */
+ NAT64LSN_EPOCH_ASSERT();
+ CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
+ if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
+ /* The host was allocated in previous call. */
+ ji->host = host;
+ goto get_state;
+ }
}
- NAT64_LOCK_INIT(nh);
- nh->addr = ji->haddr;
- nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
- nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
- nh->pg_used = 0;
- ji->nh = nh;
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
- return (0);
-}
-
-/*
- * Finds free @pg index inside @nh
- */
-static NAT64NOINLINE int
-find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
-{
- int i;
+ host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
+ if (ji->host == NULL)
+ return (HOST_ERROR(1));
- for (i = 0; i < nh->pg_allocated; i++) {
- if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
- *idx = i;
- return (0);
- }
+ host->states_hashsize = NAT64LSN_HSIZE;
+ host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
+ host->states_hashsize, M_NAT64LSN, M_NOWAIT);
+ if (host->states_hash == NULL) {
+ uma_zfree(nat64lsn_host_zone, host);
+ return (HOST_ERROR(2));
}
- return (1);
-}
-static NAT64NOINLINE int
-attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
-{
- char a[INET6_ADDRSTRLEN];
- struct nat64lsn_host *nh;
-
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL) {
- /* Add new host to list */
- nh = ji->nh;
- I6HASH_INSERT(cfg, nh);
- cfg->ihcount++;
- ji->nh = NULL;
-
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
- /*
- * Try to add portgroup.
- * Note it will automatically set
- * 'done' on ji if successful.
- */
- if (attach_portgroup(cfg, ji) != 0) {
- DPRINTF(DP_DROPS, "%s %p failed to attach PG",
- a, nh);
- NAT64STAT_INC(&cfg->base.stats, jportfails);
- return (1);
- }
- return (0);
+ link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
+ if (link == NULL) {
+ free(host->states_hash, M_NAT64LSN);
+ uma_zfree(nat64lsn_host_zone, host);
+ return (HOST_ERROR(3));
}
+ /* Initialize */
+ HOST_LOCK_INIT(host);
+ SET_AGE(host->timestamp);
+ host->addr = ji->f_id.src_ip6;
+ host->hval = ji->src6_hval;
+ host->flags = 0;
+ host->states_count = 0;
+ host->states_hashsize = NAT64LSN_HSIZE;
+ CK_SLIST_INIT(&host->aliases);
+ for (i = 0; i < host->states_hashsize; i++)
+ CK_SLIST_INIT(&host->states_hash[i]);
+
+ /* Determine alias from flow hash. */
+ hval = ALIASLINK_HVAL(cfg, &ji->f_id);
+ link->alias = &ALIAS_BYHASH(cfg, hval);
+ CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
+
+ ALIAS_LOCK(link->alias);
+ CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
+ link->alias->hosts_count++;
+ ALIAS_UNLOCK(link->alias);
+
+ CFG_LOCK(cfg);
+ CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
+ cfg->hosts_count++;
+ CFG_UNLOCK(cfg);
+
+get_state:
+ data[0] = ji->faddr;
+ data[1] = (ji->f_id.dst_port << 16) | ji->port;
+ ji->state_hval = hval = STATE_HVAL(cfg, data);
+ state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
+ ji->faddr, ji->port, ji->proto);
/*
- * nh isn't NULL. This probably means we had several simultaneous
- * host requests. The previous one request has already attached
- * this host. Requeue attached mbuf and mark job as done, but
- * leave nh and pg pointers not changed, so nat64lsn_do_request()
- * will release all allocated resources.
+ * We failed to obtain new state, used alias needs new PG.
+ * XXX: or another alias should be used.
*/
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ, "%s %p is already attached as %p",
- a, ji->nh, nh);
+ if (state == NULL) {
+ /* Try to allocate new PG */
+ if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
+ return (HOST_ERROR(4));
+ /* We assume that nat64lsn_alloc_pg() got state */
+ } else
+ ji->state = state;
+
ji->done = 1;
- return (0);
+ DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
+ inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
+ return (HOST_ERROR(0));
}
-static NAT64NOINLINE int
-find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
- int nat_proto, uint16_t *aport, int *ppg_idx)
+static int
+nat64lsn_find_pg_place(uint32_t *data)
{
- int j, pg_idx;
-
- pg_idx = addr_off * _ADDR_PG_COUNT +
- (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+ int i;
- for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
- if (cfg->pg[pg_idx + j] != NULL)
+ for (i = 0; i < 32; i++) {
+ if (~data[i] == 0)
continue;
-
- *aport = j * NAT64_CHUNK_SIZE;
- *ppg_idx = pg_idx + j;
- return (1);
+ return (i * 32 + ffs(~data[i]) - 1);
}
-
- return (0);
+ return (-1);
}
-/*
- * XXX: This function needs to be rewritten to
- * use free bitmask for faster pg finding,
- * additionally, it should take into consideration
- * a) randomization and
- * b) previous addresses allocated to given nat instance
- *
- */
-static NAT64NOINLINE int
-find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
- uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+static int
+nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_alias *alias, uint32_t *chunkmask,
+ uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
+ struct nat64lsn_pg **pgptr, uint8_t proto)
{
- int i, nat_proto;
-
- /*
- * XXX: Use bitmask index to be able to find/check if IP address
- * has some spare pg's
- */
- nat_proto = ji->nat_proto;
-
- /* First, try to use same address */
- if (ji->aaddr != 0) {
- i = ntohl(ji->aaddr) - cfg->prefix4;
- if (find_pg_place_addr(cfg, i, nat_proto, aport,
- ppg_idx) != 0){
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
- }
- }
-
- /* Next, try to use random address based on flow hash */
- i = ji->fhash % (1 << (32 - cfg->plen4));
- if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
+ struct nat64lsn_pg *pg;
+ int i, pg_idx, chunk_idx;
+
+ /* Find place in pgchunk where PG can be added */
+ pg_idx = nat64lsn_find_pg_place(pgmask);
+ if (pg_idx < 0) /* no more PGs */
+ return (PG_ERROR(1));
+ /* Check that we have allocated pgchunk for given PG index */
+ chunk_idx = pg_idx / 32;
+ if (!ISSET32(*chunkmask, chunk_idx)) {
+ chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
+ M_NOWAIT);
+ if (chunks[chunk_idx] == NULL)
+ return (PG_ERROR(2));
+ ck_pr_bts_32(chunkmask, chunk_idx);
+ ck_pr_fence_store();
}
-
-
- /* Last one: simply find ANY available */
- for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
- if (find_pg_place_addr(cfg, i, nat_proto, aport,
- ppg_idx) != 0){
- /* Found! */
- *aaddr = htonl(cfg->prefix4 + i);
- return (0);
+ /* Allocate PG and states chunks */
+ pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
+ if (pg == NULL)
+ return (PG_ERROR(3));
+ pg->chunks_count = cfg->states_chunks;
+ if (pg->chunks_count > 1) {
+ pg->freemask_chunk = malloc(pg->chunks_count *
+ sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
+ if (pg->freemask_chunk == NULL) {
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(4));
+ }
+ pg->states_chunk = malloc(pg->chunks_count *
+ sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
+ M_NOWAIT | M_ZERO);
+ if (pg->states_chunk == NULL) {
+ free(pg->freemask_chunk, M_NAT64LSN);
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(5));
}
+ for (i = 0; i < pg->chunks_count; i++) {
+ pg->states_chunk[i] = uma_zalloc(
+ nat64lsn_state_zone, M_NOWAIT);
+ if (pg->states_chunk[i] == NULL)
+ goto states_failed;
+ }
+ memset(pg->freemask_chunk, 0xff,
+ sizeof(uint64_t) * pg->chunks_count);
+ } else {
+ pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
+ if (pg->states == NULL) {
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(6));
+ }
+ memset(&pg->freemask64, 0xff, sizeof(uint64_t));
}
- return (1);
+ /* Initialize PG and hook it to pgchunk */
+ SET_AGE(pg->timestamp);
+ pg->proto = proto;
+ pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
+ ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
+ ck_pr_fence_store();
+ ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
+ ck_pr_store_ptr(pgptr, pg);
+
+ ALIAS_LOCK(alias);
+ CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
+ SET_AGE(alias->timestamp);
+ alias->portgroups_count++;
+ ALIAS_UNLOCK(alias);
+ NAT64STAT_INC(&cfg->base.stats, spgcreated);
+ return (PG_ERROR(0));
+
+states_failed:
+ for (i = 0; i < pg->chunks_count; i++)
+ uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
+ free(pg->freemask_chunk, M_NAT64LSN);
+ free(pg->states_chunk, M_NAT64LSN);
+ uma_zfree(nat64lsn_pg_zone, pg);
+ return (PG_ERROR(7));
}
-static NAT64NOINLINE int
-attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+static int
+nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
- char a[INET6_ADDRSTRLEN];
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_host *nh;
- uint32_t aaddr;
- uint16_t aport;
- int nh_pg_idx, pg_idx;
+ struct nat64lsn_aliaslink *link;
+ struct nat64lsn_alias *alias;
+ int ret;
- pg = ji->pg;
+ link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
+ if (link == NULL)
+ return (PG_ERROR(1));
/*
- * Find source host and bind: we can't rely on
- * pg->host
+ * TODO: check that we did not already allocated PG in
+ * previous call.
*/
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL)
- return (1);
- /* Find spare port chunk */
- if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
- return (2);
+ ret = 0;
+ alias = link->alias;
+ /* Find place in pgchunk where PG can be added */
+ switch (ji->proto) {
+ case IPPROTO_TCP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->tcp_chunkmask, alias->tcp_pgmask,
+ alias->tcp, &alias->tcp_pg, ji->proto);
+ break;
+ case IPPROTO_UDP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->udp_chunkmask, alias->udp_pgmask,
+ alias->udp, &alias->udp_pg, ji->proto);
+ break;
+ case IPPROTO_ICMP:
+ ret = nat64lsn_alloc_proto_pg(cfg, alias,
+ &alias->icmp_chunkmask, alias->icmp_pgmask,
+ alias->icmp, &alias->icmp_pg, ji->proto);
+ break;
+ default:
+ panic("%s: wrong proto %d", __func__, ji->proto);
}
-
- /* Expand PG indexes if needed */
- if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
- PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
- ji->spare_idx;
- nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
- ji->spare_idx = NULL;
+ if (ret == PG_ERROR(1)) {
+ /*
+ * PG_ERROR(1) means that alias lacks free PGs
+ * XXX: try next alias.
+ */
+ printf("NAT64LSN: %s: failed to obtain PG\n",
+ __func__);
+ return (ret);
}
-
- /* Find empty index to store PG in the @nh */
- if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
- DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
- a);
- return (3);
+ if (ret == PG_ERROR(0)) {
+ ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
+ ji->state_hval, ji->faddr, ji->port, ji->proto);
+ if (ji->state == NULL)
+ ret = PG_ERROR(8);
+ else
+ ji->done = 1;
}
-
- cfg->pg[pg_idx] = pg;
- cfg->protochunks[pg->nat_proto]++;
- NAT64STAT_INC(&cfg->base.stats, spgcreated);
-
- pg->aaddr = aaddr;
- pg->aport = aport;
- pg->host = nh;
- pg->idx = pg_idx;
- SET_AGE(pg->timestamp);
-
- PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
- if (nh->pg_used == nh_pg_idx)
- nh->pg_used++;
- SET_AGE(nh->timestamp);
-
- ji->pg = NULL;
- ji->done = 1;
-
- return (0);
+ return (ret);
}
-static NAT64NOINLINE void
-consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+static void
+nat64lsn_do_request(void *data)
{
- struct nat64lsn_host *nh, *nh_tmp;
- struct nat64lsn_portgroup *pg, *pg_list[256];
- int i, pg_lidx, idx;
+ struct epoch_tracker et;
+ struct nat64lsn_job_head jhead;
+ struct nat64lsn_job_item *ji, *ji2;
+ struct nat64lsn_cfg *cfg;
+ int jcount;
+ uint8_t flags;
- /* Find source host */
- I6HASH_FIND(cfg, nh, &ji->haddr);
- if (nh == NULL || nh->pg_used == 0)
+ cfg = (struct nat64lsn_cfg *)data;
+ if (cfg->jlen == 0)
return;
- memset(pg_list, 0, sizeof(pg_list));
- pg_lidx = 0;
-
- NAT64_LOCK(nh);
-
- for (i = nh->pg_used - 1; i >= 0; i--) {
- if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
- continue;
- pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
-
- /* Check that PG isn't busy. */
- if (stale_pg(cfg, pg) == 0)
- continue;
-
- /* DO delete */
- pg_list[pg_lidx++] = pg;
- PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
-
- idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
- pg->aport);
- KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
- cfg->pg[idx] = NULL;
- cfg->protochunks[pg->nat_proto]--;
- NAT64STAT_INC(&cfg->base.stats, spgdeleted);
-
- /* Decrease pg_used */
- while (nh->pg_used > 0 &&
- PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
- nh->pg_used--;
-
- /* Check if on-stack buffer has ended */
- if (pg_lidx == nitems(pg_list))
- break;
- }
-
- NAT64_UNLOCK(nh);
-
- if (stale_nh(cfg, nh)) {
- I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
- KASSERT(nh != NULL, ("Unable to find address"));
- cfg->ihcount--;
- ji->nh = nh;
- I6HASH_FIND(cfg, nh, &ji->haddr);
- KASSERT(nh == NULL, ("Failed to delete address"));
- }
-
- /* TODO: Delay freeing portgroups */
- while (pg_lidx > 0) {
- pg_lidx--;
- NAT64STAT_INC(&cfg->base.stats, spgdeleted);
- destroy_portgroup(pg_list[pg_lidx]);
- }
-}
-
-/*
- * Main request handler.
- * Responsible for handling jqueue, e.g.
- * creating new hosts, addind/deleting portgroups.
- */
-static NAT64NOINLINE void
-nat64lsn_do_request(void *data)
-{
- IPFW_RLOCK_TRACKER;
- struct nat64lsn_job_head jhead;
- struct nat64lsn_job_item *ji;
- int jcount, nhsize;
- struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
- struct ip_fw_chain *ch;
- int delcount;
-
CURVNET_SET(cfg->vp);
-
- TAILQ_INIT(&jhead);
-
- /* XXX: We're running unlocked here */
-
- ch = cfg->ch;
- delcount = 0;
- IPFW_RLOCK(ch);
+ STAILQ_INIT(&jhead);
/* Grab queue */
JQUEUE_LOCK();
- TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+ STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
jcount = cfg->jlen;
cfg->jlen = 0;
JQUEUE_UNLOCK();
- /* check if we need to resize hash */
- nhsize = 0;
- if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
- nhsize = cfg->ihsize;
- for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
- ;
- } else if (cfg->ihcount < cfg->ihsize * 4) {
- nhsize = cfg->ihsize;
- for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
- ;
- }
-
- IPFW_RUNLOCK(ch);
-
- if (TAILQ_EMPTY(&jhead)) {
- CURVNET_RESTORE();
- return;
- }
+ /* TODO: check if we need to resize hash */
NAT64STAT_INC(&cfg->base.stats, jcalls);
DPRINTF(DP_JQUEUE, "count=%d", jcount);
@@ -1169,442 +1230,283 @@ nat64lsn_do_request(void *data)
* TODO: Limit per-call number of items
*/
- /* Pre-allocate everything for entire chain */
- TAILQ_FOREACH(ji, &jhead, next) {
+ NAT64LSN_EPOCH_ENTER(et);
+ STAILQ_FOREACH(ji, &jhead, entries) {
switch (ji->jtype) {
- case JTYPE_NEWHOST:
- if (alloc_host6(cfg, ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jhostfails);
- break;
- case JTYPE_NEWPORTGROUP:
- if (alloc_portgroup(ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jportfails);
- break;
- case JTYPE_DELPORTGROUP:
- delcount += ji->delcount;
- break;
- default:
- break;
+ case JTYPE_NEWHOST:
+ if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
+ NAT64STAT_INC(&cfg->base.stats, jhostfails);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
+ NAT64STAT_INC(&cfg->base.stats, jportfails);
+ break;
+ default:
+ continue;
}
- }
-
- /*
- * TODO: Alloc hew hash
- */
- nhsize = 0;
- if (nhsize > 0) {
- /* XXX: */
- }
-
- /* Apply all changes in batch */
- IPFW_UH_WLOCK(ch);
- IPFW_WLOCK(ch);
-
- TAILQ_FOREACH(ji, &jhead, next) {
- switch (ji->jtype) {
- case JTYPE_NEWHOST:
- if (ji->nh != NULL)
- attach_host6(cfg, ji);
- break;
- case JTYPE_NEWPORTGROUP:
- if (ji->pg != NULL &&
- attach_portgroup(cfg, ji) != 0)
- NAT64STAT_INC(&cfg->base.stats,
- jportfails);
- break;
- case JTYPE_DELPORTGROUP:
- consider_del_portgroup(cfg, ji);
- break;
+ if (ji->done != 0) {
+ flags = ji->proto != IPPROTO_TCP ? 0 :
+ convert_tcp_flags(ji->f_id._flags);
+ nat64lsn_translate6_internal(cfg, &ji->m,
+ ji->state, flags);
+ NAT64STAT_INC(&cfg->base.stats, jreinjected);
}
}
+ NAT64LSN_EPOCH_EXIT(et);
- if (nhsize > 0) {
- /* XXX: Move everything to new hash */
- }
-
- IPFW_WUNLOCK(ch);
- IPFW_UH_WUNLOCK(ch);
-
- /* Flush unused entries */
- while (!TAILQ_EMPTY(&jhead)) {
- ji = TAILQ_FIRST(&jhead);
- TAILQ_REMOVE(&jhead, ji, next);
- if (ji->nh != NULL)
- destroy_host6(ji->nh);
- if (ji->pg != NULL)
- destroy_portgroup(ji->pg);
- if (ji->m != NULL)
- reinject_mbuf(cfg, ji);
- if (ji->spare_idx != NULL)
- uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
- free(ji, M_IPFW);
+ ji = STAILQ_FIRST(&jhead);
+ while (ji != NULL) {
+ ji2 = STAILQ_NEXT(ji, entries);
+ /*
+ * In any case we must free mbuf if
+ * translator did not consumed it.
+ */
+ m_freem(ji->m);
+ uma_zfree(nat64lsn_job_zone, ji);
+ ji = ji2;
}
CURVNET_RESTORE();
}
-static NAT64NOINLINE struct nat64lsn_job_item *
-nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
- int jtype)
+static struct nat64lsn_job_item *
+nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
{
struct nat64lsn_job_item *ji;
- struct in6_addr haddr;
- uint8_t nat_proto;
/*
- * Do not try to lock possibly contested mutex if we're near the limit.
- * Drop packet instead.
+ * Do not try to lock possibly contested mutex if we're near the
+ * limit. Drop packet instead.
*/
- if (cfg->jlen >= cfg->jmaxlen) {
+ ji = NULL;
+ if (cfg->jlen >= cfg->jmaxlen)
NAT64STAT_INC(&cfg->base.stats, jmaxlen);
- return (NULL);
- }
-
- memset(&haddr, 0, sizeof(haddr));
- nat_proto = 0;
- if (f_id != NULL) {
- haddr = f_id->src_ip6;
- nat_proto = nat64lsn_proto_map[f_id->proto];
-
- DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
- nat_proto, f_id->proto);
-
- if (nat_proto == 0)
- return (NULL);
+ else {
+ ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
+ if (ji == NULL)
+ NAT64STAT_INC(&cfg->base.stats, jnomem);
}
-
- ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
- M_NOWAIT | M_ZERO);
-
if (ji == NULL) {
- NAT64STAT_INC(&cfg->base.stats, jnomem);
- return (NULL);
- }
-
- ji->jtype = jtype;
-
- if (f_id != NULL) {
- ji->f_id = *f_id;
- ji->haddr = haddr;
- ji->nat_proto = nat_proto;
+ NAT64STAT_INC(&cfg->base.stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->jtype = jtype;
+ ji->done = 0;
}
-
return (ji);
}
-static NAT64NOINLINE void
+static void
nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
{
- if (ji == NULL)
- return;
-
JQUEUE_LOCK();
- TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
- cfg->jlen++;
+ STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
NAT64STAT_INC(&cfg->base.stats, jrequests);
+ cfg->jlen++;
if (callout_pending(&cfg->jcallout) == 0)
callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
JQUEUE_UNLOCK();
}
-static NAT64NOINLINE void
-nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
- struct nat64lsn_job_head *jhead, int jlen)
-{
-
- if (TAILQ_EMPTY(jhead))
- return;
-
- /* Attach current queue to execution one */
- JQUEUE_LOCK();
- TAILQ_CONCAT(&cfg->jhead, jhead, next);
- cfg->jlen += jlen;
- NAT64STAT_ADD(&cfg->base.stats, jrequests, jlen);
-
- if (callout_pending(&cfg->jcallout) == 0)
- callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
- JQUEUE_UNLOCK();
-}
-
-static unsigned int
-flow6_hash(const struct ipfw_flow_id *f_id)
+static void
+nat64lsn_job_destroy(epoch_context_t ctx)
{
- unsigned char hbuf[36];
-
- memcpy(hbuf, &f_id->dst_ip6, 16);
- memcpy(&hbuf[16], &f_id->src_ip6, 16);
- memcpy(&hbuf[32], &f_id->dst_port, 2);
- memcpy(&hbuf[32], &f_id->src_port, 2);
+ struct nat64lsn_job_item *ji;
+ struct nat64lsn_host *host;
+ struct nat64lsn_pg *pg;
+ int i;
- return (djb_hash(hbuf, sizeof(hbuf)));
+ ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
+ MPASS(ji->jtype == JTYPE_DESTROY);
+ while (!CK_SLIST_EMPTY(&ji->hosts)) {
+ host = CK_SLIST_FIRST(&ji->hosts);
+ CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
+ if (host->states_count > 0) {
+ /*
+ * XXX: The state has been created
+ * during host deletion.
+ */
+ printf("NAT64LSN: %s: destroying host with %d "
+ "states\n", __func__, host->states_count);
+ }
+ nat64lsn_destroy_host(host);
+ }
+ while (!CK_SLIST_EMPTY(&ji->portgroups)) {
+ pg = CK_SLIST_FIRST(&ji->portgroups);
+ CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
+ for (i = 0; i < pg->chunks_count; i++) {
+ if (FREEMASK_BITCOUNT(pg, i) != 64) {
+ /*
+ * XXX: The state has been created during
+ * PG deletion.
+ */
+ printf("NAT64LSN: %s: destroying PG %p "
+ "with non-empty chunk %d\n", __func__,
+ pg, i);
+ }
+ }
+ nat64lsn_destroy_pg(pg);
+ }
+ uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
+ uma_zfree(nat64lsn_job_zone, ji);
}
-static NAT64NOINLINE int
+static int
nat64lsn_request_host(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm)
+ const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_job_item *ji;
- struct mbuf *m;
- m = *pm;
- *pm = NULL;
+ ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
+ if (ji != NULL) {
+ ji->m = *mp;
+ ji->f_id = *f_id;
+ ji->faddr = faddr;
+ ji->port = port;
+ ji->proto = proto;
+ ji->src6_hval = hval;
- ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
- if (ji == NULL) {
- m_freem(m);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "failed to create job");
- } else {
- ji->m = m;
- /* Provide pseudo-random value based on flow */
- ji->fhash = flow6_hash(f_id);
nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jhostsreq);
+ *mp = NULL;
}
-
return (IP_FW_DENY);
}
-static NAT64NOINLINE int
-nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
- const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
- int needs_idx)
+static int
+nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
+ const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
+ in_addr_t faddr, uint16_t port, uint8_t proto)
{
struct nat64lsn_job_item *ji;
- struct mbuf *m;
- m = *pm;
- *pm = NULL;
+ ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
+ if (ji != NULL) {
+ ji->m = *mp;
+ ji->f_id = *f_id;
+ ji->faddr = faddr;
+ ji->port = port;
+ ji->proto = proto;
+ ji->state_hval = hval;
+ ji->host = host;
- ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
- if (ji == NULL) {
- m_freem(m);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "failed to create job");
- } else {
- ji->m = m;
- /* Provide pseudo-random value based on flow */
- ji->fhash = flow6_hash(f_id);
- ji->aaddr = aaddr;
- ji->needs_idx = needs_idx;
nat64lsn_enqueue_job(cfg, ji);
NAT64STAT_INC(&cfg->base.stats, jportreq);
+ *mp = NULL;
}
-
return (IP_FW_DENY);
}
-static NAT64NOINLINE struct nat64lsn_state *
-nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
- int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+static int
+nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
+ struct nat64lsn_state *state, uint8_t flags)
{
- struct nat64lsn_portgroup *pg;
- struct nat64lsn_state *st;
- int i, hval, off;
-
- /* XXX: create additional bitmask for selecting proper portgroup */
- for (i = 0; i < nh->pg_used; i++) {
- pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
- if (pg == NULL)
- continue;
- if (*aaddr == 0)
- *aaddr = pg->aaddr;
- if (pg->nat_proto != nat_proto)
- continue;
-
- off = PG_GET_FREE_IDX(pg);
- if (off != 0) {
- /* We have found spare state. Use it */
- off--;
- PG_MARK_BUSY_IDX(pg, off);
- st = &pg->states[off];
-
- /*
- * Fill in new info. Assume state was zeroed.
- * Timestamp and flags will be filled by caller.
- */
- st->u.s = kst->u.s;
- st->cur.idx = i + 1;
- st->cur.off = off;
-
- /* Insert into host hash table */
- hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
- st->next = nh->phash[hval];
- nh->phash[hval] = st->cur;
-
- nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+ struct pfloghdr loghdr, *logdata;
+ int ret;
+ uint16_t ts;
- NAT64STAT_INC(&cfg->base.stats, screated);
+ /* Update timestamp and flags if needed */
+ SET_AGE(ts);
+ if (state->timestamp != ts)
+ state->timestamp = ts;
+ if ((state->flags & flags) != 0)
+ state->flags |= flags;
- return (st);
- }
- /* Saev last used alias affress */
- *aaddr = pg->aaddr;
- }
+ if (cfg->base.flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *mp, AF_INET6, state);
+ } else
+ logdata = NULL;
- return (NULL);
+ ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
+ htons(state->aport), &cfg->base, logdata);
+ if (ret == NAT64SKIP)
+ return (cfg->nomatch_verdict);
+ if (ret == NAT64RETURN)
+ *mp = NULL;
+ return (IP_FW_DENY);
}
-static NAT64NOINLINE int
+static int
nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
- struct mbuf **pm)
+ struct mbuf **mp)
{
- struct pfloghdr loghdr, *logdata;
- char a[INET6_ADDRSTRLEN];
- struct nat64lsn_host *nh;
- struct st_ptr sidx;
- struct nat64lsn_state *st, kst;
- struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *state;
+ struct nat64lsn_host *host;
struct icmp6_hdr *icmp6;
- uint32_t aaddr;
- int action, hval, nat_proto, proto;
- uint16_t aport, state_ts, state_flags;
-
- /* Check if af/protocol is supported and get it short id */
- nat_proto = nat64lsn_proto_map[f_id->proto];
- if (nat_proto == 0) {
+ uint32_t addr, hval, data[2];
+ int offset, proto;
+ uint16_t port;
+ uint8_t flags;
+
+ /* Check if protocol is supported */
+ port = f_id->src_port;
+ proto = f_id->proto;
+ switch (f_id->proto) {
+ case IPPROTO_ICMPV6:
/*
- * Since we can be called from jobs handler, we need
- * to free mbuf by self, do not leave this task to
- * ipfw_check_packet().
+ * For ICMPv6 echo reply/request we use icmp6_id as
+ * local port.
*/
+ offset = 0;
+ proto = nat64_getlasthdr(*mp, &offset);
+ if (proto < 0) {
+ NAT64STAT_INC(&cfg->base.stats, dropped);
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ return (IP_FW_DENY);
+ }
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(*mp, offset);
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ port = ntohs(icmp6->icmp6_id);
+ }
+ proto = IPPROTO_ICMP;
+ /* FALLTHROUGH */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
NAT64STAT_INC(&cfg->base.stats, noproto);
- goto drop;
+ return (cfg->nomatch_verdict);
}
- /* Try to find host first */
- I6HASH_FIND(cfg, nh, &f_id->src_ip6);
-
- if (nh == NULL)
- return (nat64lsn_request_host(cfg, f_id, pm));
-
- /* Fill-in on-stack state structure */
- kst.u.s.faddr = nat64_extract_ip4(&f_id->dst_ip6,
- cfg->base.plat_plen);
- if (kst.u.s.faddr == 0 ||
- nat64_check_private_ip4(&cfg->base, kst.u.s.faddr) != 0) {
- NAT64STAT_INC(&cfg->base.stats, dropped);
- goto drop;
- }
- kst.u.s.fport = f_id->dst_port;
- kst.u.s.lport = f_id->src_port;
+ /* Extract IPv4 from destination IPv6 address */
+ addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
+ if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
+ char a[INET_ADDRSTRLEN];
- /* Prepare some fields we might need to update */
- hval = 0;
- proto = nat64_getlasthdr(*pm, &hval);
- if (proto < 0) {
NAT64STAT_INC(&cfg->base.stats, dropped);
- DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
- goto drop;
+ DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
+ inet_ntop(AF_INET, &addr, a, sizeof(a)));
+ return (IP_FW_DENY); /* XXX: add extra stats? */
}
- SET_AGE(state_ts);
- if (proto == IPPROTO_TCP)
- state_flags = convert_tcp_flags(
- TCP(mtodo(*pm, hval))->th_flags);
- else
- state_flags = 0;
- if (proto == IPPROTO_ICMPV6) {
- /* Alter local port data */
- icmp6 = mtodo(*pm, hval);
- if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
- icmp6->icmp6_type == ICMP6_ECHO_REPLY)
- kst.u.s.lport = ntohs(icmp6->icmp6_id);
- }
-
- hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
- pg = NULL;
- st = NULL;
-
- /* OK, let's find state in host hash */
- NAT64_LOCK(nh);
- sidx = nh->phash[hval];
- int k = 0;
- while (sidx.idx != 0) {
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- st = &pg->states[sidx.off];
- //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
- //st->next.idx, st->next.off);
- if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+ /* Try to find host */
+ hval = HOST_HVAL(cfg, &f_id->src_ip6);
+ CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
+ if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
break;
- if (k++ > 1000) {
- DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
- sidx.idx, sidx.off, st->next.idx, st->next.off);
- DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
- inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)),
- nh, curcpu);
- k = 0;
- }
- sidx = st->next;
}
-
- if (sidx.idx == 0) {
- aaddr = 0;
- st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
- if (st == NULL) {
- /* No free states. Request more if we can */
- if (nh->pg_used >= cfg->max_chunks) {
- /* Limit reached */
- DPRINTF(DP_DROPS, "PG limit reached "
- " for host %s (used %u, allocated %u, "
- "limit %u)", inet_ntop(AF_INET6,
- &nh->addr, a, sizeof(a)),
- nh->pg_used * NAT64_CHUNK_SIZE,
- nh->pg_allocated * NAT64_CHUNK_SIZE,
- cfg->max_chunks * NAT64_CHUNK_SIZE);
- NAT64_UNLOCK(nh);
- NAT64STAT_INC(&cfg->base.stats, dropped);
- goto drop;
- }
- if ((nh->pg_allocated <=
- nh->pg_used + NAT64LSN_REMAININGPG) &&
- nh->pg_allocated < cfg->max_chunks)
- action = 1; /* Request new indexes */
- else
- action = 0;
- NAT64_UNLOCK(nh);
- //DPRINTF("No state, unlock for %p", nh);
- return (nat64lsn_request_portgroup(cfg, f_id,
- pm, aaddr, action));
- }
-
- /* We've got new state. */
- sidx = st->cur;
- pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
- }
-
- /* Okay, state found */
-
- /* Update necessary fileds */
- if (st->timestamp != state_ts)
- st->timestamp = state_ts;
- if ((st->flags & state_flags) != 0)
- st->flags |= state_flags;
-
- /* Copy needed state data */
- aaddr = pg->aaddr;
- aport = htons(pg->aport + sidx.off);
-
- NAT64_UNLOCK(nh);
-
- if (cfg->base.flags & NAT64_LOG) {
- logdata = &loghdr;
- nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
- } else
- logdata = NULL;
-
- action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->base, logdata);
- if (action == NAT64SKIP)
- return (cfg->nomatch_verdict);
- if (action == NAT64MFREE) {
-drop:
- m_freem(*pm);
- }
- *pm = NULL; /* mark mbuf as consumed */
- return (IP_FW_DENY);
+ /* We use IPv4 address in host byte order */
+ addr = ntohl(addr);
+ if (host == NULL)
+ return (nat64lsn_request_host(cfg, f_id, mp,
+ hval, addr, port, proto));
+
+ flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
+
+ data[0] = addr;
+ data[1] = (f_id->dst_port << 16) | port;
+ hval = STATE_HVAL(cfg, data);
+ state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
+ port, proto);
+ if (state == NULL)
+ return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
+ port, proto));
+ return (nat64lsn_translate6_internal(cfg, mp, state, flags));
}
/*
@@ -1614,49 +1516,61 @@ int
ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done)
{
- ipfw_insn *icmd;
+ struct epoch_tracker et;
struct nat64lsn_cfg *cfg;
+ ipfw_insn *icmd;
int ret;
IPFW_RLOCK_ASSERT(ch);
- *done = 1; /* terminate the search */
+ *done = 0; /* continue the search in case of failure */
icmd = cmd + 1;
if (cmd->opcode != O_EXTERNAL_ACTION ||
cmd->arg1 != V_nat64lsn_eid ||
icmd->opcode != O_EXTERNAL_INSTANCE ||
(cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
- return (0);
+ return (IP_FW_DENY);
+
+ *done = 1; /* terminate the search */
+ NAT64LSN_EPOCH_ENTER(et);
switch (args->f_id.addr_type) {
case 4:
ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
break;
case 6:
+ /*
+ * Check that destination IPv6 address matches our prefix6.
+ */
+ if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
+ memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
+ cfg->base.plat_plen / 8) != 0) {
+ ret = cfg->nomatch_verdict;
+ break;
+ }
ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
break;
default:
- return (cfg->nomatch_verdict);
+ ret = cfg->nomatch_verdict;
}
- return (ret);
-}
-
-static int
-nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
-{
- struct nat64lsn_host *nh;
+ NAT64LSN_EPOCH_EXIT(et);
- nh = (struct nat64lsn_host *)mem;
- memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
- memset(nh->phash, 0, sizeof(nh->phash));
- return (0);
+ if (ret != IP_FW_PASS && args->m != NULL) {
+ m_freem(args->m);
+ args->m = NULL;
+ }
+ return (ret);
}
static int
-nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
+nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
{
+ struct nat64lsn_states_chunk *chunk;
+ int i;
- memset(mem, 0, size);
+ chunk = (struct nat64lsn_states_chunk *)mem;
+ for (i = 0; i < 64; i++)
+ chunk->state[i].flags = 0;
return (0);
}
@@ -1664,109 +1578,185 @@ void
nat64lsn_init_internal(void)
{
- memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
- /* Set up supported protocol map */
- nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
- nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
- nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
- nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
- /* Fill in reverse proto map */
- memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
- nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
- nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
- nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+ nat64lsn_epoch = epoch_alloc(EPOCH_PREEMPT);
- JQUEUE_LOCK_INIT();
- nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
- sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
- nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
- sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+ nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
+ sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
+ sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
+ sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
+ sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
- nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
- sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
- nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
+ sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
+ NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
+ sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ JQUEUE_LOCK_INIT();
}
void
nat64lsn_uninit_internal(void)
{
+ /* XXX: epoch_task drain */
+ epoch_free(nat64lsn_epoch);
+
JQUEUE_LOCK_DESTROY();
uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_pgchunk_zone);
uma_zdestroy(nat64lsn_pg_zone);
- uma_zdestroy(nat64lsn_pgidx_zone);
+ uma_zdestroy(nat64lsn_aliaslink_zone);
+ uma_zdestroy(nat64lsn_state_zone);
+ uma_zdestroy(nat64lsn_job_zone);
}
void
nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
{
+ CALLOUT_LOCK(cfg);
callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
nat64lsn_periodic, cfg);
+ CALLOUT_UNLOCK(cfg);
}
struct nat64lsn_cfg *
-nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
{
struct nat64lsn_cfg *cfg;
+ struct nat64lsn_alias *alias;
+ int i, naddr;
+
+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
+ M_WAITOK | M_ZERO);
- cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
- TAILQ_INIT(&cfg->jhead);
+ CFG_LOCK_INIT(cfg);
+ CALLOUT_LOCK_INIT(cfg);
+ STAILQ_INIT(&cfg->jhead);
cfg->vp = curvnet;
- cfg->ch = ch;
COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
- cfg->ihsize = NAT64LSN_HSIZE;
- cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
- M_WAITOK | M_ZERO);
-
- cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
- M_WAITOK | M_ZERO);
+ cfg->hash_seed = arc4random();
+ cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
+ cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
+ cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
+ for (i = 0; i < cfg->hosts_hashsize; i++)
+ CK_SLIST_INIT(&cfg->hosts_hash[i]);
+
+ naddr = 1 << (32 - plen);
+ cfg->prefix4 = prefix;
+ cfg->pmask4 = prefix | (naddr - 1);
+ cfg->plen4 = plen;
+ cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
+ M_NAT64LSN, M_WAITOK | M_ZERO);
+ for (i = 0; i < naddr; i++) {
+ alias = &cfg->aliases[i];
+ alias->addr = prefix + i; /* host byte order */
+ CK_SLIST_INIT(&alias->hosts);
+ ALIAS_LOCK_INIT(alias);
+ }
- callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+ callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
return (cfg);
}
-/*
- * Destroy all hosts callback.
- * Called on module unload when all activity already finished, so
- * can work without any locks.
- */
-static NAT64NOINLINE int
-nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
+static void
+nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
{
- struct nat64lsn_portgroup *pg;
int i;
- for (i = nh->pg_used; i > 0; i--) {
- pg = PORTGROUP_BYSIDX(cfg, nh, i);
- if (pg == NULL)
- continue;
- cfg->pg[pg->idx] = NULL;
- destroy_portgroup(pg);
- nh->pg_used--;
+ if (pg->chunks_count == 1) {
+ uma_zfree(nat64lsn_state_zone, pg->states);
+ } else {
+ for (i = 0; i < pg->chunks_count; i++)
+ uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
+ free(pg->states_chunk, M_NAT64LSN);
+ free(pg->freemask_chunk, M_NAT64LSN);
}
- destroy_host6(nh);
- cfg->ihcount--;
- return (0);
+ uma_zfree(nat64lsn_pg_zone, pg);
+}
+
+static void
+nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_alias *alias)
+{
+ struct nat64lsn_pg *pg;
+ int i;
+
+ while (!CK_SLIST_EMPTY(&alias->portgroups)) {
+ pg = CK_SLIST_FIRST(&alias->portgroups);
+ CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
+ nat64lsn_destroy_pg(pg);
+ }
+ for (i = 0; i < 32; i++) {
+ if (ISSET32(alias->tcp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
+ if (ISSET32(alias->udp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
+ if (ISSET32(alias->icmp_chunkmask, i))
+ uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
+ }
+ ALIAS_LOCK_DESTROY(alias);
+}
+
+static void
+nat64lsn_destroy_host(struct nat64lsn_host *host)
+{
+ struct nat64lsn_aliaslink *link;
+
+ while (!CK_SLIST_EMPTY(&host->aliases)) {
+ link = CK_SLIST_FIRST(&host->aliases);
+ CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
+
+ ALIAS_LOCK(link->alias);
+ CK_SLIST_REMOVE(&link->alias->hosts, link,
+ nat64lsn_aliaslink, alias_entries);
+ link->alias->hosts_count--;
+ ALIAS_UNLOCK(link->alias);
+
+ uma_zfree(nat64lsn_aliaslink_zone, link);
+ }
+ HOST_LOCK_DESTROY(host);
+ free(host->states_hash, M_NAT64LSN);
+ uma_zfree(nat64lsn_host_zone, host);
}
void
nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
{
- struct nat64lsn_host *nh, *tmp;
+ struct nat64lsn_host *host;
+ int i;
- callout_drain(&cfg->jcallout);
+ CALLOUT_LOCK(cfg);
callout_drain(&cfg->periodic);
- I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
- DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
+ CALLOUT_UNLOCK(cfg);
+ callout_drain(&cfg->jcallout);
+
+ for (i = 0; i < cfg->hosts_hashsize; i++) {
+ while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
+ host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
+ CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
+ nat64lsn_destroy_host(host);
+ }
+ }
+
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
+ nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
+ CALLOUT_LOCK_DESTROY(cfg);
+ CFG_LOCK_DESTROY(cfg);
COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
- free(cfg->ih, M_IPFW);
- free(cfg->pg, M_IPFW);
- free(cfg, M_IPFW);
+ free(cfg->hosts_hash, M_NAT64LSN);
+ free(cfg->aliases, M_NAT64LSN);
+ free(cfg, M_NAT64LSN);
}
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.h b/sys/netpfil/ipfw/nat64/nat64lsn.h
index 44036cb3efcb..797876b229c2 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn.h
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -35,75 +35,149 @@
#include "ip_fw_nat64.h"
#include "nat64_translate.h"
-#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
-#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
-
#define NAT64_MIN_PORT 1024
-#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+struct nat64lsn_host;
+struct nat64lsn_alias;
-struct st_ptr {
- uint8_t idx; /* index in nh->pg_ptr array.
- * NOTE: it starts from 1.
- */
- uint8_t off;
+struct nat64lsn_state {
+ /* IPv6 host entry keeps hash table to speedup state lookup */
+ CK_SLIST_ENTRY(nat64lsn_state) entries;
+ struct nat64lsn_host *host;
+
+ struct in6_addr ip6_dst; /* Destination IPv6 address */
+
+ in_addr_t ip_src; /* Alias IPv4 address */
+ in_addr_t ip_dst; /* Destination IPv4 address */
+ uint16_t dport; /* Destination port */
+ uint16_t sport; /* Source port */
+
+ uint32_t hval;
+ uint32_t flags; /* Internal flags */
+ uint16_t aport;
+ uint16_t timestamp; /* last used */
+ uint8_t proto;
+ uint8_t _spare[7];
};
-#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
-#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
-#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
- NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
-struct nat64lsn_portgroup;
-/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
-struct nat64lsn_host {
- struct rwlock h_lock; /* Host states lock */
-
- struct in6_addr addr;
- struct nat64lsn_host *next;
- uint16_t timestamp; /* Last altered */
- uint16_t hsize; /* ports hash size */
- uint16_t pg_used; /* Number of portgroups used */
-#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
- * requesting of new chunk of indexes.
- */
- uint16_t pg_allocated; /* Number of portgroups indexes
- * allocated.
- */
-#define NAT64LSN_HSIZE 64
- struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
- /*
- * PG indexes are stored in chunks with 32 elements.
- * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
- */
-#define NAT64LSN_PGIDX_CHUNK 32
-#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
- NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
- struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
+struct nat64lsn_states_chunk {
+ struct nat64lsn_state state[64];
+};
+
+#define ISSET64(mask, bit) ((mask) & ((uint64_t)1 << (bit)))
+#define ISSET32(mask, bit) ((mask) & ((uint32_t)1 << (bit)))
+struct nat64lsn_pg {
+ CK_SLIST_ENTRY(nat64lsn_pg) entries;
+
+ uint16_t base_port;
+ uint16_t timestamp;
+ uint8_t proto;
+ uint8_t chunks_count;
+ uint8_t spare[2];
+
+ union {
+ uint64_t freemask64;
+ uint32_t freemask32[2];
+ uint64_t *freemask64_chunk;
+ uint32_t *freemask32_chunk;
+ void *freemask_chunk;
+ };
+ union {
+ struct nat64lsn_states_chunk *states;
+ struct nat64lsn_states_chunk **states_chunk;
+ };
+};
+
+#define CHUNK_BY_FADDR(p, a) ((a) & ((p)->chunks_count - 1))
+
+#ifdef __LP64__
+#define FREEMASK_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? &(p)->freemask64 : \
+ &(p)->freemask64_chunk[CHUNK_BY_FADDR(p, v)])
+#define FREEMASK_BITCOUNT(pg, faddr) \
+ bitcount64(*FREEMASK_CHUNK((pg), (faddr)))
+#else
+#define FREEMASK_CHUNK(p, v) \
+ ((p)->chunks_count == 1 ? &(p)->freemask32[0] : \
+ &(p)->freemask32_chunk[CHUNK_BY_FADDR(p, v) * 2])
+#define FREEMASK_BITCOUNT(pg, faddr) \
+ bitcount64(*(uint64_t *)FREEMASK_CHUNK((pg), (faddr)))
+#endif /* !__LP64__ */
+
+struct nat64lsn_pgchunk {
+ struct nat64lsn_pg *pgptr[32];
};
-#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
-#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
+struct nat64lsn_aliaslink {
+ CK_SLIST_ENTRY(nat64lsn_aliaslink) alias_entries;
+ CK_SLIST_ENTRY(nat64lsn_aliaslink) host_entries;
+ struct nat64lsn_alias *alias;
+};
-#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
-#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
-#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
-#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
-#define NAT64_LOCK(h) NAT64_WLOCK(h)
-#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
-#define NAT64_LOCK_INIT(h) do { \
- rw_init(&(h)->h_lock, "NAT64 host lock"); \
- } while (0)
+CK_SLIST_HEAD(nat64lsn_aliaslink_slist, nat64lsn_aliaslink);
+CK_SLIST_HEAD(nat64lsn_states_slist, nat64lsn_state);
+CK_SLIST_HEAD(nat64lsn_hosts_slist, nat64lsn_host);
+CK_SLIST_HEAD(nat64lsn_pg_slist, nat64lsn_pg);
+
+struct nat64lsn_alias {
+ struct nat64lsn_aliaslink_slist hosts;
+ struct nat64lsn_pg_slist portgroups;
+
+ struct mtx lock;
+ in_addr_t addr; /* host byte order */
+ uint32_t hosts_count;
+ uint32_t portgroups_count;
+ uint32_t tcp_chunkmask;
+ uint32_t udp_chunkmask;
+ uint32_t icmp_chunkmask;
+
+ uint32_t tcp_pgidx;
+ uint32_t udp_pgidx;
+ uint32_t icmp_pgidx;
+ uint16_t timestamp;
+ uint16_t spare;
+
+ uint32_t tcp_pgmask[32];
+ uint32_t udp_pgmask[32];
+ uint32_t icmp_pgmask[32];
+ struct nat64lsn_pgchunk *tcp[32];
+ struct nat64lsn_pgchunk *udp[32];
+ struct nat64lsn_pgchunk *icmp[32];
+
+ /* pointer to PG that can be used for faster state allocation */
+ struct nat64lsn_pg *tcp_pg;
+ struct nat64lsn_pg *udp_pg;
+ struct nat64lsn_pg *icmp_pg;
+};
+#define ALIAS_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "alias_lock", NULL, MTX_DEF)
+#define ALIAS_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define ALIAS_LOCK(p) mtx_lock(&(p)->lock)
+#define ALIAS_UNLOCK(p) mtx_unlock(&(p)->lock)
-#define NAT64_LOCK_DESTROY(h) do { \
- rw_destroy(&(h)->h_lock); \
- } while (0)
+#define NAT64LSN_HSIZE 256
+#define NAT64LSN_MAX_HSIZE 4096
+#define NAT64LSN_HOSTS_HSIZE 1024
-/* Internal proto index */
-#define NAT_PROTO_TCP 1
-#define NAT_PROTO_UDP 2
-#define NAT_PROTO_ICMP 3
+struct nat64lsn_host {
+ struct in6_addr addr;
+ struct nat64lsn_aliaslink_slist aliases;
+ struct nat64lsn_states_slist *states_hash;
+ CK_SLIST_ENTRY(nat64lsn_host) entries;
+ uint32_t states_count;
+ uint32_t hval;
+ uint32_t flags;
+#define NAT64LSN_DEADHOST 1
+#define NAT64LSN_GROWHASH 2
+ uint16_t states_hashsize;
+ uint16_t timestamp;
+ struct mtx lock;
+};
-#define NAT_MAX_PROTO 4
-extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+#define HOST_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "host_lock", NULL, MTX_DEF|MTX_NEW)
+#define HOST_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define HOST_LOCK(p) mtx_lock(&(p)->lock)
+#define HOST_UNLOCK(p) mtx_unlock(&(p)->lock)
VNET_DECLARE(uint16_t, nat64lsn_eid);
#define V_nat64lsn_eid VNET(nat64lsn_eid)
@@ -112,124 +186,65 @@ VNET_DECLARE(uint16_t, nat64lsn_eid);
/* Timestamp macro */
#define _CT ((int)time_uptime % 65536)
#define SET_AGE(x) (x) = _CT
-#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
- (int)65536 + _CT - (x))
+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x): (int)65536 + _CT - (x))
-#ifdef __LP64__
-/* ffsl() is capable of checking 64-bit ints */
-#define _FFS64
-#endif
-
-/* 16 bytes */
-struct nat64lsn_state {
- union {
- struct {
- in_addr_t faddr; /* Remote IPv4 address */
- uint16_t fport; /* Remote IPv4 port */
- uint16_t lport; /* Local IPv6 port */
- }s;
- uint64_t hkey;
- } u;
- uint8_t nat_proto;
- uint8_t flags;
- uint16_t timestamp;
- struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
- struct st_ptr next; /* Next entry index */
-};
-
-/*
- * 1024+32 bytes per 64 states, used to store state
- * AND for outside-in state lookup
- */
-struct nat64lsn_portgroup {
- struct nat64lsn_host *host; /* IPv6 source host info */
- in_addr_t aaddr; /* Alias addr, network format */
- uint16_t aport; /* Base port */
- uint16_t timestamp;
- uint8_t nat_proto;
- uint8_t spare[3];
- uint32_t idx;
-#ifdef _FFS64
- uint64_t freemask; /* Mask of free entries */
-#else
- uint32_t freemask[2]; /* Mask of free entries */
-#endif
- struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
-};
-#ifdef _FFS64
-#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
-#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
-#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
-#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
-#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
-#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
-#else
-#define PG_MARK_BUSY_IDX(_pg, _idx) \
- (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
-#define PG_MARK_FREE_IDX(_pg, _idx) \
- (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
-#define PG_IS_FREE_IDX(_pg, _idx) \
- ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
-#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
-#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
-#define PG_IS_EMPTY(_pg) \
- ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
-
-static inline int
-_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
-{
- int i;
-
- if ((i = ffsl(pg->freemask[0])) != 0)
- return (i);
- if ((i = ffsl(pg->freemask[1])) != 0)
- return (i + 32);
- return (0);
-}
-
-#endif
-
-TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+STAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
struct nat64lsn_cfg {
struct named_object no;
- struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
- struct nat64lsn_host **ih; /* Host hash */
+
+ struct nat64lsn_hosts_slist *hosts_hash;
+ struct nat64lsn_alias *aliases; /* array of aliases */
+
+ struct mtx lock;
+ uint32_t hosts_hashsize;
+ uint32_t hash_seed;
+
uint32_t prefix4; /* IPv4 prefix */
uint32_t pmask4; /* IPv4 prefix mask */
- uint32_t ihsize; /* IPv6 host hash size */
uint8_t plen4;
- uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
+ uint8_t nomatch_verdict;/* Return value on no-match */
- uint32_t ihcount; /* Number of items in host hash */
- int max_chunks; /* Max chunks per client */
- int agg_prefix_len; /* Prefix length to count */
- int agg_prefix_max; /* Max hosts per agg prefix */
+ uint32_t hosts_count; /* Number of items in host hash */
+ uint32_t states_chunks; /* Number of states chunks per PG */
uint32_t jmaxlen; /* Max jobqueue length */
- uint16_t min_chunk; /* Min port group # to use */
- uint16_t max_chunk; /* Max port group # to use */
- uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t host_delete_delay; /* Stale host delete delay */
+ uint16_t pgchunk_delete_delay;
uint16_t pg_delete_delay; /* Stale portgroup del delay */
uint16_t st_syn_ttl; /* TCP syn expire */
uint16_t st_close_ttl; /* TCP fin expire */
uint16_t st_estab_ttl; /* TCP established expire */
uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */
- uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
struct nat64_config base;
#define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE)
+#define NAT64LSN_ANYPREFIX 0x00000100
+ struct mtx periodic_lock;
struct callout periodic;
struct callout jcallout;
- struct ip_fw_chain *ch;
struct vnet *vp;
struct nat64lsn_job_head jhead;
int jlen;
char name[64]; /* Nat instance name */
};
+/* CFG_LOCK protects cfg->hosts_hash from modification */
+#define CFG_LOCK_INIT(p) \
+ mtx_init(&(p)->lock, "cfg_lock", NULL, MTX_DEF)
+#define CFG_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
+#define CFG_LOCK(p) mtx_lock(&(p)->lock)
+#define CFG_UNLOCK(p) mtx_unlock(&(p)->lock)
+
+#define CALLOUT_LOCK_INIT(p) \
+ mtx_init(&(p)->periodic_lock, "periodic_lock", NULL, MTX_DEF)
+#define CALLOUT_LOCK_DESTROY(p) mtx_destroy(&(p)->periodic_lock)
+#define CALLOUT_LOCK(p) mtx_lock(&(p)->periodic_lock)
+#define CALLOUT_UNLOCK(p) mtx_unlock(&(p)->periodic_lock)
+
struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
- size_t numaddr);
+ in_addr_t prefix, int plen);
void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_init_internal(void);
@@ -237,114 +252,4 @@ void nat64lsn_uninit_internal(void);
int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done);
-void
-nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
- const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
- const char *px, int off);
-/*
- * Portgroup layout
- * addr x nat_proto x port_off
- *
- */
-
-#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
-#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
-
-#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
-#define __GET_PORTGROUP_IDX(_proto, _port) \
- ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
- ((_port) >> NAT64_CHUNK_SIZE_BITS))
-
-#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
- GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
- __GET_PORTGROUP_IDX(_proto, _port)
-#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
- ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
-
-#define PORTGROUP_CHUNK(_nh, _idx) \
- ((_nh)->pg_ptr[(_idx)])
-#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
- (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
- [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
-
-
-/* Chained hash table */
-#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _x = _PX##first(_ph, _buck); \
- for ( ; _x != NULL; _x = _PX##next(_x)) { \
- if (_PX##cmp(_key, _PX##val(_x))) \
- break; \
- } \
- if (_x == NULL) \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
- _PX##unlock(_ph, _buck);
-
-#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
- unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _PX##next(_i) = _PX##first(_ph, _buck); \
- _PX##first(_ph, _buck) = _i; \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
- unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
- _PX##lock(_ph, _buck); \
- _x = _PX##first(_ph, _buck); \
- _tmp = NULL; \
- for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
- if (_PX##cmp(_key, _PX##val(_x))) \
- break; \
- } \
- if (_x != NULL) { \
- if (_tmp == NULL) \
- _PX##first(_ph, _buck) = _PX##next(_x); \
- else \
- _PX##next(_tmp) = _PX##next(_x); \
- } \
- _PX##unlock(_ph, _buck); \
-} while(0)
-
-#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
- for (unsigned int _i = 0; _i < _hsize; _i++) { \
- _PX##lock(_ph, _i); \
- _x = _PX##first(_ph, _i); \
- _tmp = NULL; \
- for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
- if (_cb(_x, _arg) == 0) \
- continue; \
- if (_tmp == NULL) \
- _PX##first(_ph, _i) = _PX##next(_x); \
- else \
- _tmp = _PX##next(_x); \
- } \
- _PX##unlock(_ph, _i); \
- } \
-} while(0)
-
-#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
- unsigned int _buck; \
- for (unsigned int _i = 0; _i < _hsize; _i++) { \
- _x = _PX##first(_ph, _i); \
- _y = _x; \
- while (_y != NULL) { \
- _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
- _y = _PX##next(_x); \
- _PX##next(_x) = _PX##first(_nph, _buck); \
- _PX##first(_nph, _buck) = _x; \
- } \
- } \
-} while(0)
-
#endif /* _IP_FW_NAT64LSN_H_ */
-
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
index 6bb48d29e382..65481a88d64e 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn_control.c
+++ b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
@@ -43,10 +45,8 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sockopt.h>
-#include <sys/queue.h>
#include <net/if.h>
-#include <net/pfil.h>
#include <netinet/in.h>
#include <netinet/ip.h>
@@ -75,12 +75,6 @@ static void
nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
{
- if (uc->max_ports == 0)
- uc->max_ports = NAT64LSN_MAX_PORTS;
- else
- uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
- if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
- uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
if (uc->jmaxlen == 0)
uc->jmaxlen = NAT64LSN_JMAXLEN;
if (uc->jmaxlen > 65536)
@@ -99,6 +93,13 @@ nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
uc->st_udp_ttl = NAT64LSN_UDP_AGE;
if (uc->st_icmp_ttl == 0)
uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+
+ if (uc->states_chunks == 0)
+ uc->states_chunks = 1;
+ else if (uc->states_chunks >= 128)
+ uc->states_chunks = 128;
+ else if (!powerof2(uc->states_chunks))
+ uc->states_chunks = 1 << fls(uc->states_chunks);
}
/*
@@ -127,12 +128,20 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
if (ipfw_check_object_name_generic(uc->name) != 0)
return (EINVAL);
- if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+ if (uc->set >= IPFW_MAX_SETS)
return (EINVAL);
if (uc->plen4 > 32)
return (EINVAL);
- if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0)
+
+ /*
+ * Unspecified address has special meaning. But it must
+ * have valid prefix length. This length will be used to
+ * correctly extract and embedd IPv4 address into IPv6.
+ */
+ if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
+ IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
+ nat64_check_prefixlen(uc->plen6) != 0)
return (EINVAL);
/* XXX: Check prefix4 to be global */
@@ -140,14 +149,6 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
mask4 = ~((1 << (32 - uc->plen4)) - 1);
if ((addr4 & mask4) != addr4)
return (EINVAL);
- if (uc->min_port == 0)
- uc->min_port = NAT64_MIN_PORT;
- if (uc->max_port == 0)
- uc->max_port = 65535;
- if (uc->min_port > uc->max_port)
- return (EINVAL);
- uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
- uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
nat64lsn_default_config(uc);
@@ -159,7 +160,7 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
IPFW_UH_RUNLOCK(ch);
- cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+ cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
strlcpy(cfg->name, uc->name, sizeof(cfg->name));
cfg->no.name = cfg->name;
cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
@@ -170,20 +171,12 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
cfg->base.flags |= NAT64_WKPFX;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
+ cfg->base.flags |= NAT64LSN_ANYPREFIX;
- cfg->prefix4 = addr4;
- cfg->pmask4 = addr4 | ~mask4;
- cfg->plen4 = uc->plen4;
-
- cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
- cfg->agg_prefix_len = uc->agg_prefix_len;
- cfg->agg_prefix_max = uc->agg_prefix_max;
-
- cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
- cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
-
+ cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
- cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl;
@@ -249,7 +242,7 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
if (cfg->no.refcnt > 0) {
@@ -272,6 +265,8 @@ static void
export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
struct ipfw_nat64lsn_stats *stats)
{
+ struct nat64lsn_alias *alias;
+ int i, j;
__COPY_STAT_FIELD(cfg, stats, opcnt64);
__COPY_STAT_FIELD(cfg, stats, opcnt46);
@@ -299,10 +294,16 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
__COPY_STAT_FIELD(cfg, stats, spgcreated);
__COPY_STAT_FIELD(cfg, stats, spgdeleted);
- stats->hostcount = cfg->ihcount;
- stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
- stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
- stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+ stats->hostcount = cfg->hosts_count;
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+ alias = &cfg->aliases[i];
+ for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
+ stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
+ for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
+ stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
+ for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
+ stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
+ }
}
#undef __COPY_STAT_FIELD
@@ -312,12 +313,9 @@ nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
{
uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
- uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
- uc->agg_prefix_len = cfg->agg_prefix_len;
- uc->agg_prefix_max = cfg->agg_prefix_max;
-
+ uc->states_chunks = cfg->states_chunks;
uc->jmaxlen = cfg->jmaxlen;
- uc->nh_delete_delay = cfg->nh_delete_delay;
+ uc->nh_delete_delay = cfg->host_delete_delay;
uc->pg_delete_delay = cfg->pg_delete_delay;
uc->st_syn_ttl = cfg->st_syn_ttl;
uc->st_close_ttl = cfg->st_close_ttl;
@@ -425,7 +423,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
- return (EEXIST);
+ return (ENOENT);
}
nat64lsn_export_config(ch, cfg, uc);
IPFW_UH_RUNLOCK(ch);
@@ -438,18 +436,18 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
- return (EEXIST);
+ return (ENOENT);
}
/*
* For now allow to change only following values:
* jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
- * tcp_est_age, udp_age, icmp_age, flags, max_ports.
+ * tcp_est_age, udp_age, icmp_age, flags, states_chunks.
*/
- cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
- cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl;
@@ -496,7 +494,7 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
export_stats(ch, cfg, &stats);
@@ -538,163 +536,176 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
IPFW_UH_WUNLOCK(ch);
return (0);
}
+#ifdef __LP64__
+#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n))
+#else
+#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \
+ ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
+#endif
/*
* Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
* ipfw_nat64lsn_state x count, ... ] ]
*/
static int
-export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
- ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
+ struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
{
- ipfw_nat64lsn_state *ste;
- struct nat64lsn_state *st;
- int i, count;
+ ipfw_nat64lsn_state_v1 *s;
+ struct nat64lsn_state *state;
+ uint64_t freemask;
+ uint32_t i, count;
- NAT64_LOCK(pg->host);
- count = 0;
- for (i = 0; i < 64; i++) {
- if (PG_IS_BUSY_IDX(pg, i))
- count++;
- }
- DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
+ /* validate user input */
+ if (idx->chunk > pg->chunks_count - 1)
+ return (EINVAL);
- if (count == 0) {
- stg->count = 0;
- NAT64_UNLOCK(pg->host);
- return (0);
- }
- ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
- count * sizeof(ipfw_nat64lsn_state));
- if (ste == NULL) {
- NAT64_UNLOCK(pg->host);
- return (1);
- }
+ FREEMASK_COPY(pg, idx->chunk, freemask);
+ count = 64 - bitcount64(freemask);
+ if (count == 0)
+ return (0); /* Try next PG/chunk */
+
+ DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
+ (uintmax_t)idx->index, count);
+
+ s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
+ count * sizeof(ipfw_nat64lsn_state_v1));
+ if (s == NULL)
+ return (ENOMEM);
- stg->alias4.s_addr = pg->aaddr;
- stg->proto = nat64lsn_rproto_map[pg->nat_proto];
- stg->flags = 0;
- stg->host6 = pg->host->addr;
- stg->count = count;
for (i = 0; i < 64; i++) {
- if (PG_IS_FREE_IDX(pg, i))
+ if (ISSET64(freemask, i))
continue;
- st = &pg->states[i];
- ste->daddr.s_addr = st->u.s.faddr;
- ste->dport = st->u.s.fport;
- ste->aport = pg->aport + i;
- ste->sport = st->u.s.lport;
- ste->flags = st->flags; /* XXX filter flags */
- ste->idle = GET_AGE(st->timestamp);
- ste++;
+ state = pg->chunks_count == 1 ? &pg->states->state[i] :
+ &pg->states_chunk[idx->chunk]->state[i];
+
+ s->host6 = state->host->addr;
+ s->daddr.s_addr = htonl(state->ip_dst);
+ s->dport = state->dport;
+ s->sport = state->sport;
+ s->aport = state->aport;
+ s->flags = (uint8_t)(state->flags & 7);
+ s->proto = state->proto;
+ s->idle = GET_AGE(state->timestamp);
+ s++;
}
- NAT64_UNLOCK(pg->host);
-
+ *ret_count = count;
return (0);
}
+#define LAST_IDX 0xFF
static int
-get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
+nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
+ union nat64lsn_pgidx *idx)
{
- if (*port < 65536 - NAT64_CHUNK_SIZE) {
- *port += NAT64_CHUNK_SIZE;
- return (0);
+ /* First iterate over chunks */
+ if (pg != NULL) {
+ if (idx->chunk < pg->chunks_count - 1) {
+ idx->chunk++;
+ return (0);
+ }
}
- *port = 0;
-
- if (*nat_proto < NAT_MAX_PROTO - 1) {
- *nat_proto += 1;
+ idx->chunk = 0;
+ /* Then over PGs */
+ if (idx->port < UINT16_MAX - 64) {
+ idx->port += 64;
return (0);
}
- *nat_proto = 1;
-
- if (*addr < cfg->pmask4) {
- *addr += 1;
+ idx->port = NAT64_MIN_PORT;
+ /* Then over supported protocols */
+ switch (idx->proto) {
+ case IPPROTO_ICMP:
+ idx->proto = IPPROTO_TCP;
return (0);
+ case IPPROTO_TCP:
+ idx->proto = IPPROTO_UDP;
+ return (0);
+ default:
+ idx->proto = IPPROTO_ICMP;
}
-
- /* End of space. */
- return (1);
+ /* And then over IPv4 alias addresses */
+ if (idx->addr < cfg->pmask4) {
+ idx->addr++;
+ return (1); /* New states group is needed */
+ }
+ idx->index = LAST_IDX;
+ return (-1); /* No more states */
}
-#define PACK_IDX(addr, proto, port) \
- ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
-#define UNPACK_IDX(idx, addr, proto, port) \
- (addr) = (uint32_t)((idx) >> 32); \
- (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
- (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
-
-static struct nat64lsn_portgroup *
-get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
+static struct nat64lsn_pg*
+nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
{
- struct nat64lsn_portgroup *pg;
- uint64_t pre_pack, post_pack;
-
- pg = NULL;
- pre_pack = PACK_IDX(*addr, *nat_proto, *port);
- for (;;) {
- if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
- /* End of states */
- return (pg);
- }
-
- pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
- if (pg != NULL)
- break;
+ struct nat64lsn_alias *alias;
+ int pg_idx;
+
+ alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
+ MPASS(alias->addr == idx->addr);
+
+ pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
+ switch (idx->proto) {
+ case IPPROTO_ICMP:
+ if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
+ case IPPROTO_TCP:
+ if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
+ case IPPROTO_UDP:
+ if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
+ return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
+ break;
}
-
- post_pack = PACK_IDX(*addr, *nat_proto, *port);
- if (pre_pack == post_pack)
- DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
- *addr, *nat_proto, *port);
- return (pg);
+ return (NULL);
}
-static NAT64NOINLINE struct nat64lsn_portgroup *
-get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
- uint16_t *port)
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- struct nat64lsn_portgroup *pg;
- pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
- if (pg == NULL)
- pg = get_next_pg(cfg, addr, nat_proto, port);
-
- return (pg);
+ /* TODO: implement states listing for old ipfw(8) binaries */
+ return (EOPNOTSUPP);
}
/*
* Lists nat64lsn states.
- * Data layout (v0)(current):
+ * Data layout (v1)(current):
* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
* Reply: [ ipfw_obj_header ipfw_obj_data [
- * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ * ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
*
* Returns 0 on success
*/
static int
-nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_obj_header *oh;
ipfw_obj_data *od;
- ipfw_nat64lsn_stg *stg;
+ ipfw_nat64lsn_stg_v1 *stg;
struct nat64lsn_cfg *cfg;
- struct nat64lsn_portgroup *pg, *pg_next;
- uint64_t next_idx;
+ struct nat64lsn_pg *pg;
+ union nat64lsn_pgidx idx;
size_t sz;
- uint32_t addr, states;
- uint16_t port;
- uint8_t nat_proto;
+ uint32_t count, total;
+ int ret;
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
sizeof(uint64_t);
@@ -708,78 +719,96 @@ nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
od->head.length != sz - sizeof(ipfw_obj_header))
return (EINVAL);
- next_idx = *(uint64_t *)(od + 1);
- /* Translate index to the request position to start from */
- UNPACK_IDX(next_idx, addr, nat_proto, port);
- if (nat_proto >= NAT_MAX_PROTO)
+ idx.index = *(uint64_t *)(od + 1);
+ if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
+ idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
return (EINVAL);
- if (nat_proto == 0 && addr != 0)
+ if (idx.index == LAST_IDX)
return (EINVAL);
IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
- return (ESRCH);
+ return (ENOENT);
}
- /* Fill in starting point */
- if (addr == 0) {
- addr = cfg->prefix4;
- nat_proto = 1;
- port = 0;
+ if (idx.index == 0) { /* Fill in starting point */
+ idx.addr = cfg->prefix4;
+ idx.proto = IPPROTO_ICMP;
+ idx.port = NAT64_MIN_PORT;
}
- if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
+ idx.port < NAT64_MIN_PORT) {
IPFW_UH_RUNLOCK(ch);
- DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
- (uintmax_t)next_idx, addr, cfg->pmask4);
return (EINVAL);
}
-
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
- sizeof(ipfw_nat64lsn_stg);
- if (sd->valsize < sz)
+ sizeof(ipfw_nat64lsn_stg_v1);
+ if (sd->valsize < sz) {
+ IPFW_UH_RUNLOCK(ch);
return (ENOMEM);
+ }
oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
od = (ipfw_obj_data *)(oh + 1);
od->head.type = IPFW_TLV_OBJDATA;
od->head.length = sz - sizeof(ipfw_obj_header);
- stg = (ipfw_nat64lsn_stg *)(od + 1);
-
- pg = get_first_pg(cfg, &addr, &nat_proto, &port);
- if (pg == NULL) {
- /* No states */
- stg->next_idx = 0xFF;
- stg->count = 0;
- IPFW_UH_RUNLOCK(ch);
- return (0);
- }
- states = 0;
- pg_next = NULL;
- while (pg != NULL) {
- pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
- if (pg_next == NULL)
- stg->next_idx = 0xFF;
- else
- stg->next_idx = PACK_IDX(addr, nat_proto, port);
-
- if (export_pg_states(cfg, pg, stg, sd) != 0) {
- IPFW_UH_RUNLOCK(ch);
- return (states == 0 ? ENOMEM: 0);
+ stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
+ stg->count = total = 0;
+ stg->next.index = idx.index;
+ /*
+ * Acquire CALLOUT_LOCK to avoid races with expiration code.
+ * Thus states, hosts and PGs will not expire while we hold it.
+ */
+ CALLOUT_LOCK(cfg);
+ ret = 0;
+ do {
+ pg = nat64lsn_get_pg_byidx(cfg, &idx);
+ if (pg != NULL) {
+ count = 0;
+ ret = nat64lsn_export_states_v1(cfg, &idx, pg,
+ sd, &count);
+ if (ret != 0)
+ break;
+ if (count > 0) {
+ stg->count += count;
+ total += count;
+ /* Update total size of reply */
+ od->head.length +=
+ count * sizeof(ipfw_nat64lsn_state_v1);
+ sz += count * sizeof(ipfw_nat64lsn_state_v1);
+ }
+ stg->alias4.s_addr = htonl(idx.addr);
}
- states += stg->count;
- od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
- sz += stg->count * sizeof(ipfw_nat64lsn_state);
- if (pg_next != NULL) {
- sz += sizeof(ipfw_nat64lsn_stg);
- if (sd->valsize < sz)
+ /* Determine new index */
+ switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
+ case -1:
+ ret = ENOENT; /* End of search */
+ break;
+ case 1: /*
+ * Next alias address, new group may be needed.
+ * If states count is zero, use this group.
+ */
+ if (stg->count == 0)
+ continue;
+ /* Otherwise try to create new group */
+ sz += sizeof(ipfw_nat64lsn_stg_v1);
+ if (sd->valsize < sz) {
+ ret = ENOMEM;
break;
- stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
- sizeof(ipfw_nat64lsn_stg));
+ }
+ /* Save next index in current group */
+ stg->next.index = idx.index;
+ stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg_v1));
+ od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
+ stg->count = 0;
+ break;
}
- pg = pg_next;
- }
+ stg->next.index = idx.index;
+ } while (ret == 0);
+ CALLOUT_UNLOCK(cfg);
IPFW_UH_RUNLOCK(ch);
- return (0);
+ return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
}
static struct ipfw_sopt_handler scodes[] = {
@@ -789,7 +818,8 @@ static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
{ IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
{ IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
- { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states_v0 },
+ { IP_FW_NAT64LSN_LIST_STATES,1, HDIR_GET, nat64lsn_states_v1 },
};
static int