diff options
| author | Konstantin Belousov <kib@FreeBSD.org> | 2021-08-22 19:38:04 +0000 |
|---|---|---|
| committer | Konstantin Belousov <kib@FreeBSD.org> | 2024-07-12 04:27:58 +0000 |
| commit | ef2a572bf6bdcac97ef29ce631d2f50f938e1ec8 (patch) | |
| tree | 3931f4c72c66cf062083244d766266b58bd77a80 | |
| parent | b6919741b7479fab6886ae76ec151f4103bcf350 (diff) | |
ipsec_offload: kernel infrastructure
Inline IPSEC offload moves almost whole IPSEC processing from the
CPU/MCU and possibly crypto accelerator, to the network card.
The transmitted packet content is not touched by CPU during TX
operations, kernel only does the required policy and security
association lookups to find out that given flow is offloaded, and then
packet is transmitted as plain text to the card. For driver convenience,
a metadata is attached to the packet identifying SA which must process
the packet. Card does encryption of the payload, padding, calculates
authentication, and does the reformat according to the policy.
Similarly, on receive, card does the decapsulation, decryption, and
authentification. Kernel receives the identifier of SA that was
used to process the packet, together with the plain-text packet.
Overall, payload octets are only read or written by card DMA engine,
removing a lot of memory subsystem overhead, and saving CPU time because
IPSEC algos calculations are avoided.
If driver declares support for inline IPSEC offload (with the
IFCAP2_IPSEC_OFFLOAD capability set and registering method table struct
if_ipsec_accel_methods), kernel offers the SPD and SAD to driver.
Driver decides which policies and SAs can be offloaded based on
hardware capacity, and acks/nacks each SA for given interface to
kernel. Kernel needs to keep this information to make a decision to
skip software processing on TX, and to assume processing already done
on RX. This shadow SPD/SAD database of offloads is rooted from
policies (struct secpolicy accel_ifps, struct ifp_handle_sp) and SAs
(struct secasvar accel_ipfs, struct ifp_handle_sav).
Some extensions to the PF_KEY socket allow to limit interfaces for
which given SP/SA could be offloaded (proposed for offload). Also,
additional statistics extensions allow to observe allocation/octet/use
counters for specific SA.
Since SPs and SAs are typically instantiated in non-sleepable context,
while offloading them into card is expected to require costly async
manipulations of the card state, calls to the driver for offload and
termination are executed in the threaded taskqueue. It also solves
the issue of allocating resources needed for the offload database.
Neither ipf_handle_sp nor ipf_handle_sav do not add reference to the
owning SP/SA, the offload must be terminated before last reference is
dropped. ipsec_accel only adds transient references to ensure safe
pointer ownership by taskqueue.
Maintaining the SA counters for hardware-accelerated packets is the
duty of the driver. The helper ipsec_accel_drv_sa_lifetime_update()
is provided to hide accel infrastructure from drivers which would use
expected callout to query hardware periodically for updates.
Reviewed by: rscheff (transport, stack integration), np
Sponsored by: NVIDIA networking
Differential revision: https://reviews.freebsd.org/D44219
| -rw-r--r-- | sys/conf/files | 2 | ||||
| -rw-r--r-- | sys/conf/options | 1 | ||||
| -rw-r--r-- | sys/modules/ipsec/Makefile | 5 | ||||
| -rw-r--r-- | sys/netipsec/ipsec.c | 17 | ||||
| -rw-r--r-- | sys/netipsec/ipsec.h | 11 | ||||
| -rw-r--r-- | sys/netipsec/ipsec_input.c | 11 | ||||
| -rw-r--r-- | sys/netipsec/ipsec_offload.c | 1061 | ||||
| -rw-r--r-- | sys/netipsec/ipsec_offload.h | 191 | ||||
| -rw-r--r-- | sys/netipsec/ipsec_output.c | 15 | ||||
| -rw-r--r-- | sys/netipsec/ipsec_pcb.c | 38 | ||||
| -rw-r--r-- | sys/netipsec/key.c | 270 | ||||
| -rw-r--r-- | sys/netipsec/key.h | 6 | ||||
| -rw-r--r-- | sys/netipsec/key_debug.c | 5 | ||||
| -rw-r--r-- | sys/netipsec/keydb.h | 14 |
14 files changed, 1628 insertions, 19 deletions
diff --git a/sys/conf/files b/sys/conf/files index 609ac407d400..1f99c3586b86 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4464,6 +4464,8 @@ netipsec/ipsec.c optional ipsec inet | ipsec inet6 netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mod.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_offload.c optional ipsec ipsec_offload inet | \ + ipsec ipsec_offload inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 netipsec/ipsec_pcb.c optional ipsec inet | ipsec inet6 | \ ipsec_support inet | ipsec_support inet6 diff --git a/sys/conf/options b/sys/conf/options index f50d009987bc..928927fe99df 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -466,6 +466,7 @@ IPFIREWALL_PMOD opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h +IPSEC_OFFLOAD opt_ipsec.h IPSTEALTH KERN_TLS KRPC diff --git a/sys/modules/ipsec/Makefile b/sys/modules/ipsec/Makefile index 08a2e88d5794..8979508375a4 100644 --- a/sys/modules/ipsec/Makefile +++ b/sys/modules/ipsec/Makefile @@ -2,8 +2,9 @@ .PATH: ${SRCTOP}/sys/net ${SRCTOP}/sys/netipsec KMOD= ipsec -SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ - ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ +SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c \ + ipsec_mod.c ipsec_offload.c ipsec_output.c \ + xform_ah.c xform_esp.c xform_ipcomp.c \ opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_sctp.h .if "${MK_INET}" != "no" || "${MK_INET6}" != "no" SRCS+= udpencap.c diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 0ca33424bca8..e22a3872d48d 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -85,6 +85,7 @@ #ifdef INET6 #include <netipsec/ipsec6.h> #endif +#include <netipsec/ipsec_offload.h> #include <netipsec/ah_var.h> #include <netipsec/esp_var.h> #include <netipsec/ipcomp.h> /*XXX*/ @@ -636,8 +637,16 @@ int ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); @@ -802,8 +811,16 @@ int ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 2a1dcb8bb77b..55cc0839eab9 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -71,6 +71,12 @@ struct ipsecrequest { u_int level; /* IPsec level defined below. */ }; +struct ipsec_accel_adddel_sp_tq { + struct vnet *adddel_vnet; + struct task adddel_task; + int adddel_scheduled; +}; + /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; @@ -102,6 +108,11 @@ struct secpolicy { time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ + CK_LIST_HEAD(, ifp_handle_sp) accel_ifps; + struct ipsec_accel_adddel_sp_tq accel_add_tq; + struct ipsec_accel_adddel_sp_tq accel_del_tq; + struct inpcb *ipsec_accel_add_sp_inp; + const char *accel_ifname; }; /* diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 1150f3f470d3..dbb20748cf45 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -90,6 +90,7 @@ #include <netipsec/esp.h> #include <netipsec/esp_var.h> #include <netipsec/ipcomp_var.h> +#include <netipsec/ipsec_offload.h> #include <netipsec/key.h> #include <netipsec/keydb.h> @@ -237,6 +238,11 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) int ipsec4_input(struct mbuf *m, int offset, int proto) { + int error; + + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); switch (proto) { case IPPROTO_AH: @@ -536,7 +542,12 @@ ipsec6_lasthdr(int proto) int ipsec6_input(struct mbuf *m, int offset, int proto) { + int error; + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); + switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c new file mode 100644 index 000000000000..851bacaf4ea1 --- /dev/null +++ b/sys/netipsec/ipsec_offload.c @@ -0,0 +1,1061 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/ck.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/pctrie.h> +#include <sys/proc.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/taskqueue.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <net/vnet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet/in_pcb.h> + +#include <netipsec/key.h> +#include <netipsec/keydb.h> +#include <netipsec/key_debug.h> +#include <netipsec/xform.h> +#include <netipsec/ipsec.h> +#include <netipsec/ipsec_offload.h> +#include <netipsec/ah_var.h> +#include <netipsec/esp.h> +#include <netipsec/esp_var.h> +#include <netipsec/ipcomp_var.h> + +#ifdef IPSEC_OFFLOAD + +static struct mtx ipsec_accel_sav_tmp; +static struct unrhdr *drv_spi_unr; +static struct mtx ipsec_accel_cnt_lock; + +struct ipsec_accel_install_newkey_tq { + struct secasvar *sav; + struct vnet *install_vnet; + struct task install_task; +}; + +struct ipsec_accel_forget_tq { + struct vnet *forget_vnet; + struct task forget_task; + struct secasvar *sav; +}; + +struct ifp_handle_sav { + CK_LIST_ENTRY(ifp_handle_sav) sav_link; + CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link; + struct secasvar *sav; + struct ifnet *ifp; + void *ifdata; + uint64_t drv_spi; + uint32_t flags; + size_t hdr_ext_size; + uint64_t cnt_octets; + uint64_t cnt_allocs; +}; + +#define IFP_HS_HANDLED 0x00000001 +#define IFP_HS_REJECTED 0x00000002 +#define IFP_HS_INPUT 0x00000004 +#define IFP_HS_OUTPUT 0x00000008 +#define IFP_HS_MARKER 0x00000010 + +static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles; + +struct ifp_handle_sp { + CK_LIST_ENTRY(ifp_handle_sp) sp_link; + CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link; + struct secpolicy *sp; + struct ifnet *ifp; + void *ifdata; + uint32_t flags; +}; + +#define IFP_HP_HANDLED 0x00000001 +#define IFP_HP_REJECTED 0x00000002 +#define IFP_HP_MARKER 0x00000004 + +static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles; + +static void * +drvspi_sa_trie_alloc(struct pctrie *ptree) +{ + void *res; + + res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT); + if (res != NULL) + pctrie_zone_init(res, 0, 0); + return (res); +} + +static void +drvspi_sa_trie_free(struct pctrie *ptree, void *node) +{ + free(node, M_IPSEC_MISC); +} + +PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, + drvspi_sa_trie_alloc, drvspi_sa_trie_free); +static struct pctrie drv_spi_pctrie; + +static void ipsec_accel_sa_newkey_impl(struct secasvar *sav); +static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); +static void ipsec_accel_forget_sav_clear(struct secasvar *sav); +static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, + struct ifnet *ifp); +static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m); +static void ipsec_accel_sync_imp(void); +static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav); +static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); + +static void +ipsec_accel_init(void *arg) +{ + mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0); + mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0); + drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN, + IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp); + ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl; + ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl; + ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl; + ipsec_accel_spddel_p = ipsec_accel_spddel_impl; + ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl; + ipsec_accel_sync_p = ipsec_accel_sync_imp; + ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl; + ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl; + pctrie_init(&drv_spi_pctrie); +} +SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_init, NULL); + +static void +ipsec_accel_fini(void *arg) +{ + ipsec_accel_sa_newkey_p = NULL; + ipsec_accel_forget_sav_p = NULL; + ipsec_accel_spdadd_p = NULL; + ipsec_accel_spddel_p = NULL; + ipsec_accel_sa_lifetime_op_p = NULL; + ipsec_accel_sync_p = NULL; + ipsec_accel_is_accel_sav_p = NULL; + ipsec_accel_key_setaccelif_p = NULL; + ipsec_accel_sync_imp(); + clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ + clear_unrhdr(drv_spi_unr); + delete_unrhdr(drv_spi_unr); + mtx_destroy(&ipsec_accel_sav_tmp); + mtx_destroy(&ipsec_accel_cnt_lock); +} +SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_fini, NULL); + +static void +ipsec_accel_alloc_forget_tq(struct secasvar *sav) +{ + void *ftq; + + if (sav->accel_forget_tq != 0) + return; + + ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK); + if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq)) + free(ftq, M_TEMP); +} + +static bool +ipsec_accel_sa_install_match(if_t ifp, void *arg) +{ + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (false); + if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) { + printf("driver bug ifp %s if_sa_newkey NULL\n", + if_name(ifp)); + return (false); + } + return (true); +} + +static int +ipsec_accel_sa_newkey_cb(if_t ifp, void *arg) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *priv; + u_int drv_spi; + int error; + + tq = arg; + + printf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x " + "flags %#x seq %d\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey, + be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq); + priv = NULL; + drv_spi = alloc_unr(drv_spi_unr); + if (tq->sav->accel_ifname != NULL && + strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + goto out; + } + if (drv_spi == -1) { + /* XXXKIB */ + printf("ipsec_accel_sa_install_newkey: cannot alloc " + "drv_spi if %s spi %#x\n", if_name(ifp), + be32toh(tq->sav->spi)); + return (ENOMEM); + } + error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav, + drv_spi, &priv); + if (error != 0) { + if (error == EOPNOTSUPP) { + printf("ipsec_accel_sa_newkey: driver " + "refused sa if %s spi %#x\n", + if_name(ifp), be32toh(tq->sav->spi)); + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + /* XXXKIB */ + } else { + printf("ipsec_accel_sa_newkey: driver " + "error %d if %s spi %#x\n", + error, if_name(ifp), be32toh(tq->sav->spi)); + /* XXXKIB */ + } + } else { + error = ipsec_accel_handle_sav(tq->sav, ifp, + drv_spi, priv, IFP_HS_HANDLED, NULL); + if (error != 0) { + /* XXXKIB */ + printf("ipsec_accel_sa_newkey: handle_sav " + "err %d if %s spi %#x\n", error, + if_name(ifp), be32toh(tq->sav->spi)); + } + } +out: + return (error); +} + +static void +ipsec_accel_sa_newkey_act(void *context, int pending) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *tqf; + struct secasvar *sav; + + tq = context; + tqf = NULL; + sav = tq->sav; + CURVNET_SET(tq->install_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) == 0 && + sav->state == SADB_SASTATE_MATURE) { + sav->accel_flags |= SADB_KEY_ACCEL_INST; + mtx_unlock(&ipsec_accel_sav_tmp); + if_foreach_sleep(ipsec_accel_sa_install_match, context, + ipsec_accel_sa_newkey_cb, context); + ipsec_accel_alloc_forget_tq(sav); + mtx_lock(&ipsec_accel_sav_tmp); + + /* + * If ipsec_accel_forget_sav() raced with us and set + * the flag, do its work. Its task cannot execute in + * parallel since taskqueue_thread is single-threaded. + */ + if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { + tqf = (void *)sav->accel_forget_tq; + sav->accel_forget_tq = 0; + ipsec_accel_forget_sav_clear(sav); + } + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&tq->sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); + free(tqf, M_TEMP); +} + +static void +ipsec_accel_sa_newkey_impl(struct secasvar *sav) +{ + struct ipsec_accel_install_newkey_tq *tq; + + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) + return; + + printf( + "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n", + be32toh(sav->spi), sav->flags, sav->seq); + + tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); + if (tq == NULL) { + printf("ipsec_accel_sa_install_newkey: no memory for tq, " + "spi %#x\n", be32toh(sav->spi)); + /* XXXKIB */ + return; + } + + refcount_acquire(&sav->refcnt); + + TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq); + tq->sav = sav; + tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ + taskqueue_enqueue(taskqueue_thread, &tq->install_task); +} + +static int +ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires) +{ + struct ifp_handle_sav *ihs, *i; + int error; + + MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1); + + ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO); + ihs->ifp = ifp; + ihs->sav = sav; + ihs->drv_spi = drv_spi; + ihs->ifdata = priv; + ihs->flags = flags; + if ((flags & IFP_HS_OUTPUT) != 0) + ihs->hdr_ext_size = esp_hdrsiz(sav); + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) { + error = EALREADY; + goto errout; + } + } + error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs); + if (error != 0) + goto errout; + if_ref(ihs->ifp); + CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + if (ires != NULL) + *ires = ihs; + return (0); +errout: + mtx_unlock(&ipsec_accel_sav_tmp); + free(ihs, M_IPSEC_MISC); + if (ires != NULL) + *ires = NULL; + return (error); +} + +static void +ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav) +{ + struct ifnet *ifp; + struct secasvar *sav; + + mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED); + + CK_LIST_REMOVE(i, sav_link); + CK_LIST_REMOVE(i, sav_allh_link); + DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + ifp = i->ifp; + sav = i->sav; + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + printf("sa deinstall %s %p spi %#x ifl %#x\n", + if_name(ifp), sav, be32toh(sav->spi), i->flags); + ifp->if_ipsec_accel_m->if_sa_deinstall(ifp, + i->drv_spi, i->ifdata); + } + if_rele(ifp); + free_unr(drv_spi_unr, i->drv_spi); + free(i, M_IPSEC_MISC); + if (freesav) + key_freesav(&sav); + mtx_lock(&ipsec_accel_sav_tmp); +} + +static void +ipsec_accel_forget_sav_clear(struct secasvar *sav) +{ + struct ifp_handle_sav *i; + + for (;;) { + i = CK_LIST_FIRST(&sav->accel_ifps); + if (i == NULL) + break; + ipsec_accel_forget_handle_sav(i, false); + } +} + +static void +ipsec_accel_forget_sav_act(void *arg, int pending) +{ + struct ipsec_accel_forget_tq *tq; + struct secasvar *sav; + + tq = arg; + sav = tq->sav; + CURVNET_SET(tq->forget_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + ipsec_accel_forget_sav_clear(sav); + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); +} + +void +ipsec_accel_forget_sav_impl(struct secasvar *sav) +{ + struct ipsec_accel_forget_tq *tq; + + mtx_lock(&ipsec_accel_sav_tmp); + sav->accel_flags |= SADB_KEY_ACCEL_DEINST; + tq = (void *)atomic_load_ptr(&sav->accel_forget_tq); + if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq, + (uintptr_t)tq, 0)) { + mtx_unlock(&ipsec_accel_sav_tmp); + return; + } + mtx_unlock(&ipsec_accel_sav_tmp); + + refcount_acquire(&sav->refcnt); + TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq); + tq->forget_vnet = curthread->td_vnet; + tq->sav = sav; + taskqueue_enqueue(taskqueue_thread, &tq->forget_task); +} + +static void +ipsec_accel_on_ifdown_sav(struct ifnet *ifp) +{ + struct ifp_handle_sav *i, *marker; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker, + sav_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sav_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sav_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sav_allh_link); + if (i->ifp == ifp) { + refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */ + ipsec_accel_forget_handle_sav(i, true); + } + } + CK_LIST_REMOVE(marker, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp) +{ + struct ifp_handle_sav *i; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (NULL); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp) +{ + NET_EPOCH_ASSERT(); + return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp)); +} + +static bool +ipsec_accel_is_accel_sav_impl(struct secasvar *sav) +{ + return (!CK_LIST_EMPTY(&sav->accel_ifps)); +} + +static struct secasvar * +ipsec_accel_drvspi_to_sa(u_int drv_spi) +{ + struct ifp_handle_sav *i; + + i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi); + if (i == NULL) + return (NULL); + return (i->sav); +} + +static struct ifp_handle_sp * +ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp) +{ + struct ifp_handle_sp *i; + + CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static bool +ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp) +{ + return (ipsec_accel_find_accel_sp(sp, ifp) != NULL); +} + +static int +ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp, + struct ifp_handle_sp **ip) +{ + struct ifp_handle_sp *i; + + i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO); + i->sp = sp; + i->ifp = ifp; + if_ref(ifp); + i->flags = IFP_HP_HANDLED; + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + *ip = i; + return (0); +} + +static bool +ipsec_accel_spdadd_match(if_t ifp, void *arg) +{ + struct secpolicy *sp; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 || + ifp->if_ipsec_accel_m->if_spdadd == NULL) + return (false); + sp = arg; + if (sp->accel_ifname != NULL && + strcmp(sp->accel_ifname, if_name(ifp)) != 0) + return (false); + if (ipsec_accel_is_accel_sp(sp, ifp)) + return (false); + return (true); +} + +static int +ipsec_accel_spdadd_cb(if_t ifp, void *arg) +{ + struct secpolicy *sp; + struct inpcb *inp; + struct ifp_handle_sp *i; + int error; + + sp = arg; + inp = sp->ipsec_accel_add_sp_inp; + printf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp); + error = ipsec_accel_remember_sp(sp, ifp, &i); + if (error != 0) { + printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n", + if_name(ifp), sp, error); + return (error); + } + error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata); + if (error != 0) { + i->flags |= IFP_HP_REJECTED; + printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n", + if_name(ifp), sp, error); + } + return (error); +} + +static void +ipsec_accel_spdadd_act(void *arg, int pending) +{ + struct secpolicy *sp; + struct inpcb *inp; + + sp = arg; + CURVNET_SET(sp->accel_add_tq.adddel_vnet); + if_foreach_sleep(ipsec_accel_spdadd_match, arg, + ipsec_accel_spdadd_cb, arg); + inp = sp->ipsec_accel_add_sp_inp; + if (inp != NULL) { + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); + sp->ipsec_accel_add_sp_inp = NULL; + } + CURVNET_RESTORE(); + key_freesp(&sp); +} + +void +ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + if (sp->tcount == 0 && inp == NULL) + return; + tq = &sp->accel_add_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + sp->ipsec_accel_add_sp_inp = inp; + if (inp != NULL) + in_pcbref(inp); + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_spddel_act(void *arg, int pending) +{ + struct ifp_handle_sp *i; + struct secpolicy *sp; + int error; + + sp = arg; + CURVNET_SET(sp->accel_del_tq.adddel_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + for (;;) { + i = CK_LIST_FIRST(&sp->accel_ifps); + if (i == NULL) + break; + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(i->ifp), sp); + error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp, + sp, i->ifdata); + if (error != 0) { + printf( + "ipsec_accel_spddel: %s if_spddel %p res %d\n", + if_name(i->ifp), sp, error); + } + } + if_rele(i->ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesp(&sp); + CURVNET_RESTORE(); +} + +void +ipsec_accel_spddel_impl(struct secpolicy *sp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + + tq = &sp->accel_del_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_on_ifdown_sp(struct ifnet *ifp) +{ + struct ifp_handle_sp *i, *marker; + struct secpolicy *sp; + int error; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker, + sp_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sp_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sp_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sp_allh_link); + if (i->ifp != ifp) + continue; + + sp = i->sp; + key_addref(sp); + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(ifp), sp); + error = ifp->if_ipsec_accel_m->if_spddel(ifp, + sp, i->ifdata); + } + if (error != 0) { + printf( + "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n", + if_name(ifp), sp, error); + } + key_freesp(&sp); + if_rele(ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + CK_LIST_REMOVE(marker, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +void +ipsec_accel_on_ifdown(struct ifnet *ifp) +{ + ipsec_accel_on_ifdown_sp(ifp); + ipsec_accel_on_ifdown_sav(ifp); +} + +static bool +ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu) +{ + int alen, blks, hlen, padding, rlen; + + rlen = m->m_pkthdr.len - skip; + hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) : + sizeof(struct newesp)) + sav->ivlen; + blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ? + sav->tdb_encalgxform->native_blocksize : + sav->tdb_encalgxform->blocksize); + padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; + alen = xform_ah_authsize(sav->tdb_authalgxform); + + return (skip + hlen + rlen + padding + alen <= mtu); +} + +static bool +ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi) +{ + struct ipsec_accel_out_tag *tag; + + tag = (struct ipsec_accel_out_tag *)m_tag_get( + PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT); + if (tag == NULL) + return (false); + tag->drv_spi = drv_spi; + m_tag_prepend(m, &tag->tag); + return (true); +} + +bool +ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + struct secpolicy *sp, struct secasvar *sav, int af, int mtu) +{ + struct ifp_handle_sav *i; + struct ip *ip; + u_long ip_len, skip; + + if (ifp == NULL) + return (false); + + M_ASSERTPKTHDR(m); + NET_EPOCH_ASSERT(); + + if (sav == NULL) + return (ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS)); + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i == NULL) + return (false); + + if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { + ip_len = m->m_pkthdr.len; + if (ip_len + i->hdr_ext_size > mtu) + return (false); + switch (af) { + case AF_INET: + ip = mtod(m, struct ip *); + skip = ip->ip_hl << 2; + break; + case AF_INET6: + skip = sizeof(struct ip6_hdr); + break; + default: + __unreachable(); + } + if (!ipsec_accel_output_pad(m, sav, skip, mtu)) + return (false); + } + + if (!ipsec_accel_output_tag(m, i->drv_spi)) + return (false); + + ipsec_accel_sa_recordxfer(sav, m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); + + return (true); +} + +struct ipsec_accel_in_tag * +ipsec_accel_input_tag_lookup(const struct mbuf *m) +{ + struct ipsec_accel_in_tag *tag; + struct m_tag *xtag; + + xtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_ACCEL_IN, NULL); + if (xtag == NULL) + return (NULL); + tag = __containerof(xtag, struct ipsec_accel_in_tag, tag); + return (tag); +} + +int +ipsec_accel_input(struct mbuf *m, int offset, int proto) +{ + struct secasvar *sav; + struct ipsec_accel_in_tag *tag; + + tag = ipsec_accel_input_tag_lookup(m); + if (tag == NULL) + return (ENXIO); + + if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN || + tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) { + printf("if %s mbuf %p drv_spi %d invalid, packet dropped\n", + (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) : + "<unknwn>", m, tag->drv_spi); + m_freem(m); + return (EINPROGRESS); + } + + sav = ipsec_accel_drvspi_to_sa(tag->drv_spi); + if (sav != NULL) + ipsec_accel_sa_recordxfer(sav, m); + return (0); +} + +static void +ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m) +{ + counter_u64_add(sav->accel_lft_sw, 1); + counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len); + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; +} + +static void +ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c, + const struct seclifetime *lft_l) +{ + lft_c->allocations += lft_l->allocations; + lft_c->bytes += lft_l->bytes; + lft_c->usetime = min(lft_c->usetime, lft_l->usetime); +} + +void +ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t octets, uint64_t allocs) +{ + struct epoch_tracker et; + struct ifp_handle_sav *i; + uint64_t odiff, adiff; + + NET_EPOCH_ENTER(et); + mtx_lock(&ipsec_accel_cnt_lock); + + if (allocs != 0) { + if (sav->firstused == 0) + sav->firstused = time_second; + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; + } + + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp && i->drv_spi == drv_spi) + break; + } + if (i == NULL) + goto out; + + odiff = octets - i->cnt_octets; + adiff = allocs - i->cnt_allocs; + + if (sav->lft_c != NULL) { + counter_u64_add(sav->lft_c_bytes, odiff); + counter_u64_add(sav->lft_c_allocations, adiff); + } + + i->cnt_octets = octets; + i->cnt_allocs = allocs; + sav->accel_hw_octets += odiff; + sav->accel_hw_allocs += adiff; + +out: + mtx_unlock(&ipsec_accel_cnt_lock); + NET_EPOCH_EXIT(et); +} + +static void +ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp, + struct seclifetime *lft) +{ + struct ifp_handle_sav *i; + if_sa_cnt_fn_t p; + + IFNET_RLOCK_ASSERT(); + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i != NULL && (i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + p = ifp->if_ipsec_accel_m->if_sa_cnt; + if (p != NULL) + p(ifp, sav, i->drv_spi, i->ifdata, lft); + } +} + +static int +ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + struct seclifetime lft_l, lft_s; + struct ifp_handle_sav *i; + if_t ifp1; + if_sa_cnt_fn_t p; + int error; + + error = 0; + memset(&lft_l, 0, sizeof(lft_l)); + memset(&lft_s, 0, sizeof(lft_s)); + + switch (op & ~IF_SA_CNT_UPD) { + case IF_SA_CNT_IFP_HW_VAL: + ipsec_accel_sa_lifetime_hw(sav, ifp, &lft_l); + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + break; + + case IF_SA_CNT_TOTAL_SW_VAL: + lft_l.allocations = (uint32_t)counter_u64_fetch( + sav->accel_lft_sw); + lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1); + lft_l.usetime = sav->accel_firstused; + break; + + case IF_SA_CNT_TOTAL_HW_VAL: + IFNET_RLOCK_ASSERT(); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) != + IFP_HS_HANDLED) + continue; + ifp1 = i->ifp; + p = ifp1->if_ipsec_accel_m->if_sa_cnt; + if (p == NULL) + continue; + memset(&lft_s, 0, sizeof(lft_s)); + if (sahtree_trackerp != NULL) + ipsec_sahtree_runlock(sahtree_trackerp); + error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s); + if (sahtree_trackerp != NULL) + ipsec_sahtree_rlock(sahtree_trackerp); + if (error == 0) + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + } + break; + } + + if (error == 0) { + if ((op & IF_SA_CNT_UPD) == 0) + memset(lft_c, 0, sizeof(*lft_c)); + ipsec_accel_sa_lifetime_update(lft_c, &lft_l); + } + + return (error); +} + +static void +ipsec_accel_sync_imp(void) +{ + taskqueue_drain_all(taskqueue_thread); +} + +static struct mbuf * +ipsec_accel_key_setaccelif_impl(struct secasvar *sav) +{ + struct mbuf *m, *m1; + struct ifp_handle_sav *i; + struct epoch_tracker et; + + if (sav->accel_ifname != NULL) + return (key_setaccelif(sav->accel_ifname)); + + m = m1 = NULL; + + NET_EPOCH_ENTER(et); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + m1 = key_setaccelif(if_name(i->ifp)); + if (m == NULL) + m = m1; + else if (m1 != NULL) + m_cat(m, m1); + } + } + NET_EPOCH_EXIT(et); + return (m); +} + +#endif /* IPSEC_OFFLOAD */ diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h new file mode 100644 index 000000000000..87e2a33288be --- /dev/null +++ b/sys/netipsec/ipsec_offload.h @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETIPSEC_IPSEC_OFFLOAD_H_ +#define _NETIPSEC_IPSEC_OFFLOAD_H_ + +#ifdef _KERNEL +#include <sys/errno.h> +#include <net/if.h> +#include <net/if_var.h> + +struct secpolicy; +struct secasvar; +struct inpcb; + +struct ipsec_accel_out_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +struct ipsec_accel_in_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +#define IPSEC_ACCEL_DRV_SPI_BYPASS 2 +#define IPSEC_ACCEL_DRV_SPI_MIN 3 +#define IPSEC_ACCEL_DRV_SPI_MAX 0xffff + +extern void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); +extern void (*ipsec_accel_sa_install_input_p)(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi); +extern void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +extern void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +extern void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +extern int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +extern void (*ipsec_accel_sync_p)(void); +extern bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +extern struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); + +#ifdef IPSEC_OFFLOAD +/* + * Have to use ipsec_accel_sa_install_input_p indirection because + * key.c is unconditionally included into the static kernel. + */ +static inline void +ipsec_accel_sa_newkey(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_sa_newkey_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_forget_sav(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_forget_sav_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_spdadd(struct secpolicy *sp, struct inpcb *inp) +{ + void (*p)(struct secpolicy *sp, struct inpcb *inp); + + p = atomic_load_ptr(&ipsec_accel_spdadd_p); + if (p != NULL) + p(sp, inp); +} + +static inline void +ipsec_accel_spddel(struct secpolicy *sp) +{ + void (*p)(struct secpolicy *sp); + + p = atomic_load_ptr(&ipsec_accel_spddel_p); + if (p != NULL) + p(sp); +} + +static inline int +ipsec_accel_sa_lifetime_op(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + int (*p)(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, + enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); + + p = atomic_load_ptr(&ipsec_accel_sa_lifetime_op_p); + if (p != NULL) + return (p(sav, lft_c, ifp, op, sahtree_trackerp)); + return (ENOTSUP); +} + +static inline void +ipsec_accel_sync(void) +{ + void (*p)(void); + + p = atomic_load_ptr(&ipsec_accel_sync_p); + if (p != NULL) + p(); +} + +static inline bool +ipsec_accel_is_accel_sav(struct secasvar *sav) +{ + bool (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_is_accel_sav_p); + if (p != NULL) + return (p(sav)); + return (false); +} + +static inline struct mbuf * +ipsec_accel_key_setaccelif(struct secasvar *sav) +{ + struct mbuf *(*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_key_setaccelif_p); + if (p != NULL) + return (p(sav)); + return (NULL); +} + + +#else +#define ipsec_accel_sa_newkey(a) +#define ipsec_accel_forget_sav(a) +#define ipsec_accel_spdadd(a, b) +#define ipsec_accel_spddel(a) +#define ipsec_accel_sa_lifetime_op(a, b, c, d, e) +#define ipsec_accel_sync() +#define ipsec_accel_is_accel_sav(a) +#define ipsec_accel_key_setaccelif(a) +#endif + +void ipsec_accel_forget_sav_impl(struct secasvar *sav); +void ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp); +void ipsec_accel_spddel_impl(struct secpolicy *sp); + +#ifdef IPSEC_OFFLOAD +int ipsec_accel_input(struct mbuf *m, int offset, int proto); +bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, + struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af, + int mtu); +void ipsec_accel_forget_sav(struct secasvar *sav); +#else +#define ipsec_accel_input(a, b, c) (ENXIO) +#define ipsec_accel_output(a, b, c, d, e, f, g) (false) +#define ipsec_accel_forget_sav(a) +#endif + +struct ipsec_accel_in_tag *ipsec_accel_input_tag_lookup(const struct mbuf *); +void ipsec_accel_on_ifdown(struct ifnet *ifp); +void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t octets, uint64_t allocs); + +#endif /* _KERNEL */ + +#endif /* _NETIPSEC_IPSEC_OFFLOAD_H_ */ diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index be996f257b64..8f49bc8fce24 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -84,6 +84,7 @@ #include <netipsec/ipsec6.h> #endif #include <netipsec/ipsec_support.h> +#include <netipsec/ipsec_offload.h> #include <netipsec/ah_var.h> #include <netipsec/esp_var.h> #include <netipsec/ipcomp_var.h> @@ -210,6 +211,8 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET, mtu); key_freesp(&sp); return (error); } @@ -222,6 +225,9 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu)) + return (EJUSTRETURN); + ip = mtod(m, struct ip *); dst = &sav->sah->saidx.dst; /* Do the appropriate encapsulation, if necessary */ @@ -597,6 +603,8 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET6, mtu); key_freesp(&sp); return (error); } @@ -611,6 +619,9 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu)) + return (EJUSTRETURN); + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ dst = &sav->sah->saidx.dst; @@ -859,6 +870,10 @@ ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, struct m_tag *mtag; int error; + if (sav->state >= SADB_SASTATE_DEAD) { + error = ESRCH; + goto bad; + } saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET diff --git a/sys/netipsec/ipsec_pcb.c b/sys/netipsec/ipsec_pcb.c index 38a94907cc48..497bc5e3b2f3 100644 --- a/sys/netipsec/ipsec_pcb.c +++ b/sys/netipsec/ipsec_pcb.c @@ -49,6 +49,7 @@ #include <netipsec/ipsec_support.h> #include <netipsec/key.h> #include <netipsec/key_debug.h> +#include <netipsec/ipsec_offload.h> MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); @@ -166,18 +167,26 @@ ipsec_init_pcbpolicy(struct inpcb *inp) int ipsec_delete_pcbpolicy(struct inpcb *inp) { + struct inpcbpolicy *inp_sp; - if (inp->inp_sp == NULL) + inp_sp = inp->inp_sp; + if (inp_sp == NULL) return (0); + inp->inp_sp = NULL; - if (inp->inp_sp->sp_in != NULL) - key_freesp(&inp->inp_sp->sp_in); + if (inp_sp->sp_in != NULL) { + if ((inp_sp->flags & INP_INBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_in); + key_freesp(&inp_sp->sp_in); + } - if (inp->inp_sp->sp_out != NULL) - key_freesp(&inp->inp_sp->sp_out); + if (inp_sp->sp_out != NULL) { + if ((inp_sp->flags & INP_OUTBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_out); + key_freesp(&inp_sp->sp_out); + } - free(inp->inp_sp, M_IPSEC_INPCB); - inp->inp_sp = NULL; + free(inp_sp, M_IPSEC_INPCB); return (0); } @@ -248,20 +257,26 @@ ipsec_copy_pcbpolicy(struct inpcb *old, struct inpcb *new) if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); - if (new->inp_sp->sp_in != NULL) + if (new->inp_sp->sp_in != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_in); key_freesp(&new->inp_sp->sp_in); + } new->inp_sp->sp_in = sp; new->inp_sp->flags |= INP_INBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); - if (new->inp_sp->sp_out != NULL) + if (new->inp_sp->sp_out != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_out); key_freesp(&new->inp_sp->sp_out); + } new->inp_sp->sp_out = sp; new->inp_sp->flags |= INP_OUTBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } return (0); } @@ -339,8 +354,10 @@ ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, flags = INP_OUTBOUND_POLICY; } /* Clear old SP and set new SP. */ - if (*spp != NULL) + if (*spp != NULL) { + ipsec_accel_spddel(*spp); key_freesp(spp); + } *spp = newsp; KEYDBG(IPSEC_DUMP, printf("%s: new SP(%p)\n", __func__, newsp)); @@ -348,6 +365,7 @@ ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, inp->inp_sp->flags &= ~flags; else { inp->inp_sp->flags |= flags; + ipsec_accel_spdadd(newsp, inp); KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); } INP_WUNLOCK(inp); diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index 501f5c0a7339..38dd2bc5c1a6 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -83,6 +83,7 @@ #include <netipsec/key.h> #include <netipsec/keysock.h> #include <netipsec/key_debug.h> +#include <netipsec/ipsec_offload.h> #include <netipsec/ipsec.h> #ifdef INET6 @@ -90,12 +91,26 @@ #endif #include <netipsec/xform.h> +#include <netipsec/ipsec_offload.h> #include <machine/in_cksum.h> #include <machine/stdarg.h> /* randomness */ #include <sys/random.h> +#ifdef IPSEC_OFFLOAD +void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); +void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +void (*ipsec_accel_sync_p)(void); +bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); +#endif + #define FULLMASK 0xff #define _BITS(bytes) ((bytes) << 3) @@ -391,6 +406,9 @@ static const int minsize[] = { [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = sizeof(struct sadb_address), [SADB_X_EXT_NEW_ADDRESS_DST] = sizeof(struct sadb_address), + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(minsize) == SADB_EXT_MAX + 1, "minsize size mismatch"); @@ -424,6 +442,9 @@ static const int maxsize[] = { [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = 0, [SADB_X_EXT_NEW_ADDRESS_DST] = 0, + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(maxsize) == SADB_EXT_MAX + 1, "maxsize size mismatch"); @@ -661,7 +682,7 @@ static int key_updateaddresses(struct socket *, struct mbuf *, const struct sadb_msghdr *, struct secasvar *, struct secasindex *); static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t); + u_int8_t, u_int32_t, u_int32_t, struct rm_priotracker *); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); @@ -1227,6 +1248,11 @@ key_freesp(struct secpolicy **spp) KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); *spp = NULL; +#ifdef IPSEC_OFFLOAD + KASSERT(CK_LIST_EMPTY(&sp->accel_ifps), + ("key_freesp: sp %p still offloaded", sp)); + free(__DECONST(char *, sp->accel_ifname), M_IPSEC_MISC); +#endif while (sp->tcount > 0) ipsec_delisr(sp->req[--sp->tcount]); free(sp, M_IPSEC_SP); @@ -1240,6 +1266,7 @@ key_unlink(struct secpolicy *sp) SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); key_freesp(&sp); } @@ -1258,6 +1285,7 @@ key_detach(struct secpolicy *sp) return; } sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); V_spd_size--; LIST_REMOVE(sp, idhash); @@ -1285,6 +1313,7 @@ done: newsp->state = IPSEC_SPSTATE_ALIVE; V_spd_size++; V_sp_genid++; + ipsec_accel_spdadd(newsp, NULL); } /* @@ -1329,6 +1358,7 @@ key_register_ifnet(struct secpolicy **spp, u_int count) */ LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); spp[i]->state = IPSEC_SPSTATE_IFNET; + ipsec_accel_spdadd(spp[i], NULL); } SPTREE_WUNLOCK(); /* @@ -1357,6 +1387,7 @@ key_unregister_ifnet(struct secpolicy **spp, u_int count) if (spp[i]->state != IPSEC_SPSTATE_IFNET) continue; spp[i]->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(spp[i]); TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], spp[i], chain); V_spd_size--; @@ -1365,6 +1396,7 @@ key_unregister_ifnet(struct secpolicy **spp, u_int count) SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); for (i = 0; i < count; i++) { m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); @@ -1424,6 +1456,7 @@ key_unlinksav(struct secasvar *sav) /* Unlink from SPI hash */ LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sah = sav->sah; SAHTREE_WUNLOCK(); key_freesav(&sav); @@ -1821,6 +1854,9 @@ key_sp2msg(struct secpolicy *sp, void *request, size_t *len) size_t xlen, ilen; caddr_t p; int error, i; +#ifdef IPSEC_OFFLOAD + struct sadb_x_if_hw_offl *xif; +#endif IPSEC_ASSERT(sp != NULL, ("null policy")); @@ -1876,6 +1912,18 @@ key_sp2msg(struct secpolicy *sp, void *request, size_t *len) } } xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); +#ifdef IPSEC_OFFLOAD + if (error == 0 && sp->accel_ifname != NULL) { + xif = (struct sadb_x_if_hw_offl *)(xpl + 1); + bzero(xif, sizeof(*xif)); + xif->sadb_x_if_hw_offl_len = PFKEY_UNIT64(sizeof(*xif)); + xif->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + xif->sadb_x_if_hw_offl_flags = 0; + strncpy(xif->sadb_x_if_hw_offl_if, sp->accel_ifname, + sizeof(xif->sadb_x_if_hw_offl_if)); + xlen += sizeof(*xif); + } +#endif if (error == 0) *len = xlen; else @@ -2088,6 +2136,27 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; bcopy(&spidx, &newsp->spidx, sizeof(spidx)); +#ifdef IPSEC_OFFLOAD + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + newsp->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsp->accel_ifname == NULL) { + ipseclog((LOG_DEBUG, "%s: cannot alloc accel_ifname.\n", + __func__)); + key_freesp(&newsp); + return (key_senderror(so, m, error)); + } + strncpy(__DECONST(char *, newsp->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } + +#endif SPTREE_WLOCK(); if ((newsp->id = key_getnewspid()) == 0) { @@ -2095,6 +2164,7 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) key_detach(oldsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2109,6 +2179,7 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) key_insertsp(newsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2290,6 +2361,7 @@ key_spddelete(struct socket *so, struct mbuf *m, KEYDBG(KEY_STAMP, printf("%s: SP(%p)\n", __func__, sp)); KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + ipsec_accel_spddel(sp); key_unlink(sp); key_freesp(&sp); @@ -2561,6 +2633,7 @@ key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) */ TAILQ_FOREACH(sp, &drainq, chain) { sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); LIST_REMOVE(sp, idhash); } V_sp_genid++; @@ -2764,6 +2837,10 @@ key_getspreqmsglen(struct secpolicy *sp) tlen += PFKEY_ALIGN8(len); } +#ifdef IPSEC_OFFLOAD + if (sp->accel_ifname != NULL) + tlen += sizeof(struct sadb_x_if_hw_offl); +#endif return (tlen); } @@ -3005,6 +3082,32 @@ key_newsav(const struct sadb_msghdr *mhp, struct secasindex *saidx, sav->state = SADB_SASTATE_LARVAL; sav->pid = (pid_t)mhp->msg->sadb_msg_pid; SAV_INITREF(sav); +#ifdef IPSEC_OFFLOAD + CK_LIST_INIT(&sav->accel_ifps); + sav->accel_forget_tq = 0; + sav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (sav->accel_lft_sw == NULL) { + *errp = ENOBUFS; + goto done; + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + sav->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (sav->accel_ifname == NULL) { + *errp = ENOBUFS; + goto done; + } + strncpy(__DECONST(char *, sav->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } +#endif again: sah = key_getsah(saidx); if (sah == NULL) { @@ -3068,9 +3171,10 @@ again: SAH_ADDREF(sah); } /* Link SAV with SAH */ - if (sav->state == SADB_SASTATE_MATURE) + if (sav->state == SADB_SASTATE_MATURE) { TAILQ_INSERT_HEAD(&sah->savtree_alive, sav, chain); - else + ipsec_accel_sa_newkey(sav); + } else TAILQ_INSERT_HEAD(&sah->savtree_larval, sav, chain); /* Add SAV into SPI hash */ LIST_INSERT_HEAD(SAVHASH_HASH(sav->spi), sav, spihash); @@ -3085,6 +3189,13 @@ done: } if (sav->lft_c != NULL) uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); +#ifdef IPSEC_OFFLOAD + if (sav->accel_lft_sw != NULL) + uma_zfree_pcpu(ipsec_key_lft_zone, + sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), + M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA), sav = NULL; } if (sah != NULL) @@ -3153,6 +3264,10 @@ key_delsav(struct secasvar *sav) ("attempt to free non DEAD SA %p", sav)); IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); +#ifdef IPSEC_OFFLOAD + KASSERT(CK_LIST_EMPTY(&sav->accel_ifps), + ("key_unlinksav: sav %p still offloaded", sav)); +#endif /* * SA must be unlinked from the chain and hashtbl. @@ -3165,6 +3280,11 @@ key_delsav(struct secasvar *sav) free(sav->lock, M_IPSEC_MISC); uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); } +#ifdef IPSEC_OFFLOAD + /* XXXKIB should this be moved to key_cleansav()? */ + uma_zfree_pcpu(ipsec_key_lft_zone, sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA); } @@ -3588,7 +3708,7 @@ fail: */ static struct mbuf * key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, - uint32_t seq, uint32_t pid) + uint32_t seq, uint32_t pid, struct rm_priotracker *sahtree_trackerp) { struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; @@ -3604,8 +3724,15 @@ key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, +#ifdef IPSEC_OFFLOAD + SADB_X_EXT_LFT_CUR_SW_OFFL, SADB_X_EXT_LFT_CUR_HW_OFFL, + SADB_X_EXT_IF_HW_OFFL, +#endif }; uint32_t replay_count; +#ifdef IPSEC_OFFLOAD + int error; +#endif SECASVAR_RLOCK_TRACKER; @@ -3752,6 +3879,44 @@ key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; +#ifdef IPSEC_OFFLOAD + case SADB_X_EXT_LFT_CUR_SW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + SAV_ADDREF(sav); + error = ipsec_accel_sa_lifetime_op(sav, &lft_c, + NULL, IF_SA_CNT_TOTAL_SW_VAL, sahtree_trackerp); + if (error != 0) { + m = NULL; + goto fail; + } + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + key_freesav(&sav); + if (sav == NULL) { + m_freem(m); + goto fail; + } + break; + case SADB_X_EXT_LFT_CUR_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + memset(&lft_c, 0, sizeof(lft_c)); + lft_c.bytes = sav->accel_hw_octets; + lft_c.allocations = sav->accel_hw_allocs; + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + break; + case SADB_X_EXT_IF_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + m = ipsec_accel_key_setaccelif(sav); + if (m == NULL) + continue; /* benigh */ + break; +#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: @@ -4502,6 +4667,7 @@ key_flush_spd(time_t now) V_spd_size--; LIST_REMOVE(sp, idhash); sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); sp = nextsp; } V_sp_genid++; @@ -4625,6 +4791,7 @@ key_flush_sad(time_t now) TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Unlink all SAs with expired HARD lifetime */ @@ -4641,6 +4808,7 @@ key_flush_sad(time_t now) TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Mark all SAs with expired SOFT lifetime as DYING */ @@ -5239,6 +5407,30 @@ key_updateaddresses(struct socket *so, struct mbuf *m, /* Clone SA's content into newsav */ SAV_INITREF(newsav); bcopy(sav, newsav, offsetof(struct secasvar, chain)); +#ifdef IPSEC_OFFLOAD + CK_LIST_INIT(&newsav->accel_ifps); + newsav->accel_forget_tq = 0; + newsav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (newsav->accel_lft_sw == NULL) { + error = ENOBUFS; + goto fail; + } + if (sav->accel_ifname != NULL) { + struct sadb_x_if_hw_offl xof; + + newsav->accel_ifname = malloc(sizeof(xof.sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsav->accel_ifname == NULL) { + error = ENOBUFS; + goto fail; + } + strncpy(__DECONST(char *, sav->accel_ifname), + newsav->accel_ifname, + sizeof(xof.sadb_x_if_hw_offl_if)); + } +#endif + /* * We create new NAT-T config if it is needed. * Old NAT-T config will be freed by key_cleansav() when @@ -5269,6 +5461,7 @@ key_updateaddresses(struct socket *so, struct mbuf *m, TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); /* * Link new SA with SAH. Keep SAs ordered by @@ -5326,6 +5519,10 @@ fail: if (isnew != 0) key_freesah(&sah); if (newsav != NULL) { +#ifdef IPSEC_OFFLOAD + uma_zfree_pcpu(ipsec_key_lft_zone, newsav->accel_lft_sw); + free(__DECONST(char *, newsav->accel_ifname), M_IPSEC_MISC); +#endif if (newsav->natt != NULL) free(newsav->natt, M_IPSEC_MISC); free(newsav, M_IPSEC_SA); @@ -5540,6 +5737,7 @@ key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) KEYDBG(KEY_STAMP, printf("%s: SA(%p)\n", __func__, sav)); KEYDBG(KEY_DATA, kdebug_secasv(sav)); + ipsec_accel_sa_newkey(sav); key_freesav(&sav); { @@ -5692,6 +5890,7 @@ key_add(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) KEYDBG(KEY_STAMP, printf("%s: return SA(%p)\n", __func__, sav)); KEYDBG(KEY_DATA, kdebug_secasv(sav)); + ipsec_accel_sa_newkey(sav); /* * If SADB_ADD was in response to SADB_ACQUIRE, we need to schedule * ACQ for deletion. @@ -6196,6 +6395,7 @@ key_delete_all(struct socket *so, struct mbuf *m, /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6264,6 +6464,7 @@ key_delete_xform(const struct xformsw *xsp) /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6372,7 +6573,7 @@ key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, - mhp->msg->sadb_msg_pid); + mhp->msg->sadb_msg_pid, NULL); key_freesav(&sav); if (!n) @@ -7614,9 +7815,11 @@ key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) */ TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -7638,10 +7841,12 @@ key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } /* Add SAH into flushq */ TAILQ_INSERT_HEAD(&flushq, sah, chain); @@ -7705,6 +7910,7 @@ key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* count sav entries to be sent to the userland. */ cnt = 0; + IFNET_RLOCK(); SAHTREE_RLOCK(); TAILQ_FOREACH(sah, &V_sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && @@ -7719,6 +7925,7 @@ key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) if (cnt == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOENT); } @@ -7731,30 +7938,34 @@ key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); m_freem(m); return (0); } @@ -8175,6 +8386,11 @@ key_align(struct mbuf *m, struct sadb_msghdr *mhp) case SADB_X_EXT_SA_REPLAY: case SADB_X_EXT_NEW_ADDRESS_SRC: case SADB_X_EXT_NEW_ADDRESS_DST: +#ifdef IPSEC_OFFLOAD + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + case SADB_X_EXT_IF_HW_OFFL: +#endif /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -8483,9 +8699,11 @@ key_vnet_destroy(void *arg __unused) sah->state = SADB_SASTATE_DEAD; TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -8633,6 +8851,32 @@ key_setkey(struct seckey *src, uint16_t exttype) return m; } +#ifdef IPSEC_OFFLOAD +struct mbuf * +key_setaccelif(const char *ifname) +{ + struct mbuf *m = NULL; + struct sadb_x_if_hw_offl *p; + int len = PFKEY_ALIGN8(sizeof(*p)); + + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (m); + m_align(m, len); + m->m_len = len; + p = mtod(m, struct sadb_x_if_hw_offl *); + + bzero(p, len); + p->sadb_x_if_hw_offl_len = PFKEY_UNIT64(len); + p->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + p->sadb_x_if_hw_offl_flags = 0; + strncpy(p->sadb_x_if_hw_offl_if, ifname, + sizeof(p->sadb_x_if_hw_offl_if)); + + return (m); +} +#endif + /* * Take one of the kernel's lifetime data structures and convert it * into a PF_KEY structure within an mbuf, suitable for sending up to @@ -8708,3 +8952,15 @@ comp_algorithm_lookup(int alg) return (supported_calgs[i].xform); return (NULL); } + +void +ipsec_sahtree_runlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_runlock(&sahtree_lock, sahtree_trackerp); +} + +void +ipsec_sahtree_rlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_rlock(&sahtree_lock, sahtree_trackerp); +} diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h index d62426e6733e..ca0c9036800a 100644 --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -36,6 +36,7 @@ #ifdef _KERNEL +struct mbuf; struct secpolicy; struct secpolicyindex; struct secasvar; @@ -60,6 +61,7 @@ int key_havesp_any(void); void key_bumpspgen(void); uint32_t key_getspgen(void); uint32_t key_newreqid(void); +struct mbuf *key_setaccelif(const char *ifname); struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); struct secasvar *key_allocsa_tunnel(union sockaddr_union *, @@ -85,6 +87,10 @@ extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); uint16_t key_portfromsaddr(struct sockaddr *); void key_porttosaddr(struct sockaddr *, uint16_t port); +struct rm_priotracker; +void ipsec_sahtree_runlock(struct rm_priotracker *); +void ipsec_sahtree_rlock(struct rm_priotracker *); + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); MALLOC_DECLARE(M_IPSEC_SAH); diff --git a/sys/netipsec/key_debug.c b/sys/netipsec/key_debug.c index dcb542b22ad8..ead5fe80115b 100644 --- a/sys/netipsec/key_debug.c +++ b/sys/netipsec/key_debug.c @@ -155,6 +155,8 @@ kdebug_sadb_exttype(uint16_t type) X_NAME(SA_REPLAY); X_NAME(NEW_ADDRESS_SRC); X_NAME(NEW_ADDRESS_DST); + X_NAME(LFT_CUR_SW_OFFL); + X_NAME(LFT_CUR_HW_OFFL); default: return ("UNKNOWN"); }; @@ -251,6 +253,9 @@ kdebug_sadb(struct sadb_msg *base) case SADB_X_EXT_NAT_T_DPORT: kdebug_sadb_x_natt(ext); break; + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + kdebug_sadb_lifetime(ext); default: printf("%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type); diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h index 041a5ce1293c..ccc4a68e78fb 100644 --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -36,9 +36,11 @@ #ifdef _KERNEL #include <sys/counter.h> +#include <sys/ck.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/rmlock.h> +#include <sys/_task.h> #include <netipsec/key_var.h> #include <opencrypto/_cryptodev.h> @@ -125,6 +127,7 @@ struct xformsw; struct enc_xform; struct auth_hash; struct comp_algo; +struct ifp_handle_sav; /* * Security Association @@ -185,8 +188,19 @@ struct secasvar { uint64_t cntr; /* counter for GCM and CTR */ volatile u_int refcnt; /* reference count */ + CK_LIST_HEAD(, ifp_handle_sav) accel_ifps; + uintptr_t accel_forget_tq; + const char *accel_ifname; + uint32_t accel_flags; + counter_u64_t accel_lft_sw; + uint64_t accel_hw_allocs; + uint64_t accel_hw_octets; + uint64_t accel_firstused; }; +#define SADB_KEY_ACCEL_INST 0x00000001 +#define SADB_KEY_ACCEL_DEINST 0x00000002 + #define SECASVAR_RLOCK_TRACKER struct rm_priotracker _secas_tracker #define SECASVAR_RLOCK(_sav) rm_rlock((_sav)->lock, &_secas_tracker) #define SECASVAR_RUNLOCK(_sav) rm_runlock((_sav)->lock, &_secas_tracker) |
