aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/icp
diff options
context:
space:
mode:
authorMatt Macy <mmacy@FreeBSD.org>2020-08-24 23:31:26 +0000
committerMatt Macy <mmacy@FreeBSD.org>2020-08-24 23:31:26 +0000
commiteda14cbc264d6969b02f2b1994cef11148e914f1 (patch)
tree54766ce51e901d5ec66cdce87973bb1e210588e1 /sys/contrib/openzfs/module/icp
parent8d9b400f9d02116e528968fa4e7d3c479e326e2a (diff)
parent3b0ce0e28db46d0403929aba45c682285e1ac217 (diff)
downloadsrc-eda14cbc264d6969b02f2b1994cef11148e914f1.tar.gz
src-eda14cbc264d6969b02f2b1994cef11148e914f1.zip
Initial import from vendor-sys branch of openzfs
Notes
Notes: svn path=/head/; revision=364740
Diffstat (limited to 'sys/contrib/openzfs/module/icp')
-rw-r--r--sys/contrib/openzfs/module/icp/Makefile.in96
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c443
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c124
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c1242
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c63
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c135
-rw-r--r--sys/contrib/openzfs/module/icp/algs/edonr/edonr.c746
-rw-r--r--sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h216
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/cbc.c273
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ccm.c907
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ctr.c228
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ecb.c128
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm.c1543
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c83
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c64
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/modes.c157
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha1/sha1.c835
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha2.c956
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE3
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE.descrip1
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein.c911
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_block.c790
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h292
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c185
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_port.h116
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_cipher.c930
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_ctxops.c151
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_digest.c491
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_mac.c645
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_miscapi.c127
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman23
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip1
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl127
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip1
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S748
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S906
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aeskey.c580
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aesopt.h770
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab.h165
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h594
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams36
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip1
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl177
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip1
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S1245
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S254
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S714
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S1353
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S2063
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S2088
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_callprov.c1567
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c791
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c227
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c645
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_sched.c1782
-rw-r--r--sys/contrib/openzfs/module/icp/illumos-crypto.c158
-rw-r--r--sys/contrib/openzfs/module/icp/include/aes/aes_impl.h227
-rw-r--r--sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h75
-rw-r--r--sys/contrib/openzfs/module/icp/include/modes/modes.h411
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1.h61
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h65
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h73
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h219
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h64
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h46
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/bitmap.h183
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h137
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h1363
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h1480
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h136
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h630
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h531
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h726
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h307
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h160
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h107
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/modctl.h477
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/modhash.h147
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h108
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/stack.h36
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/trap.h36
-rw-r--r--sys/contrib/openzfs/module/icp/io/aes.c1439
-rw-r--r--sys/contrib/openzfs/module/icp/io/edonr_mod.c63
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha1_mod.c1230
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha2_mod.c1399
-rw-r--r--sys/contrib/openzfs/module/icp/io/skein_mod.c729
-rw-r--r--sys/contrib/openzfs/module/icp/os/modconf.c173
-rw-r--r--sys/contrib/openzfs/module/icp/os/modhash.c927
-rw-r--r--sys/contrib/openzfs/module/icp/spi/kcf_spi.c925
89 files changed, 44588 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/module/icp/Makefile.in b/sys/contrib/openzfs/module/icp/Makefile.in
new file mode 100644
index 000000000000..7a01b2f08b8e
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/Makefile.in
@@ -0,0 +1,96 @@
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
+obj = @abs_builddir@
+icp_include = $(src)/include
+else
+icp_include = $(srctree)/$(src)/include
+endif
+
+MODULE := icp
+
+obj-$(CONFIG_ZFS) := $(MODULE).o
+
+asflags-y := -I$(icp_include)
+ccflags-y := -I$(icp_include)
+
+$(MODULE)-objs += illumos-crypto.o
+$(MODULE)-objs += api/kcf_cipher.o
+$(MODULE)-objs += api/kcf_digest.o
+$(MODULE)-objs += api/kcf_mac.o
+$(MODULE)-objs += api/kcf_miscapi.o
+$(MODULE)-objs += api/kcf_ctxops.o
+$(MODULE)-objs += core/kcf_callprov.o
+$(MODULE)-objs += core/kcf_prov_tabs.o
+$(MODULE)-objs += core/kcf_sched.o
+$(MODULE)-objs += core/kcf_mech_tabs.o
+$(MODULE)-objs += core/kcf_prov_lib.o
+$(MODULE)-objs += spi/kcf_spi.o
+$(MODULE)-objs += io/aes.o
+$(MODULE)-objs += io/edonr_mod.o
+$(MODULE)-objs += io/sha1_mod.o
+$(MODULE)-objs += io/sha2_mod.o
+$(MODULE)-objs += io/skein_mod.o
+$(MODULE)-objs += os/modhash.o
+$(MODULE)-objs += os/modconf.o
+$(MODULE)-objs += algs/modes/cbc.o
+$(MODULE)-objs += algs/modes/ccm.o
+$(MODULE)-objs += algs/modes/ctr.o
+$(MODULE)-objs += algs/modes/ecb.o
+$(MODULE)-objs += algs/modes/gcm_generic.o
+$(MODULE)-objs += algs/modes/gcm.o
+$(MODULE)-objs += algs/modes/modes.o
+$(MODULE)-objs += algs/aes/aes_impl_generic.o
+$(MODULE)-objs += algs/aes/aes_impl.o
+$(MODULE)-objs += algs/aes/aes_modes.o
+$(MODULE)-objs += algs/edonr/edonr.o
+$(MODULE)-objs += algs/sha1/sha1.o
+$(MODULE)-objs += algs/sha2/sha2.o
+$(MODULE)-objs += algs/sha1/sha1.o
+$(MODULE)-objs += algs/skein/skein.o
+$(MODULE)-objs += algs/skein/skein_block.o
+$(MODULE)-objs += algs/skein/skein_iv.o
+
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aeskey.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_amd64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_aesni.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/gcm_pclmulqdq.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha1/sha1-x86_64.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha256_impl.o
+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha512_impl.o
+
+$(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o
+$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o
+$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o
+
+# Suppress objtool "can't find jump dest instruction at" warnings. They
+# are caused by the constants which are defined in the text section of the
+# assembly file using .byte instructions (e.g. bswap_mask). The objtool
+# utility tries to interpret them as opcodes and obviously fails doing so.
+OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
+OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
+
+ICP_DIRS = \
+ api \
+ core \
+ spi \
+ io \
+ os \
+ algs \
+ algs/aes \
+ algs/edonr \
+ algs/modes \
+ algs/sha1 \
+ algs/sha2 \
+ algs/skein \
+ asm-x86_64 \
+ asm-x86_64/aes \
+ asm-x86_64/modes \
+ asm-x86_64/sha1 \
+ asm-x86_64/sha2 \
+ asm-i386 \
+ asm-generic
+
+all:
+ mkdir -p $(ICP_DIRS)
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c
new file mode 100644
index 000000000000..037be0db60d7
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c
@@ -0,0 +1,443 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/icp.h>
+#include <sys/crypto/spi.h>
+#include <sys/simd.h>
+#include <modes/modes.h>
+#include <aes/aes_impl.h>
+
+/*
+ * Initialize AES encryption and decryption key schedules.
+ *
+ * Parameters:
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ * keysched AES key schedule to be initialized, of type aes_key_t.
+ * Allocated by aes_alloc_keysched().
+ */
+void
+aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
+{
+ const aes_impl_ops_t *ops = aes_impl_get_ops();
+ aes_key_t *newbie = keysched;
+ uint_t keysize, i, j;
+ union {
+ uint64_t ka64[4];
+ uint32_t ka32[8];
+ } keyarr;
+
+ switch (keyBits) {
+ case 128:
+ newbie->nr = 10;
+ break;
+
+ case 192:
+ newbie->nr = 12;
+ break;
+
+ case 256:
+ newbie->nr = 14;
+ break;
+
+ default:
+ /* should never get here */
+ return;
+ }
+ keysize = CRYPTO_BITS2BYTES(keyBits);
+
+ /*
+ * Generic C implementation requires byteswap for little endian
+ * machines, various accelerated implementations for various
+ * architectures may not.
+ */
+ if (!ops->needs_byteswap) {
+ /* no byteswap needed */
+ if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
+ for (i = 0, j = 0; j < keysize; i++, j += 8) {
+ /* LINTED: pointer alignment */
+ keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
+ }
+ } else {
+ bcopy(cipherKey, keyarr.ka32, keysize);
+ }
+ } else {
+ /* byte swap */
+ for (i = 0, j = 0; j < keysize; i++, j += 4) {
+ keyarr.ka32[i] =
+ htonl(*(uint32_t *)(void *)&cipherKey[j]);
+ }
+ }
+
+ ops->generate(newbie, keyarr.ka32, keyBits);
+ newbie->ops = ops;
+
+ /*
+ * Note: if there are systems that need the AES_64BIT_KS type in the
+ * future, move setting key schedule type to individual implementations
+ */
+ newbie->type = AES_32BIT_KS;
+}
+
+
+/*
+ * Encrypt one block using AES.
+ * Align if needed and (for x86 32-bit only) byte-swap.
+ *
+ * Parameters:
+ * ks Key schedule, of type aes_key_t
+ * pt Input block (plain text)
+ * ct Output block (crypto text). Can overlap with pt
+ */
+int
+aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
+{
+ aes_key_t *ksch = (aes_key_t *)ks;
+ const aes_impl_ops_t *ops = ksch->ops;
+
+ if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t)) && !ops->needs_byteswap) {
+ /* LINTED: pointer alignment */
+ ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr,
+ /* LINTED: pointer alignment */
+ (uint32_t *)pt, (uint32_t *)ct);
+ } else {
+ uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
+
+ /* Copy input block into buffer */
+ if (ops->needs_byteswap) {
+ buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
+ buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
+ buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
+ buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
+ } else
+ bcopy(pt, &buffer, AES_BLOCK_LEN);
+
+ ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, buffer, buffer);
+
+ /* Copy result from buffer to output block */
+ if (ops->needs_byteswap) {
+ *(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
+ *(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
+ *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
+ *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
+ } else
+ bcopy(&buffer, ct, AES_BLOCK_LEN);
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+
+/*
+ * Decrypt one block using AES.
+ * Align and byte-swap if needed.
+ *
+ * Parameters:
+ * ks Key schedule, of type aes_key_t
+ * ct Input block (crypto text)
+ * pt Output block (plain text). Can overlap with pt
+ */
+int
+aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
+{
+ aes_key_t *ksch = (aes_key_t *)ks;
+ const aes_impl_ops_t *ops = ksch->ops;
+
+ if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t)) && !ops->needs_byteswap) {
+ /* LINTED: pointer alignment */
+ ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr,
+ /* LINTED: pointer alignment */
+ (uint32_t *)ct, (uint32_t *)pt);
+ } else {
+ uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
+
+ /* Copy input block into buffer */
+ if (ops->needs_byteswap) {
+ buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
+ buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
+ buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
+ buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
+ } else
+ bcopy(ct, &buffer, AES_BLOCK_LEN);
+
+ ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, buffer, buffer);
+
+ /* Copy result from buffer to output block */
+ if (ops->needs_byteswap) {
+ *(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
+ *(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
+ *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
+ *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
+ } else
+ bcopy(&buffer, pt, AES_BLOCK_LEN);
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+
+/*
+ * Allocate key schedule for AES.
+ *
+ * Return the pointer and set size to the number of bytes allocated.
+ * Memory allocated must be freed by the caller when done.
+ *
+ * Parameters:
+ * size Size of key schedule allocated, in bytes
+ * kmflag Flag passed to kmem_alloc(9F); ignored in userland.
+ */
+/* ARGSUSED */
+void *
+aes_alloc_keysched(size_t *size, int kmflag)
+{
+ aes_key_t *keysched;
+
+ keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
+ if (keysched != NULL) {
+ *size = sizeof (aes_key_t);
+ return (keysched);
+ }
+ return (NULL);
+}
+
+/* AES implementation that contains the fastest methods */
+static aes_impl_ops_t aes_fastest_impl = {
+ .name = "fastest"
+};
+
+/* All compiled in implementations */
+const aes_impl_ops_t *aes_all_impl[] = {
+ &aes_generic_impl,
+#if defined(__x86_64)
+ &aes_x86_64_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AES)
+ &aes_aesni_impl,
+#endif
+};
+
+/* Indicate that benchmark has been completed */
+static boolean_t aes_impl_initialized = B_FALSE;
+
+/* Select aes implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX-1)
+
+#define AES_IMPL_READ(i) (*(volatile uint32_t *) &(i))
+
+static uint32_t icp_aes_impl = IMPL_FASTEST;
+static uint32_t user_sel_impl = IMPL_FASTEST;
+
+/* Hold all supported implementations */
+static size_t aes_supp_impl_cnt = 0;
+static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
+
+/*
+ * Returns the AES operations for encrypt/decrypt/key setup. When a
+ * SIMD implementation is not allowed in the current context, then
+ * fallback to the fastest generic implementation.
+ */
+const aes_impl_ops_t *
+aes_impl_get_ops(void)
+{
+ if (!kfpu_allowed())
+ return (&aes_generic_impl);
+
+ const aes_impl_ops_t *ops = NULL;
+ const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(aes_impl_initialized);
+ ops = &aes_fastest_impl;
+ break;
+ case IMPL_CYCLE:
+ /* Cycle through supported implementations */
+ ASSERT(aes_impl_initialized);
+ ASSERT3U(aes_supp_impl_cnt, >, 0);
+ static size_t cycle_impl_idx = 0;
+ size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
+ ops = aes_supp_impl[idx];
+ break;
+ default:
+ ASSERT3U(impl, <, aes_supp_impl_cnt);
+ ASSERT3U(aes_supp_impl_cnt, >, 0);
+ if (impl < ARRAY_SIZE(aes_all_impl))
+ ops = aes_supp_impl[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+/*
+ * Initialize all supported implementations.
+ */
+void
+aes_impl_init(void)
+{
+ aes_impl_ops_t *curr_impl;
+ int i, c;
+
+ /* Move supported implementations into aes_supp_impls */
+ for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
+ curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
+
+ if (curr_impl->is_supported())
+ aes_supp_impl[c++] = (aes_impl_ops_t *)curr_impl;
+ }
+ aes_supp_impl_cnt = c;
+
+ /*
+ * Set the fastest implementation given the assumption that the
+ * hardware accelerated version is the fastest.
+ */
+#if defined(__x86_64)
+#if defined(HAVE_AES)
+ if (aes_aesni_impl.is_supported()) {
+ memcpy(&aes_fastest_impl, &aes_aesni_impl,
+ sizeof (aes_fastest_impl));
+ } else
+#endif
+ {
+ memcpy(&aes_fastest_impl, &aes_x86_64_impl,
+ sizeof (aes_fastest_impl));
+ }
+#else
+ memcpy(&aes_fastest_impl, &aes_generic_impl,
+ sizeof (aes_fastest_impl));
+#endif
+
+ strlcpy(aes_fastest_impl.name, "fastest", AES_IMPL_NAME_MAX);
+
+ /* Finish initialization */
+ atomic_swap_32(&icp_aes_impl, user_sel_impl);
+ aes_impl_initialized = B_TRUE;
+}
+
+static const struct {
+ char *name;
+ uint32_t sel;
+} aes_impl_opts[] = {
+ { "cycle", IMPL_CYCLE },
+ { "fastest", IMPL_FASTEST },
+};
+
+/*
+ * Function sets desired aes implementation.
+ *
+ * If we are called before init(), user preference will be saved in
+ * user_sel_impl, and applied in later init() call. This occurs when module
+ * parameter is specified on module load. Otherwise, directly update
+ * icp_aes_impl.
+ *
+ * @val Name of aes implementation to use
+ * @param Unused.
+ */
+int
+aes_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ char req_name[AES_IMPL_NAME_MAX];
+ uint32_t impl = AES_IMPL_READ(user_sel_impl);
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(val, AES_IMPL_NAME_MAX);
+ if (i == 0 || i >= AES_IMPL_NAME_MAX)
+ return (err);
+
+ strlcpy(req_name, val, AES_IMPL_NAME_MAX);
+ while (i > 0 && isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ /* Check mandatory options */
+ for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
+ if (strcmp(req_name, aes_impl_opts[i].name) == 0) {
+ impl = aes_impl_opts[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported impl if init() was already called */
+ if (err != 0 && aes_impl_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < aes_supp_impl_cnt; i++) {
+ if (strcmp(req_name, aes_supp_impl[i]->name) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ if (err == 0) {
+ if (aes_impl_initialized)
+ atomic_swap_32(&icp_aes_impl, impl);
+ else
+ atomic_swap_32(&user_sel_impl, impl);
+ }
+
+ return (err);
+}
+
+#if defined(_KERNEL) && defined(__linux__)
+
+static int
+icp_aes_impl_set(const char *val, zfs_kernel_param_t *kp)
+{
+ return (aes_impl_set(val));
+}
+
+static int
+icp_aes_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cnt = 0;
+ char *fmt;
+ const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
+
+ ASSERT(aes_impl_initialized);
+
+ /* list mandatory options */
+ for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
+ fmt = (impl == aes_impl_opts[i].sel) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, aes_impl_opts[i].name);
+ }
+
+ /* list all supported implementations */
+ for (i = 0; i < aes_supp_impl_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, aes_supp_impl[i]->name);
+ }
+
+ return (cnt);
+}
+
+module_param_call(icp_aes_impl, icp_aes_impl_set, icp_aes_impl_get,
+ NULL, 0644);
+MODULE_PARM_DESC(icp_aes_impl, "Select aes implementation.");
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c
new file mode 100644
index 000000000000..4b5eefd71b17
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c
@@ -0,0 +1,124 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(__x86_64) && defined(HAVE_AES)
+
+#include <sys/simd.h>
+#include <sys/types.h>
+
+/* These functions are used to execute AES-NI instructions: */
+extern int rijndael_key_setup_enc_intel(uint32_t rk[],
+ const uint32_t cipherKey[], uint64_t keyBits);
+extern int rijndael_key_setup_dec_intel(uint32_t rk[],
+ const uint32_t cipherKey[], uint64_t keyBits);
+extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
+ const uint32_t pt[4], uint32_t ct[4]);
+extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
+ const uint32_t ct[4], uint32_t pt[4]);
+
+
+#include <aes/aes_impl.h>
+
+/*
+ * Expand the 32-bit AES cipher key array into the encryption and decryption
+ * key schedules.
+ *
+ * Parameters:
+ * key AES key schedule to be initialized
+ * keyarr32 User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static void
+aes_aesni_generate(aes_key_t *key, const uint32_t *keyarr32, int keybits)
+{
+ kfpu_begin();
+ key->nr = rijndael_key_setup_enc_intel(&(key->encr_ks.ks32[0]),
+ keyarr32, keybits);
+ key->nr = rijndael_key_setup_dec_intel(&(key->decr_ks.ks32[0]),
+ keyarr32, keybits);
+ kfpu_end();
+}
+
+/*
+ * Encrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * pt Input block (plain text)
+ * ct Output block (crypto text). Can overlap with pt
+ */
+static void
+aes_aesni_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4])
+{
+ kfpu_begin();
+ aes_encrypt_intel(rk, Nr, pt, ct);
+ kfpu_end();
+}
+
+/*
+ * Decrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * ct Input block (crypto text)
+ * pt Output block (plain text). Can overlap with pt
+ */
+static void
+aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4])
+{
+ kfpu_begin();
+ aes_decrypt_intel(rk, Nr, ct, pt);
+ kfpu_end();
+}
+
+static boolean_t
+aes_aesni_will_work(void)
+{
+ return (kfpu_allowed() && zfs_aes_available());
+}
+
+const aes_impl_ops_t aes_aesni_impl = {
+ .generate = &aes_aesni_generate,
+ .encrypt = &aes_aesni_encrypt,
+ .decrypt = &aes_aesni_decrypt,
+ .is_supported = &aes_aesni_will_work,
+ .needs_byteswap = B_FALSE,
+ .name = "aesni"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_AES) */
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c
new file mode 100644
index 000000000000..427c096c6ab3
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c
@@ -0,0 +1,1242 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <aes/aes_impl.h>
+
+/*
+ * This file is derived from the file rijndael-alg-fst.c taken from the
+ * "optimized C code v3.0" on the "rijndael home page"
+ * http://www.iaik.tu-graz.ac.at/research/krypto/AES/old/~rijmen/rijndael/
+ * pointed by the NIST web-site http://csrc.nist.gov/archive/aes/
+ *
+ * The following note is from the original file:
+ */
+
+/*
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Constant tables
+ */
+
+/*
+ * Te0[x] = S [x].[02, 01, 01, 03];
+ * Te1[x] = S [x].[03, 02, 01, 01];
+ * Te2[x] = S [x].[01, 03, 02, 01];
+ * Te3[x] = S [x].[01, 01, 03, 02];
+ * Te4[x] = S [x].[01, 01, 01, 01];
+ *
+ * Td0[x] = Si[x].[0e, 09, 0d, 0b];
+ * Td1[x] = Si[x].[0b, 0e, 09, 0d];
+ * Td2[x] = Si[x].[0d, 0b, 0e, 09];
+ * Td3[x] = Si[x].[09, 0d, 0b, 0e];
+ * Td4[x] = Si[x].[01, 01, 01, 01];
+ */
+
+/* Encrypt Sbox constants (for the substitute bytes operation) */
+
+static const uint32_t Te0[256] =
+{
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU
+};
+
+
+static const uint32_t Te1[256] =
+{
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U
+};
+
+
+static const uint32_t Te2[256] =
+{
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U
+};
+
+
+static const uint32_t Te3[256] =
+{
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU
+};
+
+static const uint32_t Te4[256] =
+{
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U
+};
+
+/* Decrypt Sbox constants (for the substitute bytes operation) */
+
+static const uint32_t Td0[256] =
+{
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U
+};
+
+static const uint32_t Td1[256] =
+{
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U
+};
+
+static const uint32_t Td2[256] =
+{
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U
+};
+
+static const uint32_t Td3[256] =
+{
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U
+};
+
+static const uint32_t Td4[256] =
+{
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU
+};
+
+/* Rcon is Round Constant; used for encryption key expansion */
+static const uint32_t rcon[RC_LENGTH] =
+{
+ /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000
+};
+
+
+/*
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static int
+rijndael_key_setup_enc(uint32_t rk[], const uint32_t cipherKey[],
+ int keyBits)
+{
+ int i = 0;
+ uint32_t temp;
+
+ rk[0] = cipherKey[0];
+ rk[1] = cipherKey[1];
+ rk[2] = cipherKey[2];
+ rk[3] = cipherKey[3];
+
+ if (keyBits == 128) {
+ for (;;) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[temp & 0xff] & 0x0000ff00) ^
+ (Te4[temp >> 24] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+
+ if (++i == 10) {
+ return (10);
+ }
+ rk += 4;
+ }
+ }
+
+ rk[4] = cipherKey[4];
+ rk[5] = cipherKey[5];
+
+ if (keyBits == 192) {
+ for (;;) {
+ temp = rk[5];
+ rk[6] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[temp & 0xff] & 0x0000ff00) ^
+ (Te4[temp >> 24] & 0x000000ff) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+
+ if (++i == 8) {
+ return (12);
+ }
+
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+
+ rk[6] = cipherKey[6];
+ rk[7] = cipherKey[7];
+
+ if (keyBits == 256) {
+ for (;;) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[temp & 0xff] & 0x0000ff00) ^
+ (Te4[temp >> 24] & 0x000000ff) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+
+ if (++i == 7) {
+ return (14);
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te4[temp >> 24] & 0xff000000) ^
+ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[temp & 0xff] & 0x000000ff);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Expand the cipher key into the decryption key schedule.
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static int
+rijndael_key_setup_dec(uint32_t rk[], const uint32_t cipherKey[], int keyBits)
+{
+ int Nr, i, j;
+ uint32_t temp;
+
+ /* expand the cipher key: */
+ Nr = rijndael_key_setup_enc(rk, cipherKey, keyBits);
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4 * Nr; i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+
+ /*
+ * apply the inverse MixColumn transform to all
+ * round keys but the first and the last:
+ */
+ for (i = 1; i < Nr; i++) {
+ rk += 4;
+ rk[0] = Td0[Te4[rk[0] >> 24] & 0xff] ^
+ Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[0] & 0xff] & 0xff];
+ rk[1] = Td0[Te4[rk[1] >> 24] & 0xff] ^
+ Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[1] & 0xff] & 0xff];
+ rk[2] = Td0[Te4[rk[2] >> 24] & 0xff] ^
+ Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[2] & 0xff] & 0xff];
+ rk[3] = Td0[Te4[rk[3] >> 24] & 0xff] ^
+ Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[3] & 0xff] & 0xff];
+ }
+
+ return (Nr);
+}
+
+/*
+ * Expand the 32-bit AES cipher key array into the encryption and decryption
+ * key schedules.
+ *
+ * Parameters:
+ * key AES key schedule to be initialized
+ * keyarr32 User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static void
+aes_generic_generate(aes_key_t *key, const uint32_t *keyarr32, int keybits)
+{
+ key->nr = rijndael_key_setup_enc(&(key->encr_ks.ks32[0]), keyarr32,
+ keybits);
+ key->nr = rijndael_key_setup_dec(&(key->decr_ks.ks32[0]), keyarr32,
+ keybits);
+}
+
+/*
+ * Encrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * pt Input block (plain text)
+ * ct Output block (crypto text). Can overlap with pt
+ */
+static void
+aes_generic_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4])
+{
+ uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
+ int r;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+
+ s0 = pt[0] ^ rk[0];
+ s1 = pt[1] ^ rk[1];
+ s2 = pt[2] ^ rk[2];
+ s3 = pt[3] ^ rk[3];
+
+ /*
+ * Nr - 1 full rounds:
+ */
+
+ r = Nr >> 1;
+
+ for (;;) {
+ t0 = Te0[s0 >> 24] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[s3 & 0xff] ^
+ rk[4];
+
+ t1 = Te0[s1 >> 24] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[s0 & 0xff] ^
+ rk[5];
+
+ t2 = Te0[s2 >> 24] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[s1 & 0xff] ^
+ rk[6];
+
+ t3 = Te0[s3 >> 24] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[s2 & 0xff] ^
+ rk[7];
+
+ rk += 8;
+
+ if (--r == 0) {
+ break;
+ }
+
+ s0 = Te0[t0 >> 24] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[t3 & 0xff] ^
+ rk[0];
+
+ s1 = Te0[t1 >> 24] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[t0 & 0xff] ^
+ rk[1];
+
+ s2 = Te0[t2 >> 24] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[t1 & 0xff] ^
+ rk[2];
+
+ s3 = Te0[t3 >> 24] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[t2 & 0xff] ^
+ rk[3];
+ }
+
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+
+ s0 = (Te4[(t0 >> 24)] & 0xff000000) ^
+ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t3 & 0xff] & 0x000000ff) ^
+ rk[0];
+ ct[0] = s0;
+
+ s1 = (Te4[(t1 >> 24)] & 0xff000000) ^
+ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t0 & 0xff] & 0x000000ff) ^
+ rk[1];
+ ct[1] = s1;
+
+ s2 = (Te4[(t2 >> 24)] & 0xff000000) ^
+ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t1 & 0xff] & 0x000000ff) ^
+ rk[2];
+ ct[2] = s2;
+
+ s3 = (Te4[(t3 >> 24)] & 0xff000000) ^
+ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t2 & 0xff] & 0x000000ff) ^
+ rk[3];
+ ct[3] = s3;
+}
+
+
+/*
+ * Decrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * ct Input block (crypto text)
+ * pt Output block (plain text). Can overlap with pt
+ */
+static void
+aes_generic_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4])
+{
+ uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
+ int r;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = ct[0] ^ rk[0];
+ s1 = ct[1] ^ rk[1];
+ s2 = ct[2] ^ rk[2];
+ s3 = ct[3] ^ rk[3];
+
+ /*
+ * Nr - 1 full rounds:
+ */
+
+ r = Nr >> 1;
+
+ for (;;) {
+ t0 = Td0[s0 >> 24] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[s1 & 0xff] ^
+ rk[4];
+
+ t1 = Td0[s1 >> 24] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[s2 & 0xff] ^
+ rk[5];
+
+ t2 = Td0[s2 >> 24] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[s3 & 0xff] ^
+ rk[6];
+
+ t3 = Td0[s3 >> 24] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[s0 & 0xff] ^
+ rk[7];
+
+ rk += 8;
+
+ if (--r == 0) {
+ break;
+ }
+
+ s0 = Td0[t0 >> 24] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[t1 & 0xff] ^
+ rk[0];
+
+ s1 = Td0[t1 >> 24] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[t2 & 0xff] ^
+ rk[1];
+
+ s2 = Td0[t2 >> 24] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[t3 & 0xff] ^
+ rk[2];
+
+ s3 = Td0[t3 >> 24] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[t0 & 0xff] ^
+ rk[3];
+ }
+
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+
+ s0 = (Td4[t0 >> 24] & 0xff000000) ^
+ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t1 & 0xff] & 0x000000ff) ^
+ rk[0];
+ pt[0] = s0;
+
+ s1 = (Td4[t1 >> 24] & 0xff000000) ^
+ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t2 & 0xff] & 0x000000ff) ^
+ rk[1];
+ pt[1] = s1;
+
+ s2 = (Td4[t2 >> 24] & 0xff000000) ^
+ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t3 & 0xff] & 0x000000ff) ^
+ rk[2];
+ pt[2] = s2;
+
+ s3 = (Td4[t3 >> 24] & 0xff000000) ^
+ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t0 & 0xff] & 0x000000ff) ^
+ rk[3];
+ pt[3] = s3;
+}
+
+static boolean_t
+aes_generic_will_work(void)
+{
+ return (B_TRUE);
+}
+
+/*
+ * For _LITTLE_ENDIAN machines, reverse every 4 bytes in the key.
+ * On _BIG_ENDIAN, copy the key without reversing bytes.
+ *
+ * SPARCv8/v9 uses a key schedule array with 64-bit elements.
+ * X86/AMD64 uses a key schedule array with 32-bit elements.
+ */
+const aes_impl_ops_t aes_generic_impl = {
+ .generate = &aes_generic_generate,
+ .encrypt = &aes_generic_encrypt,
+ .decrypt = &aes_generic_decrypt,
+ .is_supported = &aes_generic_will_work,
+#if defined(_ZFS_LITTLE_ENDIAN)
+ .needs_byteswap = B_TRUE,
+#else
+ .needs_byteswap = B_FALSE,
+#endif
+ .name = "generic"
+};
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c
new file mode 100644
index 000000000000..19f8fd5012cf
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(__x86_64)
+
+#include <sys/simd.h>
+#include <aes/aes_impl.h>
+
+/*
+ * Expand the 32-bit AES cipher key array into the encryption and decryption
+ * key schedules.
+ *
+ * Parameters:
+ * key AES key schedule to be initialized
+ * keyarr32 User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static void
+aes_x86_64_generate(aes_key_t *key, const uint32_t *keyarr32, int keybits)
+{
+ key->nr = rijndael_key_setup_enc_amd64(&(key->encr_ks.ks32[0]),
+ keyarr32, keybits);
+ key->nr = rijndael_key_setup_dec_amd64(&(key->decr_ks.ks32[0]),
+ keyarr32, keybits);
+}
+
+static boolean_t
+aes_x86_64_will_work(void)
+{
+ return (B_TRUE);
+}
+
+const aes_impl_ops_t aes_x86_64_impl = {
+ .generate = &aes_x86_64_generate,
+ .encrypt = &aes_encrypt_amd64,
+ .decrypt = &aes_decrypt_amd64,
+ .is_supported = &aes_x86_64_will_work,
+ .needs_byteswap = B_FALSE,
+ .name = "x86_64"
+};
+
+#endif /* defined(__x86_64) */
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c
new file mode 100644
index 000000000000..9e4b498fffcb
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <aes/aes_impl.h>
+
+/* Copy a 16-byte AES block from "in" to "out" */
+void
+aes_copy_block(uint8_t *in, uint8_t *out)
+{
+ if (IS_P2ALIGNED2(in, out, sizeof (uint32_t))) {
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[0] = *(uint32_t *)&in[0];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[4] = *(uint32_t *)&in[4];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[8] = *(uint32_t *)&in[8];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[12] = *(uint32_t *)&in[12];
+ } else {
+ AES_COPY_BLOCK(in, out);
+ }
+}
+
+
+/* XOR a 16-byte AES block of data into dst */
+void
+aes_xor_block(uint8_t *data, uint8_t *dst)
+{
+ if (IS_P2ALIGNED2(dst, data, sizeof (uint32_t))) {
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[0] ^= *(uint32_t *)&data[0];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[4] ^= *(uint32_t *)&data[4];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[8] ^= *(uint32_t *)&data[8];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[12] ^= *(uint32_t *)&data[12];
+ } else {
+ AES_XOR_BLOCK(data, dst);
+ }
+}
+
+
+/*
+ * Encrypt multiple blocks of data according to mode.
+ */
+int
+aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
+ crypto_data_t *out)
+{
+ aes_ctx_t *aes_ctx = ctx;
+ int rv;
+
+ if (aes_ctx->ac_flags & CTR_MODE) {
+ rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ } else if (aes_ctx->ac_flags & CCM_MODE) {
+ rv = ccm_mode_encrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ rv = gcm_mode_encrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & CBC_MODE) {
+ rv = cbc_encrypt_contiguous_blocks(ctx,
+ data, length, out, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_copy_block, aes_xor_block);
+ } else {
+ rv = ecb_cipher_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_encrypt_block);
+ }
+ return (rv);
+}
+
+
+/*
+ * Decrypt multiple blocks of data according to mode.
+ */
+int
+aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
+ crypto_data_t *out)
+{
+ aes_ctx_t *aes_ctx = ctx;
+ int rv;
+
+ if (aes_ctx->ac_flags & CTR_MODE) {
+ rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ if (rv == CRYPTO_DATA_LEN_RANGE)
+ rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ } else if (aes_ctx->ac_flags & CCM_MODE) {
+ rv = ccm_mode_decrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ rv = gcm_mode_decrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & CBC_MODE) {
+ rv = cbc_decrypt_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_decrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else {
+ rv = ecb_cipher_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_decrypt_block);
+ if (rv == CRYPTO_DATA_LEN_RANGE)
+ rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ }
+ return (rv);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
new file mode 100644
index 000000000000..7c677095f1ef
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
@@ -0,0 +1,746 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
+ * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
+ *
+ * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+#include <sys/strings.h>
+#include <sys/edonr.h>
+#include <sys/debug.h>
+
+/* big endian support, provides no-op's if run on little endian hosts */
+#include "edonr_byteorder.h"
+
+#define hashState224(x) ((x)->pipe->p256)
+#define hashState256(x) ((x)->pipe->p256)
+#define hashState384(x) ((x)->pipe->p512)
+#define hashState512(x) ((x)->pipe->p512)
+
+/* shift and rotate shortcuts */
+#define shl(x, n) ((x) << n)
+#define shr(x, n) ((x) >> n)
+
+#define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+
+#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+
+#if !defined(__C99_RESTRICT)
+#define restrict /* restrict */
+#endif
+
+#define EDONR_VALID_HASHBITLEN(x) \
+ ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224)
+
+/* EdonR224 initial double chaining pipe */
+static const uint32_t i224p2[16] = {
+ 0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful,
+ 0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful,
+ 0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful,
+ 0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful,
+};
+
+/* EdonR256 initial double chaining pipe */
+static const uint32_t i256p2[16] = {
+ 0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful,
+ 0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful,
+ 0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful,
+ 0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful,
+};
+
+/* EdonR384 initial double chaining pipe */
+static const uint64_t i384p2[16] = {
+ 0x0001020304050607ull, 0x08090a0b0c0d0e0full,
+ 0x1011121314151617ull, 0x18191a1b1c1d1e1full,
+ 0x2021222324252627ull, 0x28292a2b2c2d2e2full,
+ 0x3031323334353637ull, 0x38393a3b3c3d3e3full,
+ 0x4041424344454647ull, 0x48494a4b4c4d4e4full,
+ 0x5051525354555657ull, 0x58595a5b5c5d5e5full,
+ 0x6061626364656667ull, 0x68696a6b6c6d6e6full,
+ 0x7071727374757677ull, 0x78797a7b7c7d7e7full
+};
+
+/* EdonR512 initial double chaining pipe */
+static const uint64_t i512p2[16] = {
+ 0x8081828384858687ull, 0x88898a8b8c8d8e8full,
+ 0x9091929394959697ull, 0x98999a9b9c9d9e9full,
+ 0xa0a1a2a3a4a5a6a7ull, 0xa8a9aaabacadaeafull,
+ 0xb0b1b2b3b4b5b6b7ull, 0xb8b9babbbcbdbebfull,
+ 0xc0c1c2c3c4c5c6c7ull, 0xc8c9cacbcccdcecfull,
+ 0xd0d1d2d3d4d5d6d7ull, 0xd8d9dadbdcdddedfull,
+ 0xe0e1e2e3e4e5e6e7ull, 0xe8e9eaebecedeeefull,
+ 0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull
+};
+
+/*
+ * First Latin Square
+ * 0 7 1 3 2 4 6 5
+ * 4 1 7 6 3 0 5 2
+ * 7 0 4 2 5 3 1 6
+ * 1 4 0 5 6 2 7 3
+ * 2 3 6 7 1 5 0 4
+ * 5 2 3 1 7 6 4 0
+ * 3 6 5 0 4 7 2 1
+ * 6 5 2 4 0 1 3 7
+ */
+#define LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7) \
+{ \
+ uint32_t x04, x17, x23, x56, x07, x26; \
+ x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \
+ s0 = c + x07 + x2; \
+ s1 = rotl32(x07 + x3, 4); \
+ s2 = rotl32(x07 + x6, 8); \
+ x23 = x2 + x3; \
+ s5 = rotl32(x04 + x23 + x5, 22); \
+ x56 = x5 + x6; \
+ s6 = rotl32(x17 + x56 + x0, 24); \
+ x26 = x23+x56; \
+ s3 = rotl32(x26 + x7, 13); \
+ s4 = rotl32(x26 + x1, 17); \
+ s7 = rotl32(x26 + x4, 29); \
+}
+
+#define LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7) \
+{ \
+ uint64_t x04, x17, x23, x56, x07, x26; \
+ x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \
+ s0 = c + x07 + x2; \
+ s1 = rotl64(x07 + x3, 5); \
+ s2 = rotl64(x07 + x6, 15); \
+ x23 = x2 + x3; \
+ s5 = rotl64(x04 + x23 + x5, 40); \
+ x56 = x5 + x6; \
+ s6 = rotl64(x17 + x56 + x0, 50); \
+ x26 = x23+x56; \
+ s3 = rotl64(x26 + x7, 22); \
+ s4 = rotl64(x26 + x1, 31); \
+ s7 = rotl64(x26 + x4, 59); \
+}
+
+/*
+ * Second Orthogonal Latin Square
+ * 0 4 2 3 1 6 5 7
+ * 7 6 3 2 5 4 1 0
+ * 5 3 1 6 0 2 7 4
+ * 1 0 5 4 3 7 2 6
+ * 2 1 0 7 4 5 6 3
+ * 3 5 7 0 6 1 4 2
+ * 4 7 6 1 2 0 3 5
+ * 6 2 4 5 7 3 0 1
+ */
+#define LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7) \
+{ \
+ uint32_t y01, y25, y34, y67, y04, y05, y27, y37; \
+ y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \
+ t0 = ~c + y05 + y7; \
+ t2 = rotl32(y05 + y3, 9); \
+ y34 = y3+y4, y04 = y01+y34; \
+ t1 = rotl32(y04 + y6, 5); \
+ t4 = rotl32(y04 + y5, 15); \
+ y67 = y6+y7, y37 = y34+y67; \
+ t3 = rotl32(y37 + y2, 11); \
+ t7 = rotl32(y37 + y0, 27); \
+ y27 = y25+y67; \
+ t5 = rotl32(y27 + y4, 20); \
+ t6 = rotl32(y27 + y1, 25); \
+}
+
+#define LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7) \
+{ \
+ uint64_t y01, y25, y34, y67, y04, y05, y27, y37; \
+ y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \
+ t0 = ~c + y05 + y7; \
+ t2 = rotl64(y05 + y3, 19); \
+ y34 = y3+y4, y04 = y01+y34; \
+ t1 = rotl64(y04 + y6, 10); \
+ t4 = rotl64(y04 + y5, 36); \
+ y67 = y6+y7, y37 = y34+y67; \
+ t3 = rotl64(y37 + y2, 29); \
+ t7 = rotl64(y37 + y0, 55); \
+ y27 = y25+y67; \
+ t5 = rotl64(y27 + y4, 44); \
+ t6 = rotl64(y27 + y1, 48); \
+}
+
+#define quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7) \
+{ \
+ uint32_t s04, s17, s23, s56, t01, t25, t34, t67; \
+ s04 = s0 ^ s4, t01 = t0 ^ t1; \
+ r0 = (s04 ^ s1) + (t01 ^ t5); \
+ t67 = t6 ^ t7; \
+ r1 = (s04 ^ s7) + (t2 ^ t67); \
+ s23 = s2 ^ s3; \
+ r7 = (s23 ^ s5) + (t4 ^ t67); \
+ t34 = t3 ^ t4; \
+ r3 = (s23 ^ s4) + (t0 ^ t34); \
+ s56 = s5 ^ s6; \
+ r5 = (s3 ^ s56) + (t34 ^ t6); \
+ t25 = t2 ^ t5; \
+ r6 = (s2 ^ s56) + (t25 ^ t7); \
+ s17 = s1 ^ s7; \
+ r4 = (s0 ^ s17) + (t1 ^ t25); \
+ r2 = (s17 ^ s6) + (t01 ^ t3); \
+}
+
+#define quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7) \
+{ \
+ uint64_t s04, s17, s23, s56, t01, t25, t34, t67; \
+ s04 = s0 ^ s4, t01 = t0 ^ t1; \
+ r0 = (s04 ^ s1) + (t01 ^ t5); \
+ t67 = t6 ^ t7; \
+ r1 = (s04 ^ s7) + (t2 ^ t67); \
+ s23 = s2 ^ s3; \
+ r7 = (s23 ^ s5) + (t4 ^ t67); \
+ t34 = t3 ^ t4; \
+ r3 = (s23 ^ s4) + (t0 ^ t34); \
+ s56 = s5 ^ s6; \
+ r5 = (s3 ^ s56) + (t34 ^ t6); \
+ t25 = t2 ^ t5; \
+ r6 = (s2 ^ s56) + (t25 ^ t7); \
+ s17 = s1 ^ s7; \
+ r4 = (s0 ^ s17) + (t1 ^ t25); \
+ r2 = (s17 ^ s6) + (t01 ^ t3); \
+}
+
+static size_t
+Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
+{
+ size_t bl;
+
+ for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE;
+ bl -= EdonR256_BLOCK_BITSIZE, data += 16) {
+ uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
+ t5, t6, t7;
+ uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
+ q5, q6, q7;
+ const uint32_t defix = 0xaaaaaaaa;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
+ swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+#define d(j) swp ## j
+#define s32(j) ld_swap32((uint32_t *)data + j, swp ## j)
+#else
+#define d(j) data[j]
+#endif
+
+ /* First row of quasigroup e-transformations */
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s32(8);
+ s32(9);
+ s32(10);
+ s32(11);
+ s32(12);
+ s32(13);
+ s32(14);
+ s32(15);
+#endif
+ LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
+ d(8));
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s32(0);
+ s32(1);
+ s32(2);
+ s32(3);
+ s32(4);
+ s32(5);
+ s32(6);
+ s32(7);
+#undef s32
+#endif
+ LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
+ d(15));
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Second row of quasigroup e-transformations */
+ LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
+ p[15]);
+ LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Third row of quasigroup e-transformations */
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Fourth row of quasigroup e-transformations */
+ LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+ LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Edon-R tweak on the original SHA-3 Edon-R submission. */
+ p[0] ^= d(8) ^ p0;
+ p[1] ^= d(9) ^ p1;
+ p[2] ^= d(10) ^ p2;
+ p[3] ^= d(11) ^ p3;
+ p[4] ^= d(12) ^ p4;
+ p[5] ^= d(13) ^ p5;
+ p[6] ^= d(14) ^ p6;
+ p[7] ^= d(15) ^ p7;
+ p[8] ^= d(0) ^ q0;
+ p[9] ^= d(1) ^ q1;
+ p[10] ^= d(2) ^ q2;
+ p[11] ^= d(3) ^ q3;
+ p[12] ^= d(4) ^ q4;
+ p[13] ^= d(5) ^ q5;
+ p[14] ^= d(6) ^ q6;
+ p[15] ^= d(7) ^ q7;
+ }
+
+#undef d
+ return (bitlen - bl);
+}
+
+/*
+ * Why is this #pragma here?
+ *
+ * Checksum functions like this one can go over the stack frame size check
+ * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can
+ * safely ignore the compiler error since we know that in ZoL, that
+ * the function will be called from a worker thread that won't be using
+ * much stack. The only function that goes over the 1k limit is Q512(),
+ * which only goes over it by a hair (1248 bytes on ARM32).
+ */
+#include <sys/isa_defs.h> /* for _ILP32 */
+#ifdef _ILP32 /* We're 32-bit, assume small stack frames */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+#if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
+static inline size_t
+#else
+static size_t
+#endif
+Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
+{
+ size_t bl;
+
+ for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE;
+ bl -= EdonR512_BLOCK_BITSIZE, data += 16) {
+ uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
+ t5, t6, t7;
+ uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
+ q5, q6, q7;
+ const uint64_t defix = 0xaaaaaaaaaaaaaaaaull;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
+ swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+#define d(j) swp##j
+#define s64(j) ld_swap64((uint64_t *)data+j, swp##j)
+#else
+#define d(j) data[j]
+#endif
+
+ /* First row of quasigroup e-transformations */
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s64(8);
+ s64(9);
+ s64(10);
+ s64(11);
+ s64(12);
+ s64(13);
+ s64(14);
+ s64(15);
+#endif
+ LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
+ d(8));
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s64(0);
+ s64(1);
+ s64(2);
+ s64(3);
+ s64(4);
+ s64(5);
+ s64(6);
+ s64(7);
+#undef s64
+#endif
+ LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
+ d(15));
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Second row of quasigroup e-transformations */
+ LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
+ p[15]);
+ LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Third row of quasigroup e-transformations */
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Fourth row of quasigroup e-transformations */
+ LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+ LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Edon-R tweak on the original SHA-3 Edon-R submission. */
+ p[0] ^= d(8) ^ p0;
+ p[1] ^= d(9) ^ p1;
+ p[2] ^= d(10) ^ p2;
+ p[3] ^= d(11) ^ p3;
+ p[4] ^= d(12) ^ p4;
+ p[5] ^= d(13) ^ p5;
+ p[6] ^= d(14) ^ p6;
+ p[7] ^= d(15) ^ p7;
+ p[8] ^= d(0) ^ q0;
+ p[9] ^= d(1) ^ q1;
+ p[10] ^= d(2) ^ q2;
+ p[11] ^= d(3) ^ q3;
+ p[12] ^= d(4) ^ q4;
+ p[13] ^= d(5) ^ q5;
+ p[14] ^= d(6) ^ q6;
+ p[15] ^= d(7) ^ q7;
+ }
+
+#undef d
+ return (bitlen - bl);
+}
+
+void
+EdonRInit(EdonRState *state, size_t hashbitlen)
+{
+ ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen));
+ switch (hashbitlen) {
+ case 224:
+ state->hashbitlen = 224;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i224p2, hashState224(state)->DoublePipe,
+ 16 * sizeof (uint32_t));
+ break;
+
+ case 256:
+ state->hashbitlen = 256;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i256p2, hashState256(state)->DoublePipe,
+ 16 * sizeof (uint32_t));
+ break;
+
+ case 384:
+ state->hashbitlen = 384;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i384p2, hashState384(state)->DoublePipe,
+ 16 * sizeof (uint64_t));
+ break;
+
+ case 512:
+ state->hashbitlen = 512;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i512p2, hashState224(state)->DoublePipe,
+ 16 * sizeof (uint64_t));
+ break;
+ }
+}
+
+
+void
+EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen)
+{
+ uint32_t *data32;
+ uint64_t *data64;
+
+ size_t bits_processed;
+
+ ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
+ switch (state->hashbitlen) {
+ case 224:
+ case 256:
+ if (state->unprocessed_bits > 0) {
+ /* LastBytes = databitlen / 8 */
+ int LastBytes = (int)databitlen >> 3;
+
+ ASSERT(state->unprocessed_bits + databitlen <=
+ EdonR256_BLOCK_SIZE * 8);
+
+ bcopy(data, hashState256(state)->LastPart
+ + (state->unprocessed_bits >> 3), LastBytes);
+ state->unprocessed_bits += (int)databitlen;
+ databitlen = state->unprocessed_bits;
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data32 = (uint32_t *)hashState256(state)->LastPart;
+ } else
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data32 = (uint32_t *)data;
+
+ bits_processed = Q256(databitlen, data32,
+ hashState256(state)->DoublePipe);
+ state->bits_processed += bits_processed;
+ databitlen -= bits_processed;
+ state->unprocessed_bits = (int)databitlen;
+ if (databitlen > 0) {
+ /* LastBytes = Ceil(databitlen / 8) */
+ int LastBytes =
+ ((~(((-(int)databitlen) >> 3) & 0x01ff)) +
+ 1) & 0x01ff;
+
+ data32 += bits_processed >> 5; /* byte size update */
+ bcopy(data32, hashState256(state)->LastPart, LastBytes);
+ }
+ break;
+
+ case 384:
+ case 512:
+ if (state->unprocessed_bits > 0) {
+ /* LastBytes = databitlen / 8 */
+ int LastBytes = (int)databitlen >> 3;
+
+ ASSERT(state->unprocessed_bits + databitlen <=
+ EdonR512_BLOCK_SIZE * 8);
+
+ bcopy(data, hashState512(state)->LastPart
+ + (state->unprocessed_bits >> 3), LastBytes);
+ state->unprocessed_bits += (int)databitlen;
+ databitlen = state->unprocessed_bits;
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState512(state)->LastPart;
+ } else
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)data;
+
+ bits_processed = Q512(databitlen, data64,
+ hashState512(state)->DoublePipe);
+ state->bits_processed += bits_processed;
+ databitlen -= bits_processed;
+ state->unprocessed_bits = (int)databitlen;
+ if (databitlen > 0) {
+ /* LastBytes = Ceil(databitlen / 8) */
+ int LastBytes =
+ ((~(((-(int)databitlen) >> 3) & 0x03ff)) +
+ 1) & 0x03ff;
+
+ data64 += bits_processed >> 6; /* byte size update */
+ bcopy(data64, hashState512(state)->LastPart, LastBytes);
+ }
+ break;
+ }
+}
+
+void
+EdonRFinal(EdonRState *state, uint8_t *hashval)
+{
+ uint32_t *data32;
+ uint64_t *data64, num_bits;
+
+ size_t databitlen;
+ int LastByte, PadOnePosition;
+
+ num_bits = state->bits_processed + state->unprocessed_bits;
+ ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
+ switch (state->hashbitlen) {
+ case 224:
+ case 256:
+ LastByte = (int)state->unprocessed_bits >> 3;
+ PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+ hashState256(state)->LastPart[LastByte] =
+ (hashState256(state)->LastPart[LastByte]
+ & (0xff << (PadOnePosition + 1))) ^
+ (0x01 << PadOnePosition);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState256(state)->LastPart;
+
+ if (state->unprocessed_bits < 448) {
+ (void) memset((hashState256(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR256_BLOCK_SIZE - LastByte - 9);
+ databitlen = EdonR256_BLOCK_SIZE * 8;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 7);
+#else
+ data64[7] = num_bits;
+#endif
+ } else {
+ (void) memset((hashState256(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR256_BLOCK_SIZE * 2 - LastByte - 9);
+ databitlen = EdonR256_BLOCK_SIZE * 16;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 15);
+#else
+ data64[15] = num_bits;
+#endif
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data32 = (uint32_t *)hashState256(state)->LastPart;
+ state->bits_processed += Q256(databitlen, data32,
+ hashState256(state)->DoublePipe);
+ break;
+
+ case 384:
+ case 512:
+ LastByte = (int)state->unprocessed_bits >> 3;
+ PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+ hashState512(state)->LastPart[LastByte] =
+ (hashState512(state)->LastPart[LastByte]
+ & (0xff << (PadOnePosition + 1))) ^
+ (0x01 << PadOnePosition);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState512(state)->LastPart;
+
+ if (state->unprocessed_bits < 960) {
+ (void) memset((hashState512(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR512_BLOCK_SIZE - LastByte - 9);
+ databitlen = EdonR512_BLOCK_SIZE * 8;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 15);
+#else
+ data64[15] = num_bits;
+#endif
+ } else {
+ (void) memset((hashState512(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR512_BLOCK_SIZE * 2 - LastByte - 9);
+ databitlen = EdonR512_BLOCK_SIZE * 16;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 31);
+#else
+ data64[31] = num_bits;
+#endif
+ }
+
+ state->bits_processed += Q512(databitlen, data64,
+ hashState512(state)->DoublePipe);
+ break;
+ }
+
+ switch (state->hashbitlen) {
+ case 224: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint32_t *d32 = (uint32_t *)hashval;
+ uint32_t *s32 = hashState224(state)->DoublePipe + 9;
+ int j;
+
+ for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++)
+ st_swap32(s32[j], d32 + j);
+#else
+ bcopy(hashState256(state)->DoublePipe + 9, hashval,
+ EdonR224_DIGEST_SIZE);
+#endif
+ break;
+ }
+ case 256: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint32_t *d32 = (uint32_t *)hashval;
+ uint32_t *s32 = hashState224(state)->DoublePipe + 8;
+ int j;
+
+ for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++)
+ st_swap32(s32[j], d32 + j);
+#else
+ bcopy(hashState256(state)->DoublePipe + 8, hashval,
+ EdonR256_DIGEST_SIZE);
+#endif
+ break;
+ }
+ case 384: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint64_t *d64 = (uint64_t *)hashval;
+ uint64_t *s64 = hashState384(state)->DoublePipe + 10;
+ int j;
+
+ for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++)
+ st_swap64(s64[j], d64 + j);
+#else
+ bcopy(hashState384(state)->DoublePipe + 10, hashval,
+ EdonR384_DIGEST_SIZE);
+#endif
+ break;
+ }
+ case 512: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint64_t *d64 = (uint64_t *)hashval;
+ uint64_t *s64 = hashState512(state)->DoublePipe + 8;
+ int j;
+
+ for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++)
+ st_swap64(s64[j], d64 + j);
+#else
+ bcopy(hashState512(state)->DoublePipe + 8, hashval,
+ EdonR512_DIGEST_SIZE);
+#endif
+ break;
+ }
+ }
+}
+
+
+void
+EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
+ uint8_t *hashval)
+{
+ EdonRState state;
+
+ EdonRInit(&state, hashbitlen);
+ EdonRUpdate(&state, data, databitlen);
+ EdonRFinal(&state, hashval);
+}
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(EdonRInit);
+EXPORT_SYMBOL(EdonRUpdate);
+EXPORT_SYMBOL(EdonRHash);
+EXPORT_SYMBOL(EdonRFinal);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h b/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h
new file mode 100644
index 000000000000..2b5d48287f26
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h
@@ -0,0 +1,216 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
+ *
+ * C header file to determine compile machine byte order. Take care when cross
+ * compiling.
+ *
+ * $Id: byteorder.h 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+#ifndef _CRYPTO_EDONR_BYTEORDER_H
+#define _CRYPTO_EDONR_BYTEORDER_H
+
+#include <sys/sysmacros.h>
+#include <sys/param.h>
+
+#if defined(__BYTE_ORDER)
+#if (__BYTE_ORDER == __BIG_ENDIAN)
+#define MACHINE_IS_BIG_ENDIAN
+#elif (__BYTE_ORDER == __LITTLE_ENDIAN)
+#define MACHINE_IS_LITTLE_ENDIAN
+#endif
+#elif defined(BYTE_ORDER)
+#if (BYTE_ORDER == BIG_ENDIAN)
+#define MACHINE_IS_BIG_ENDIAN
+#elif (BYTE_ORDER == LITTLE_ENDIAN)
+#define MACHINE_IS_LITTLE_ENDIAN
+#endif
+#endif /* __BYTE_ORDER || BYTE_ORDER */
+
+#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
+#if defined(_ZFS_BIG_ENDIAN) || defined(_MIPSEB)
+#define MACHINE_IS_BIG_ENDIAN
+#endif
+#if defined(_ZFS_LITTLE_ENDIAN) || defined(_MIPSEL)
+#define MACHINE_IS_LITTLE_ENDIAN
+#endif
+#endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */
+
+#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
+#error unknown machine byte sex
+#endif
+
+#define BYTEORDER_INCLUDED
+
+#if defined(MACHINE_IS_BIG_ENDIAN)
+/*
+ * Byte swapping macros for big endian architectures and compilers,
+ * add as appropriate for other architectures and/or compilers.
+ *
+ * ld_swap64(src,dst) : uint64_t dst = *(src)
+ * st_swap64(src,dst) : *(dst) = uint64_t src
+ */
+
+#if defined(__PPC__) || defined(_ARCH_PPC)
+
+#if defined(__64BIT__)
+#if defined(_ARCH_PWR7)
+#define aix_ld_swap64(s64, d64)\
+ __asm__("ldbrx %0,0,%1" : "=r"(d64) : "r"(s64))
+#define aix_st_swap64(s64, d64)\
+ __asm__ volatile("stdbrx %1,0,%0" : : "r"(d64), "r"(s64))
+#else
+#define aix_ld_swap64(s64, d64) \
+{ \
+ uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \
+ \
+ __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0;rldimi %1,%2,32,0"\
+ : "+r"(s4), "=r"(d64), "=r"(h) : "b"(s64)); \
+}
+
+#define aix_st_swap64(s64, d64) \
+{ \
+ uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \
+ h = (s64) >> 32; \
+ __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \
+ : "+r"(s4) : "r"(s64), "r"(h), "b"(d64)); \
+}
+#endif /* 64BIT && PWR7 */
+#else
+#define aix_ld_swap64(s64, d64) \
+{ \
+ uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
+ __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0" \
+ : "+r"(s4), "=r"(l), "=r"(h) : "b"(s64)); \
+ d64 = ((uint64_t)h<<32) | l; \
+}
+
+#define aix_st_swap64(s64, d64) \
+{ \
+ uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
+ l = (s64) & 0xfffffffful, h = (s64) >> 32; \
+ __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \
+ : "+r"(s4) : "r"(l), "r"(h), "b"(d64)); \
+}
+#endif /* __64BIT__ */
+#define aix_ld_swap32(s32, d32)\
+ __asm__("lwbrx %0,0,%1" : "=r"(d32) : "r"(s32))
+#define aix_st_swap32(s32, d32)\
+ __asm__ volatile("stwbrx %1,0,%0" : : "r"(d32), "r"(s32))
+#define ld_swap32(s, d) aix_ld_swap32(s, d)
+#define st_swap32(s, d) aix_st_swap32(s, d)
+#define ld_swap64(s, d) aix_ld_swap64(s, d)
+#define st_swap64(s, d) aix_st_swap64(s, d)
+#endif /* __PPC__ || _ARCH_PPC */
+
+#if defined(__sparc)
+#if !defined(__arch64__) && !defined(__sparcv8) && defined(__sparcv9)
+#define __arch64__
+#endif
+#if defined(__GNUC__) || (defined(__SUNPRO_C) && __SUNPRO_C > 0x590)
+/* need Sun Studio C 5.10 and above for GNU inline assembly */
+#if defined(__arch64__)
+#define sparc_ld_swap64(s64, d64) \
+ __asm__("ldxa [%1]0x88,%0" : "=r"(d64) : "r"(s64))
+#define sparc_st_swap64(s64, d64) \
+ __asm__ volatile("stxa %0,[%1]0x88" : : "r"(s64), "r"(d64))
+#define st_swap64(s, d) sparc_st_swap64(s, d)
+#else
+#define sparc_ld_swap64(s64, d64) \
+{ \
+ uint32_t *s4, h, l; \
+ __asm__("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" \
+ : "+r"(s4), "=r"(l), "=r"(h) : "r"(s64)); \
+ d64 = ((uint64_t)h<<32) | l; \
+}
+#define sparc_st_swap64(s64, d64) \
+{ \
+ uint32_t *s4, h, l; \
+ l = (s64) & 0xfffffffful, h = (s64) >> 32; \
+ __asm__ volatile("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88"\
+ : "+r"(s4) : "r"(l), "r"(h), "r"(d64)); \
+}
+#endif /* sparc64 */
+#define sparc_ld_swap32(s32, d32)\
+ __asm__("lda [%1]0x88,%0" : "=r"(d32) : "r"(s32))
+#define sparc_st_swap32(s32, d32)\
+ __asm__ volatile("sta %0,[%1]0x88" : : "r"(s32), "r"(d32))
+#define ld_swap32(s, d) sparc_ld_swap32(s, d)
+#define st_swap32(s, d) sparc_st_swap32(s, d)
+#define ld_swap64(s, d) sparc_ld_swap64(s, d)
+#define st_swap64(s, d) sparc_st_swap64(s, d)
+#endif /* GCC || Sun Studio C > 5.9 */
+#endif /* sparc */
+
+/* GCC fallback */
+#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap32)
+#define ld_swap32(s, d) (d = __builtin_bswap32(*(s)))
+#define st_swap32(s, d) (*(d) = __builtin_bswap32(s))
+#endif /* GCC4/PGIC && !swap32 */
+#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap64)
+#define ld_swap64(s, d) (d = __builtin_bswap64(*(s)))
+#define st_swap64(s, d) (*(d) = __builtin_bswap64(s))
+#endif /* GCC4/PGIC && !swap64 */
+
+/* generic fallback */
+#if !defined(ld_swap32)
+#define ld_swap32(s, d) \
+ (d = (*(s) >> 24) | (*(s) >> 8 & 0xff00) | \
+ (*(s) << 8 & 0xff0000) | (*(s) << 24))
+#define st_swap32(s, d) \
+ (*(d) = ((s) >> 24) | ((s) >> 8 & 0xff00) | \
+ ((s) << 8 & 0xff0000) | ((s) << 24))
+#endif
+#if !defined(ld_swap64)
+#define ld_swap64(s, d) \
+ (d = (*(s) >> 56) | (*(s) >> 40 & 0xff00) | \
+ (*(s) >> 24 & 0xff0000) | (*(s) >> 8 & 0xff000000) | \
+ (*(s) & 0xff000000) << 8 | (*(s) & 0xff0000) << 24 | \
+ (*(s) & 0xff00) << 40 | *(s) << 56)
+#define st_swap64(s, d) \
+ (*(d) = ((s) >> 56) | ((s) >> 40 & 0xff00) | \
+ ((s) >> 24 & 0xff0000) | ((s) >> 8 & 0xff000000) | \
+ ((s) & 0xff000000) << 8 | ((s) & 0xff0000) << 24 | \
+ ((s) & 0xff00) << 40 | (s) << 56)
+#endif
+
+#endif /* MACHINE_IS_BIG_ENDIAN */
+
+
+#if defined(MACHINE_IS_LITTLE_ENDIAN)
+/* replace swaps with simple assignments on little endian systems */
+#undef ld_swap32
+#undef st_swap32
+#define ld_swap32(s, d) (d = *(s))
+#define st_swap32(s, d) (*(d) = s)
+#undef ld_swap64
+#undef st_swap64
+#define ld_swap64(s, d) (d = *(s))
+#define st_swap64(s, d) (*(d) = s)
+#endif /* MACHINE_IS_LITTLE_ENDIAN */
+
+#endif /* _CRYPTO_EDONR_BYTEORDER_H */
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/cbc.c b/sys/contrib/openzfs/module/icp/algs/modes/cbc.c
new file mode 100644
index 000000000000..85864f56dead
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/cbc.c
@@ -0,0 +1,273 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Algorithm independent CBC functions.
+ */
+int
+cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+
+ if (length + ctx->cbc_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ length);
+ ctx->cbc_remainder_len += length;
+ ctx->cbc_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->cbc_iv;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->cbc_remainder_len > 0) {
+ need = block_size - ctx->cbc_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
+ [ctx->cbc_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->cbc_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /*
+ * XOR the previous cipher block or IV with the
+ * current clear block.
+ */
+ xor_block(blockp, lastp);
+ encrypt(ctx->cbc_keysched, lastp, lastp);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->cbc_remainder_len != 0) {
+ datap += need;
+ ctx->cbc_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->cbc_remainder, remainder);
+ ctx->cbc_remainder_len = remainder;
+ ctx->cbc_copy_to = datap;
+ goto out;
+ }
+ ctx->cbc_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ /*
+ * Save the last encrypted block in the context.
+ */
+ if (ctx->cbc_lastp != NULL) {
+ copy_block((uint8_t *)ctx->cbc_lastp, (uint8_t *)ctx->cbc_iv);
+ ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv;
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+#define OTHER(a, ctx) \
+ (((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock)
+
+/* ARGSUSED */
+int
+cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*decrypt)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+
+ if (length + ctx->cbc_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ length);
+ ctx->cbc_remainder_len += length;
+ ctx->cbc_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = ctx->cbc_lastp;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->cbc_remainder_len > 0) {
+ need = block_size - ctx->cbc_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
+ [ctx->cbc_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->cbc_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /* LINTED: pointer alignment */
+ copy_block(blockp, (uint8_t *)OTHER((uint64_t *)lastp, ctx));
+
+ decrypt(ctx->cbc_keysched, blockp,
+ (uint8_t *)ctx->cbc_remainder);
+ blockp = (uint8_t *)ctx->cbc_remainder;
+
+ /*
+ * XOR the previous cipher block or IV with the
+ * currently decrypted block.
+ */
+ xor_block(lastp, blockp);
+
+ /* LINTED: pointer alignment */
+ lastp = (uint8_t *)OTHER((uint64_t *)lastp, ctx);
+
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ bcopy(blockp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(blockp + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->cbc_remainder_len != 0) {
+ datap += need;
+ ctx->cbc_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->cbc_remainder, remainder);
+ ctx->cbc_remainder_len = remainder;
+ ctx->cbc_lastp = lastp;
+ ctx->cbc_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+ ctx->cbc_copy_to = NULL;
+
+ } while (remainder > 0);
+
+ ctx->cbc_lastp = lastp;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len,
+ size_t block_size, void (*copy_block)(uint8_t *, uint64_t *))
+{
+ /*
+ * Copy IV into context.
+ *
+ * If cm_param == NULL then the IV comes from the
+ * cd_miscdata field in the crypto_data structure.
+ */
+ if (param != NULL) {
+ ASSERT(param_len == block_size);
+ copy_block((uchar_t *)param, cbc_ctx->cbc_iv);
+ }
+
+ cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0];
+ cbc_ctx->cbc_flags |= CBC_MODE;
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+void *
+cbc_alloc_ctx(int kmflag)
+{
+ cbc_ctx_t *cbc_ctx;
+
+ if ((cbc_ctx = kmem_zalloc(sizeof (cbc_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ cbc_ctx->cbc_flags = CBC_MODE;
+ return (cbc_ctx);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
new file mode 100644
index 000000000000..5d6507c49db1
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
@@ -0,0 +1,907 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
+#include <sys/byteorder.h>
+#define UNALIGNED_POINTERS_PERMITTED
+#endif
+
+/*
+ * Encrypt multiple blocks of data in CCM mode. Decrypt for CCM mode
+ * is done in another function.
+ */
+int
+ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t counter;
+ uint8_t *mac_buf;
+
+ if (length + ctx->ccm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ length);
+ ctx->ccm_remainder_len += length;
+ ctx->ccm_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->ccm_cb;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ccm_remainder_len > 0) {
+ need = block_size - ctx->ccm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ccm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /*
+ * do CBC MAC
+ *
+ * XOR the previous cipher block current clear block.
+ * mac_buf always contain previous cipher block.
+ */
+ xor_block(blockp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ /* ccm_cb is the counter block */
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb,
+ (uint8_t *)ctx->ccm_tmp);
+
+ lastp = (uint8_t *)ctx->ccm_tmp;
+
+ /*
+ * Increment counter. Counter bits are confined
+ * to the bottom 64 bits of the counter block.
+ */
+#ifdef _ZFS_LITTLE_ENDIAN
+ counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
+ counter = htonll(counter + 1);
+#else
+ counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
+ counter++;
+#endif /* _ZFS_LITTLE_ENDIAN */
+ counter &= ctx->ccm_counter_mask;
+ ctx->ccm_cb[1] =
+ (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
+
+ /*
+ * XOR encrypted counter block with the current clear block.
+ */
+ xor_block(blockp, lastp);
+
+ ctx->ccm_processed_data_len += block_size;
+
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ccm_remainder_len != 0) {
+ datap += need;
+ ctx->ccm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ccm_remainder, remainder);
+ ctx->ccm_remainder_len = remainder;
+ ctx->ccm_copy_to = datap;
+ goto out;
+ }
+ ctx->ccm_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+void
+calculate_ccm_mac(ccm_ctx_t *ctx, uint8_t *ccm_mac,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+{
+ uint64_t counter;
+ uint8_t *counterp, *mac_buf;
+ int i;
+
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ /* first counter block start with index 0 */
+ counter = 0;
+ ctx->ccm_cb[1] = (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
+
+ counterp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp);
+
+ /* calculate XOR of MAC with first counter block */
+ for (i = 0; i < ctx->ccm_mac_len; i++) {
+ ccm_mac[i] = mac_buf[i] ^ counterp[i];
+ }
+}
+
+/* ARGSUSED */
+int
+ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *lastp, *mac_buf, *ccm_mac_p, *macp = NULL;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ int i;
+
+ if (out->cd_length < (ctx->ccm_remainder_len + ctx->ccm_mac_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ /*
+ * When we get here, the number of bytes of payload processed
+ * plus whatever data remains, if any,
+ * should be the same as the number of bytes that's being
+ * passed in the argument during init time.
+ */
+ if ((ctx->ccm_processed_data_len + ctx->ccm_remainder_len)
+ != (ctx->ccm_data_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ if (ctx->ccm_remainder_len > 0) {
+
+ /* ccm_mac_input_buf is not used for encryption */
+ macp = (uint8_t *)ctx->ccm_mac_input_buf;
+ bzero(macp, block_size);
+
+ /* copy remainder to temporary buffer */
+ bcopy(ctx->ccm_remainder, macp, ctx->ccm_remainder_len);
+
+ /* calculate the CBC MAC */
+ xor_block(macp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ /* calculate the counter mode */
+ lastp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, lastp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->ccm_remainder_len; i++) {
+ macp[i] ^= lastp[i];
+ }
+ ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
+ }
+
+ /* Calculate the CCM MAC */
+ ccm_mac_p = (uint8_t *)ctx->ccm_tmp;
+ calculate_ccm_mac(ctx, ccm_mac_p, encrypt_block);
+
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2,
+ ctx->ccm_remainder_len + ctx->ccm_mac_len);
+
+ if (ctx->ccm_remainder_len > 0) {
+
+ /* copy temporary block to where it belongs */
+ if (out_data_2 == NULL) {
+ /* everything will fit in out_data_1 */
+ bcopy(macp, out_data_1, ctx->ccm_remainder_len);
+ bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len,
+ ctx->ccm_mac_len);
+ } else {
+
+ if (out_data_1_len < ctx->ccm_remainder_len) {
+
+ size_t data_2_len_used;
+
+ bcopy(macp, out_data_1, out_data_1_len);
+
+ data_2_len_used = ctx->ccm_remainder_len
+ - out_data_1_len;
+
+ bcopy((uint8_t *)macp + out_data_1_len,
+ out_data_2, data_2_len_used);
+ bcopy(ccm_mac_p, out_data_2 + data_2_len_used,
+ ctx->ccm_mac_len);
+ } else {
+ bcopy(macp, out_data_1, out_data_1_len);
+ if (out_data_1_len == ctx->ccm_remainder_len) {
+ /* mac will be in out_data_2 */
+ bcopy(ccm_mac_p, out_data_2,
+ ctx->ccm_mac_len);
+ } else {
+ size_t len_not_used = out_data_1_len -
+ ctx->ccm_remainder_len;
+ /*
+ * part of mac in will be in
+ * out_data_1, part of the mac will be
+ * in out_data_2
+ */
+ bcopy(ccm_mac_p,
+ out_data_1 + ctx->ccm_remainder_len,
+ len_not_used);
+ bcopy(ccm_mac_p + len_not_used,
+ out_data_2,
+ ctx->ccm_mac_len - len_not_used);
+
+ }
+ }
+ }
+ } else {
+ /* copy block to where it belongs */
+ bcopy(ccm_mac_p, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(ccm_mac_p + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ out->cd_offset += ctx->ccm_remainder_len + ctx->ccm_mac_len;
+ ctx->ccm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * This will only deal with decrypting the last block of the input that
+ * might not be a multiple of block length.
+ */
+static void
+ccm_decrypt_incomplete_block(ccm_ctx_t *ctx,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+{
+ uint8_t *datap, *outp, *counterp;
+ int i;
+
+ datap = (uint8_t *)ctx->ccm_remainder;
+ outp = &((ctx->ccm_pt_buf)[ctx->ccm_processed_data_len]);
+
+ counterp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->ccm_remainder_len; i++) {
+ outp[i] = datap[i] ^ counterp[i];
+ }
+}
+
+/*
+ * This will decrypt the cipher text. However, the plaintext won't be
+ * returned to the caller. It will be returned when decrypt_final() is
+ * called if the MAC matches
+ */
+/* ARGSUSED */
+int
+ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *cbp;
+ uint64_t counter;
+ size_t pt_len, total_decrypted_len, mac_len, pm_len, pd_len;
+ uint8_t *resultp;
+
+
+ pm_len = ctx->ccm_processed_mac_len;
+
+ if (pm_len > 0) {
+ uint8_t *tmp;
+ /*
+ * all ciphertext has been processed, just waiting for
+ * part of the value of the mac
+ */
+ if ((pm_len + length) > ctx->ccm_mac_len) {
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ }
+ tmp = (uint8_t *)ctx->ccm_mac_input_buf;
+
+ bcopy(datap, tmp + pm_len, length);
+
+ ctx->ccm_processed_mac_len += length;
+ return (CRYPTO_SUCCESS);
+ }
+
+ /*
+ * If we decrypt the given data, what total amount of data would
+ * have been decrypted?
+ */
+ pd_len = ctx->ccm_processed_data_len;
+ total_decrypted_len = pd_len + length + ctx->ccm_remainder_len;
+
+ if (total_decrypted_len >
+ (ctx->ccm_data_len + ctx->ccm_mac_len)) {
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ }
+
+ pt_len = ctx->ccm_data_len;
+
+ if (total_decrypted_len > pt_len) {
+ /*
+ * part of the input will be the MAC, need to isolate that
+ * to be dealt with later. The left-over data in
+ * ccm_remainder_len from last time will not be part of the
+ * MAC. Otherwise, it would have already been taken out
+ * when this call is made last time.
+ */
+ size_t pt_part = pt_len - pd_len - ctx->ccm_remainder_len;
+
+ mac_len = length - pt_part;
+
+ ctx->ccm_processed_mac_len = mac_len;
+ bcopy(data + pt_part, ctx->ccm_mac_input_buf, mac_len);
+
+ if (pt_part + ctx->ccm_remainder_len < block_size) {
+ /*
+ * since this is last of the ciphertext, will
+ * just decrypt with it here
+ */
+ bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], pt_part);
+ ctx->ccm_remainder_len += pt_part;
+ ccm_decrypt_incomplete_block(ctx, encrypt_block);
+ ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
+ ctx->ccm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+ } else {
+ /* let rest of the code handle this */
+ length = pt_part;
+ }
+ } else if (length + ctx->ccm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ length);
+ ctx->ccm_remainder_len += length;
+ ctx->ccm_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ccm_remainder_len > 0) {
+ need = block_size - ctx->ccm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ccm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /* Calculate the counter mode, ccm_cb is the counter block */
+ cbp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, cbp);
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 64 bits
+ */
+#ifdef _ZFS_LITTLE_ENDIAN
+ counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
+ counter = htonll(counter + 1);
+#else
+ counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
+ counter++;
+#endif /* _ZFS_LITTLE_ENDIAN */
+ counter &= ctx->ccm_counter_mask;
+ ctx->ccm_cb[1] =
+ (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
+
+ /* XOR with the ciphertext */
+ xor_block(blockp, cbp);
+
+ /* Copy the plaintext to the "holding buffer" */
+ resultp = (uint8_t *)ctx->ccm_pt_buf +
+ ctx->ccm_processed_data_len;
+ copy_block(cbp, resultp);
+
+ ctx->ccm_processed_data_len += block_size;
+
+ ctx->ccm_lastp = blockp;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ccm_remainder_len != 0) {
+ datap += need;
+ ctx->ccm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ccm_remainder, remainder);
+ ctx->ccm_remainder_len = remainder;
+ ctx->ccm_copy_to = datap;
+ if (ctx->ccm_processed_mac_len > 0) {
+ /*
+ * not expecting anymore ciphertext, just
+ * compute plaintext for the remaining input
+ */
+ ccm_decrypt_incomplete_block(ctx,
+ encrypt_block);
+ ctx->ccm_processed_data_len += remainder;
+ ctx->ccm_remainder_len = 0;
+ }
+ goto out;
+ }
+ ctx->ccm_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t mac_remain, pt_len;
+ uint8_t *pt, *mac_buf, *macp, *ccm_mac_p;
+ int rv;
+
+ pt_len = ctx->ccm_data_len;
+
+ /* Make sure output buffer can fit all of the plaintext */
+ if (out->cd_length < pt_len) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ pt = ctx->ccm_pt_buf;
+ mac_remain = ctx->ccm_processed_data_len;
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ macp = (uint8_t *)ctx->ccm_tmp;
+
+ while (mac_remain > 0) {
+
+ if (mac_remain < block_size) {
+ bzero(macp, block_size);
+ bcopy(pt, macp, mac_remain);
+ mac_remain = 0;
+ } else {
+ copy_block(pt, macp);
+ mac_remain -= block_size;
+ pt += block_size;
+ }
+
+ /* calculate the CBC MAC */
+ xor_block(macp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+ }
+
+ /* Calculate the CCM MAC */
+ ccm_mac_p = (uint8_t *)ctx->ccm_tmp;
+ calculate_ccm_mac((ccm_ctx_t *)ctx, ccm_mac_p, encrypt_block);
+
+ /* compare the input CCM MAC value with what we calculated */
+ if (bcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) {
+ /* They don't match */
+ return (CRYPTO_INVALID_MAC);
+ } else {
+ rv = crypto_put_output_data(ctx->ccm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += pt_len;
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+ccm_validate_args(CK_AES_CCM_PARAMS *ccm_param, boolean_t is_encrypt_init)
+{
+ size_t macSize, nonceSize;
+ uint8_t q;
+ uint64_t maxValue;
+
+ /*
+ * Check the length of the MAC. The only valid
+ * lengths for the MAC are: 4, 6, 8, 10, 12, 14, 16
+ */
+ macSize = ccm_param->ulMACSize;
+ if ((macSize < 4) || (macSize > 16) || ((macSize % 2) != 0)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ /* Check the nonce length. Valid values are 7, 8, 9, 10, 11, 12, 13 */
+ nonceSize = ccm_param->ulNonceSize;
+ if ((nonceSize < 7) || (nonceSize > 13)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ /* q is the length of the field storing the length, in bytes */
+ q = (uint8_t)((15 - nonceSize) & 0xFF);
+
+
+ /*
+ * If it is decrypt, need to make sure size of ciphertext is at least
+ * bigger than MAC len
+ */
+ if ((!is_encrypt_init) && (ccm_param->ulDataSize < macSize)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ /*
+ * Check to make sure the length of the payload is within the
+ * range of values allowed by q
+ */
+ if (q < 8) {
+ maxValue = (1ULL << (q * 8)) - 1;
+ } else {
+ maxValue = ULONG_MAX;
+ }
+
+ if (ccm_param->ulDataSize > maxValue) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Format the first block used in CBC-MAC (B0) and the initial counter
+ * block based on formatting functions and counter generation functions
+ * specified in RFC 3610 and NIST publication 800-38C, appendix A
+ *
+ * b0 is the first block used in CBC-MAC
+ * cb0 is the first counter block
+ *
+ * It's assumed that the arguments b0 and cb0 are preallocated AES blocks
+ *
+ */
+static void
+ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize,
+ ulong_t authDataSize, uint8_t *b0, ccm_ctx_t *aes_ctx)
+{
+ uint64_t payloadSize;
+ uint8_t t, q, have_adata = 0;
+ size_t limit;
+ int i, j, k;
+ uint64_t mask = 0;
+ uint8_t *cb;
+
+ q = (uint8_t)((15 - nonceSize) & 0xFF);
+ t = (uint8_t)((aes_ctx->ccm_mac_len) & 0xFF);
+
+ /* Construct the first octet of b0 */
+ if (authDataSize > 0) {
+ have_adata = 1;
+ }
+ b0[0] = (have_adata << 6) | (((t - 2) / 2) << 3) | (q - 1);
+
+ /* copy the nonce value into b0 */
+ bcopy(nonce, &(b0[1]), nonceSize);
+
+ /* store the length of the payload into b0 */
+ bzero(&(b0[1+nonceSize]), q);
+
+ payloadSize = aes_ctx->ccm_data_len;
+ limit = 8 < q ? 8 : q;
+
+ for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) {
+ b0[k] = (uint8_t)((payloadSize >> j) & 0xFF);
+ }
+
+ /* format the counter block */
+
+ cb = (uint8_t *)aes_ctx->ccm_cb;
+
+ cb[0] = 0x07 & (q-1); /* first byte */
+
+ /* copy the nonce value into the counter block */
+ bcopy(nonce, &(cb[1]), nonceSize);
+
+ bzero(&(cb[1+nonceSize]), q);
+
+ /* Create the mask for the counter field based on the size of nonce */
+ q <<= 3;
+ while (q-- > 0) {
+ mask |= (1ULL << q);
+ }
+
+#ifdef _ZFS_LITTLE_ENDIAN
+ mask = htonll(mask);
+#endif
+ aes_ctx->ccm_counter_mask = mask;
+
+ /*
+ * During calculation, we start using counter block 1, we will
+ * set it up right here.
+ * We can just set the last byte to have the value 1, because
+ * even with the biggest nonce of 13, the last byte of the
+ * counter block will be used for the counter value.
+ */
+ cb[15] = 0x01;
+}
+
+/*
+ * Encode the length of the associated data as
+ * specified in RFC 3610 and NIST publication 800-38C, appendix A
+ */
+static void
+encode_adata_len(ulong_t auth_data_len, uint8_t *encoded, size_t *encoded_len)
+{
+#ifdef UNALIGNED_POINTERS_PERMITTED
+ uint32_t *lencoded_ptr;
+#ifdef _LP64
+ uint64_t *llencoded_ptr;
+#endif
+#endif /* UNALIGNED_POINTERS_PERMITTED */
+
+ if (auth_data_len < ((1ULL<<16) - (1ULL<<8))) {
+ /* 0 < a < (2^16-2^8) */
+ *encoded_len = 2;
+ encoded[0] = (auth_data_len & 0xff00) >> 8;
+ encoded[1] = auth_data_len & 0xff;
+
+ } else if ((auth_data_len >= ((1ULL<<16) - (1ULL<<8))) &&
+ (auth_data_len < (1ULL << 31))) {
+ /* (2^16-2^8) <= a < 2^32 */
+ *encoded_len = 6;
+ encoded[0] = 0xff;
+ encoded[1] = 0xfe;
+#ifdef UNALIGNED_POINTERS_PERMITTED
+ lencoded_ptr = (uint32_t *)&encoded[2];
+ *lencoded_ptr = htonl(auth_data_len);
+#else
+ encoded[2] = (auth_data_len & 0xff000000) >> 24;
+ encoded[3] = (auth_data_len & 0xff0000) >> 16;
+ encoded[4] = (auth_data_len & 0xff00) >> 8;
+ encoded[5] = auth_data_len & 0xff;
+#endif /* UNALIGNED_POINTERS_PERMITTED */
+
+#ifdef _LP64
+ } else {
+ /* 2^32 <= a < 2^64 */
+ *encoded_len = 10;
+ encoded[0] = 0xff;
+ encoded[1] = 0xff;
+#ifdef UNALIGNED_POINTERS_PERMITTED
+ llencoded_ptr = (uint64_t *)&encoded[2];
+ *llencoded_ptr = htonl(auth_data_len);
+#else
+ encoded[2] = (auth_data_len & 0xff00000000000000) >> 56;
+ encoded[3] = (auth_data_len & 0xff000000000000) >> 48;
+ encoded[4] = (auth_data_len & 0xff0000000000) >> 40;
+ encoded[5] = (auth_data_len & 0xff00000000) >> 32;
+ encoded[6] = (auth_data_len & 0xff000000) >> 24;
+ encoded[7] = (auth_data_len & 0xff0000) >> 16;
+ encoded[8] = (auth_data_len & 0xff00) >> 8;
+ encoded[9] = auth_data_len & 0xff;
+#endif /* UNALIGNED_POINTERS_PERMITTED */
+#endif /* _LP64 */
+ }
+}
+
+static int
+ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *mac_buf, *datap, *ivp, *authp;
+ size_t remainder, processed;
+ uint8_t encoded_a[10]; /* max encoded auth data length is 10 octets */
+ size_t encoded_a_len = 0;
+
+ mac_buf = (uint8_t *)&(ctx->ccm_mac_buf);
+
+ /*
+ * Format the 1st block for CBC-MAC and construct the
+ * 1st counter block.
+ *
+ * aes_ctx->ccm_iv is used for storing the counter block
+ * mac_buf will store b0 at this time.
+ */
+ ccm_format_initial_blocks(nonce, nonce_len,
+ auth_data_len, mac_buf, ctx);
+
+ /* The IV for CBC MAC for AES CCM mode is always zero */
+ ivp = (uint8_t *)ctx->ccm_tmp;
+ bzero(ivp, block_size);
+
+ xor_block(ivp, mac_buf);
+
+ /* encrypt the nonce */
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ /* take care of the associated data, if any */
+ if (auth_data_len == 0) {
+ return (CRYPTO_SUCCESS);
+ }
+
+ encode_adata_len(auth_data_len, encoded_a, &encoded_a_len);
+
+ remainder = auth_data_len;
+
+ /* 1st block: it contains encoded associated data, and some data */
+ authp = (uint8_t *)ctx->ccm_tmp;
+ bzero(authp, block_size);
+ bcopy(encoded_a, authp, encoded_a_len);
+ processed = block_size - encoded_a_len;
+ if (processed > auth_data_len) {
+ /* in case auth_data is very small */
+ processed = auth_data_len;
+ }
+ bcopy(auth_data, authp+encoded_a_len, processed);
+ /* xor with previous buffer */
+ xor_block(authp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+ remainder -= processed;
+ if (remainder == 0) {
+ /* a small amount of associated data, it's all done now */
+ return (CRYPTO_SUCCESS);
+ }
+
+ do {
+ if (remainder < block_size) {
+ /*
+ * There's not a block full of data, pad rest of
+ * buffer with zero
+ */
+ bzero(authp, block_size);
+ bcopy(&(auth_data[processed]), authp, remainder);
+ datap = (uint8_t *)authp;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(auth_data[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+
+ xor_block(datap, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ } while (remainder > 0);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * The following function should be call at encrypt or decrypt init time
+ * for AES CCM mode.
+ */
+int
+ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag,
+ boolean_t is_encrypt_init, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_CCM_PARAMS *ccm_param;
+
+ if (param != NULL) {
+ ccm_param = (CK_AES_CCM_PARAMS *)param;
+
+ if ((rv = ccm_validate_args(ccm_param,
+ is_encrypt_init)) != 0) {
+ return (rv);
+ }
+
+ ccm_ctx->ccm_mac_len = ccm_param->ulMACSize;
+ if (is_encrypt_init) {
+ ccm_ctx->ccm_data_len = ccm_param->ulDataSize;
+ } else {
+ ccm_ctx->ccm_data_len =
+ ccm_param->ulDataSize - ccm_ctx->ccm_mac_len;
+ ccm_ctx->ccm_processed_mac_len = 0;
+ }
+ ccm_ctx->ccm_processed_data_len = 0;
+
+ ccm_ctx->ccm_flags |= CCM_MODE;
+ } else {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ if (ccm_init(ccm_ctx, ccm_param->nonce, ccm_param->ulNonceSize,
+ ccm_param->authData, ccm_param->ulAuthDataSize, block_size,
+ encrypt_block, xor_block) != 0) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ if (!is_encrypt_init) {
+ /* allocate buffer for storing decrypted plaintext */
+ ccm_ctx->ccm_pt_buf = vmem_alloc(ccm_ctx->ccm_data_len,
+ kmflag);
+ if (ccm_ctx->ccm_pt_buf == NULL) {
+ rv = CRYPTO_HOST_MEMORY;
+ }
+ }
+ return (rv);
+}
+
+void *
+ccm_alloc_ctx(int kmflag)
+{
+ ccm_ctx_t *ccm_ctx;
+
+ if ((ccm_ctx = kmem_zalloc(sizeof (ccm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ ccm_ctx->ccm_flags = CCM_MODE;
+ return (ccm_ctx);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
new file mode 100644
index 000000000000..0188bdd395ff
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
@@ -0,0 +1,228 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/byteorder.h>
+
+/*
+ * Encrypt and decrypt multiple blocks of data in counter mode.
+ */
+int
+ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t lower_counter, upper_counter;
+
+ if (length + ctx->ctr_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len,
+ length);
+ ctx->ctr_remainder_len += length;
+ ctx->ctr_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->ctr_cb;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ctr_remainder_len > 0) {
+ need = block_size - ctx->ctr_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ctr_remainder)
+ [ctx->ctr_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ctr_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /* ctr_cb is the counter block */
+ cipher(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
+ (uint8_t *)ctx->ctr_tmp);
+
+ lastp = (uint8_t *)ctx->ctr_tmp;
+
+ /*
+ * Increment Counter.
+ */
+ lower_counter = ntohll(ctx->ctr_cb[1] & ctx->ctr_lower_mask);
+ lower_counter = htonll(lower_counter + 1);
+ lower_counter &= ctx->ctr_lower_mask;
+ ctx->ctr_cb[1] = (ctx->ctr_cb[1] & ~(ctx->ctr_lower_mask)) |
+ lower_counter;
+
+ /* wrap around */
+ if (lower_counter == 0) {
+ upper_counter =
+ ntohll(ctx->ctr_cb[0] & ctx->ctr_upper_mask);
+ upper_counter = htonll(upper_counter + 1);
+ upper_counter &= ctx->ctr_upper_mask;
+ ctx->ctr_cb[0] =
+ (ctx->ctr_cb[0] & ~(ctx->ctr_upper_mask)) |
+ upper_counter;
+ }
+
+ /*
+ * XOR encrypted counter block with the current clear block.
+ */
+ xor_block(blockp, lastp);
+
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ctr_remainder_len != 0) {
+ datap += need;
+ ctx->ctr_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ctr_remainder, remainder);
+ ctx->ctr_remainder_len = remainder;
+ ctx->ctr_copy_to = datap;
+ goto out;
+ }
+ ctx->ctr_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+{
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint8_t *p;
+ int i;
+
+ if (out->cd_length < ctx->ctr_remainder_len)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ encrypt_block(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
+ (uint8_t *)ctx->ctr_tmp);
+
+ lastp = (uint8_t *)ctx->ctr_tmp;
+ p = (uint8_t *)ctx->ctr_remainder;
+ for (i = 0; i < ctx->ctr_remainder_len; i++) {
+ p[i] ^= lastp[i];
+ }
+
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, ctx->ctr_remainder_len);
+
+ bcopy(p, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy((uint8_t *)p + out_data_1_len,
+ out_data_2, ctx->ctr_remainder_len - out_data_1_len);
+ }
+ out->cd_offset += ctx->ctr_remainder_len;
+ ctx->ctr_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb,
+ void (*copy_block)(uint8_t *, uint8_t *))
+{
+ uint64_t upper_mask = 0;
+ uint64_t lower_mask = 0;
+
+ if (count == 0 || count > 128) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ /* upper 64 bits of the mask */
+ if (count >= 64) {
+ count -= 64;
+ upper_mask = (count == 64) ? UINT64_MAX : (1ULL << count) - 1;
+ lower_mask = UINT64_MAX;
+ } else {
+ /* now the lower 63 bits */
+ lower_mask = (1ULL << count) - 1;
+ }
+ ctr_ctx->ctr_lower_mask = htonll(lower_mask);
+ ctr_ctx->ctr_upper_mask = htonll(upper_mask);
+
+ copy_block(cb, (uchar_t *)ctr_ctx->ctr_cb);
+ ctr_ctx->ctr_lastp = (uint8_t *)&ctr_ctx->ctr_cb[0];
+ ctr_ctx->ctr_flags |= CTR_MODE;
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+void *
+ctr_alloc_ctx(int kmflag)
+{
+ ctr_ctx_t *ctr_ctx;
+
+ if ((ctr_ctx = kmem_zalloc(sizeof (ctr_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ ctr_ctx->ctr_flags = CTR_MODE;
+ return (ctr_ctx);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ecb.c b/sys/contrib/openzfs/module/icp/algs/modes/ecb.c
new file mode 100644
index 000000000000..025f5825cf04
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ecb.c
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Algorithm independent ECB functions.
+ */
+int
+ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+
+ if (length + ctx->ecb_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len,
+ length);
+ ctx->ecb_remainder_len += length;
+ ctx->ecb_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->ecb_iv;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ecb_remainder_len > 0) {
+ need = block_size - ctx->ecb_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ecb_remainder)
+ [ctx->ecb_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ecb_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ cipher(ctx->ecb_keysched, blockp, lastp);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ecb_remainder_len != 0) {
+ datap += need;
+ ctx->ecb_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ecb_remainder, remainder);
+ ctx->ecb_remainder_len = remainder;
+ ctx->ecb_copy_to = datap;
+ goto out;
+ }
+ ctx->ecb_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+void *
+ecb_alloc_ctx(int kmflag)
+{
+ ecb_ctx_t *ecb_ctx;
+
+ if ((ecb_ctx = kmem_zalloc(sizeof (ecb_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ ecb_ctx->ecb_flags = ECB_MODE;
+ return (ecb_ctx);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
new file mode 100644
index 000000000000..5553c55e11cd
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
@@ -0,0 +1,1543 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/icp.h>
+#include <sys/crypto/impl.h>
+#include <sys/byteorder.h>
+#include <sys/simd.h>
+#include <modes/gcm_impl.h>
+#ifdef CAN_USE_GCM_ASM
+#include <aes/aes_impl.h>
+#endif
+
+#define GHASH(c, d, t, o) \
+ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
+ (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
+ (uint64_t *)(void *)(t));
+
+/* Select GCM implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX-1)
+#ifdef CAN_USE_GCM_ASM
+#define IMPL_AVX (UINT32_MAX-2)
+#endif
+#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
+static uint32_t icp_gcm_impl = IMPL_FASTEST;
+static uint32_t user_sel_impl = IMPL_FASTEST;
+
+#ifdef CAN_USE_GCM_ASM
+/* Does the architecture we run on support the MOVBE instruction? */
+boolean_t gcm_avx_can_use_movbe = B_FALSE;
+/*
+ * Whether to use the optimized openssl gcm and ghash implementations.
+ * Set to true if module parameter icp_gcm_impl == "avx".
+ */
+static boolean_t gcm_use_avx = B_FALSE;
+#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
+
+static inline boolean_t gcm_avx_will_work(void);
+static inline void gcm_set_avx(boolean_t);
+static inline boolean_t gcm_toggle_avx(void);
+extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+
+static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t);
+
+static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
+static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
+static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
+ size_t, size_t);
+#endif /* ifdef CAN_USE_GCM_ASM */
+
+/*
+ * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
+ * is done in another function.
+ */
+int
+gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+#ifdef CAN_USE_GCM_ASM
+ if (ctx->gcm_use_avx == B_TRUE)
+ return (gcm_mode_encrypt_contiguous_blocks_avx(
+ ctx, data, length, out, block_size));
+#endif
+
+ const gcm_impl_ops_t *gops;
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+
+ if (length + ctx->gcm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
+ length);
+ ctx->gcm_remainder_len += length;
+ if (ctx->gcm_copy_to == NULL) {
+ ctx->gcm_copy_to = datap;
+ }
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->gcm_cb;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ gops = gcm_impl_get_ops();
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->gcm_remainder_len > 0) {
+ need = block_size - ctx->gcm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
+ [ctx->gcm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->gcm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /*
+ * Increment counter. Counter bits are confined
+ * to the bottom 32 bits of the counter block.
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
+ (uint8_t *)ctx->gcm_tmp);
+ xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
+
+ lastp = (uint8_t *)ctx->gcm_tmp;
+
+ ctx->gcm_processed_data_len += block_size;
+
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->gcm_remainder_len != 0) {
+ datap += need;
+ ctx->gcm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->gcm_remainder, remainder);
+ ctx->gcm_remainder_len = remainder;
+ ctx->gcm_copy_to = datap;
+ goto out;
+ }
+ ctx->gcm_copy_to = NULL;
+
+ } while (remainder > 0);
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+int
+gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+#ifdef CAN_USE_GCM_ASM
+ if (ctx->gcm_use_avx == B_TRUE)
+ return (gcm_encrypt_final_avx(ctx, out, block_size));
+#endif
+
+ const gcm_impl_ops_t *gops;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ uint8_t *ghash, *macp = NULL;
+ int i, rv;
+
+ if (out->cd_length <
+ (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ gops = gcm_impl_get_ops();
+ ghash = (uint8_t *)ctx->gcm_ghash;
+
+ if (ctx->gcm_remainder_len > 0) {
+ uint64_t counter;
+ uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
+
+ /*
+ * Here is where we deal with data that is not a
+ * multiple of the block size.
+ */
+
+ /*
+ * Increment counter.
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
+ (uint8_t *)ctx->gcm_tmp);
+
+ macp = (uint8_t *)ctx->gcm_remainder;
+ bzero(macp + ctx->gcm_remainder_len,
+ block_size - ctx->gcm_remainder_len);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->gcm_remainder_len; i++) {
+ macp[i] ^= tmpp[i];
+ }
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, macp, ghash, gops);
+
+ ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
+ }
+
+ ctx->gcm_len_a_len_c[1] =
+ htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
+ GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
+ (uint8_t *)ctx->gcm_J0);
+ xor_block((uint8_t *)ctx->gcm_J0, ghash);
+
+ if (ctx->gcm_remainder_len > 0) {
+ rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ out->cd_offset += ctx->gcm_remainder_len;
+ ctx->gcm_remainder_len = 0;
+ rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += ctx->gcm_tag_len;
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * This will only deal with decrypting the last block of the input that
+ * might not be a multiple of block length.
+ */
+static void
+gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *datap, *outp, *counterp;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int i;
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 32 bits
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ datap = (uint8_t *)ctx->gcm_remainder;
+ outp = &((ctx->gcm_pt_buf)[index]);
+ counterp = (uint8_t *)ctx->gcm_tmp;
+
+ /* authentication tag */
+ bzero((uint8_t *)ctx->gcm_tmp, block_size);
+ bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
+
+ /* decrypt remaining ciphertext */
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->gcm_remainder_len; i++) {
+ outp[i] = datap[i] ^ counterp[i];
+ }
+}
+
+/* ARGSUSED */
+int
+gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t new_len;
+ uint8_t *new;
+
+ /*
+ * Copy contiguous ciphertext input blocks to plaintext buffer.
+ * Ciphertext will be decrypted in the final.
+ */
+ if (length > 0) {
+ new_len = ctx->gcm_pt_buf_len + length;
+ new = vmem_alloc(new_len, ctx->gcm_kmflag);
+ if (new == NULL) {
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ ctx->gcm_pt_buf = NULL;
+ return (CRYPTO_HOST_MEMORY);
+ }
+ bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ ctx->gcm_pt_buf = new;
+ ctx->gcm_pt_buf_len = new_len;
+ bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
+ length);
+ ctx->gcm_processed_data_len += length;
+ }
+
+ ctx->gcm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+#ifdef CAN_USE_GCM_ASM
+ if (ctx->gcm_use_avx == B_TRUE)
+ return (gcm_decrypt_final_avx(ctx, out, block_size));
+#endif
+
+ const gcm_impl_ops_t *gops;
+ size_t pt_len;
+ size_t remainder;
+ uint8_t *ghash;
+ uint8_t *blockp;
+ uint8_t *cbp;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int processed = 0, rv;
+
+ ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
+
+ gops = gcm_impl_get_ops();
+ pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ blockp = ctx->gcm_pt_buf;
+ remainder = pt_len;
+ while (remainder > 0) {
+ /* Incomplete last block */
+ if (remainder < block_size) {
+ bcopy(blockp, ctx->gcm_remainder, remainder);
+ ctx->gcm_remainder_len = remainder;
+ /*
+ * not expecting anymore ciphertext, just
+ * compute plaintext for the remaining input
+ */
+ gcm_decrypt_incomplete_block(ctx, block_size,
+ processed, encrypt_block, xor_block);
+ ctx->gcm_remainder_len = 0;
+ goto out;
+ }
+ /* add ciphertext to the hash */
+ GHASH(ctx, blockp, ghash, gops);
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 32 bits
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ cbp = (uint8_t *)ctx->gcm_tmp;
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
+
+ /* XOR with ciphertext */
+ xor_block(cbp, blockp);
+
+ processed += block_size;
+ blockp += block_size;
+ remainder -= block_size;
+ }
+out:
+ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
+ GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
+ (uint8_t *)ctx->gcm_J0);
+ xor_block((uint8_t *)ctx->gcm_J0, ghash);
+
+ /* compare the input authentication tag with what we calculated */
+ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ /* They don't match */
+ return (CRYPTO_INVALID_MAC);
+ } else {
+ rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += pt_len;
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
+{
+ size_t tag_len;
+
+ /*
+ * Check the length of the authentication tag (in bits).
+ */
+ tag_len = gcm_param->ulTagBits;
+ switch (tag_len) {
+ case 32:
+ case 64:
+ case 96:
+ case 104:
+ case 112:
+ case 120:
+ case 128:
+ break;
+ default:
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ if (gcm_param->ulIvLen == 0)
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+
+ return (CRYPTO_SUCCESS);
+}
+
+static void
+gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
+ gcm_ctx_t *ctx, size_t block_size,
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ const gcm_impl_ops_t *gops;
+ uint8_t *cb;
+ ulong_t remainder = iv_len;
+ ulong_t processed = 0;
+ uint8_t *datap, *ghash;
+ uint64_t len_a_len_c[2];
+
+ gops = gcm_impl_get_ops();
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ cb = (uint8_t *)ctx->gcm_cb;
+ if (iv_len == 12) {
+ bcopy(iv, cb, 12);
+ cb[12] = 0;
+ cb[13] = 0;
+ cb[14] = 0;
+ cb[15] = 1;
+ /* J0 will be used again in the final */
+ copy_block(cb, (uint8_t *)ctx->gcm_J0);
+ } else {
+ /* GHASH the IV */
+ do {
+ if (remainder < block_size) {
+ bzero(cb, block_size);
+ bcopy(&(iv[processed]), cb, remainder);
+ datap = (uint8_t *)cb;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(iv[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+ GHASH(ctx, datap, ghash, gops);
+ } while (remainder > 0);
+
+ len_a_len_c[0] = 0;
+ len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
+ GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
+
+ /* J0 will be used again in the final */
+ copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
+ }
+}
+
+static int
+gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ const gcm_impl_ops_t *gops;
+ uint8_t *ghash, *datap, *authp;
+ size_t remainder, processed;
+
+ /* encrypt zero block to get subkey H */
+ bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
+ (uint8_t *)ctx->gcm_H);
+
+ gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
+ copy_block, xor_block);
+
+ gops = gcm_impl_get_ops();
+ authp = (uint8_t *)ctx->gcm_tmp;
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ bzero(authp, block_size);
+ bzero(ghash, block_size);
+
+ processed = 0;
+ remainder = auth_data_len;
+ do {
+ if (remainder < block_size) {
+ /*
+ * There's not a block full of data, pad rest of
+ * buffer with zero
+ */
+ bzero(authp, block_size);
+ bcopy(&(auth_data[processed]), authp, remainder);
+ datap = (uint8_t *)authp;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(auth_data[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+
+ /* add auth data to the hash */
+ GHASH(ctx, datap, ghash, gops);
+
+ } while (remainder > 0);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * The following function is called at encrypt or decrypt init time
+ * for AES GCM mode.
+ *
+ * Init the GCM context struct. Handle the cycle and avx implementations here.
+ */
+int
+gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_GCM_PARAMS *gcm_param;
+
+ if (param != NULL) {
+ gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
+
+ if ((rv = gcm_validate_args(gcm_param)) != 0) {
+ return (rv);
+ }
+
+ gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
+ gcm_ctx->gcm_tag_len >>= 3;
+ gcm_ctx->gcm_processed_data_len = 0;
+
+ /* these values are in bits */
+ gcm_ctx->gcm_len_a_len_c[0]
+ = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
+
+ rv = CRYPTO_SUCCESS;
+ gcm_ctx->gcm_flags |= GCM_MODE;
+ } else {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+#ifdef CAN_USE_GCM_ASM
+ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
+ gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
+ } else {
+ /*
+ * Handle the "cycle" implementation by creating avx and
+ * non-avx contexts alternately.
+ */
+ gcm_ctx->gcm_use_avx = gcm_toggle_avx();
+ /*
+ * We don't handle byte swapped key schedules in the avx
+ * code path.
+ */
+ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
+ if (ks->ops->needs_byteswap == B_TRUE) {
+ gcm_ctx->gcm_use_avx = B_FALSE;
+ }
+ /* Use the MOVBE and the BSWAP variants alternately. */
+ if (gcm_ctx->gcm_use_avx == B_TRUE &&
+ zfs_movbe_available() == B_TRUE) {
+ (void) atomic_toggle_boolean_nv(
+ (volatile boolean_t *)&gcm_avx_can_use_movbe);
+ }
+ }
+ /* Avx and non avx context initialization differs from here on. */
+ if (gcm_ctx->gcm_use_avx == B_FALSE) {
+#endif /* ifdef CAN_USE_GCM_ASM */
+ if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
+ gcm_param->pAAD, gcm_param->ulAADLen, block_size,
+ encrypt_block, copy_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+#ifdef CAN_USE_GCM_ASM
+ } else {
+ if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
+ gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+ }
+#endif /* ifdef CAN_USE_GCM_ASM */
+
+ return (rv);
+}
+
+int
+gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_GMAC_PARAMS *gmac_param;
+
+ if (param != NULL) {
+ gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
+
+ gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
+ gcm_ctx->gcm_processed_data_len = 0;
+
+ /* these values are in bits */
+ gcm_ctx->gcm_len_a_len_c[0]
+ = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
+
+ rv = CRYPTO_SUCCESS;
+ gcm_ctx->gcm_flags |= GMAC_MODE;
+ } else {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+#ifdef CAN_USE_GCM_ASM
+ /*
+ * Handle the "cycle" implementation by creating avx and non avx
+ * contexts alternately.
+ */
+ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
+ gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
+ } else {
+ gcm_ctx->gcm_use_avx = gcm_toggle_avx();
+ }
+ /* We don't handle byte swapped key schedules in the avx code path. */
+ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
+ if (ks->ops->needs_byteswap == B_TRUE) {
+ gcm_ctx->gcm_use_avx = B_FALSE;
+ }
+ /* Avx and non avx context initialization differs from here on. */
+ if (gcm_ctx->gcm_use_avx == B_FALSE) {
+#endif /* ifdef CAN_USE_GCM_ASM */
+ if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
+ gmac_param->pAAD, gmac_param->ulAADLen, block_size,
+ encrypt_block, copy_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+#ifdef CAN_USE_GCM_ASM
+ } else {
+ if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
+ gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+ }
+#endif /* ifdef CAN_USE_GCM_ASM */
+
+ return (rv);
+}
+
+void *
+gcm_alloc_ctx(int kmflag)
+{
+ gcm_ctx_t *gcm_ctx;
+
+ if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ gcm_ctx->gcm_flags = GCM_MODE;
+ return (gcm_ctx);
+}
+
+void *
+gmac_alloc_ctx(int kmflag)
+{
+ gcm_ctx_t *gcm_ctx;
+
+ if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ gcm_ctx->gcm_flags = GMAC_MODE;
+ return (gcm_ctx);
+}
+
+void
+gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
+{
+ ctx->gcm_kmflag = kmflag;
+}
+
+/* GCM implementation that contains the fastest methods */
+static gcm_impl_ops_t gcm_fastest_impl = {
+ .name = "fastest"
+};
+
+/* All compiled in implementations */
+const gcm_impl_ops_t *gcm_all_impl[] = {
+ &gcm_generic_impl,
+#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
+ &gcm_pclmulqdq_impl,
+#endif
+};
+
+/* Indicate that benchmark has been completed */
+static boolean_t gcm_impl_initialized = B_FALSE;
+
+/* Hold all supported implementations */
+static size_t gcm_supp_impl_cnt = 0;
+static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
+
+/*
+ * Returns the GCM operations for encrypt/decrypt/key setup. When a
+ * SIMD implementation is not allowed in the current context, then
+ * fallback to the fastest generic implementation.
+ */
+const gcm_impl_ops_t *
+gcm_impl_get_ops()
+{
+ if (!kfpu_allowed())
+ return (&gcm_generic_impl);
+
+ const gcm_impl_ops_t *ops = NULL;
+ const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(gcm_impl_initialized);
+ ops = &gcm_fastest_impl;
+ break;
+ case IMPL_CYCLE:
+ /* Cycle through supported implementations */
+ ASSERT(gcm_impl_initialized);
+ ASSERT3U(gcm_supp_impl_cnt, >, 0);
+ static size_t cycle_impl_idx = 0;
+ size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
+ ops = gcm_supp_impl[idx];
+ break;
+#ifdef CAN_USE_GCM_ASM
+ case IMPL_AVX:
+ /*
+ * Make sure that we return a valid implementation while
+ * switching to the avx implementation since there still
+ * may be unfinished non-avx contexts around.
+ */
+ ops = &gcm_generic_impl;
+ break;
+#endif
+ default:
+ ASSERT3U(impl, <, gcm_supp_impl_cnt);
+ ASSERT3U(gcm_supp_impl_cnt, >, 0);
+ if (impl < ARRAY_SIZE(gcm_all_impl))
+ ops = gcm_supp_impl[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+/*
+ * Initialize all supported implementations.
+ */
+void
+gcm_impl_init(void)
+{
+ gcm_impl_ops_t *curr_impl;
+ int i, c;
+
+ /* Move supported implementations into gcm_supp_impls */
+ for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
+ curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
+
+ if (curr_impl->is_supported())
+ gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
+ }
+ gcm_supp_impl_cnt = c;
+
+ /*
+ * Set the fastest implementation given the assumption that the
+ * hardware accelerated version is the fastest.
+ */
+#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
+ if (gcm_pclmulqdq_impl.is_supported()) {
+ memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
+ sizeof (gcm_fastest_impl));
+ } else
+#endif
+ {
+ memcpy(&gcm_fastest_impl, &gcm_generic_impl,
+ sizeof (gcm_fastest_impl));
+ }
+
+ strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
+
+#ifdef CAN_USE_GCM_ASM
+ /*
+ * Use the avx implementation if it's available and the implementation
+ * hasn't changed from its default value of fastest on module load.
+ */
+ if (gcm_avx_will_work()) {
+#ifdef HAVE_MOVBE
+ if (zfs_movbe_available() == B_TRUE) {
+ atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
+ }
+#endif
+ if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
+ gcm_set_avx(B_TRUE);
+ }
+ }
+#endif
+ /* Finish initialization */
+ atomic_swap_32(&icp_gcm_impl, user_sel_impl);
+ gcm_impl_initialized = B_TRUE;
+}
+
+static const struct {
+ char *name;
+ uint32_t sel;
+} gcm_impl_opts[] = {
+ { "cycle", IMPL_CYCLE },
+ { "fastest", IMPL_FASTEST },
+#ifdef CAN_USE_GCM_ASM
+ { "avx", IMPL_AVX },
+#endif
+};
+
+/*
+ * Function sets desired gcm implementation.
+ *
+ * If we are called before init(), user preference will be saved in
+ * user_sel_impl, and applied in later init() call. This occurs when module
+ * parameter is specified on module load. Otherwise, directly update
+ * icp_gcm_impl.
+ *
+ * @val Name of gcm implementation to use
+ * @param Unused.
+ */
+int
+gcm_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ char req_name[GCM_IMPL_NAME_MAX];
+ uint32_t impl = GCM_IMPL_READ(user_sel_impl);
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(val, GCM_IMPL_NAME_MAX);
+ if (i == 0 || i >= GCM_IMPL_NAME_MAX)
+ return (err);
+
+ strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
+ while (i > 0 && isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ /* Check mandatory options */
+ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
+#ifdef CAN_USE_GCM_ASM
+ /* Ignore avx implementation if it won't work. */
+ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
+ continue;
+ }
+#endif
+ if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
+ impl = gcm_impl_opts[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported impl if init() was already called */
+ if (err != 0 && gcm_impl_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < gcm_supp_impl_cnt; i++) {
+ if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+#ifdef CAN_USE_GCM_ASM
+ /*
+ * Use the avx implementation if available and the requested one is
+ * avx or fastest.
+ */
+ if (gcm_avx_will_work() == B_TRUE &&
+ (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
+ gcm_set_avx(B_TRUE);
+ } else {
+ gcm_set_avx(B_FALSE);
+ }
+#endif
+
+ if (err == 0) {
+ if (gcm_impl_initialized)
+ atomic_swap_32(&icp_gcm_impl, impl);
+ else
+ atomic_swap_32(&user_sel_impl, impl);
+ }
+
+ return (err);
+}
+
+#if defined(_KERNEL) && defined(__linux__)
+
+static int
+icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
+{
+ return (gcm_impl_set(val));
+}
+
+static int
+icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cnt = 0;
+ char *fmt;
+ const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
+
+ ASSERT(gcm_impl_initialized);
+
+ /* list mandatory options */
+ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
+#ifdef CAN_USE_GCM_ASM
+ /* Ignore avx implementation if it won't work. */
+ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
+ continue;
+ }
+#endif
+ fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
+ }
+
+ /* list all supported implementations */
+ for (i = 0; i < gcm_supp_impl_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name);
+ }
+
+ return (cnt);
+}
+
+module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
+ NULL, 0644);
+MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
+#endif /* defined(__KERNEL) */
+
+#ifdef CAN_USE_GCM_ASM
+#define GCM_BLOCK_LEN 16
+/*
+ * The openssl asm routines are 6x aggregated and need that many bytes
+ * at minimum.
+ */
+#define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
+#define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
+/*
+ * Ensure the chunk size is reasonable since we are allocating a
+ * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
+ */
+#define GCM_AVX_MAX_CHUNK_SIZE \
+ (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
+
+/* Get the chunk size module parameter. */
+#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
+
+/* Clear the FPU registers since they hold sensitive internal state. */
+#define clear_fpu_regs() clear_fpu_regs_avx()
+#define GHASH_AVX(ctx, in, len) \
+ gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \
+ in, len)
+
+#define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
+
+/*
+ * Module parameter: number of bytes to process at once while owning the FPU.
+ * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
+ * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
+ */
+static uint32_t gcm_avx_chunk_size =
+ ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
+
+extern void clear_fpu_regs_avx(void);
+extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
+extern void aes_encrypt_intel(const uint32_t rk[], int nr,
+ const uint32_t pt[4], uint32_t ct[4]);
+
+extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]);
+extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2],
+ const uint8_t *in, size_t len);
+
+extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
+ const void *, uint64_t *, uint64_t *);
+
+extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
+ const void *, uint64_t *, uint64_t *);
+
+static inline boolean_t
+gcm_avx_will_work(void)
+{
+ /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
+ return (kfpu_allowed() &&
+ zfs_avx_available() && zfs_aes_available() &&
+ zfs_pclmulqdq_available());
+}
+
+static inline void
+gcm_set_avx(boolean_t val)
+{
+ if (gcm_avx_will_work() == B_TRUE) {
+ atomic_swap_32(&gcm_use_avx, val);
+ }
+}
+
+static inline boolean_t
+gcm_toggle_avx(void)
+{
+ if (gcm_avx_will_work() == B_TRUE) {
+ return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
+ } else {
+ return (B_FALSE);
+ }
+}
+
+/*
+ * Clear sensitive data in the context.
+ *
+ * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
+ * ctx->gcm_Htable contain the hash sub key which protects authentication.
+ *
+ * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
+ * a known plaintext attack, they consists of the IV and the first and last
+ * counter respectively. If they should be cleared is debatable.
+ */
+static inline void
+gcm_clear_ctx(gcm_ctx_t *ctx)
+{
+ bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
+ bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
+ bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable));
+ bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
+ bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
+}
+
+/* Increment the GCM counter block by n. */
+static inline void
+gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
+{
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+
+ counter = htonll(counter + n);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+}
+
+/*
+ * Encrypt multiple blocks of data in GCM mode.
+ * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
+ * if possible. While processing a chunk the FPU is "locked".
+ */
+static int
+gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
+ size_t length, crypto_data_t *out, size_t block_size)
+{
+ size_t bleft = length;
+ size_t need = 0;
+ size_t done = 0;
+ uint8_t *datap = (uint8_t *)data;
+ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
+ const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
+ uint64_t *ghash = ctx->gcm_ghash;
+ uint64_t *cb = ctx->gcm_cb;
+ uint8_t *ct_buf = NULL;
+ uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
+ int rv = CRYPTO_SUCCESS;
+
+ ASSERT(block_size == GCM_BLOCK_LEN);
+ /*
+ * If the last call left an incomplete block, try to fill
+ * it first.
+ */
+ if (ctx->gcm_remainder_len > 0) {
+ need = block_size - ctx->gcm_remainder_len;
+ if (length < need) {
+ /* Accumulate bytes here and return. */
+ bcopy(datap, (uint8_t *)ctx->gcm_remainder +
+ ctx->gcm_remainder_len, length);
+
+ ctx->gcm_remainder_len += length;
+ if (ctx->gcm_copy_to == NULL) {
+ ctx->gcm_copy_to = datap;
+ }
+ return (CRYPTO_SUCCESS);
+ } else {
+ /* Complete incomplete block. */
+ bcopy(datap, (uint8_t *)ctx->gcm_remainder +
+ ctx->gcm_remainder_len, need);
+
+ ctx->gcm_copy_to = NULL;
+ }
+ }
+
+ /* Allocate a buffer to encrypt to if there is enough input. */
+ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
+ ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag);
+ if (ct_buf == NULL) {
+ return (CRYPTO_HOST_MEMORY);
+ }
+ }
+
+ /* If we completed an incomplete block, encrypt and write it out. */
+ if (ctx->gcm_remainder_len > 0) {
+ kfpu_begin();
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr,
+ (const uint32_t *)cb, (uint32_t *)tmp);
+
+ gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
+ GHASH_AVX(ctx, tmp, block_size);
+ clear_fpu_regs();
+ kfpu_end();
+ rv = crypto_put_output_data(tmp, out, block_size);
+ out->cd_offset += block_size;
+ gcm_incr_counter_block(ctx);
+ ctx->gcm_processed_data_len += block_size;
+ bleft -= need;
+ datap += need;
+ ctx->gcm_remainder_len = 0;
+ }
+
+ /* Do the bulk encryption in chunk_size blocks. */
+ for (; bleft >= chunk_size; bleft -= chunk_size) {
+ kfpu_begin();
+ done = aesni_gcm_encrypt(
+ datap, ct_buf, chunk_size, key, cb, ghash);
+
+ clear_fpu_regs();
+ kfpu_end();
+ if (done != chunk_size) {
+ rv = CRYPTO_FAILED;
+ goto out_nofpu;
+ }
+ rv = crypto_put_output_data(ct_buf, out, chunk_size);
+ if (rv != CRYPTO_SUCCESS) {
+ goto out_nofpu;
+ }
+ out->cd_offset += chunk_size;
+ datap += chunk_size;
+ ctx->gcm_processed_data_len += chunk_size;
+ }
+ /* Check if we are already done. */
+ if (bleft == 0) {
+ goto out_nofpu;
+ }
+ /* Bulk encrypt the remaining data. */
+ kfpu_begin();
+ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
+ done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
+ if (done == 0) {
+ rv = CRYPTO_FAILED;
+ goto out;
+ }
+ rv = crypto_put_output_data(ct_buf, out, done);
+ if (rv != CRYPTO_SUCCESS) {
+ goto out;
+ }
+ out->cd_offset += done;
+ ctx->gcm_processed_data_len += done;
+ datap += done;
+ bleft -= done;
+
+ }
+ /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
+ while (bleft > 0) {
+ if (bleft < block_size) {
+ bcopy(datap, ctx->gcm_remainder, bleft);
+ ctx->gcm_remainder_len = bleft;
+ ctx->gcm_copy_to = datap;
+ goto out;
+ }
+ /* Encrypt, hash and write out. */
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr,
+ (const uint32_t *)cb, (uint32_t *)tmp);
+
+ gcm_xor_avx(datap, tmp);
+ GHASH_AVX(ctx, tmp, block_size);
+ rv = crypto_put_output_data(tmp, out, block_size);
+ if (rv != CRYPTO_SUCCESS) {
+ goto out;
+ }
+ out->cd_offset += block_size;
+ gcm_incr_counter_block(ctx);
+ ctx->gcm_processed_data_len += block_size;
+ datap += block_size;
+ bleft -= block_size;
+ }
+out:
+ clear_fpu_regs();
+ kfpu_end();
+out_nofpu:
+ if (ct_buf != NULL) {
+ vmem_free(ct_buf, chunk_size);
+ }
+ return (rv);
+}
+
+/*
+ * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
+ * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
+ */
+static int
+gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
+{
+ uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
+ uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
+ uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
+ size_t rem_len = ctx->gcm_remainder_len;
+ const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
+ int aes_rounds = ((aes_key_t *)keysched)->nr;
+ int rv;
+
+ ASSERT(block_size == GCM_BLOCK_LEN);
+
+ if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ kfpu_begin();
+ /* Pad last incomplete block with zeros, encrypt and hash. */
+ if (rem_len > 0) {
+ uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
+ const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
+
+ aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
+ bzero(remainder + rem_len, block_size - rem_len);
+ for (int i = 0; i < rem_len; i++) {
+ remainder[i] ^= tmp[i];
+ }
+ GHASH_AVX(ctx, remainder, block_size);
+ ctx->gcm_processed_data_len += rem_len;
+ /* No need to increment counter_block, it's the last block. */
+ }
+ /* Finish tag. */
+ ctx->gcm_len_a_len_c[1] =
+ htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
+ GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
+ aes_encrypt_intel(keysched, aes_rounds, J0, J0);
+
+ gcm_xor_avx((uint8_t *)J0, ghash);
+ clear_fpu_regs();
+ kfpu_end();
+
+ /* Output remainder. */
+ if (rem_len > 0) {
+ rv = crypto_put_output_data(remainder, out, rem_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ out->cd_offset += rem_len;
+ ctx->gcm_remainder_len = 0;
+ rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+
+ out->cd_offset += ctx->gcm_tag_len;
+ /* Clear sensitive data in the context before returning. */
+ gcm_clear_ctx(ctx);
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Finalize decryption: We just have accumulated crypto text, so now we
+ * decrypt it here inplace.
+ */
+static int
+gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
+{
+ ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
+ ASSERT3U(block_size, ==, 16);
+
+ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
+ size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
+ uint8_t *datap = ctx->gcm_pt_buf;
+ const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
+ uint32_t *cb = (uint32_t *)ctx->gcm_cb;
+ uint64_t *ghash = ctx->gcm_ghash;
+ uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
+ int rv = CRYPTO_SUCCESS;
+ size_t bleft, done;
+
+ /*
+ * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
+ * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
+ * GCM_AVX_MIN_DECRYPT_BYTES.
+ */
+ for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
+ kfpu_begin();
+ done = aesni_gcm_decrypt(datap, datap, chunk_size,
+ (const void *)key, ctx->gcm_cb, ghash);
+ clear_fpu_regs();
+ kfpu_end();
+ if (done != chunk_size) {
+ return (CRYPTO_FAILED);
+ }
+ datap += done;
+ }
+ /* Decrypt remainder, which is less then chunk size, in one go. */
+ kfpu_begin();
+ if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
+ done = aesni_gcm_decrypt(datap, datap, bleft,
+ (const void *)key, ctx->gcm_cb, ghash);
+ if (done == 0) {
+ clear_fpu_regs();
+ kfpu_end();
+ return (CRYPTO_FAILED);
+ }
+ datap += done;
+ bleft -= done;
+ }
+ ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
+
+ /*
+ * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
+ * decrypt them block by block.
+ */
+ while (bleft > 0) {
+ /* Incomplete last block. */
+ if (bleft < block_size) {
+ uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
+
+ bzero(lastb, block_size);
+ bcopy(datap, lastb, bleft);
+ /* The GCM processing. */
+ GHASH_AVX(ctx, lastb, block_size);
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
+ for (size_t i = 0; i < bleft; i++) {
+ datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
+ }
+ break;
+ }
+ /* The GCM processing. */
+ GHASH_AVX(ctx, datap, block_size);
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
+ gcm_xor_avx((uint8_t *)tmp, datap);
+ gcm_incr_counter_block(ctx);
+
+ datap += block_size;
+ bleft -= block_size;
+ }
+ if (rv != CRYPTO_SUCCESS) {
+ clear_fpu_regs();
+ kfpu_end();
+ return (rv);
+ }
+ /* Decryption done, finish the tag. */
+ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
+ GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
+ (uint32_t *)ctx->gcm_J0);
+
+ gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
+
+ /* We are done with the FPU, restore its state. */
+ clear_fpu_regs();
+ kfpu_end();
+
+ /* Compare the input authentication tag with what we calculated. */
+ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ /* They don't match. */
+ return (CRYPTO_INVALID_MAC);
+ }
+ rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS) {
+ return (rv);
+ }
+ out->cd_offset += pt_len;
+ gcm_clear_ctx(ctx);
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Initialize the GCM params H, Htabtle and the counter block. Save the
+ * initial counter block.
+ */
+static int
+gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size)
+{
+ uint8_t *cb = (uint8_t *)ctx->gcm_cb;
+ uint64_t *H = ctx->gcm_H;
+ const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
+ int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
+ uint8_t *datap = auth_data;
+ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
+ size_t bleft;
+
+ ASSERT(block_size == GCM_BLOCK_LEN);
+
+ /* Init H (encrypt zero block) and create the initial counter block. */
+ bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash));
+ bzero(H, sizeof (ctx->gcm_H));
+ kfpu_begin();
+ aes_encrypt_intel(keysched, aes_rounds,
+ (const uint32_t *)H, (uint32_t *)H);
+
+ gcm_init_htab_avx(ctx->gcm_Htable, H);
+
+ if (iv_len == 12) {
+ bcopy(iv, cb, 12);
+ cb[12] = 0;
+ cb[13] = 0;
+ cb[14] = 0;
+ cb[15] = 1;
+ /* We need the ICB later. */
+ bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0));
+ } else {
+ /*
+ * Most consumers use 12 byte IVs, so it's OK to use the
+ * original routines for other IV sizes, just avoid nesting
+ * kfpu_begin calls.
+ */
+ clear_fpu_regs();
+ kfpu_end();
+ gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
+ aes_copy_block, aes_xor_block);
+ kfpu_begin();
+ }
+
+ /* Openssl post increments the counter, adjust for that. */
+ gcm_incr_counter_block(ctx);
+
+ /* Ghash AAD in chunk_size blocks. */
+ for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
+ GHASH_AVX(ctx, datap, chunk_size);
+ datap += chunk_size;
+ clear_fpu_regs();
+ kfpu_end();
+ kfpu_begin();
+ }
+ /* Ghash the remainder and handle possible incomplete GCM block. */
+ if (bleft > 0) {
+ size_t incomp = bleft % block_size;
+
+ bleft -= incomp;
+ if (bleft > 0) {
+ GHASH_AVX(ctx, datap, bleft);
+ datap += bleft;
+ }
+ if (incomp > 0) {
+ /* Zero pad and hash incomplete last block. */
+ uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
+
+ bzero(authp, block_size);
+ bcopy(datap, authp, incomp);
+ GHASH_AVX(ctx, authp, block_size);
+ }
+ }
+ clear_fpu_regs();
+ kfpu_end();
+ return (CRYPTO_SUCCESS);
+}
+
+#if defined(_KERNEL)
+static int
+icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
+{
+ unsigned long val;
+ char val_rounded[16];
+ int error = 0;
+
+ error = kstrtoul(buf, 0, &val);
+ if (error)
+ return (error);
+
+ val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
+
+ if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
+ return (-EINVAL);
+
+ snprintf(val_rounded, 16, "%u", (uint32_t)val);
+ error = param_set_uint(val_rounded, kp);
+ return (error);
+}
+
+module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
+ param_get_uint, &gcm_avx_chunk_size, 0644);
+
+MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
+ "How many bytes to process while owning the FPU");
+
+#endif /* defined(__KERNEL) */
+#endif /* ifdef CAN_USE_GCM_ASM */
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c
new file mode 100644
index 000000000000..16b57998a92f
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c
@@ -0,0 +1,83 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <modes/gcm_impl.h>
+
+struct aes_block {
+ uint64_t a;
+ uint64_t b;
+};
+
+/*
+ * Perform a carry-less multiplication (that is, use XOR instead of the
+ * multiply operator) on *x_in and *y and place the result in *res.
+ *
+ * Byte swap the input (*x_in and *y) and the output (*res).
+ *
+ * Note: x_in, y, and res all point to 16-byte numbers (an array of two
+ * 64-bit integers).
+ */
+static void
+gcm_generic_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
+{
+ static const uint64_t R = 0xe100000000000000ULL;
+ struct aes_block z = {0, 0};
+ struct aes_block v;
+ uint64_t x;
+ int i, j;
+
+ v.a = ntohll(y[0]);
+ v.b = ntohll(y[1]);
+
+ for (j = 0; j < 2; j++) {
+ x = ntohll(x_in[j]);
+ for (i = 0; i < 64; i++, x <<= 1) {
+ if (x & 0x8000000000000000ULL) {
+ z.a ^= v.a;
+ z.b ^= v.b;
+ }
+ if (v.b & 1ULL) {
+ v.b = (v.a << 63)|(v.b >> 1);
+ v.a = (v.a >> 1) ^ R;
+ } else {
+ v.b = (v.a << 63)|(v.b >> 1);
+ v.a = v.a >> 1;
+ }
+ }
+ }
+ res[0] = htonll(z.a);
+ res[1] = htonll(z.b);
+}
+
+static boolean_t
+gcm_generic_will_work(void)
+{
+ return (B_TRUE);
+}
+
+const gcm_impl_ops_t gcm_generic_impl = {
+ .mul = &gcm_generic_mul,
+ .is_supported = &gcm_generic_will_work,
+ .name = "generic"
+};
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c
new file mode 100644
index 000000000000..05920115ce86
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
+
+#include <sys/types.h>
+#include <sys/simd.h>
+
+/* These functions are used to execute pclmulqdq based assembly methods */
+extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
+
+#include <modes/gcm_impl.h>
+
+/*
+ * Perform a carry-less multiplication (that is, use XOR instead of the
+ * multiply operator) on *x_in and *y and place the result in *res.
+ *
+ * Byte swap the input (*x_in and *y) and the output (*res).
+ *
+ * Note: x_in, y, and res all point to 16-byte numbers (an array of two
+ * 64-bit integers).
+ */
+static void
+gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
+{
+ kfpu_begin();
+ gcm_mul_pclmulqdq(x_in, y, res);
+ kfpu_end();
+}
+
+static boolean_t
+gcm_pclmulqdq_will_work(void)
+{
+ return (kfpu_allowed() && zfs_pclmulqdq_available());
+}
+
+const gcm_impl_ops_t gcm_pclmulqdq_impl = {
+ .mul = &gcm_pclmulqdq_mul,
+ .is_supported = &gcm_pclmulqdq_will_work,
+ .name = "pclmulqdq"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_PCLMULQDQ) */
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
new file mode 100644
index 000000000000..f07876a478e2
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
@@ -0,0 +1,157 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Initialize by setting iov_or_mp to point to the current iovec or mp,
+ * and by setting current_offset to an offset within the current iovec or mp.
+ */
+void
+crypto_init_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset)
+{
+ offset_t offset;
+
+ switch (out->cd_format) {
+ case CRYPTO_DATA_RAW:
+ *current_offset = out->cd_offset;
+ break;
+
+ case CRYPTO_DATA_UIO: {
+ uio_t *uiop = out->cd_uio;
+ uint_t vec_idx;
+
+ offset = out->cd_offset;
+ offset = uio_index_at_offset(uiop, offset, &vec_idx);
+
+ *current_offset = offset;
+ *iov_or_mp = (void *)(uintptr_t)vec_idx;
+ break;
+ }
+ } /* end switch */
+}
+
+/*
+ * Get pointers for where in the output to copy a block of encrypted or
+ * decrypted data. The iov_or_mp argument stores a pointer to the current
+ * iovec or mp, and offset stores an offset into the current iovec or mp.
+ */
+void
+crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
+ uint8_t **out_data_1, size_t *out_data_1_len, uint8_t **out_data_2,
+ size_t amt)
+{
+ offset_t offset;
+
+ switch (out->cd_format) {
+ case CRYPTO_DATA_RAW: {
+ iovec_t *iov;
+
+ offset = *current_offset;
+ iov = &out->cd_raw;
+ if ((offset + amt) <= iov->iov_len) {
+ /* one block fits */
+ *out_data_1 = (uint8_t *)iov->iov_base + offset;
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ }
+ break;
+ }
+
+ case CRYPTO_DATA_UIO: {
+ uio_t *uio = out->cd_uio;
+ offset_t offset;
+ uint_t vec_idx;
+ uint8_t *p;
+ uint64_t iov_len;
+ void *iov_base;
+
+ offset = *current_offset;
+ vec_idx = (uintptr_t)(*iov_or_mp);
+ uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
+ p = (uint8_t *)iov_base + offset;
+ *out_data_1 = p;
+
+ if (offset + amt <= iov_len) {
+ /* can fit one block into this iov */
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ } else {
+ /* one block spans two iovecs */
+ *out_data_1_len = iov_len - offset;
+ if (vec_idx == uio_iovcnt(uio))
+ return;
+ vec_idx++;
+ uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
+ *out_data_2 = (uint8_t *)iov_base;
+ *current_offset = amt - *out_data_1_len;
+ }
+ *iov_or_mp = (void *)(uintptr_t)vec_idx;
+ break;
+ }
+ } /* end switch */
+}
+
+void
+crypto_free_mode_ctx(void *ctx)
+{
+ common_ctx_t *common_ctx = (common_ctx_t *)ctx;
+
+ switch (common_ctx->cc_flags &
+ (ECB_MODE|CBC_MODE|CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) {
+ case ECB_MODE:
+ kmem_free(common_ctx, sizeof (ecb_ctx_t));
+ break;
+
+ case CBC_MODE:
+ kmem_free(common_ctx, sizeof (cbc_ctx_t));
+ break;
+
+ case CTR_MODE:
+ kmem_free(common_ctx, sizeof (ctr_ctx_t));
+ break;
+
+ case CCM_MODE:
+ if (((ccm_ctx_t *)ctx)->ccm_pt_buf != NULL)
+ vmem_free(((ccm_ctx_t *)ctx)->ccm_pt_buf,
+ ((ccm_ctx_t *)ctx)->ccm_data_len);
+
+ kmem_free(ctx, sizeof (ccm_ctx_t));
+ break;
+
+ case GCM_MODE:
+ case GMAC_MODE:
+ if (((gcm_ctx_t *)ctx)->gcm_pt_buf != NULL)
+ vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf,
+ ((gcm_ctx_t *)ctx)->gcm_pt_buf_len);
+
+ kmem_free(ctx, sizeof (gcm_ctx_t));
+ }
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c b/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c
new file mode 100644
index 000000000000..da34222c8fc3
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * The basic framework for this code came from the reference
+ * implementation for MD5. That implementation is Copyright (C)
+ * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ *
+ * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
+ * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ * Not as fast as one would like -- further optimizations are encouraged
+ * and appreciated.
+ */
+
+#include <sys/zfs_context.h>
+#include <sha1/sha1.h>
+#include <sha1/sha1_consts.h>
+
+#ifdef _LITTLE_ENDIAN
+#include <sys/byteorder.h>
+#define HAVE_HTONL
+#endif
+
+#define _RESTRICT_KYWD
+
+static void Encode(uint8_t *, const uint32_t *, size_t);
+
+#if defined(__sparc)
+
+#define SHA1_TRANSFORM(ctx, in) \
+ SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
+ (ctx)->state[3], (ctx)->state[4], (ctx), (in))
+
+static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
+ SHA1_CTX *, const uint8_t *);
+
+#elif defined(__amd64)
+
+#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
+#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
+ (in), (num))
+
+void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
+
+#else
+
+#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
+
+static void SHA1Transform(SHA1_CTX *, const uint8_t *);
+
+#endif
+
+
+static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
+
+/*
+ * F, G, and H are the basic SHA1 functions.
+ */
+#define F(b, c, d) (((b) & (c)) | ((~b) & (d)))
+#define G(b, c, d) ((b) ^ (c) ^ (d))
+#define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d)))
+
+/*
+ * SHA1Init()
+ *
+ * purpose: initializes the sha1 context and begins and sha1 digest operation
+ * input: SHA1_CTX * : the context to initializes.
+ * output: void
+ */
+
+void
+SHA1Init(SHA1_CTX *ctx)
+{
+ ctx->count[0] = ctx->count[1] = 0;
+
+ /*
+ * load magic initialization constants. Tell lint
+ * that these constants are unsigned by using U.
+ */
+
+ ctx->state[0] = 0x67452301U;
+ ctx->state[1] = 0xefcdab89U;
+ ctx->state[2] = 0x98badcfeU;
+ ctx->state[3] = 0x10325476U;
+ ctx->state[4] = 0xc3d2e1f0U;
+}
+
+void
+SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
+{
+ uint32_t i, buf_index, buf_len;
+ const uint8_t *input = inptr;
+#if defined(__amd64)
+ uint32_t block_count;
+#endif /* __amd64 */
+
+ /* check for noop */
+ if (input_len == 0)
+ return;
+
+ /* compute number of bytes mod 64 */
+ buf_index = (ctx->count[1] >> 3) & 0x3F;
+
+ /* update number of bits */
+ if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
+ ctx->count[0]++;
+
+ ctx->count[0] += (input_len >> 29);
+
+ buf_len = 64 - buf_index;
+
+ /* transform as many times as possible */
+ i = 0;
+ if (input_len >= buf_len) {
+
+ /*
+ * general optimization:
+ *
+ * only do initial bcopy() and SHA1Transform() if
+ * buf_index != 0. if buf_index == 0, we're just
+ * wasting our time doing the bcopy() since there
+ * wasn't any data left over from a previous call to
+ * SHA1Update().
+ */
+
+ if (buf_index) {
+ bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
+ SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
+ i = buf_len;
+ }
+
+#if !defined(__amd64)
+ for (; i + 63 < input_len; i += 64)
+ SHA1_TRANSFORM(ctx, &input[i]);
+#else
+ block_count = (input_len - i) >> 6;
+ if (block_count > 0) {
+ SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
+ i += block_count << 6;
+ }
+#endif /* !__amd64 */
+
+ /*
+ * general optimization:
+ *
+ * if i and input_len are the same, return now instead
+ * of calling bcopy(), since the bcopy() in this case
+ * will be an expensive nop.
+ */
+
+ if (input_len == i)
+ return;
+
+ buf_index = 0;
+ }
+
+ /* buffer remaining input */
+ bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
+}
+
+/*
+ * SHA1Final()
+ *
+ * purpose: ends an sha1 digest operation, finalizing the message digest and
+ * zeroing the context.
+ * input: uchar_t * : A buffer to store the digest.
+ * : The function actually uses void* because many
+ * : callers pass things other than uchar_t here.
+ * SHA1_CTX * : the context to finalize, save, and zero
+ * output: void
+ */
+
+void
+SHA1Final(void *digest, SHA1_CTX *ctx)
+{
+ uint8_t bitcount_be[sizeof (ctx->count)];
+ uint32_t index = (ctx->count[1] >> 3) & 0x3f;
+
+ /* store bit count, big endian */
+ Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
+
+ /* pad out to 56 mod 64 */
+ SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
+
+ /* append length (before padding) */
+ SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
+
+ /* store state in digest */
+ Encode(digest, ctx->state, sizeof (ctx->state));
+
+ /* zeroize sensitive information */
+ bzero(ctx, sizeof (*ctx));
+}
+
+
+#if !defined(__amd64)
+
+typedef uint32_t sha1word;
+
+/*
+ * sparc optimization:
+ *
+ * on the sparc, we can load big endian 32-bit data easily. note that
+ * special care must be taken to ensure the address is 32-bit aligned.
+ * in the interest of speed, we don't check to make sure, since
+ * careful programming can guarantee this for us.
+ */
+
+#if defined(_ZFS_BIG_ENDIAN)
+#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
+
+#elif defined(HAVE_HTONL)
+#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
+
+#else
+#define LOAD_BIG_32(addr) BE_32(*((uint32_t *)(addr)))
+#endif /* _BIG_ENDIAN */
+
+/*
+ * SHA1Transform()
+ */
+#if defined(W_ARRAY)
+#define W(n) w[n]
+#else /* !defined(W_ARRAY) */
+#define W(n) w_ ## n
+#endif /* !defined(W_ARRAY) */
+
+/*
+ * ROTATE_LEFT rotates x left n bits.
+ */
+
+#if defined(__GNUC__) && defined(_LP64)
+static __inline__ uint64_t
+ROTATE_LEFT(uint64_t value, uint32_t n)
+{
+ uint32_t t32;
+
+ t32 = (uint32_t)value;
+ return ((t32 << n) | (t32 >> (32 - n)));
+}
+
+#else
+
+#define ROTATE_LEFT(x, n) \
+ (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
+
+#endif
+
+#if defined(__sparc)
+
+
+/*
+ * sparc register window optimization:
+ *
+ * `a', `b', `c', `d', and `e' are passed into SHA1Transform
+ * explicitly since it increases the number of registers available to
+ * the compiler. under this scheme, these variables can be held in
+ * %i0 - %i4, which leaves more local and out registers available.
+ *
+ * purpose: sha1 transformation -- updates the digest based on `block'
+ * input: uint32_t : bytes 1 - 4 of the digest
+ * uint32_t : bytes 5 - 8 of the digest
+ * uint32_t : bytes 9 - 12 of the digest
+ * uint32_t : bytes 12 - 16 of the digest
+ * uint32_t : bytes 16 - 20 of the digest
+ * SHA1_CTX * : the context to update
+ * uint8_t [64]: the block to use to update the digest
+ * output: void
+ */
+
+
+void
+SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
+ SHA1_CTX *ctx, const uint8_t blk[64])
+{
+ /*
+ * sparc optimization:
+ *
+ * while it is somewhat counter-intuitive, on sparc, it is
+ * more efficient to place all the constants used in this
+ * function in an array and load the values out of the array
+ * than to manually load the constants. this is because
+ * setting a register to a 32-bit value takes two ops in most
+ * cases: a `sethi' and an `or', but loading a 32-bit value
+ * from memory only takes one `ld' (or `lduw' on v9). while
+ * this increases memory usage, the compiler can find enough
+ * other things to do while waiting to keep the pipeline does
+ * not stall. additionally, it is likely that many of these
+ * constants are cached so that later accesses do not even go
+ * out to the bus.
+ *
+ * this array is declared `static' to keep the compiler from
+ * having to bcopy() this array onto the stack frame of
+ * SHA1Transform() each time it is called -- which is
+ * unacceptably expensive.
+ *
+ * the `const' is to ensure that callers are good citizens and
+ * do not try to munge the array. since these routines are
+ * going to be called from inside multithreaded kernelland,
+ * this is a good safety check. -- `sha1_consts' will end up in
+ * .rodata.
+ *
+ * unfortunately, loading from an array in this manner hurts
+ * performance under Intel. So, there is a macro,
+ * SHA1_CONST(), used in SHA1Transform(), that either expands to
+ * a reference to this array, or to the actual constant,
+ * depending on what platform this code is compiled for.
+ */
+
+
+ static const uint32_t sha1_consts[] = {
+ SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
+ };
+
+
+ /*
+ * general optimization:
+ *
+ * use individual integers instead of using an array. this is a
+ * win, although the amount it wins by seems to vary quite a bit.
+ */
+
+
+ uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
+ uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+
+
+ /*
+ * sparc optimization:
+ *
+ * if `block' is already aligned on a 4-byte boundary, use
+ * LOAD_BIG_32() directly. otherwise, bcopy() into a
+ * buffer that *is* aligned on a 4-byte boundary and then do
+ * the LOAD_BIG_32() on that buffer. benchmarks have shown
+ * that using the bcopy() is better than loading the bytes
+ * individually and doing the endian-swap by hand.
+ *
+ * even though it's quite tempting to assign to do:
+ *
+ * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
+ *
+ * and only have one set of LOAD_BIG_32()'s, the compiler
+ * *does not* like that, so please resist the urge.
+ */
+
+
+ if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
+ bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
+ w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
+ w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
+ w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
+ w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
+ w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
+ w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
+ w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9);
+ w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8);
+ w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7);
+ w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6);
+ w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5);
+ w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4);
+ w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3);
+ w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2);
+ w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1);
+ w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0);
+ } else {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_15 = LOAD_BIG_32(blk + 60);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_14 = LOAD_BIG_32(blk + 56);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_13 = LOAD_BIG_32(blk + 52);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_12 = LOAD_BIG_32(blk + 48);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_11 = LOAD_BIG_32(blk + 44);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_10 = LOAD_BIG_32(blk + 40);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_9 = LOAD_BIG_32(blk + 36);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_8 = LOAD_BIG_32(blk + 32);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_7 = LOAD_BIG_32(blk + 28);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_6 = LOAD_BIG_32(blk + 24);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_5 = LOAD_BIG_32(blk + 20);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_4 = LOAD_BIG_32(blk + 16);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_3 = LOAD_BIG_32(blk + 12);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_2 = LOAD_BIG_32(blk + 8);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_1 = LOAD_BIG_32(blk + 4);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_0 = LOAD_BIG_32(blk + 0);
+ }
+#else /* !defined(__sparc) */
+
+void /* CSTYLED */
+SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
+{
+ /* CSTYLED */
+ sha1word a = ctx->state[0];
+ sha1word b = ctx->state[1];
+ sha1word c = ctx->state[2];
+ sha1word d = ctx->state[3];
+ sha1word e = ctx->state[4];
+
+#if defined(W_ARRAY)
+ sha1word w[16];
+#else /* !defined(W_ARRAY) */
+ sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
+ sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+#endif /* !defined(W_ARRAY) */
+
+ W(0) = LOAD_BIG_32((void *)(blk + 0));
+ W(1) = LOAD_BIG_32((void *)(blk + 4));
+ W(2) = LOAD_BIG_32((void *)(blk + 8));
+ W(3) = LOAD_BIG_32((void *)(blk + 12));
+ W(4) = LOAD_BIG_32((void *)(blk + 16));
+ W(5) = LOAD_BIG_32((void *)(blk + 20));
+ W(6) = LOAD_BIG_32((void *)(blk + 24));
+ W(7) = LOAD_BIG_32((void *)(blk + 28));
+ W(8) = LOAD_BIG_32((void *)(blk + 32));
+ W(9) = LOAD_BIG_32((void *)(blk + 36));
+ W(10) = LOAD_BIG_32((void *)(blk + 40));
+ W(11) = LOAD_BIG_32((void *)(blk + 44));
+ W(12) = LOAD_BIG_32((void *)(blk + 48));
+ W(13) = LOAD_BIG_32((void *)(blk + 52));
+ W(14) = LOAD_BIG_32((void *)(blk + 56));
+ W(15) = LOAD_BIG_32((void *)(blk + 60));
+
+#endif /* !defined(__sparc) */
+
+ /*
+ * general optimization:
+ *
+ * even though this approach is described in the standard as
+ * being slower algorithmically, it is 30-40% faster than the
+ * "faster" version under SPARC, because this version has more
+ * of the constraints specified at compile-time and uses fewer
+ * variables (and therefore has better register utilization)
+ * than its "speedier" brother. (i've tried both, trust me)
+ *
+ * for either method given in the spec, there is an "assignment"
+ * phase where the following takes place:
+ *
+ * tmp = (main_computation);
+ * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
+ *
+ * we can make the algorithm go faster by not doing this work,
+ * but just pretending that `d' is now `e', etc. this works
+ * really well and obviates the need for a temporary variable.
+ * however, we still explicitly perform the rotate action,
+ * since it is cheaper on SPARC to do it once than to have to
+ * do it over and over again.
+ */
+
+ /* round 1 */
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
+ b = ROTATE_LEFT(b, 30);
+
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
+ a = ROTATE_LEFT(a, 30);
+
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
+ e = ROTATE_LEFT(e, 30);
+
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
+ d = ROTATE_LEFT(d, 30);
+
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
+ c = ROTATE_LEFT(c, 30);
+
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
+ b = ROTATE_LEFT(b, 30);
+
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
+ a = ROTATE_LEFT(a, 30);
+
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
+ e = ROTATE_LEFT(e, 30);
+
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
+ d = ROTATE_LEFT(d, 30);
+
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
+ c = ROTATE_LEFT(c, 30);
+
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
+ b = ROTATE_LEFT(b, 30);
+
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
+ a = ROTATE_LEFT(a, 30);
+
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
+ e = ROTATE_LEFT(e, 30);
+
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
+ d = ROTATE_LEFT(d, 30);
+
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
+ c = ROTATE_LEFT(c, 30);
+
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
+ b = ROTATE_LEFT(b, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
+ a = ROTATE_LEFT(a, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
+ e = ROTATE_LEFT(e, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
+ d = ROTATE_LEFT(d, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
+ c = ROTATE_LEFT(c, 30);
+
+ /* round 2 */
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ /* round 3 */
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ /* round 4 */
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
+ c = ROTATE_LEFT(c, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
+ c = ROTATE_LEFT(c, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
+ c = ROTATE_LEFT(c, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */
+
+ ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
+ SHA1_CONST(3);
+ ctx->state[1] += b;
+ ctx->state[2] += ROTATE_LEFT(c, 30);
+ ctx->state[3] += d;
+ ctx->state[4] += e;
+
+ /* zeroize sensitive information */
+ W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
+ W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
+}
+#endif /* !__amd64 */
+
+
+/*
+ * Encode()
+ *
+ * purpose: to convert a list of numbers from little endian to big endian
+ * input: uint8_t * : place to store the converted big endian numbers
+ * uint32_t * : place to get numbers to convert from
+ * size_t : the length of the input in bytes
+ * output: void
+ */
+
+static void
+Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
+ size_t len)
+{
+ size_t i, j;
+
+#if defined(__sparc)
+ if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ *((uint32_t *)(output + j)) = input[i];
+ }
+ } else {
+#endif /* little endian -- will work on big endian, but slowly */
+
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (input[i] >> 24) & 0xff;
+ output[j + 1] = (input[i] >> 16) & 0xff;
+ output[j + 2] = (input[i] >> 8) & 0xff;
+ output[j + 3] = input[i] & 0xff;
+ }
+#if defined(__sparc)
+ }
+#endif
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c
new file mode 100644
index 000000000000..75f6a3c1af4b
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c
@@ -0,0 +1,956 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+/*
+ * The basic framework for this code came from the reference
+ * implementation for MD5. That implementation is Copyright (C)
+ * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ *
+ * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
+ * standard, available at
+ * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
+ * Not as fast as one would like -- further optimizations are encouraged
+ * and appreciated.
+ */
+
+#include <sys/zfs_context.h>
+#define _SHA2_IMPL
+#include <sys/sha2.h>
+#include <sha2/sha2_consts.h>
+
+#define _RESTRICT_KYWD
+
+#ifdef _ZFS_LITTLE_ENDIAN
+#include <sys/byteorder.h>
+#define HAVE_HTONL
+#endif
+#include <sys/isa_defs.h> /* for _ILP32 */
+
+static void Encode(uint8_t *, uint32_t *, size_t);
+static void Encode64(uint8_t *, uint64_t *, size_t);
+
+/* userspace only supports the generic version */
+#if defined(__amd64) && defined(_KERNEL)
+#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
+#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
+
+void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
+void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
+
+#else
+static void SHA256Transform(SHA2_CTX *, const uint8_t *);
+static void SHA512Transform(SHA2_CTX *, const uint8_t *);
+#endif /* __amd64 && _KERNEL */
+
+static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
+
+/*
+ * The low-level checksum routines use a lot of stack space. On systems where
+ * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
+ * barriers to reduce stack frame size. This can reduce the SHA512Transform()
+ * stack frame usage from 3k to <1k on ARM32, for example.
+ */
+#if defined(_ILP32) || defined(__powerpc) /* small stack */
+#define SMALL_STACK_MEMORY_BARRIER asm volatile("": : :"memory");
+#else
+#define SMALL_STACK_MEMORY_BARRIER
+#endif
+
+/* Ch and Maj are the basic SHA2 functions. */
+#define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d)))
+#define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
+
+/* Rotates x right n bits. */
+#define ROTR(x, n) \
+ (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
+
+/* Shift x right n bits */
+#define SHR(x, n) ((x) >> (n))
+
+/* SHA256 Functions */
+#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
+#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
+#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
+#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
+
+#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
+ T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \
+ d += T1; \
+ T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \
+ h = T1 + T2
+
+/* SHA384/512 Functions */
+#define BIGSIGMA0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
+#define BIGSIGMA1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
+#define SIGMA0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
+#define SIGMA1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
+#define SHA512ROUND(a, b, c, d, e, f, g, h, i, w) \
+ T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w; \
+ d += T1; \
+ T2 = BIGSIGMA0(a) + Maj(a, b, c); \
+ h = T1 + T2; \
+ SMALL_STACK_MEMORY_BARRIER;
+
+/*
+ * sparc optimization:
+ *
+ * on the sparc, we can load big endian 32-bit data easily. note that
+ * special care must be taken to ensure the address is 32-bit aligned.
+ * in the interest of speed, we don't check to make sure, since
+ * careful programming can guarantee this for us.
+ */
+
+#if defined(_ZFS_BIG_ENDIAN)
+#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
+#define LOAD_BIG_64(addr) (*(uint64_t *)(addr))
+
+#elif defined(HAVE_HTONL)
+#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
+#define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
+
+#else
+/* little endian -- will work on big endian, but slowly */
+#define LOAD_BIG_32(addr) \
+ (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
+#define LOAD_BIG_64(addr) \
+ (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \
+ ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
+ ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
+ ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
+#endif /* _BIG_ENDIAN */
+
+
+#if !defined(__amd64) || !defined(_KERNEL)
+/* SHA256 Transform */
+
+static void
+SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
+{
+ uint32_t a = ctx->state.s32[0];
+ uint32_t b = ctx->state.s32[1];
+ uint32_t c = ctx->state.s32[2];
+ uint32_t d = ctx->state.s32[3];
+ uint32_t e = ctx->state.s32[4];
+ uint32_t f = ctx->state.s32[5];
+ uint32_t g = ctx->state.s32[6];
+ uint32_t h = ctx->state.s32[7];
+
+ uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
+ uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
+ uint32_t T1, T2;
+
+#if defined(__sparc)
+ static const uint32_t sha256_consts[] = {
+ SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
+ SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
+ SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
+ SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
+ SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
+ SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
+ SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
+ SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
+ SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
+ SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
+ SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
+ SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
+ SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
+ SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
+ SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
+ SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
+ SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
+ SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
+ SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
+ SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
+ SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
+ SHA256_CONST_63
+ };
+#endif /* __sparc */
+
+ if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
+ bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
+ blk = (uint8_t *)ctx->buf_un.buf32;
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w0 = LOAD_BIG_32(blk + 4 * 0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w1 = LOAD_BIG_32(blk + 4 * 1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w2 = LOAD_BIG_32(blk + 4 * 2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w3 = LOAD_BIG_32(blk + 4 * 3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w4 = LOAD_BIG_32(blk + 4 * 4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w5 = LOAD_BIG_32(blk + 4 * 5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w6 = LOAD_BIG_32(blk + 4 * 6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w7 = LOAD_BIG_32(blk + 4 * 7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w8 = LOAD_BIG_32(blk + 4 * 8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w9 = LOAD_BIG_32(blk + 4 * 9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w10 = LOAD_BIG_32(blk + 4 * 10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w11 = LOAD_BIG_32(blk + 4 * 11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w12 = LOAD_BIG_32(blk + 4 * 12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w13 = LOAD_BIG_32(blk + 4 * 13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w14 = LOAD_BIG_32(blk + 4 * 14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w15 = LOAD_BIG_32(blk + 4 * 15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+ w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
+ w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
+ w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
+ w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
+ w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
+ w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
+ w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
+ w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
+ w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
+ w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
+ w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
+ w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
+ w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
+ w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
+ w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
+ w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+ w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
+ w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
+ w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
+ w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
+ w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
+ w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
+ w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
+ w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
+ w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
+ w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
+ w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
+ w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
+ w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
+ w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
+ w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
+ w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+ w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
+ w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
+ w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
+ w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
+ w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
+ w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
+ w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
+ w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
+ w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
+ w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
+ w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
+ w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
+ w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
+ w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
+ w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
+ w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+ ctx->state.s32[0] += a;
+ ctx->state.s32[1] += b;
+ ctx->state.s32[2] += c;
+ ctx->state.s32[3] += d;
+ ctx->state.s32[4] += e;
+ ctx->state.s32[5] += f;
+ ctx->state.s32[6] += g;
+ ctx->state.s32[7] += h;
+}
+
+
+/* SHA384 and SHA512 Transform */
+
+static void
+SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
+{
+
+ uint64_t a = ctx->state.s64[0];
+ uint64_t b = ctx->state.s64[1];
+ uint64_t c = ctx->state.s64[2];
+ uint64_t d = ctx->state.s64[3];
+ uint64_t e = ctx->state.s64[4];
+ uint64_t f = ctx->state.s64[5];
+ uint64_t g = ctx->state.s64[6];
+ uint64_t h = ctx->state.s64[7];
+
+ uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
+ uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
+ uint64_t T1, T2;
+
+#if defined(__sparc)
+ static const uint64_t sha512_consts[] = {
+ SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
+ SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
+ SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
+ SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
+ SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
+ SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
+ SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
+ SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
+ SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
+ SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
+ SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
+ SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
+ SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
+ SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
+ SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
+ SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
+ SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
+ SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
+ SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
+ SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
+ SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
+ SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
+ SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
+ SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
+ SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
+ SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
+ SHA512_CONST_78, SHA512_CONST_79
+ };
+#endif /* __sparc */
+
+
+ if ((uintptr_t)blk & 0x7) { /* not 8-byte aligned? */
+ bcopy(blk, ctx->buf_un.buf64, sizeof (ctx->buf_un.buf64));
+ blk = (uint8_t *)ctx->buf_un.buf64;
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w0 = LOAD_BIG_64(blk + 8 * 0);
+ SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w1 = LOAD_BIG_64(blk + 8 * 1);
+ SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w2 = LOAD_BIG_64(blk + 8 * 2);
+ SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w3 = LOAD_BIG_64(blk + 8 * 3);
+ SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w4 = LOAD_BIG_64(blk + 8 * 4);
+ SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w5 = LOAD_BIG_64(blk + 8 * 5);
+ SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w6 = LOAD_BIG_64(blk + 8 * 6);
+ SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w7 = LOAD_BIG_64(blk + 8 * 7);
+ SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w8 = LOAD_BIG_64(blk + 8 * 8);
+ SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w9 = LOAD_BIG_64(blk + 8 * 9);
+ SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w10 = LOAD_BIG_64(blk + 8 * 10);
+ SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w11 = LOAD_BIG_64(blk + 8 * 11);
+ SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w12 = LOAD_BIG_64(blk + 8 * 12);
+ SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w13 = LOAD_BIG_64(blk + 8 * 13);
+ SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w14 = LOAD_BIG_64(blk + 8 * 14);
+ SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w15 = LOAD_BIG_64(blk + 8 * 15);
+ SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
+
+ ctx->state.s64[0] += a;
+ ctx->state.s64[1] += b;
+ ctx->state.s64[2] += c;
+ ctx->state.s64[3] += d;
+ ctx->state.s64[4] += e;
+ ctx->state.s64[5] += f;
+ ctx->state.s64[6] += g;
+ ctx->state.s64[7] += h;
+
+}
+#endif /* !__amd64 || !_KERNEL */
+
+
+/*
+ * Encode()
+ *
+ * purpose: to convert a list of numbers from little endian to big endian
+ * input: uint8_t * : place to store the converted big endian numbers
+ * uint32_t * : place to get numbers to convert from
+ * size_t : the length of the input in bytes
+ * output: void
+ */
+
+static void
+Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
+ size_t len)
+{
+ size_t i, j;
+
+#if defined(__sparc)
+ if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ *((uint32_t *)(output + j)) = input[i];
+ }
+ } else {
+#endif /* little endian -- will work on big endian, but slowly */
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (input[i] >> 24) & 0xff;
+ output[j + 1] = (input[i] >> 16) & 0xff;
+ output[j + 2] = (input[i] >> 8) & 0xff;
+ output[j + 3] = input[i] & 0xff;
+ }
+#if defined(__sparc)
+ }
+#endif
+}
+
+static void
+Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
+ size_t len)
+{
+ size_t i, j;
+
+#if defined(__sparc)
+ if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
+ for (i = 0, j = 0; j < len; i++, j += 8) {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ *((uint64_t *)(output + j)) = input[i];
+ }
+ } else {
+#endif /* little endian -- will work on big endian, but slowly */
+ for (i = 0, j = 0; j < len; i++, j += 8) {
+
+ output[j] = (input[i] >> 56) & 0xff;
+ output[j + 1] = (input[i] >> 48) & 0xff;
+ output[j + 2] = (input[i] >> 40) & 0xff;
+ output[j + 3] = (input[i] >> 32) & 0xff;
+ output[j + 4] = (input[i] >> 24) & 0xff;
+ output[j + 5] = (input[i] >> 16) & 0xff;
+ output[j + 6] = (input[i] >> 8) & 0xff;
+ output[j + 7] = input[i] & 0xff;
+ }
+#if defined(__sparc)
+ }
+#endif
+}
+
+
+void
+SHA2Init(uint64_t mech, SHA2_CTX *ctx)
+{
+
+ switch (mech) {
+ case SHA256_MECH_INFO_TYPE:
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ ctx->state.s32[0] = 0x6a09e667U;
+ ctx->state.s32[1] = 0xbb67ae85U;
+ ctx->state.s32[2] = 0x3c6ef372U;
+ ctx->state.s32[3] = 0xa54ff53aU;
+ ctx->state.s32[4] = 0x510e527fU;
+ ctx->state.s32[5] = 0x9b05688cU;
+ ctx->state.s32[6] = 0x1f83d9abU;
+ ctx->state.s32[7] = 0x5be0cd19U;
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
+ ctx->state.s64[1] = 0x629a292a367cd507ULL;
+ ctx->state.s64[2] = 0x9159015a3070dd17ULL;
+ ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
+ ctx->state.s64[4] = 0x67332667ffc00b31ULL;
+ ctx->state.s64[5] = 0x8eb44a8768581511ULL;
+ ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
+ ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
+ ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
+ ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
+ ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
+ ctx->state.s64[4] = 0x510e527fade682d1ULL;
+ ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
+ ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
+ ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
+ break;
+ case SHA512_224_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
+ ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
+ ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
+ ctx->state.s64[3] = 0x679DD514582F9FCFULL;
+ ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
+ ctx->state.s64[5] = 0x77E36F7304C48942ULL;
+ ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
+ ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
+ break;
+ case SHA512_256_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0x22312194FC2BF72CULL;
+ ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
+ ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
+ ctx->state.s64[3] = 0x963877195940EABDULL;
+ ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
+ ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
+ ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
+ ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
+ break;
+#ifdef _KERNEL
+ default:
+ cmn_err(CE_PANIC,
+ "sha2_init: failed to find a supported algorithm: 0x%x",
+ (uint32_t)mech);
+
+#endif /* _KERNEL */
+ }
+
+ ctx->algotype = (uint32_t)mech;
+ ctx->count.c64[0] = ctx->count.c64[1] = 0;
+}
+
+#ifndef _KERNEL
+
+// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
+void
+SHA256Init(SHA256_CTX *ctx)
+{
+ SHA2Init(SHA256, ctx);
+}
+
+void
+SHA384Init(SHA384_CTX *ctx)
+{
+ SHA2Init(SHA384, ctx);
+}
+
+void
+SHA512Init(SHA512_CTX *ctx)
+{
+ SHA2Init(SHA512, ctx);
+}
+
+#endif /* _KERNEL */
+
+/*
+ * SHA2Update()
+ *
+ * purpose: continues an sha2 digest operation, using the message block
+ * to update the context.
+ * input: SHA2_CTX * : the context to update
+ * void * : the message block
+ * size_t : the length of the message block, in bytes
+ * output: void
+ */
+
+void
+SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
+{
+ uint32_t i, buf_index, buf_len, buf_limit;
+ const uint8_t *input = inptr;
+ uint32_t algotype = ctx->algotype;
+
+ /* check for noop */
+ if (input_len == 0)
+ return;
+
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ buf_limit = 64;
+
+ /* compute number of bytes mod 64 */
+ buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
+
+ /* update number of bits */
+ if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
+ ctx->count.c32[0]++;
+
+ ctx->count.c32[0] += (input_len >> 29);
+
+ } else {
+ buf_limit = 128;
+
+ /* compute number of bytes mod 128 */
+ buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
+
+ /* update number of bits */
+ if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
+ ctx->count.c64[0]++;
+
+ ctx->count.c64[0] += (input_len >> 29);
+ }
+
+ buf_len = buf_limit - buf_index;
+
+ /* transform as many times as possible */
+ i = 0;
+ if (input_len >= buf_len) {
+
+ /*
+ * general optimization:
+ *
+ * only do initial bcopy() and SHA2Transform() if
+ * buf_index != 0. if buf_index == 0, we're just
+ * wasting our time doing the bcopy() since there
+ * wasn't any data left over from a previous call to
+ * SHA2Update().
+ */
+ if (buf_index) {
+ bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
+ SHA256Transform(ctx, ctx->buf_un.buf8);
+ else
+ SHA512Transform(ctx, ctx->buf_un.buf8);
+
+ i = buf_len;
+ }
+
+#if !defined(__amd64) || !defined(_KERNEL)
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ for (; i + buf_limit - 1 < input_len; i += buf_limit) {
+ SHA256Transform(ctx, &input[i]);
+ }
+ } else {
+ for (; i + buf_limit - 1 < input_len; i += buf_limit) {
+ SHA512Transform(ctx, &input[i]);
+ }
+ }
+
+#else
+ uint32_t block_count;
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ block_count = (input_len - i) >> 6;
+ if (block_count > 0) {
+ SHA256TransformBlocks(ctx, &input[i],
+ block_count);
+ i += block_count << 6;
+ }
+ } else {
+ block_count = (input_len - i) >> 7;
+ if (block_count > 0) {
+ SHA512TransformBlocks(ctx, &input[i],
+ block_count);
+ i += block_count << 7;
+ }
+ }
+#endif /* !__amd64 || !_KERNEL */
+
+ /*
+ * general optimization:
+ *
+ * if i and input_len are the same, return now instead
+ * of calling bcopy(), since the bcopy() in this case
+ * will be an expensive noop.
+ */
+
+ if (input_len == i)
+ return;
+
+ buf_index = 0;
+ }
+
+ /* buffer remaining input */
+ bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
+}
+
+
+/*
+ * SHA2Final()
+ *
+ * purpose: ends an sha2 digest operation, finalizing the message digest and
+ * zeroing the context.
+ * input: uchar_t * : a buffer to store the digest
+ * : The function actually uses void* because many
+ * : callers pass things other than uchar_t here.
+ * SHA2_CTX * : the context to finalize, save, and zero
+ * output: void
+ */
+
+void
+SHA2Final(void *digest, SHA2_CTX *ctx)
+{
+ uint8_t bitcount_be[sizeof (ctx->count.c32)];
+ uint8_t bitcount_be64[sizeof (ctx->count.c64)];
+ uint32_t index;
+ uint32_t algotype = ctx->algotype;
+
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ index = (ctx->count.c32[1] >> 3) & 0x3f;
+ Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
+ SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
+ SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
+ Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
+ } else {
+ index = (ctx->count.c64[1] >> 3) & 0x7f;
+ Encode64(bitcount_be64, ctx->count.c64,
+ sizeof (bitcount_be64));
+ SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
+ SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
+ if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
+ ctx->state.s64[6] = ctx->state.s64[7] = 0;
+ Encode64(digest, ctx->state.s64,
+ sizeof (uint64_t) * 6);
+ } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
+ uint8_t last[sizeof (uint64_t)];
+ /*
+ * Since SHA-512/224 doesn't align well to 64-bit
+ * boundaries, we must do the encoding in three steps:
+ * 1) encode the three 64-bit words that fit neatly
+ * 2) encode the last 64-bit word to a temp buffer
+ * 3) chop out the lower 32-bits from the temp buffer
+ * and append them to the digest
+ */
+ Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
+ Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
+ bcopy(last, (uint8_t *)digest + 24, 4);
+ } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
+ Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
+ } else {
+ Encode64(digest, ctx->state.s64,
+ sizeof (ctx->state.s64));
+ }
+ }
+
+ /* zeroize sensitive information */
+ bzero(ctx, sizeof (*ctx));
+}
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(SHA2Init);
+EXPORT_SYMBOL(SHA2Update);
+EXPORT_SYMBOL(SHA2Final);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE b/sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE
new file mode 100644
index 000000000000..b7434fd17872
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE
@@ -0,0 +1,3 @@
+Implementation of the Skein hash function.
+Source code author: Doug Whiting, 2008.
+This algorithm and source code is released to the public domain.
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE.descrip b/sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE.descrip
new file mode 100644
index 000000000000..0ae89cfdf5ce
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+LICENSE TERMS OF SKEIN HASH ALGORITHM IMPLEMENTATION
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein.c b/sys/contrib/openzfs/module/icp/algs/skein/skein.c
new file mode 100644
index 000000000000..83fe84260307
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein.c
@@ -0,0 +1,911 @@
+/*
+ * Implementation of the Skein hash function.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/skein.h> /* get the Skein API definitions */
+#include "skein_impl.h" /* get internal definitions */
+
+/* 256-bit Skein */
+/* init the context for a straight hashing operation */
+int
+Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
+{
+ union {
+ uint8_t b[SKEIN_256_STATE_BYTES];
+ uint64_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen) { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 256:
+ bcopy(SKEIN_256_IV_256, ctx->X, sizeof (ctx->X));
+ break;
+ case 224:
+ bcopy(SKEIN_256_IV_224, ctx->X, sizeof (ctx->X));
+ break;
+ case 160:
+ bcopy(SKEIN_256_IV_160, ctx->X, sizeof (ctx->X));
+ break;
+ case 128:
+ bcopy(SKEIN_256_IV_128, ctx->X, sizeof (ctx->X));
+ break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed)
+ */
+ /* set tweaks: T0=0; T1=CFG | FINAL */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ /* set the schema, version */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ /* zero pad config block */
+ bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+ /* compute the initial chaining values from config block */
+ /* zero the chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+ break;
+ }
+ /*
+ * The chaining vars ctx->X are now initialized for the given
+ * hashBitLen.
+ * Set up to process the data message portion of the hash (default)
+ */
+ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+ return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein_256_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+ const uint8_t *key, size_t keyBytes)
+{
+ union {
+ uint8_t b[SKEIN_256_STATE_BYTES];
+ uint64_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) { /* is there a key? */
+ /* no key: use all zeroes as key for config block */
+ bzero(ctx->X, sizeof (ctx->X));
+ } else { /* here to pre-process a key */
+
+ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+ /* do a mini-Init right here */
+ /* set output hash bit count = state size */
+ ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+ /* set tweaks: T0 = 0; T1 = KEY type */
+ Skein_Start_New_Type(ctx, KEY);
+ /* zero the initial chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ /* hash the key */
+ (void) Skein_256_Update(ctx, key, keyBytes);
+ /* put result into cfg.b[] */
+ (void) Skein_256_Final_Pad(ctx, cfg.b);
+ /* copy over into ctx->X[] */
+ bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ /* convert key bytes to context words */
+ for (i = 0; i < SKEIN_256_STATE_WORDS; i++)
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed for each key)
+ */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+ cfg.w[2] = Skein_Swap64(treeInfo);
+
+ Skein_Show_Key(256, &ctx->h, key, keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx, MSG);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) {
+ /* finish up any buffered message data */
+ if (ctx->h.bCnt) {
+ /* # bytes free in buffer b[] */
+ n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt;
+ if (n) {
+ /* check on our logic here */
+ Skein_assert(n < msgByteCnt);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+ Skein_256_Process_Block(ctx, ctx->b, 1,
+ SKEIN_256_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /*
+ * now process any remaining full blocks, directly from input
+ * message data
+ */
+ if (msgByteCnt > SKEIN_256_BLOCK_BYTES) {
+ /* number of full blocks to process */
+ n = (msgByteCnt - 1) / SKEIN_256_BLOCK_BYTES;
+ Skein_256_Process_Block(ctx, msg, n,
+ SKEIN_256_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+ msg += n * SKEIN_256_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt) {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_256_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+
+ /* process the final block */
+ Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_256_BLOCK_BYTES;
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN_256_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* 512-bit Skein */
+
+/* init the context for a straight hashing operation */
+int
+Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
+{
+ union {
+ uint8_t b[SKEIN_512_STATE_BYTES];
+ uint64_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen) { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512:
+ bcopy(SKEIN_512_IV_512, ctx->X, sizeof (ctx->X));
+ break;
+ case 384:
+ bcopy(SKEIN_512_IV_384, ctx->X, sizeof (ctx->X));
+ break;
+ case 256:
+ bcopy(SKEIN_512_IV_256, ctx->X, sizeof (ctx->X));
+ break;
+ case 224:
+ bcopy(SKEIN_512_IV_224, ctx->X, sizeof (ctx->X));
+ break;
+#endif
+ default:
+ /*
+ * here if there is no precomputed IV value available
+ * build/process the config block, type == CONFIG (could be
+ * precomputed)
+ */
+ /* set tweaks: T0=0; T1=CFG | FINAL */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ /* set the schema, version */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ /* zero pad config block */
+ bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+ /* compute the initial chaining values from config block */
+ /* zero the chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /*
+ * The chaining vars ctx->X are now initialized for the given
+ * hashBitLen. Set up to process the data message portion of the
+ * hash (default)
+ */
+ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+ return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein_512_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+ const uint8_t *key, size_t keyBytes)
+{
+ union {
+ uint8_t b[SKEIN_512_STATE_BYTES];
+ uint64_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) { /* is there a key? */
+ /* no key: use all zeroes as key for config block */
+ bzero(ctx->X, sizeof (ctx->X));
+ } else { /* here to pre-process a key */
+
+ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+ /* do a mini-Init right here */
+ /* set output hash bit count = state size */
+ ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+ /* set tweaks: T0 = 0; T1 = KEY type */
+ Skein_Start_New_Type(ctx, KEY);
+ /* zero the initial chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ (void) Skein_512_Update(ctx, key, keyBytes); /* hash the key */
+ /* put result into cfg.b[] */
+ (void) Skein_512_Final_Pad(ctx, cfg.b);
+ /* copy over into ctx->X[] */
+ bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ /* convert key bytes to context words */
+ for (i = 0; i < SKEIN_512_STATE_WORDS; i++)
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed for each key)
+ */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+ cfg.w[2] = Skein_Swap64(treeInfo);
+
+ Skein_Show_Key(512, &ctx->h, key, keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx, MSG);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) {
+ /* finish up any buffered message data */
+ if (ctx->h.bCnt) {
+ /* # bytes free in buffer b[] */
+ n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
+ if (n) {
+ /* check on our logic here */
+ Skein_assert(n < msgByteCnt);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+ Skein_512_Process_Block(ctx, ctx->b, 1,
+ SKEIN_512_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /*
+ * now process any remaining full blocks, directly from input
+ * message data
+ */
+ if (msgByteCnt > SKEIN_512_BLOCK_BYTES) {
+ /* number of full blocks to process */
+ n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES;
+ Skein_512_Process_Block(ctx, msg, n,
+ SKEIN_512_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+ msg += n * SKEIN_512_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt) {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_512_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+ /* process the final block */
+ Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_512_BLOCK_BYTES;
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(512, &ctx->h, n,
+ hashVal + i * SKEIN_512_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* 1024-bit Skein */
+
+/* init the context for a straight hashing operation */
+int
+Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
+{
+ union {
+ uint8_t b[SKEIN1024_STATE_BYTES];
+ uint64_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen) { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512:
+ bcopy(SKEIN1024_IV_512, ctx->X, sizeof (ctx->X));
+ break;
+ case 384:
+ bcopy(SKEIN1024_IV_384, ctx->X, sizeof (ctx->X));
+ break;
+ case 1024:
+ bcopy(SKEIN1024_IV_1024, ctx->X, sizeof (ctx->X));
+ break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed)
+ */
+ /* set tweaks: T0=0; T1=CFG | FINAL */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ /* set the schema, version */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ /* zero pad config block */
+ bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+ /* compute the initial chaining values from config block */
+ /* zero the chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /*
+ * The chaining vars ctx->X are now initialized for the given
+ * hashBitLen. Set up to process the data message portion of the hash
+ * (default)
+ */
+ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+ return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein1024_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+ const uint8_t *key, size_t keyBytes)
+{
+ union {
+ uint8_t b[SKEIN1024_STATE_BYTES];
+ uint64_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) { /* is there a key? */
+ /* no key: use all zeroes as key for config block */
+ bzero(ctx->X, sizeof (ctx->X));
+ } else { /* here to pre-process a key */
+ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+ /* do a mini-Init right here */
+ /* set output hash bit count = state size */
+ ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+ /* set tweaks: T0 = 0; T1 = KEY type */
+ Skein_Start_New_Type(ctx, KEY);
+ /* zero the initial chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ (void) Skein1024_Update(ctx, key, keyBytes); /* hash the key */
+ /* put result into cfg.b[] */
+ (void) Skein1024_Final_Pad(ctx, cfg.b);
+ /* copy over into ctx->X[] */
+ bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ /* convert key bytes to context words */
+ for (i = 0; i < SKEIN1024_STATE_WORDS; i++)
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed for each key)
+ */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+ cfg.w[2] = Skein_Swap64(treeInfo);
+
+ Skein_Show_Key(1024, &ctx->h, key, keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx, MSG);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) {
+ /* finish up any buffered message data */
+ if (ctx->h.bCnt) {
+ /* # bytes free in buffer b[] */
+ n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt;
+ if (n) {
+ /* check on our logic here */
+ Skein_assert(n < msgByteCnt);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+ Skein1024_Process_Block(ctx, ctx->b, 1,
+ SKEIN1024_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /*
+ * now process any remaining full blocks, directly from
+ * input message data
+ */
+ if (msgByteCnt > SKEIN1024_BLOCK_BYTES) {
+ /* number of full blocks to process */
+ n = (msgByteCnt - 1) / SKEIN1024_BLOCK_BYTES;
+ Skein1024_Process_Block(ctx, msg, n,
+ SKEIN1024_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+ msg += n * SKEIN1024_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt) {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN1024_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+ /* process the final block */
+ Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN1024_BLOCK_BYTES;
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(1024, &ctx->h, n,
+ hashVal + i * SKEIN1024_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* Functions to support MAC/tree hashing */
+/* (this code is identical for Optimized and Reference versions) */
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+ /* process the final block */
+ Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* "output" the state bytes */
+ Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_256_BLOCK_BYTES);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+ /* process the final block */
+ Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* "output" the state bytes */
+ Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_512_BLOCK_BYTES);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* tag as the final block */
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+ /* process the final block */
+ Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* "output" the state bytes */
+ Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN1024_BLOCK_BYTES);
+
+ return (SKEIN_SUCCESS);
+}
+
+#if SKEIN_TREE_HASH
+/* just do the OUTPUT stage */
+int
+Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_256_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_256_BLOCK_BYTES;
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN_256_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* just do the OUTPUT stage */
+int
+Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_512_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_512_BLOCK_BYTES;
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN_512_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* just do the OUTPUT stage */
+int
+Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN1024_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN1024_BLOCK_BYTES;
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN1024_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+#endif
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(Skein_512_Init);
+EXPORT_SYMBOL(Skein_512_InitExt);
+EXPORT_SYMBOL(Skein_512_Update);
+EXPORT_SYMBOL(Skein_512_Final);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c b/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c
new file mode 100644
index 000000000000..7ba165a48511
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c
@@ -0,0 +1,790 @@
+/*
+ * Implementation of the Skein block functions.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ * Compile-time switches:
+ * SKEIN_USE_ASM -- set bits (256/512/1024) to select which
+ * versions use ASM code for block processing
+ * [default: use C for all block sizes]
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#include <sys/skein.h>
+#include "skein_impl.h"
+#include <sys/isa_defs.h> /* for _ILP32 */
+
+#ifndef SKEIN_USE_ASM
+#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
+#endif
+
+#ifndef SKEIN_LOOP
+/*
+ * The low-level checksum routines use a lot of stack space. On systems where
+ * small stacks frame are enforced (like 32-bit kernel builds), do not unroll
+ * checksum calculations to save stack space.
+ *
+ * Even with no loops unrolled, we still can exceed the 1k stack frame limit
+ * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32). We can
+ * safely ignore it though, since that the checksum functions will be called
+ * from a worker thread that won't be using much stack. That's why we have
+ * the #pragma here to ignore the warning.
+ */
+#if defined(_ILP32) || defined(__powerpc) /* Assume small stack */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+/*
+ * We're running on 32-bit, don't unroll loops to save stack frame space
+ *
+ * Due to the ways the calculations on SKEIN_LOOP are done in
+ * Skein_*_Process_Block(), a value of 111 disables unrolling loops
+ * in any of those functions.
+ */
+#define SKEIN_LOOP 111
+#else
+/* We're compiling with large stacks */
+#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
+#endif
+#endif
+
+/* some useful definitions for code here */
+#define BLK_BITS (WCNT*64)
+#define KW_TWK_BASE (0)
+#define KW_KEY_BASE (3)
+#define ks (kw + KW_KEY_BASE)
+#define ts (kw + KW_TWK_BASE)
+
+/* no debugging in Illumos version */
+#define DebugSaveTweak(ctx)
+
+/* Skein_256 */
+#if !(SKEIN_USE_ASM & 256)
+void
+Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd)
+{
+ enum {
+ WCNT = SKEIN_256_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_256_ROUNDS_TOTAL / 8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10)
+#else
+#define SKEIN_UNROLL_256 (0)
+#endif
+
+#if SKEIN_UNROLL_256
+#if (RCNT % SKEIN_UNROLL_256)
+#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
+#endif
+ size_t r;
+ /* key schedule words : chaining vars + tweak + "rotation" */
+ uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+ uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */
+#endif
+ /* local copy of context vars, for speed */
+ uint64_t X0, X1, X2, X3;
+ uint64_t w[WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ /* use for debugging (help compiler put Xn in registers) */
+ const uint64_t *Xptr[4];
+ Xptr[0] = &X0;
+ Xptr[1] = &X1;
+ Xptr[2] = &X2;
+ Xptr[3] = &X3;
+#endif
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /*
+ * this implementation only supports 2**64 input bytes
+ * (no carry out here)
+ */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ /* get input block in little-endian format */
+ Skein_Get64_LSB_First(w, blkPtr, WCNT);
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1] + ts[0];
+ X2 = w[2] + ks[2] + ts[1];
+ X3 = w[3] + ks[3];
+
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+ Xptr); /* show starting state values */
+
+ blkPtr += SKEIN_256_BLOCK_BYTES;
+
+ /* run the rounds */
+
+#define Round256(p0, p1, p2, p3, ROT, rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
+
+#if SKEIN_UNROLL_256 == 0
+#define R256(p0, p1, p2, p3, ROT, rNum) /* fully unrolled */ \
+ Round256(p0, p1, p2, p3, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
+
+#define I256(R) \
+ X0 += ks[((R) + 1) % 5]; /* inject the key schedule value */ \
+ X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3]; \
+ X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3]; \
+ X3 += ks[((R) + 4) % 5] + (R) + 1; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else /* looping version */
+#define R256(p0, p1, p2, p3, ROT, rNum) \
+ Round256(p0, p1, p2, p3, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
+
+#define I256(R) \
+ X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \
+ X1 += ks[r + (R) + 1] + ts[r + (R) + 0]; \
+ X2 += ks[r + (R) + 2] + ts[r + (R) + 1]; \
+ X3 += ks[r + (R) + 3] + r + (R); \
+ ks[r + (R) + 4] = ks[r + (R) - 1]; /* rotate key schedule */ \
+ ts[r + (R) + 2] = ts[r + (R) - 1]; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+ /* loop through it */
+ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
+#endif
+ {
+#define R256_8_rounds(R) \
+ R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \
+ R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \
+ R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \
+ R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \
+ I256(2 * (R)); \
+ R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \
+ R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \
+ R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \
+ R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \
+ I256(2 * (R) + 1);
+
+ R256_8_rounds(0);
+
+#define R256_Unroll_R(NN) \
+ ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \
+ (SKEIN_UNROLL_256 > (NN)))
+
+#if R256_Unroll_R(1)
+ R256_8_rounds(1);
+#endif
+#if R256_Unroll_R(2)
+ R256_8_rounds(2);
+#endif
+#if R256_Unroll_R(3)
+ R256_8_rounds(3);
+#endif
+#if R256_Unroll_R(4)
+ R256_8_rounds(4);
+#endif
+#if R256_Unroll_R(5)
+ R256_8_rounds(5);
+#endif
+#if R256_Unroll_R(6)
+ R256_8_rounds(6);
+#endif
+#if R256_Unroll_R(7)
+ R256_8_rounds(7);
+#endif
+#if R256_Unroll_R(8)
+ R256_8_rounds(8);
+#endif
+#if R256_Unroll_R(9)
+ R256_8_rounds(9);
+#endif
+#if R256_Unroll_R(10)
+ R256_8_rounds(10);
+#endif
+#if R256_Unroll_R(11)
+ R256_8_rounds(11);
+#endif
+#if R256_Unroll_R(12)
+ R256_8_rounds(12);
+#endif
+#if R256_Unroll_R(13)
+ R256_8_rounds(13);
+#endif
+#if R256_Unroll_R(14)
+ R256_8_rounds(14);
+#endif
+#if (SKEIN_UNROLL_256 > 14)
+#error "need more unrolling in Skein_256_Process_Block"
+#endif
+ }
+ /*
+ * do the final "feedforward" xor, update context chaining vars
+ */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+
+ Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ } while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein_256_Process_Block_CodeSize(void)
+{
+ return ((uint8_t *)Skein_256_Process_Block_CodeSize) -
+ ((uint8_t *)Skein_256_Process_Block);
+}
+
+uint_t
+Skein_256_Unroll_Cnt(void)
+{
+ return (SKEIN_UNROLL_256);
+}
+#endif
+#endif
+
+/* Skein_512 */
+#if !(SKEIN_USE_ASM & 512)
+void
+Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd)
+{
+ enum {
+ WCNT = SKEIN_512_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_512_ROUNDS_TOTAL / 8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10)
+#else
+#define SKEIN_UNROLL_512 (0)
+#endif
+
+#if SKEIN_UNROLL_512
+#if (RCNT % SKEIN_UNROLL_512)
+#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
+#endif
+ size_t r;
+ /* key schedule words : chaining vars + tweak + "rotation" */
+ uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+ uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */
+#endif
+ /* local copy of vars, for speed */
+ uint64_t X0, X1, X2, X3, X4, X5, X6, X7;
+ uint64_t w[WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ /* use for debugging (help compiler put Xn in registers) */
+ const uint64_t *Xptr[8];
+ Xptr[0] = &X0;
+ Xptr[1] = &X1;
+ Xptr[2] = &X2;
+ Xptr[3] = &X3;
+ Xptr[4] = &X4;
+ Xptr[5] = &X5;
+ Xptr[6] = &X6;
+ Xptr[7] = &X7;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /*
+ * this implementation only supports 2**64 input bytes
+ * (no carry out here)
+ */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ /* get input block in little-endian format */
+ Skein_Get64_LSB_First(w, blkPtr, WCNT);
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1];
+ X2 = w[2] + ks[2];
+ X3 = w[3] + ks[3];
+ X4 = w[4] + ks[4];
+ X5 = w[5] + ks[5] + ts[0];
+ X6 = w[6] + ks[6] + ts[1];
+ X7 = w[7] + ks[7];
+
+ blkPtr += SKEIN_512_BLOCK_BYTES;
+
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+ Xptr);
+ /* run the rounds */
+#define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;
+
+#if SKEIN_UNROLL_512 == 0
+#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */ \
+ Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
+
+#define I512(R) \
+ X0 += ks[((R) + 1) % 9]; /* inject the key schedule value */\
+ X1 += ks[((R) + 2) % 9]; \
+ X2 += ks[((R) + 3) % 9]; \
+ X3 += ks[((R) + 4) % 9]; \
+ X4 += ks[((R) + 5) % 9]; \
+ X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \
+ X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \
+ X7 += ks[((R) + 8) % 9] + (R) + 1; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else /* looping version */
+#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
+
+#define I512(R) \
+ X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \
+ X1 += ks[r + (R) + 1]; \
+ X2 += ks[r + (R) + 2]; \
+ X3 += ks[r + (R) + 3]; \
+ X4 += ks[r + (R) + 4]; \
+ X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \
+ X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \
+ X7 += ks[r + (R) + 7] + r + (R); \
+ ks[r + (R)+8] = ks[r + (R) - 1]; /* rotate key schedule */\
+ ts[r + (R)+2] = ts[r + (R) - 1]; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+ /* loop through it */
+ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
+#endif /* end of looped code definitions */
+ {
+#define R512_8_rounds(R) /* do 8 full rounds */ \
+ R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \
+ R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \
+ R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \
+ R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \
+ I512(2 * (R)); \
+ R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \
+ R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \
+ R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \
+ R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \
+ I512(2*(R) + 1); /* and key injection */
+
+ R512_8_rounds(0);
+
+#define R512_Unroll_R(NN) \
+ ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \
+ (SKEIN_UNROLL_512 > (NN)))
+
+#if R512_Unroll_R(1)
+ R512_8_rounds(1);
+#endif
+#if R512_Unroll_R(2)
+ R512_8_rounds(2);
+#endif
+#if R512_Unroll_R(3)
+ R512_8_rounds(3);
+#endif
+#if R512_Unroll_R(4)
+ R512_8_rounds(4);
+#endif
+#if R512_Unroll_R(5)
+ R512_8_rounds(5);
+#endif
+#if R512_Unroll_R(6)
+ R512_8_rounds(6);
+#endif
+#if R512_Unroll_R(7)
+ R512_8_rounds(7);
+#endif
+#if R512_Unroll_R(8)
+ R512_8_rounds(8);
+#endif
+#if R512_Unroll_R(9)
+ R512_8_rounds(9);
+#endif
+#if R512_Unroll_R(10)
+ R512_8_rounds(10);
+#endif
+#if R512_Unroll_R(11)
+ R512_8_rounds(11);
+#endif
+#if R512_Unroll_R(12)
+ R512_8_rounds(12);
+#endif
+#if R512_Unroll_R(13)
+ R512_8_rounds(13);
+#endif
+#if R512_Unroll_R(14)
+ R512_8_rounds(14);
+#endif
+#if (SKEIN_UNROLL_512 > 14)
+#error "need more unrolling in Skein_512_Process_Block"
+#endif
+ }
+
+ /*
+ * do the final "feedforward" xor, update context chaining vars
+ */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+ ctx->X[4] = X4 ^ w[4];
+ ctx->X[5] = X5 ^ w[5];
+ ctx->X[6] = X6 ^ w[6];
+ ctx->X[7] = X7 ^ w[7];
+ Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ } while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein_512_Process_Block_CodeSize(void)
+{
+ return ((uint8_t *)Skein_512_Process_Block_CodeSize) -
+ ((uint8_t *)Skein_512_Process_Block);
+}
+
+uint_t
+Skein_512_Unroll_Cnt(void)
+{
+ return (SKEIN_UNROLL_512);
+}
+#endif
+#endif
+
+/* Skein1024 */
+#if !(SKEIN_USE_ASM & 1024)
+void
+Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd)
+{
+ /* do it in C, always looping (unrolled is bigger AND slower!) */
+ enum {
+ WCNT = SKEIN1024_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
+#else
+#define SKEIN_UNROLL_1024 (0)
+#endif
+
+#if (SKEIN_UNROLL_1024 != 0)
+#if (RCNT % SKEIN_UNROLL_1024)
+#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
+#endif
+ size_t r;
+ /* key schedule words : chaining vars + tweak + "rotation" */
+ uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+ uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */
+#endif
+
+ /* local copy of vars, for speed */
+ uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11,
+ X12, X13, X14, X15;
+ uint64_t w[WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ /* use for debugging (help compiler put Xn in registers) */
+ const uint64_t *Xptr[16];
+ Xptr[0] = &X00;
+ Xptr[1] = &X01;
+ Xptr[2] = &X02;
+ Xptr[3] = &X03;
+ Xptr[4] = &X04;
+ Xptr[5] = &X05;
+ Xptr[6] = &X06;
+ Xptr[7] = &X07;
+ Xptr[8] = &X08;
+ Xptr[9] = &X09;
+ Xptr[10] = &X10;
+ Xptr[11] = &X11;
+ Xptr[12] = &X12;
+ Xptr[13] = &X13;
+ Xptr[14] = &X14;
+ Xptr[15] = &X15;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /*
+ * this implementation only supports 2**64 input bytes
+ * (no carry out here)
+ */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ctx->X[8];
+ ks[9] = ctx->X[9];
+ ks[10] = ctx->X[10];
+ ks[11] = ctx->X[11];
+ ks[12] = ctx->X[12];
+ ks[13] = ctx->X[13];
+ ks[14] = ctx->X[14];
+ ks[15] = ctx->X[15];
+ ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
+ ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
+ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ /* get input block in little-endian format */
+ Skein_Get64_LSB_First(w, blkPtr, WCNT);
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+ X00 = w[0] + ks[0]; /* do the first full key injection */
+ X01 = w[1] + ks[1];
+ X02 = w[2] + ks[2];
+ X03 = w[3] + ks[3];
+ X04 = w[4] + ks[4];
+ X05 = w[5] + ks[5];
+ X06 = w[6] + ks[6];
+ X07 = w[7] + ks[7];
+ X08 = w[8] + ks[8];
+ X09 = w[9] + ks[9];
+ X10 = w[10] + ks[10];
+ X11 = w[11] + ks[11];
+ X12 = w[12] + ks[12];
+ X13 = w[13] + ks[13] + ts[0];
+ X14 = w[14] + ks[14] + ts[1];
+ X15 = w[15] + ks[15];
+
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+ Xptr);
+
+#define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \
+ pD, pE, pF, ROT, rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\
+ X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\
+ X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\
+ X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\
+ X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE;
+
+#if SKEIN_UNROLL_1024 == 0
+#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \
+ pE, pF, ROT, rn) \
+ Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \
+ pD, pE, pF, ROT, rn) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
+
+#define I1024(R) \
+ X00 += ks[((R) + 1) % 17]; /* inject the key schedule value */\
+ X01 += ks[((R) + 2) % 17]; \
+ X02 += ks[((R) + 3) % 17]; \
+ X03 += ks[((R) + 4) % 17]; \
+ X04 += ks[((R) + 5) % 17]; \
+ X05 += ks[((R) + 6) % 17]; \
+ X06 += ks[((R) + 7) % 17]; \
+ X07 += ks[((R) + 8) % 17]; \
+ X08 += ks[((R) + 9) % 17]; \
+ X09 += ks[((R) + 10) % 17]; \
+ X10 += ks[((R) + 11) % 17]; \
+ X11 += ks[((R) + 12) % 17]; \
+ X12 += ks[((R) + 13) % 17]; \
+ X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \
+ X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \
+ X15 += ks[((R) + 16) % 17] + (R) +1; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else /* looping version */
+#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \
+ pE, pF, ROT, rn) \
+ Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \
+ pD, pE, pF, ROT, rn) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
+
+#define I1024(R) \
+ X00 += ks[r + (R) + 0]; /* inject the key schedule value */ \
+ X01 += ks[r + (R) + 1]; \
+ X02 += ks[r + (R) + 2]; \
+ X03 += ks[r + (R) + 3]; \
+ X04 += ks[r + (R) + 4]; \
+ X05 += ks[r + (R) + 5]; \
+ X06 += ks[r + (R) + 6]; \
+ X07 += ks[r + (R) + 7]; \
+ X08 += ks[r + (R) + 8]; \
+ X09 += ks[r + (R) + 9]; \
+ X10 += ks[r + (R) + 10]; \
+ X11 += ks[r + (R) + 11]; \
+ X12 += ks[r + (R) + 12]; \
+ X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \
+ X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \
+ X15 += ks[r + (R) + 15] + r + (R); \
+ ks[r + (R) + 16] = ks[r + (R) - 1]; /* rotate key schedule */\
+ ts[r + (R) + 2] = ts[r + (R) - 1]; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+ /* loop through it */
+ for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
+#endif
+ {
+#define R1024_8_rounds(R) /* do 8 full rounds */ \
+ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \
+ 14, 15, R1024_0, 8 * (R) + 1); \
+ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \
+ 08, 01, R1024_1, 8 * (R) + 2); \
+ R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \
+ 10, 09, R1024_2, 8 * (R) + 3); \
+ R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \
+ 12, 07, R1024_3, 8 * (R) + 4); \
+ I1024(2 * (R)); \
+ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \
+ 14, 15, R1024_4, 8 * (R) + 5); \
+ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \
+ 08, 01, R1024_5, 8 * (R) + 6); \
+ R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \
+ 10, 09, R1024_6, 8 * (R) + 7); \
+ R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \
+ 12, 07, R1024_7, 8 * (R) + 8); \
+ I1024(2 * (R) + 1);
+
+ R1024_8_rounds(0);
+
+#define R1024_Unroll_R(NN) \
+ ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \
+ (SKEIN_UNROLL_1024 > (NN)))
+
+#if R1024_Unroll_R(1)
+ R1024_8_rounds(1);
+#endif
+#if R1024_Unroll_R(2)
+ R1024_8_rounds(2);
+#endif
+#if R1024_Unroll_R(3)
+ R1024_8_rounds(3);
+#endif
+#if R1024_Unroll_R(4)
+ R1024_8_rounds(4);
+#endif
+#if R1024_Unroll_R(5)
+ R1024_8_rounds(5);
+#endif
+#if R1024_Unroll_R(6)
+ R1024_8_rounds(6);
+#endif
+#if R1024_Unroll_R(7)
+ R1024_8_rounds(7);
+#endif
+#if R1024_Unroll_R(8)
+ R1024_8_rounds(8);
+#endif
+#if R1024_Unroll_R(9)
+ R1024_8_rounds(9);
+#endif
+#if R1024_Unroll_R(10)
+ R1024_8_rounds(10);
+#endif
+#if R1024_Unroll_R(11)
+ R1024_8_rounds(11);
+#endif
+#if R1024_Unroll_R(12)
+ R1024_8_rounds(12);
+#endif
+#if R1024_Unroll_R(13)
+ R1024_8_rounds(13);
+#endif
+#if R1024_Unroll_R(14)
+ R1024_8_rounds(14);
+#endif
+#if (SKEIN_UNROLL_1024 > 14)
+#error "need more unrolling in Skein_1024_Process_Block"
+#endif
+ }
+ /*
+ * do the final "feedforward" xor, update context chaining vars
+ */
+
+ ctx->X[0] = X00 ^ w[0];
+ ctx->X[1] = X01 ^ w[1];
+ ctx->X[2] = X02 ^ w[2];
+ ctx->X[3] = X03 ^ w[3];
+ ctx->X[4] = X04 ^ w[4];
+ ctx->X[5] = X05 ^ w[5];
+ ctx->X[6] = X06 ^ w[6];
+ ctx->X[7] = X07 ^ w[7];
+ ctx->X[8] = X08 ^ w[8];
+ ctx->X[9] = X09 ^ w[9];
+ ctx->X[10] = X10 ^ w[10];
+ ctx->X[11] = X11 ^ w[11];
+ ctx->X[12] = X12 ^ w[12];
+ ctx->X[13] = X13 ^ w[13];
+ ctx->X[14] = X14 ^ w[14];
+ ctx->X[15] = X15 ^ w[15];
+
+ Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ blkPtr += SKEIN1024_BLOCK_BYTES;
+ } while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein1024_Process_Block_CodeSize(void)
+{
+ return ((uint8_t *)Skein1024_Process_Block_CodeSize) -
+ ((uint8_t *)Skein1024_Process_Block);
+}
+
+uint_t
+Skein1024_Unroll_Cnt(void)
+{
+ return (SKEIN_UNROLL_1024);
+}
+#endif
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h b/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h
new file mode 100644
index 000000000000..205a517d69db
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h
@@ -0,0 +1,292 @@
+/*
+ * Internal definitions for Skein hashing.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ *
+ * The following compile-time switches may be defined to control some
+ * tradeoffs between speed, code size, error checking, and security.
+ *
+ * The "default" note explains what happens when the switch is not defined.
+ *
+ * SKEIN_DEBUG -- make callouts from inside Skein code
+ * to examine/display intermediate values.
+ * [default: no callouts (no overhead)]
+ *
+ * SKEIN_ERR_CHECK -- how error checking is handled inside Skein
+ * code. If not defined, most error checking
+ * is disabled (for performance). Otherwise,
+ * the switch value is interpreted as:
+ * 0: use assert() to flag errors
+ * 1: return SKEIN_FAIL to flag errors
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#ifndef _SKEIN_IMPL_H_
+#define _SKEIN_IMPL_H_
+
+#include <sys/skein.h>
+#include <sys/strings.h>
+#include <sys/note.h>
+#include "skein_impl.h"
+#include "skein_port.h"
+
+/*
+ * "Internal" Skein definitions
+ * -- not needed for sequential hashing API, but will be
+ * helpful for other uses of Skein (e.g., tree hash mode).
+ * -- included here so that they can be shared between
+ * reference and optimized code.
+ */
+
+/* tweak word T[1]: bit field starting positions */
+/* offset 64 because it's the second word */
+#define SKEIN_T1_BIT(BIT) ((BIT) - 64)
+
+/* bits 112..118: level in hash tree */
+#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112)
+/* bit 119: partial final input byte */
+#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119)
+/* bits 120..125: type field */
+#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120)
+/* bits 126: first block flag */
+#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126)
+/* bit 127: final block flag */
+#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127)
+
+/* tweak word T[1]: flag bit definition(s) */
+#define SKEIN_T1_FLAG_FIRST (((uint64_t)1) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_FINAL (((uint64_t)1) << SKEIN_T1_POS_FINAL)
+#define SKEIN_T1_FLAG_BIT_PAD (((uint64_t)1) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define SKEIN_T1_TREE_LVL_MASK (((uint64_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define SKEIN_T1_TREE_LEVEL(n) (((uint64_t)(n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define SKEIN_BLK_TYPE_KEY (0) /* key, for MAC and KDF */
+#define SKEIN_BLK_TYPE_CFG (4) /* configuration block */
+#define SKEIN_BLK_TYPE_PERS (8) /* personalization string */
+#define SKEIN_BLK_TYPE_PK (12) /* public key (for signature hashing) */
+#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
+#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
+#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
+#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
+#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
+
+#define SKEIN_T1_BLK_TYPE(T) \
+ (((uint64_t)(SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+/* key, for MAC and KDF */
+#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY)
+/* configuration block */
+#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG)
+/* personalization string */
+#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS)
+/* public key (for digital signature hashing) */
+#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK)
+/* key identifier for KDF */
+#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF)
+/* nonce for PRNG */
+#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)
+/* message processing */
+#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG)
+/* output stage */
+#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT)
+/* field bit mask */
+#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK)
+
+#define SKEIN_T1_BLK_TYPE_CFG_FINAL \
+ (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL \
+ (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define SKEIN_VERSION (1)
+
+#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */
+#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian) */
+#endif
+
+#define SKEIN_MK_64(hi32, lo32) ((lo32) + (((uint64_t)(hi32)) << 32))
+#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION, SKEIN_ID_STRING_LE)
+#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA, 0xA9FC1A22)
+
+#define SKEIN_CFG_STR_LEN (4*8)
+
+/* bit field definitions in config block treeInfo word */
+#define SKEIN_CFG_TREE_LEAF_SIZE_POS (0)
+#define SKEIN_CFG_TREE_NODE_SIZE_POS (8)
+#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
+
+#define SKEIN_CFG_TREE_LEAF_SIZE_MSK \
+ (((uint64_t)0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define SKEIN_CFG_TREE_NODE_SIZE_MSK \
+ (((uint64_t)0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define SKEIN_CFG_TREE_MAX_LEVEL_MSK \
+ (((uint64_t)0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define SKEIN_CFG_TREE_INFO(leaf, node, maxLvl) \
+ ((((uint64_t)(leaf)) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
+ (((uint64_t)(node)) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
+ (((uint64_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS))
+
+/* use as treeInfo in InitExt() call for sequential processing */
+#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0, 0, 0)
+
+/*
+ * Skein macros for getting/setting tweak words, etc.
+ * These are useful for partial input bytes, hash tree init/update, etc.
+ */
+#define Skein_Get_Tweak(ctxPtr, TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
+#define Skein_Set_Tweak(ctxPtr, TWK_NUM, tVal) \
+ do { \
+ (ctxPtr)->h.T[TWK_NUM] = (tVal); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr, 0)
+#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr, 1)
+#define Skein_Set_T0(ctxPtr, T0) Skein_Set_Tweak(ctxPtr, 0, T0)
+#define Skein_Set_T1(ctxPtr, T1) Skein_Set_Tweak(ctxPtr, 1, T1)
+
+/* set both tweak words at once */
+#define Skein_Set_T0_T1(ctxPtr, T0, T1) \
+ do { \
+ Skein_Set_T0(ctxPtr, (T0)); \
+ Skein_Set_T1(ctxPtr, (T1)); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Set_Type(ctxPtr, BLK_TYPE) \
+ Skein_Set_T1(ctxPtr, SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/*
+ * set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0;
+ */
+#define Skein_Start_New_Type(ctxPtr, BLK_TYPE) \
+ do { \
+ Skein_Set_T0_T1(ctxPtr, 0, SKEIN_T1_FLAG_FIRST | \
+ SKEIN_T1_BLK_TYPE_ ## BLK_TYPE); \
+ (ctxPtr)->h.bCnt = 0; \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Clear_First_Flag(hdr) \
+ do { \
+ (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; \
+ _NOTE(CONSTCOND) \
+ } while (0)
+#define Skein_Set_Bit_Pad_Flag(hdr) \
+ do { \
+ (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Set_Tree_Level(hdr, height) \
+ do { \
+ (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+/*
+ * "Internal" Skein definitions for debugging and error checking
+ * Note: in Illumos we always disable debugging features.
+ */
+#define Skein_Show_Block(bits, ctx, X, blkPtr, wPtr, ksEvenPtr, ksOddPtr)
+#define Skein_Show_Round(bits, ctx, r, X)
+#define Skein_Show_R_Ptr(bits, ctx, r, X_ptr)
+#define Skein_Show_Final(bits, ctx, cnt, outPtr)
+#define Skein_Show_Key(bits, ctx, key, keyBytes)
+
+/* run-time checks (e.g., bad params, uninitialized context)? */
+#ifndef SKEIN_ERR_CHECK
+/* default: ignore all Asserts, for performance */
+#define Skein_Assert(x, retCode)
+#define Skein_assert(x)
+#elif defined(SKEIN_ASSERT)
+#include <sys/debug.h>
+#define Skein_Assert(x, retCode) ASSERT(x)
+#define Skein_assert(x) ASSERT(x)
+#else
+#include <sys/debug.h>
+/* caller error */
+#define Skein_Assert(x, retCode) \
+ do { \
+ if (!(x)) \
+ return (retCode); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+/* internal error */
+#define Skein_assert(x) ASSERT(x)
+#endif
+
+/*
+ * Skein block function constants (shared across Ref and Opt code)
+ */
+enum {
+ /* Skein_256 round rotation constants */
+ R_256_0_0 = 14, R_256_0_1 = 16,
+ R_256_1_0 = 52, R_256_1_1 = 57,
+ R_256_2_0 = 23, R_256_2_1 = 40,
+ R_256_3_0 = 5, R_256_3_1 = 37,
+ R_256_4_0 = 25, R_256_4_1 = 33,
+ R_256_5_0 = 46, R_256_5_1 = 12,
+ R_256_6_0 = 58, R_256_6_1 = 22,
+ R_256_7_0 = 32, R_256_7_1 = 32,
+
+ /* Skein_512 round rotation constants */
+ R_512_0_0 = 46, R_512_0_1 = 36, R_512_0_2 = 19, R_512_0_3 = 37,
+ R_512_1_0 = 33, R_512_1_1 = 27, R_512_1_2 = 14, R_512_1_3 = 42,
+ R_512_2_0 = 17, R_512_2_1 = 49, R_512_2_2 = 36, R_512_2_3 = 39,
+ R_512_3_0 = 44, R_512_3_1 = 9, R_512_3_2 = 54, R_512_3_3 = 56,
+ R_512_4_0 = 39, R_512_4_1 = 30, R_512_4_2 = 34, R_512_4_3 = 24,
+ R_512_5_0 = 13, R_512_5_1 = 50, R_512_5_2 = 10, R_512_5_3 = 17,
+ R_512_6_0 = 25, R_512_6_1 = 29, R_512_6_2 = 39, R_512_6_3 = 43,
+ R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22,
+
+ /* Skein1024 round rotation constants */
+ R1024_0_0 = 24, R1024_0_1 = 13, R1024_0_2 = 8, R1024_0_3 =
+ 47, R1024_0_4 = 8, R1024_0_5 = 17, R1024_0_6 = 22, R1024_0_7 = 37,
+ R1024_1_0 = 38, R1024_1_1 = 19, R1024_1_2 = 10, R1024_1_3 =
+ 55, R1024_1_4 = 49, R1024_1_5 = 18, R1024_1_6 = 23, R1024_1_7 = 52,
+ R1024_2_0 = 33, R1024_2_1 = 4, R1024_2_2 = 51, R1024_2_3 =
+ 13, R1024_2_4 = 34, R1024_2_5 = 41, R1024_2_6 = 59, R1024_2_7 = 17,
+ R1024_3_0 = 5, R1024_3_1 = 20, R1024_3_2 = 48, R1024_3_3 =
+ 41, R1024_3_4 = 47, R1024_3_5 = 28, R1024_3_6 = 16, R1024_3_7 = 25,
+ R1024_4_0 = 41, R1024_4_1 = 9, R1024_4_2 = 37, R1024_4_3 =
+ 31, R1024_4_4 = 12, R1024_4_5 = 47, R1024_4_6 = 44, R1024_4_7 = 30,
+ R1024_5_0 = 16, R1024_5_1 = 34, R1024_5_2 = 56, R1024_5_3 =
+ 51, R1024_5_4 = 4, R1024_5_5 = 53, R1024_5_6 = 42, R1024_5_7 = 41,
+ R1024_6_0 = 31, R1024_6_1 = 44, R1024_6_2 = 47, R1024_6_3 =
+ 46, R1024_6_4 = 19, R1024_6_5 = 42, R1024_6_6 = 44, R1024_6_7 = 25,
+ R1024_7_0 = 9, R1024_7_1 = 48, R1024_7_2 = 35, R1024_7_3 =
+ 52, R1024_7_4 = 23, R1024_7_5 = 31, R1024_7_6 = 37, R1024_7_7 = 20
+};
+
+/* number of rounds for the different block sizes */
+#define SKEIN_256_ROUNDS_TOTAL (72)
+#define SKEIN_512_ROUNDS_TOTAL (72)
+#define SKEIN1024_ROUNDS_TOTAL (80)
+
+
+extern const uint64_t SKEIN_256_IV_128[];
+extern const uint64_t SKEIN_256_IV_160[];
+extern const uint64_t SKEIN_256_IV_224[];
+extern const uint64_t SKEIN_256_IV_256[];
+extern const uint64_t SKEIN_512_IV_128[];
+extern const uint64_t SKEIN_512_IV_160[];
+extern const uint64_t SKEIN_512_IV_224[];
+extern const uint64_t SKEIN_512_IV_256[];
+extern const uint64_t SKEIN_512_IV_384[];
+extern const uint64_t SKEIN_512_IV_512[];
+extern const uint64_t SKEIN1024_IV_384[];
+extern const uint64_t SKEIN1024_IV_512[];
+extern const uint64_t SKEIN1024_IV_1024[];
+
+/* Functions to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd);
+
+#endif /* _SKEIN_IMPL_H_ */
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c b/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c
new file mode 100644
index 000000000000..140d38f76547
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c
@@ -0,0 +1,185 @@
+/*
+ * Pre-computed Skein IVs
+ *
+ * NOTE: these values are not "magic" constants, but
+ * are generated using the Threefish block function.
+ * They are pre-computed here only for speed; i.e., to
+ * avoid the need for a Threefish call during Init().
+ *
+ * The IV for any fixed hash length may be pre-computed.
+ * Only the most common values are included here.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+/*
+ * Illumos implementation note: these constants are for Skein v1.3 as per:
+ * http://www.skein-hash.info/sites/default/files/skein1.3.pdf
+ */
+
+#include <sys/skein.h> /* get Skein macros and types */
+#include "skein_impl.h" /* get internal definitions */
+
+#define MK_64 SKEIN_MK_64
+
+/* blkSize = 256 bits. hashSize = 128 bits */
+const uint64_t SKEIN_256_IV_128[] = {
+ MK_64(0xE1111906, 0x964D7260),
+ MK_64(0x883DAAA7, 0x7C8D811C),
+ MK_64(0x10080DF4, 0x91960F7A),
+ MK_64(0xCCF7DDE5, 0xB45BC1C2)
+};
+
+/* blkSize = 256 bits. hashSize = 160 bits */
+const uint64_t SKEIN_256_IV_160[] = {
+ MK_64(0x14202314, 0x72825E98),
+ MK_64(0x2AC4E9A2, 0x5A77E590),
+ MK_64(0xD47A5856, 0x8838D63E),
+ MK_64(0x2DD2E496, 0x8586AB7D)
+};
+
+/* blkSize = 256 bits. hashSize = 224 bits */
+const uint64_t SKEIN_256_IV_224[] = {
+ MK_64(0xC6098A8C, 0x9AE5EA0B),
+ MK_64(0x876D5686, 0x08C5191C),
+ MK_64(0x99CB88D7, 0xD7F53884),
+ MK_64(0x384BDDB1, 0xAEDDB5DE)
+};
+
+/* blkSize = 256 bits. hashSize = 256 bits */
+const uint64_t SKEIN_256_IV_256[] = {
+ MK_64(0xFC9DA860, 0xD048B449),
+ MK_64(0x2FCA6647, 0x9FA7D833),
+ MK_64(0xB33BC389, 0x6656840F),
+ MK_64(0x6A54E920, 0xFDE8DA69)
+};
+
+/* blkSize = 512 bits. hashSize = 128 bits */
+const uint64_t SKEIN_512_IV_128[] = {
+ MK_64(0xA8BC7BF3, 0x6FBF9F52),
+ MK_64(0x1E9872CE, 0xBD1AF0AA),
+ MK_64(0x309B1790, 0xB32190D3),
+ MK_64(0xBCFBB854, 0x3F94805C),
+ MK_64(0x0DA61BCD, 0x6E31B11B),
+ MK_64(0x1A18EBEA, 0xD46A32E3),
+ MK_64(0xA2CC5B18, 0xCE84AA82),
+ MK_64(0x6982AB28, 0x9D46982D)
+};
+
+/* blkSize = 512 bits. hashSize = 160 bits */
+const uint64_t SKEIN_512_IV_160[] = {
+ MK_64(0x28B81A2A, 0xE013BD91),
+ MK_64(0xC2F11668, 0xB5BDF78F),
+ MK_64(0x1760D8F3, 0xF6A56F12),
+ MK_64(0x4FB74758, 0x8239904F),
+ MK_64(0x21EDE07F, 0x7EAF5056),
+ MK_64(0xD908922E, 0x63ED70B8),
+ MK_64(0xB8EC76FF, 0xECCB52FA),
+ MK_64(0x01A47BB8, 0xA3F27A6E)
+};
+
+/* blkSize = 512 bits. hashSize = 224 bits */
+const uint64_t SKEIN_512_IV_224[] = {
+ MK_64(0xCCD06162, 0x48677224),
+ MK_64(0xCBA65CF3, 0xA92339EF),
+ MK_64(0x8CCD69D6, 0x52FF4B64),
+ MK_64(0x398AED7B, 0x3AB890B4),
+ MK_64(0x0F59D1B1, 0x457D2BD0),
+ MK_64(0x6776FE65, 0x75D4EB3D),
+ MK_64(0x99FBC70E, 0x997413E9),
+ MK_64(0x9E2CFCCF, 0xE1C41EF7)
+};
+
+/* blkSize = 512 bits. hashSize = 256 bits */
+const uint64_t SKEIN_512_IV_256[] = {
+ MK_64(0xCCD044A1, 0x2FDB3E13),
+ MK_64(0xE8359030, 0x1A79A9EB),
+ MK_64(0x55AEA061, 0x4F816E6F),
+ MK_64(0x2A2767A4, 0xAE9B94DB),
+ MK_64(0xEC06025E, 0x74DD7683),
+ MK_64(0xE7A436CD, 0xC4746251),
+ MK_64(0xC36FBAF9, 0x393AD185),
+ MK_64(0x3EEDBA18, 0x33EDFC13)
+};
+
+/* blkSize = 512 bits. hashSize = 384 bits */
+const uint64_t SKEIN_512_IV_384[] = {
+ MK_64(0xA3F6C6BF, 0x3A75EF5F),
+ MK_64(0xB0FEF9CC, 0xFD84FAA4),
+ MK_64(0x9D77DD66, 0x3D770CFE),
+ MK_64(0xD798CBF3, 0xB468FDDA),
+ MK_64(0x1BC4A666, 0x8A0E4465),
+ MK_64(0x7ED7D434, 0xE5807407),
+ MK_64(0x548FC1AC, 0xD4EC44D6),
+ MK_64(0x266E1754, 0x6AA18FF8)
+};
+
+/* blkSize = 512 bits. hashSize = 512 bits */
+const uint64_t SKEIN_512_IV_512[] = {
+ MK_64(0x4903ADFF, 0x749C51CE),
+ MK_64(0x0D95DE39, 0x9746DF03),
+ MK_64(0x8FD19341, 0x27C79BCE),
+ MK_64(0x9A255629, 0xFF352CB1),
+ MK_64(0x5DB62599, 0xDF6CA7B0),
+ MK_64(0xEABE394C, 0xA9D5C3F4),
+ MK_64(0x991112C7, 0x1A75B523),
+ MK_64(0xAE18A40B, 0x660FCC33)
+};
+
+/* blkSize = 1024 bits. hashSize = 384 bits */
+const uint64_t SKEIN1024_IV_384[] = {
+ MK_64(0x5102B6B8, 0xC1894A35),
+ MK_64(0xFEEBC9E3, 0xFE8AF11A),
+ MK_64(0x0C807F06, 0xE32BED71),
+ MK_64(0x60C13A52, 0xB41A91F6),
+ MK_64(0x9716D35D, 0xD4917C38),
+ MK_64(0xE780DF12, 0x6FD31D3A),
+ MK_64(0x797846B6, 0xC898303A),
+ MK_64(0xB172C2A8, 0xB3572A3B),
+ MK_64(0xC9BC8203, 0xA6104A6C),
+ MK_64(0x65909338, 0xD75624F4),
+ MK_64(0x94BCC568, 0x4B3F81A0),
+ MK_64(0x3EBBF51E, 0x10ECFD46),
+ MK_64(0x2DF50F0B, 0xEEB08542),
+ MK_64(0x3B5A6530, 0x0DBC6516),
+ MK_64(0x484B9CD2, 0x167BBCE1),
+ MK_64(0x2D136947, 0xD4CBAFEA)
+};
+
+/* blkSize = 1024 bits. hashSize = 512 bits */
+const uint64_t SKEIN1024_IV_512[] = {
+ MK_64(0xCAEC0E5D, 0x7C1B1B18),
+ MK_64(0xA01B0E04, 0x5F03E802),
+ MK_64(0x33840451, 0xED912885),
+ MK_64(0x374AFB04, 0xEAEC2E1C),
+ MK_64(0xDF25A0E2, 0x813581F7),
+ MK_64(0xE4004093, 0x8B12F9D2),
+ MK_64(0xA662D539, 0xC2ED39B6),
+ MK_64(0xFA8B85CF, 0x45D8C75A),
+ MK_64(0x8316ED8E, 0x29EDE796),
+ MK_64(0x053289C0, 0x2E9F91B8),
+ MK_64(0xC3F8EF1D, 0x6D518B73),
+ MK_64(0xBDCEC3C4, 0xD5EF332E),
+ MK_64(0x549A7E52, 0x22974487),
+ MK_64(0x67070872, 0x5B749816),
+ MK_64(0xB9CD28FB, 0xF0581BD1),
+ MK_64(0x0E2940B8, 0x15804974)
+};
+
+/* blkSize = 1024 bits. hashSize = 1024 bits */
+const uint64_t SKEIN1024_IV_1024[] = {
+ MK_64(0xD593DA07, 0x41E72355),
+ MK_64(0x15B5E511, 0xAC73E00C),
+ MK_64(0x5180E5AE, 0xBAF2C4F0),
+ MK_64(0x03BD41D3, 0xFCBCAFAF),
+ MK_64(0x1CAEC6FD, 0x1983A898),
+ MK_64(0x6E510B8B, 0xCDD0589F),
+ MK_64(0x77E2BDFD, 0xC6394ADA),
+ MK_64(0xC11E1DB5, 0x24DCB0A3),
+ MK_64(0xD6D14AF9, 0xC6329AB5),
+ MK_64(0x6A9B0BFC, 0x6EB67E0D),
+ MK_64(0x9243C60D, 0xCCFF1332),
+ MK_64(0x1A1F1DDE, 0x743F02D4),
+ MK_64(0x0996753C, 0x10ED0BB8),
+ MK_64(0x6572DD22, 0xF2B4969A),
+ MK_64(0x61FD3062, 0xD00A579A),
+ MK_64(0x1DE0536E, 0x8682E539)
+};
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h b/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h
new file mode 100644
index 000000000000..ce4353082552
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h
@@ -0,0 +1,116 @@
+/*
+ * Platform-specific definitions for Skein hash function.
+ *
+ * Source code author: Doug Whiting, 2008.
+ *
+ * This algorithm and source code is released to the public domain.
+ *
+ * Many thanks to Brian Gladman for his portable header files.
+ *
+ * To port Skein to an "unsupported" platform, change the definitions
+ * in this file appropriately.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#ifndef _SKEIN_PORT_H_
+#define _SKEIN_PORT_H_
+
+#include <sys/types.h> /* get integer type definitions */
+
+#ifndef RotL_64
+#define RotL_64(x, N) (((x) << (N)) | ((x) >> (64 - (N))))
+#endif
+
+/*
+ * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
+ * performance on x86 CPUs. The Skein code requires the following
+ * definitions for dealing with endianness:
+ *
+ * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
+ * Skein_Put64_LSB_First
+ * Skein_Get64_LSB_First
+ * Skein_Swap64
+ *
+ * If SKEIN_NEED_SWAP is defined at compile time, it is used here
+ * along with the portable versions of Put64/Get64/Swap64, which
+ * are slow in general.
+ *
+ * Otherwise, an "auto-detect" of endianness is attempted below.
+ * If the default handling doesn't work well, the user may insert
+ * platform-specific code instead (e.g., for big-endian CPUs).
+ *
+ */
+#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
+
+#include <sys/isa_defs.h> /* get endianness selection */
+
+#if defined(_ZFS_BIG_ENDIAN)
+/* here for big-endian CPUs */
+#define SKEIN_NEED_SWAP (1)
+#else
+/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+#define SKEIN_NEED_SWAP (0)
+#define Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt)
+#define Skein_Get64_LSB_First(dst64, src08, wCnt) \
+ bcopy(src08, dst64, 8 * (wCnt))
+#endif
+
+#endif /* ifndef SKEIN_NEED_SWAP */
+
+/*
+ * Provide any definitions still needed.
+ */
+#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
+#if SKEIN_NEED_SWAP
+#define Skein_Swap64(w64) \
+ (((((uint64_t)(w64)) & 0xFF) << 56) | \
+ (((((uint64_t)(w64)) >> 8) & 0xFF) << 48) | \
+ (((((uint64_t)(w64)) >> 16) & 0xFF) << 40) | \
+ (((((uint64_t)(w64)) >> 24) & 0xFF) << 32) | \
+ (((((uint64_t)(w64)) >> 32) & 0xFF) << 24) | \
+ (((((uint64_t)(w64)) >> 40) & 0xFF) << 16) | \
+ (((((uint64_t)(w64)) >> 48) & 0xFF) << 8) | \
+ (((((uint64_t)(w64)) >> 56) & 0xFF)))
+#else
+#define Skein_Swap64(w64) (w64)
+#endif
+#endif /* ifndef Skein_Swap64 */
+
+#ifndef Skein_Put64_LSB_First
+static inline void
+Skein_Put64_LSB_First(uint8_t *dst, const uint64_t *src, size_t bCnt)
+{
+ /*
+ * this version is fully portable (big-endian or little-endian),
+ * but slow
+ */
+ size_t n;
+
+ for (n = 0; n < bCnt; n++)
+ dst[n] = (uint8_t)(src[n >> 3] >> (8 * (n & 7)));
+}
+#endif /* ifndef Skein_Put64_LSB_First */
+
+#ifndef Skein_Get64_LSB_First
+static inline void
+Skein_Get64_LSB_First(uint64_t *dst, const uint8_t *src, size_t wCnt)
+{
+ /*
+ * this version is fully portable (big-endian or little-endian),
+ * but slow
+ */
+ size_t n;
+
+ for (n = 0; n < 8 * wCnt; n += 8)
+ dst[n / 8] = (((uint64_t)src[n])) +
+ (((uint64_t)src[n + 1]) << 8) +
+ (((uint64_t)src[n + 2]) << 16) +
+ (((uint64_t)src[n + 3]) << 24) +
+ (((uint64_t)src[n + 4]) << 32) +
+ (((uint64_t)src[n + 5]) << 40) +
+ (((uint64_t)src[n + 6]) << 48) +
+ (((uint64_t)src[n + 7]) << 56);
+}
+#endif /* ifndef Skein_Get64_LSB_First */
+
+#endif /* _SKEIN_PORT_H_ */
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_cipher.c b/sys/contrib/openzfs/module/icp/api/kcf_cipher.c
new file mode 100644
index 000000000000..d6aa48147edb
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/api/kcf_cipher.c
@@ -0,0 +1,930 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * Encryption and decryption routines.
+ */
+
+/*
+ * The following are the possible returned values common to all the routines
+ * below. The applicability of some of these return values depends on the
+ * presence of the arguments.
+ *
+ * CRYPTO_SUCCESS: The operation completed successfully.
+ * CRYPTO_QUEUED: A request was submitted successfully. The callback
+ * routine will be called when the operation is done.
+ * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or
+ * CRYPTO_INVALID_MECH for problems with the 'mech'.
+ * CRYPTO_INVALID_DATA for bogus 'data'
+ * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
+ * CRYPTO_INVALID_CONTEXT: Not a valid context.
+ * CRYPTO_BUSY: Cannot process the request now. Schedule a
+ * crypto_bufcall(), or try later.
+ * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is
+ * capable of a function or a mechanism.
+ * CRYPTO_INVALID_KEY: bogus 'key' argument.
+ * CRYPTO_INVALID_PLAINTEXT: bogus 'plaintext' argument.
+ * CRYPTO_INVALID_CIPHERTEXT: bogus 'ciphertext' argument.
+ */
+
+/*
+ * crypto_cipher_init_prov()
+ *
+ * Arguments:
+ *
+ * pd: provider descriptor
+ * sid: session id
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of an
+ * encryption or decryption with the 'mech' using 'key'.
+ * 'tmpl' is created by a previous call to
+ * crypto_create_ctx_template().
+ * ctxp: Pointer to a crypto_context_t.
+ * func: CRYPTO_FG_ENCRYPT or CRYPTO_FG_DECRYPT.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * This is a common function invoked internally by both
+ * crypto_encrypt_init() and crypto_decrypt_init().
+ * Asynchronously submits a request for, or synchronously performs the
+ * initialization of an encryption or a decryption operation.
+ * When possible and applicable, will internally use the pre-expanded key
+ * schedule from the context template, tmpl.
+ * When complete and successful, 'ctxp' will contain a crypto_context_t
+ * valid for later calls to encrypt_update() and encrypt_final(), or
+ * decrypt_update() and decrypt_final().
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+static int
+crypto_cipher_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_spi_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq, crypto_func_group_t func)
+{
+ int error;
+ crypto_ctx_t *ctx;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ if (func == CRYPTO_FG_ENCRYPT) {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_ENCRYPT);
+ } else {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_DECRYPT);
+ }
+
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ }
+
+ /* Allocate and initialize the canonical context */
+ if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+
+ if (func == CRYPTO_FG_ENCRYPT)
+ error = KCF_PROV_ENCRYPT_INIT(real_provider, ctx,
+ &lmech, key, tmpl, KCF_SWFP_RHNDL(crq));
+ else {
+ ASSERT(func == CRYPTO_FG_DECRYPT);
+
+ error = KCF_PROV_DECRYPT_INIT(real_provider, ctx,
+ &lmech, key, tmpl, KCF_SWFP_RHNDL(crq));
+ }
+ KCF_PROV_INCRSTATS(pd, error);
+
+ goto done;
+ }
+
+ /* Check if context sharing is possible */
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ key->ck_format == CRYPTO_KEY_RAW &&
+ KCF_CAN_SHARE_OPSTATE(pd, mech->cm_type)) {
+ kcf_context_t *tctxp = (kcf_context_t *)ctx;
+ kcf_provider_desc_t *tpd = NULL;
+ crypto_mech_info_t *sinfo;
+
+ if ((kcf_get_sw_prov(mech->cm_type, &tpd, &tctxp->kc_mech,
+ B_FALSE) == CRYPTO_SUCCESS)) {
+ int tlen;
+
+ sinfo = &(KCF_TO_PROV_MECHINFO(tpd, mech->cm_type));
+ /*
+ * key->ck_length from the consumer is always in bits.
+ * We convert it to be in the same unit registered by
+ * the provider in order to do a comparison.
+ */
+ if (sinfo->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES)
+ tlen = key->ck_length >> 3;
+ else
+ tlen = key->ck_length;
+ /*
+ * Check if the software provider can support context
+ * sharing and support this key length.
+ */
+ if ((sinfo->cm_mech_flags & CRYPTO_CAN_SHARE_OPSTATE) &&
+ (tlen >= sinfo->cm_min_key_length) &&
+ (tlen <= sinfo->cm_max_key_length)) {
+ ctx->cc_flags = CRYPTO_INIT_OPSTATE;
+ tctxp->kc_sw_prov_desc = tpd;
+ } else
+ KCF_PROV_REFRELE(tpd);
+ }
+ }
+
+ if (func == CRYPTO_FG_ENCRYPT) {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_INIT, sid,
+ mech, key, NULL, NULL, tmpl);
+ } else {
+ ASSERT(func == CRYPTO_FG_DECRYPT);
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_INIT, sid,
+ mech, key, NULL, NULL, tmpl);
+ }
+
+ error = kcf_submit_request(real_provider, ctx, crq, &params,
+ B_FALSE);
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+done:
+ if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED))
+ *ctxp = (crypto_context_t)ctx;
+ else {
+ /* Release the hold done in kcf_new_ctx(). */
+ KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
+ }
+
+ return (error);
+}
+
+/*
+ * Same as crypto_cipher_init_prov(), but relies on the scheduler to pick
+ * an appropriate provider. See crypto_cipher_init_prov() comments for more
+ * details.
+ */
+static int
+crypto_cipher_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq, crypto_func_group_t func)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, func, CHECK_RESTRICT(crq), 0)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ error = crypto_cipher_init_prov(pd, pd->pd_sid, mech, key,
+ spi_ctx_tmpl, ctxp, crq, func);
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_encrypt_prov()
+ *
+ * Arguments:
+ * pd: provider descriptor
+ * sid: session id
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * plaintext: The message to be encrypted
+ * ciphertext: Storage for the encrypted message. The length needed
+ * depends on the mechanism, and the plaintext's size.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of an
+ * encryption with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * single-part encryption of 'plaintext' with the mechanism 'mech', using
+ * the key 'key'.
+ * When complete and successful, 'ciphertext' will contain the encrypted
+ * message.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_encrypt_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *plaintext, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
+ crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int error;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_ENCRYPT_ATOMIC);
+
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ }
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, key,
+ plaintext, ciphertext, tmpl);
+
+ error = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (error);
+}
+
+/*
+ * Same as crypto_encrypt_prov(), but relies on the scheduler to pick
+ * a provider. See crypto_encrypt_prov() for more details.
+ */
+int
+crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_ENCRYPT_ATOMIC, CHECK_RESTRICT(crq),
+ plaintext->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_ENCRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key,
+ plaintext, ciphertext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, pd->pd_sid,
+ mech, key, plaintext, ciphertext, spi_ctx_tmpl);
+ error = kcf_submit_request(pd, NULL, crq, &params, B_FALSE);
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_encrypt_init_prov()
+ *
+ * Calls crypto_cipher_init_prov() to initialize an encryption operation.
+ */
+int
+crypto_encrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_ENCRYPT));
+}
+
+/*
+ * crypto_encrypt_init()
+ *
+ * Calls crypto_cipher_init() to initialize an encryption operation
+ */
+int
+crypto_encrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init(mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_ENCRYPT));
+}
+
+/*
+ * crypto_encrypt_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by encrypt_init().
+ * plaintext: The message part to be encrypted
+ * ciphertext: Storage for the encrypted message part.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of an encryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_encrypt_update(crypto_context_t context, crypto_data_t *plaintext,
+ crypto_data_t *ciphertext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext,
+ ciphertext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ return (error);
+ }
+
+ /* Check if we should use a software provider for small jobs */
+ if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) {
+ if (plaintext->cd_length < kcf_ctx->kc_mech->me_threshold &&
+ kcf_ctx->kc_sw_prov_desc != NULL &&
+ KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) {
+ pd = kcf_ctx->kc_sw_prov_desc;
+ }
+ }
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, plaintext, ciphertext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+
+ return (error);
+}
+
+/*
+ * crypto_encrypt_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by encrypt_init().
+ * ciphertext: Storage for the last part of encrypted message
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * final part of an encryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, ciphertext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * crypto_decrypt_prov()
+ *
+ * Arguments:
+ * pd: provider descriptor
+ * sid: session id
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * ciphertext: The message to be encrypted
+ * plaintext: Storage for the encrypted message. The length needed
+ * depends on the mechanism, and the plaintext's size.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of an
+ * encryption with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * single-part decryption of 'ciphertext' with the mechanism 'mech', using
+ * the key 'key'.
+ * When complete and successful, 'plaintext' will contain the decrypted
+ * message.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_decrypt_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *ciphertext, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
+ crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_DECRYPT_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, key,
+ ciphertext, plaintext, tmpl);
+
+ rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_decrypt_prov(), but relies on the KCF scheduler to
+ * choose a provider. See crypto_decrypt_prov() comments for more
+ * information.
+ */
+int
+crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_DECRYPT_ATOMIC, CHECK_RESTRICT(crq),
+ ciphertext->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_DECRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key,
+ ciphertext, plaintext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, pd->pd_sid,
+ mech, key, ciphertext, plaintext, spi_ctx_tmpl);
+ error = kcf_submit_request(pd, NULL, crq, &params, B_FALSE);
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_decrypt_init_prov()
+ *
+ * Calls crypto_cipher_init_prov() to initialize a decryption operation
+ */
+int
+crypto_decrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_DECRYPT));
+}
+
+/*
+ * crypto_decrypt_init()
+ *
+ * Calls crypto_cipher_init() to initialize a decryption operation
+ */
+int
+crypto_decrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init(mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_DECRYPT));
+}
+
+/*
+ * crypto_decrypt_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by decrypt_init().
+ * ciphertext: The message part to be decrypted
+ * plaintext: Storage for the decrypted message part.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of an decryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_decrypt_update(crypto_context_t context, crypto_data_t *ciphertext,
+ crypto_data_t *plaintext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext,
+ plaintext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ return (error);
+ }
+
+ /* Check if we should use a software provider for small jobs */
+ if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) {
+ if (ciphertext->cd_length < kcf_ctx->kc_mech->me_threshold &&
+ kcf_ctx->kc_sw_prov_desc != NULL &&
+ KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) {
+ pd = kcf_ctx->kc_sw_prov_desc;
+ }
+ }
+
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, ciphertext, plaintext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+
+ return (error);
+}
+
+/*
+ * crypto_decrypt_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by decrypt_init().
+ * plaintext: Storage for the last part of the decrypted message
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * final part of a decryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_decrypt_final(crypto_context_t context, crypto_data_t *plaintext,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext,
+ NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, plaintext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * See comments for crypto_encrypt_update().
+ */
+int
+crypto_encrypt_single(crypto_context_t context, crypto_data_t *plaintext,
+ crypto_data_t *ciphertext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_ENCRYPT(pd, ctx, plaintext,
+ ciphertext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, plaintext, ciphertext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * See comments for crypto_decrypt_update().
+ */
+int
+crypto_decrypt_single(crypto_context_t context, crypto_data_t *ciphertext,
+ crypto_data_t *plaintext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DECRYPT(pd, ctx, ciphertext,
+ plaintext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, ciphertext, plaintext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(crypto_encrypt_prov);
+EXPORT_SYMBOL(crypto_encrypt);
+EXPORT_SYMBOL(crypto_encrypt_init_prov);
+EXPORT_SYMBOL(crypto_encrypt_init);
+EXPORT_SYMBOL(crypto_encrypt_update);
+EXPORT_SYMBOL(crypto_encrypt_final);
+EXPORT_SYMBOL(crypto_decrypt_prov);
+EXPORT_SYMBOL(crypto_decrypt);
+EXPORT_SYMBOL(crypto_decrypt_init_prov);
+EXPORT_SYMBOL(crypto_decrypt_init);
+EXPORT_SYMBOL(crypto_decrypt_update);
+EXPORT_SYMBOL(crypto_decrypt_final);
+EXPORT_SYMBOL(crypto_encrypt_single);
+EXPORT_SYMBOL(crypto_decrypt_single);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c
new file mode 100644
index 000000000000..21b0977d3634
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c
@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * Crypto contexts manipulation routines
+ */
+
+/*
+ * crypto_create_ctx_template()
+ *
+ * Arguments:
+ *
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * ptmpl: a storage for the opaque crypto_ctx_template_t, allocated and
+ * initialized by the software provider this routine is
+ * dispatched to.
+ * kmflag: KM_SLEEP/KM_NOSLEEP mem. alloc. flag.
+ *
+ * Description:
+ * Redirects the call to the software provider of the specified
+ * mechanism. That provider will allocate and pre-compute/pre-expand
+ * the context template, reusable by later calls to crypto_xxx_init().
+ * The size and address of that provider context template are stored
+ * in an internal structure, kcf_ctx_template_t. The address of that
+ * structure is given back to the caller in *ptmpl.
+ *
+ * Context:
+ * Process or interrupt.
+ *
+ * Returns:
+ * CRYPTO_SUCCESS when the context template is successfully created.
+ * CRYPTO_HOST_MEMORY: mem alloc failure
+ * CRYPTO_ARGUMENTS_BAD: NULL storage for the ctx template.
+ * RYPTO_MECHANISM_INVALID: invalid mechanism 'mech'.
+ */
+int
+crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t *ptmpl, int kmflag)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_mechanism_t prov_mech;
+
+ /* A few args validation */
+
+ if (ptmpl == NULL)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (mech == NULL)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ error = kcf_get_sw_prov(mech->cm_type, &pd, &me, B_TRUE);
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+
+ if ((ctx_tmpl = (kcf_ctx_template_t *)kmem_alloc(
+ sizeof (kcf_ctx_template_t), kmflag)) == NULL) {
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* Pass a mechtype that the provider understands */
+ prov_mech.cm_type = KCF_TO_PROV_MECHNUM(pd, mech->cm_type);
+ prov_mech.cm_param = mech->cm_param;
+ prov_mech.cm_param_len = mech->cm_param_len;
+
+ error = KCF_PROV_CREATE_CTX_TEMPLATE(pd, &prov_mech, key,
+ &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size), KCF_RHNDL(kmflag));
+
+ if (error == CRYPTO_SUCCESS) {
+ ctx_tmpl->ct_generation = me->me_gen_swprov;
+ *ptmpl = ctx_tmpl;
+ } else {
+ kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
+ }
+ KCF_PROV_REFRELE(pd);
+
+ return (error);
+}
+
+/*
+ * crypto_destroy_ctx_template()
+ *
+ * Arguments:
+ *
+ * tmpl: an opaque crypto_ctx_template_t previously created by
+ * crypto_create_ctx_template()
+ *
+ * Description:
+ * Frees the embedded crypto_spi_ctx_template_t, then the
+ * kcf_ctx_template_t.
+ *
+ * Context:
+ * Process or interrupt.
+ *
+ */
+void
+crypto_destroy_ctx_template(crypto_ctx_template_t tmpl)
+{
+ kcf_ctx_template_t *ctx_tmpl = (kcf_ctx_template_t *)tmpl;
+
+ if (ctx_tmpl == NULL)
+ return;
+
+ ASSERT(ctx_tmpl->ct_prov_tmpl != NULL);
+
+ bzero(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
+ kmem_free(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
+ kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(crypto_create_ctx_template);
+EXPORT_SYMBOL(crypto_destroy_ctx_template);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_digest.c b/sys/contrib/openzfs/module/icp/api/kcf_digest.c
new file mode 100644
index 000000000000..aa68d69bc162
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/api/kcf_digest.c
@@ -0,0 +1,491 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * Message digest routines
+ */
+
+/*
+ * The following are the possible returned values common to all the routines
+ * below. The applicability of some of these return values depends on the
+ * presence of the arguments.
+ *
+ * CRYPTO_SUCCESS: The operation completed successfully.
+ * CRYPTO_QUEUED: A request was submitted successfully. The callback
+ * routine will be called when the operation is done.
+ * CRYPTO_MECHANISM_INVALID or CRYPTO_INVALID_MECH_PARAM
+ * for problems with the 'mech'.
+ * CRYPTO_INVALID_DATA for bogus 'data'
+ * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
+ * CRYPTO_INVALID_CONTEXT: Not a valid context.
+ * CRYPTO_BUSY: Cannot process the request now. Schedule a
+ * crypto_bufcall(), or try later.
+ * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED:
+ * No provider is capable of a function or a mechanism.
+ */
+
+
+/*
+ * crypto_digest_prov()
+ *
+ * Arguments:
+ * pd: pointer to the descriptor of the provider to use for this
+ * operation.
+ * sid: provider session id.
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * data: The message to be digested.
+ * digest: Storage for the digest. The length needed depends on the
+ * mechanism.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * digesting operation of 'data' on the specified
+ * provider with the specified session.
+ * When complete and successful, 'digest' will contain the digest value.
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_digest_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *data, crypto_data_t *digest,
+ crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq),
+ pd, &real_provider, CRYPTO_FG_DIGEST_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, NULL,
+ data, digest);
+
+ /* no crypto context to carry between multiple parts. */
+ rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+
+/*
+ * Same as crypto_digest_prov(), but relies on the KCF scheduler to
+ * choose a provider. See crypto_digest_prov() comments for more information.
+ */
+int
+crypto_digest(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_data_t *digest, crypto_call_req_t *crq)
+{
+ int error;
+ kcf_provider_desc_t *pd;
+ kcf_req_params_t params;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error, list,
+ CRYPTO_FG_DIGEST_ATOMIC, CHECK_RESTRICT(crq),
+ data->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+ error = KCF_PROV_DIGEST_ATOMIC(pd, pd->pd_sid, &lmech, data,
+ digest, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
+ (data->cd_length > pd->pd_hash_limit)) {
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_ATOMIC,
+ pd->pd_sid, mech, NULL, data, digest);
+
+ /* no crypto context to carry between multiple parts. */
+ error = kcf_submit_request(pd, NULL, crq, &params,
+ B_FALSE);
+ }
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_digest_init_prov()
+ *
+ * pd: pointer to the descriptor of the provider to use for this
+ * operation.
+ * sid: provider session id.
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * ctxp: Pointer to a crypto_context_t.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * initialization of a message digest operation on the specified
+ * provider with the specified session.
+ * When complete and successful, 'ctxp' will contain a crypto_context_t
+ * valid for later calls to digest_update() and digest_final().
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ */
+int
+crypto_digest_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_context_t *ctxp, crypto_call_req_t *crq)
+{
+ int error;
+ crypto_ctx_t *ctx;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_DIGEST);
+
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ }
+
+ /* Allocate and initialize the canonical context */
+ if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+ error = KCF_PROV_DIGEST_INIT(real_provider, ctx, &lmech,
+ KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_INIT, sid,
+ mech, NULL, NULL, NULL);
+ error = kcf_submit_request(real_provider, ctx, crq, &params,
+ B_FALSE);
+ }
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED))
+ *ctxp = (crypto_context_t)ctx;
+ else {
+ /* Release the hold done in kcf_new_ctx(). */
+ KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
+ }
+
+ return (error);
+}
+
+/*
+ * Same as crypto_digest_init_prov(), but relies on the KCF scheduler
+ * to choose a provider. See crypto_digest_init_prov() comments for
+ * more information.
+ */
+int
+crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_provider_desc_t *pd;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error,
+ list, CRYPTO_FG_DIGEST, CHECK_RESTRICT(crq), 0)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) {
+ /*
+ * The hardware provider has limited digest support.
+ * So, we fallback early here to using a software provider.
+ *
+ * XXX - need to enhance to do the fallback later in
+ * crypto_digest_update() if the size of accumulated input data
+ * exceeds the maximum size digestable by hardware provider.
+ */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ error = crypto_digest_init_prov(pd, pd->pd_sid,
+ mech, ctxp, crq);
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_digest_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by digest_init().
+ * data: The part of message to be digested.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of a message digest operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_digest_update(crypto_context_t context, crypto_data_t *data,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST_UPDATE(pd, ctx, data, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, data, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ return (error);
+}
+
+/*
+ * crypto_digest_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by digest_init().
+ * digest: The storage for the digest.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * final part of a message digest operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_digest_final(crypto_context_t context, crypto_data_t *digest,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST_FINAL(pd, ctx, digest, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, digest);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * Performs a digest update on the specified key. Note that there is
+ * no k-API crypto_digest_key() equivalent of this function.
+ */
+int
+crypto_digest_key_prov(crypto_context_t context, crypto_key_t *key,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST_KEY(pd, ctx, key, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_DIGEST_KEY,
+ ctx->cc_session, NULL, key, NULL, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ return (error);
+}
+
+/*
+ * See comments for crypto_digest_update() and crypto_digest_final().
+ */
+int
+crypto_digest_single(crypto_context_t context, crypto_data_t *data,
+ crypto_data_t *digest, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST(pd, ctx, data, digest, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, data, digest);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(crypto_digest_prov);
+EXPORT_SYMBOL(crypto_digest);
+EXPORT_SYMBOL(crypto_digest_init_prov);
+EXPORT_SYMBOL(crypto_digest_init);
+EXPORT_SYMBOL(crypto_digest_update);
+EXPORT_SYMBOL(crypto_digest_final);
+EXPORT_SYMBOL(crypto_digest_key_prov);
+EXPORT_SYMBOL(crypto_digest_single);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_mac.c b/sys/contrib/openzfs/module/icp/api/kcf_mac.c
new file mode 100644
index 000000000000..a7722d8f914c
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/api/kcf_mac.c
@@ -0,0 +1,645 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * Message authentication codes routines.
+ */
+
+/*
+ * The following are the possible returned values common to all the routines
+ * below. The applicability of some of these return values depends on the
+ * presence of the arguments.
+ *
+ * CRYPTO_SUCCESS: The operation completed successfully.
+ * CRYPTO_QUEUED: A request was submitted successfully. The callback
+ * routine will be called when the operation is done.
+ * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or
+ * CRYPTO_INVALID_MECH for problems with the 'mech'.
+ * CRYPTO_INVALID_DATA for bogus 'data'
+ * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
+ * CRYPTO_INVALID_CONTEXT: Not a valid context.
+ * CRYPTO_BUSY: Cannot process the request now. Schedule a
+ * crypto_bufcall(), or try later.
+ * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is
+ * capable of a function or a mechanism.
+ * CRYPTO_INVALID_KEY: bogus 'key' argument.
+ * CRYPTO_INVALID_MAC: bogus 'mac' argument.
+ */
+
+/*
+ * crypto_mac_prov()
+ *
+ * Arguments:
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * data: The message to compute the MAC for.
+ * mac: Storage for the MAC. The length needed depends on the mechanism.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of a
+ * MAC with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * single-part message authentication of 'data' with the mechanism
+ * 'mech', using * the key 'key', on the specified provider with
+ * the specified session id.
+ * When complete and successful, 'mac' will contain the message
+ * authentication code.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'crq'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_MAC_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, key,
+ data, mac, tmpl);
+ rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_mac_prov(), but relies on the KCF scheduler to choose
+ * a provider. See crypto_mac() comments for more information.
+ */
+int
+crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq),
+ data->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_MAC_ATOMIC(pd, pd->pd_sid, &lmech, key, data,
+ mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
+ (data->cd_length > pd->pd_hash_limit)) {
+ /*
+ * XXX - We need a check to see if this is indeed
+ * a HMAC. So far, all kernel clients use
+ * this interface only for HMAC. So, this is fine
+ * for now.
+ */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_ATOMIC,
+ pd->pd_sid, mech, key, data, mac, spi_ctx_tmpl);
+
+ error = kcf_submit_request(pd, NULL, crq, &params,
+ KCF_ISDUALREQ(crq));
+ }
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * Single part operation to compute the MAC corresponding to the specified
+ * 'data' and to verify that it matches the MAC specified by 'mac'.
+ * The other arguments are the same as the function crypto_mac_prov().
+ */
+int
+crypto_mac_verify_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_MAC_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_MAC_VERIFY_ATOMIC, sid, mech,
+ key, data, mac, tmpl);
+ rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_mac_verify_prov(), but relies on the KCF scheduler to choose
+ * a provider. See crypto_mac_verify_prov() comments for more information.
+ */
+int
+crypto_mac_verify(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq),
+ data->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_MAC_VERIFY_ATOMIC(pd, pd->pd_sid, &lmech, key,
+ data, mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
+ (data->cd_length > pd->pd_hash_limit)) {
+ /* see comments in crypto_mac() */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(&params,
+ KCF_OP_MAC_VERIFY_ATOMIC, pd->pd_sid, mech,
+ key, data, mac, spi_ctx_tmpl);
+
+ error = kcf_submit_request(pd, NULL, crq, &params,
+ KCF_ISDUALREQ(crq));
+ }
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_mac_init_prov()
+ *
+ * Arguments:
+ * pd: pointer to the descriptor of the provider to use for this
+ * operation.
+ * sid: provider session id.
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of a
+ * MAC with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * ctxp: Pointer to a crypto_context_t.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * initialization of a MAC operation on the specified provider with
+ * the specified session.
+ * When possible and applicable, will internally use the pre-computed MAC
+ * context from the context template, tmpl.
+ * When complete and successful, 'ctxp' will contain a crypto_context_t
+ * valid for later calls to mac_update() and mac_final().
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key, crypto_spi_ctx_template_t tmpl,
+ crypto_context_t *ctxp, crypto_call_req_t *crq)
+{
+ int rv;
+ crypto_ctx_t *ctx;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_MAC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ /* Allocate and initialize the canonical context */
+ if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+ rv = KCF_PROV_MAC_INIT(real_provider, ctx, &lmech, key, tmpl,
+ KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, rv);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_INIT, sid, mech, key,
+ NULL, NULL, tmpl);
+ rv = kcf_submit_request(real_provider, ctx, crq, &params,
+ B_FALSE);
+ }
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ if ((rv == CRYPTO_SUCCESS) || (rv == CRYPTO_QUEUED))
+ *ctxp = (crypto_context_t)ctx;
+ else {
+ /* Release the hold done in kcf_new_ctx(). */
+ KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
+ }
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_mac_init_prov(), but relies on the KCF scheduler to
+ * choose a provider. See crypto_mac_init_prov() comments for more
+ * information.
+ */
+int
+crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_MAC, CHECK_RESTRICT(crq), 0)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) {
+ /*
+ * The hardware provider has limited HMAC support.
+ * So, we fallback early here to using a software provider.
+ *
+ * XXX - need to enhance to do the fallback later in
+ * crypto_mac_update() if the size of accumulated input data
+ * exceeds the maximum size digestable by hardware provider.
+ */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ error = crypto_mac_init_prov(pd, pd->pd_sid, mech, key,
+ spi_ctx_tmpl, ctxp, crq);
+ }
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_mac_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by mac_init().
+ * data: The message part to be MAC'ed
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of a MAC operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_update(crypto_context_t context, crypto_data_t *data,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ kcf_req_params_t params;
+ int rv;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ rv = KCF_PROV_MAC_UPDATE(pd, ctx, data, NULL);
+ KCF_PROV_INCRSTATS(pd, rv);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, data, NULL, NULL);
+ rv = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ return (rv);
+}
+
+/*
+ * crypto_mac_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by mac_init().
+ * mac: Storage for the message authentication code.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of a message authentication operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_final(crypto_context_t context, crypto_data_t *mac,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ kcf_req_params_t params;
+ int rv;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ rv = KCF_PROV_MAC_FINAL(pd, ctx, mac, NULL);
+ KCF_PROV_INCRSTATS(pd, rv);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, mac, NULL);
+ rv = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(rv, kcf_ctx);
+ return (rv);
+}
+
+/*
+ * See comments for crypto_mac_update() and crypto_mac_final().
+ */
+int
+crypto_mac_single(crypto_context_t context, crypto_data_t *data,
+ crypto_data_t *mac, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_MAC(pd, ctx, data, mac, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, data, mac, NULL);
+ error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(crypto_mac_prov);
+EXPORT_SYMBOL(crypto_mac);
+EXPORT_SYMBOL(crypto_mac_verify_prov);
+EXPORT_SYMBOL(crypto_mac_verify);
+EXPORT_SYMBOL(crypto_mac_init_prov);
+EXPORT_SYMBOL(crypto_mac_init);
+EXPORT_SYMBOL(crypto_mac_update);
+EXPORT_SYMBOL(crypto_mac_final);
+EXPORT_SYMBOL(crypto_mac_single);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c b/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c
new file mode 100644
index 000000000000..c0f415b264a7
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * All event subscribers are put on a list. kcf_notify_list_lock
+ * protects changes to this list.
+ *
+ * The following locking order is maintained in the code - The
+ * global kcf_notify_list_lock followed by the individual lock
+ * in a kcf_ntfy_elem structure (kn_lock).
+ */
+kmutex_t ntfy_list_lock;
+kcondvar_t ntfy_list_cv; /* cv the service thread waits on */
+static kcf_ntfy_elem_t *ntfy_list_head;
+
+/*
+ * crypto_mech2id()
+ *
+ * Arguments:
+ * . mechname: A null-terminated string identifying the mechanism name.
+ *
+ * Description:
+ * Walks the mechanisms tables, looking for an entry that matches the
+ * mechname. Once it find it, it builds the 64-bit mech_type and returns
+ * it. If there are no hardware or software providers for the mechanism,
+ * but there is an unloaded software provider, this routine will attempt
+ * to load it.
+ *
+ * Context:
+ * Process and interruption.
+ *
+ * Returns:
+ * The unique mechanism identified by 'mechname', if found.
+ * CRYPTO_MECH_INVALID otherwise.
+ */
+crypto_mech_type_t
+crypto_mech2id(char *mechname)
+{
+ return (crypto_mech2id_common(mechname, B_TRUE));
+}
+
+/*
+ * We walk the notification list and do the callbacks.
+ */
+void
+kcf_walk_ntfylist(uint32_t event, void *event_arg)
+{
+ kcf_ntfy_elem_t *nep;
+ int nelem = 0;
+
+ mutex_enter(&ntfy_list_lock);
+
+ /*
+ * Count how many clients are on the notification list. We need
+ * this count to ensure that clients which joined the list after we
+ * have started this walk, are not wrongly notified.
+ */
+ for (nep = ntfy_list_head; nep != NULL; nep = nep->kn_next)
+ nelem++;
+
+ for (nep = ntfy_list_head; (nep != NULL && nelem); nep = nep->kn_next) {
+ nelem--;
+
+ /*
+ * Check if this client is interested in the
+ * event.
+ */
+ if (!(nep->kn_event_mask & event))
+ continue;
+
+ mutex_enter(&nep->kn_lock);
+ nep->kn_state = NTFY_RUNNING;
+ mutex_exit(&nep->kn_lock);
+ mutex_exit(&ntfy_list_lock);
+
+ /*
+ * We invoke the callback routine with no locks held. Another
+ * client could have joined the list meanwhile. This is fine
+ * as we maintain nelem as stated above. The NULL check in the
+ * for loop guards against shrinkage. Also, any callers of
+ * crypto_unnotify_events() at this point cv_wait till kn_state
+ * changes to NTFY_WAITING. Hence, nep is assured to be valid.
+ */
+ (*nep->kn_func)(event, event_arg);
+
+ mutex_enter(&nep->kn_lock);
+ nep->kn_state = NTFY_WAITING;
+ cv_broadcast(&nep->kn_cv);
+ mutex_exit(&nep->kn_lock);
+
+ mutex_enter(&ntfy_list_lock);
+ }
+
+ mutex_exit(&ntfy_list_lock);
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(crypto_mech2id);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman
new file mode 100644
index 000000000000..48fea7bb333e
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman
@@ -0,0 +1,23 @@
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software is allowed (with or without
+ changes) provided that:
+
+ 1. source code distributions include the above copyright notice, this
+ list of conditions and the following disclaimer;
+
+ 2. binary distributions include the above copyright notice, this list
+ of conditions and the following disclaimer in their documentation;
+
+ 3. the name of the copyright holder is not used to endorse products
+ built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip
new file mode 100644
index 000000000000..5f822cf27586
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip
@@ -0,0 +1 @@
+PORTIONS OF AES FUNCTIONALITY
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
new file mode 100644
index 000000000000..92c9e196a318
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
@@ -0,0 +1,127 @@
+
+ LICENSE ISSUES
+ ==============
+
+ The OpenSSL toolkit stays under a dual license, i.e. both the conditions of
+ the OpenSSL License and the original SSLeay license apply to the toolkit.
+ See below for the actual license texts. Actually both licenses are BSD-style
+ Open Source licenses. In case of any license issues related to OpenSSL
+ please contact openssl-core@openssl.org.
+
+ OpenSSL License
+ ---------------
+
+/* ====================================================================
+ * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the routines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip
new file mode 100644
index 000000000000..5f822cf27586
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip
@@ -0,0 +1 @@
+PORTIONS OF AES FUNCTIONALITY
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
new file mode 100644
index 000000000000..4a80c62097ae
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
@@ -0,0 +1,748 @@
+/*
+ * ====================================================================
+ * Written by Intel Corporation for the OpenSSL project to add support
+ * for Intel AES-NI instructions. Rights for redistribution and usage
+ * in source and binary forms are granted according to the OpenSSL
+ * license.
+ *
+ * Author: Huang Ying <ying.huang at intel dot com>
+ * Vinodh Gopal <vinodh.gopal at intel dot com>
+ * Kahraman Akdemir
+ *
+ * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
+ * instructions that are going to be introduced in the next generation
+ * of Intel processor, as of 2009. These instructions enable fast and
+ * secure data encryption and decryption, using the Advanced Encryption
+ * Standard (AES), defined by FIPS Publication number 197. The
+ * architecture introduces six instructions that offer full hardware
+ * support for AES. Four of them support high performance data
+ * encryption and decryption, and the other two instructions support
+ * the AES key expansion procedure.
+ * ====================================================================
+ */
+
+/*
+ * ====================================================================
+ * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+/*
+ * ====================================================================
+ * OpenSolaris OS modifications
+ *
+ * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
+ * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
+ * Huang Ying of Intel to the openssl-dev mailing list under the subject
+ * of "Add support to Intel AES-NI instruction set for x86_64 platform".
+ *
+ * This OpenSolaris version has these major changes from the original source:
+ *
+ * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
+ * definitions for lint.
+ *
+ * 2. Formatted code, added comments, and added #includes and #defines.
+ *
+ * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
+ * calling kpreempt_disable() and kpreempt_enable().
+ * If the TS bit is not set, Save and restore %xmm registers at the beginning
+ * and end of function calls (%xmm* registers are not saved and restored by
+ * during kernel thread preemption).
+ *
+ * 4. Renamed functions, reordered parameters, and changed return value
+ * to match OpenSolaris:
+ *
+ * OpenSSL interface:
+ * int intel_AES_set_encrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * int intel_AES_set_decrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * Return values for above are non-zero on error, 0 on success.
+ *
+ * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key);
+ * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key);
+ * typedef struct aes_key_st {
+ * unsigned int rd_key[4 *(AES_MAXNR + 1)];
+ * int rounds;
+ * unsigned int pad[3];
+ * } AES_KEY;
+ * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
+ * (ks32) instead of 64-bit (ks64).
+ * Number of rounds (aka round count) is at offset 240 of AES_KEY.
+ *
+ * OpenSolaris OS interface (#ifdefs removed for readability):
+ * int rijndael_key_setup_dec_intel(uint32_t rk[],
+ * const uint32_t cipherKey[], uint64_t keyBits);
+ * int rijndael_key_setup_enc_intel(uint32_t rk[],
+ * const uint32_t cipherKey[], uint64_t keyBits);
+ * Return values for above are 0 on error, number of rounds on success.
+ *
+ * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4]);
+ * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4]);
+ * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
+ * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
+ *
+ * typedef union {
+ * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
+ * } aes_ks_t;
+ * typedef struct aes_key {
+ * aes_ks_t encr_ks, decr_ks;
+ * long double align128;
+ * int flags, nr, type;
+ * } aes_key_t;
+ *
+ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
+ * ct is crypto text, and MAX_AES_NR is 14.
+ * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
+ *
+ * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
+ *
+ * ====================================================================
+ */
+
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+
+/* ARGSUSED */
+void
+aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4]) {
+}
+/* ARGSUSED */
+void
+aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4]) {
+}
+/* ARGSUSED */
+int
+rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
+ uint64_t keyBits) {
+ return (0);
+}
+/* ARGSUSED */
+int
+rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
+ uint64_t keyBits) {
+ return (0);
+}
+
+
+#elif defined(HAVE_AES) /* guard by instruction set */
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+/*
+ * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
+ * _key_expansion_256a(), _key_expansion_256b()
+ *
+ * Helper functions called by rijndael_key_setup_inc_intel().
+ * Also used indirectly by rijndael_key_setup_dec_intel().
+ *
+ * Input:
+ * %xmm0 User-provided cipher key
+ * %xmm1 Round constant
+ * Output:
+ * (%rcx) AES key
+ */
+
+ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
+_key_expansion_128_local:
+_key_expansion_256a_local:
+ pshufd $0b11111111, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+ movups %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+ nop
+SET_SIZE(_key_expansion_128)
+SET_SIZE(_key_expansion_256a)
+
+
+ENTRY_NP(_key_expansion_192a)
+_key_expansion_192a_local:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movups %xmm2, %xmm5
+ movups %xmm2, %xmm6
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movups %xmm0, %xmm1
+ shufps $0b01000100, %xmm0, %xmm6
+ movups %xmm6, (%rcx)
+ shufps $0b01001110, %xmm2, %xmm1
+ movups %xmm1, 0x10(%rcx)
+ add $0x20, %rcx
+ ret
+SET_SIZE(_key_expansion_192a)
+
+
+ENTRY_NP(_key_expansion_192b)
+_key_expansion_192b_local:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movups %xmm2, %xmm5
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movups %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+SET_SIZE(_key_expansion_192b)
+
+
+ENTRY_NP(_key_expansion_256b)
+_key_expansion_256b_local:
+ pshufd $0b10101010, %xmm1, %xmm1
+ shufps $0b00010000, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ shufps $0b10001100, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ pxor %xmm1, %xmm2
+ movups %xmm2, (%rcx)
+ add $0x10, %rcx
+ ret
+SET_SIZE(_key_expansion_256b)
+
+
+/*
+ * rijndael_key_setup_enc_intel()
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * OpenSolaris interface:
+ * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
+ * uint64_t keyBits);
+ * Return value is 0 on error, number of rounds on success.
+ *
+ * Original Intel OpenSSL interface:
+ * int intel_AES_set_encrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * Return value is non-zero on error, 0 on success.
+ */
+
+#ifdef OPENSSL_INTERFACE
+#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
+#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
+
+#define USERCIPHERKEY rdi /* P1, 64 bits */
+#define KEYSIZE32 esi /* P2, 32 bits */
+#define KEYSIZE64 rsi /* P2, 64 bits */
+#define AESKEY rdx /* P3, 64 bits */
+
+#else /* OpenSolaris Interface */
+#define AESKEY rdi /* P1, 64 bits */
+#define USERCIPHERKEY rsi /* P2, 64 bits */
+#define KEYSIZE32 edx /* P3, 32 bits */
+#define KEYSIZE64 rdx /* P3, 64 bits */
+#endif /* OPENSSL_INTERFACE */
+
+#define ROUNDS32 KEYSIZE32 /* temp */
+#define ROUNDS64 KEYSIZE64 /* temp */
+#define ENDAESKEY USERCIPHERKEY /* temp */
+
+ENTRY_NP(rijndael_key_setup_enc_intel)
+rijndael_key_setup_enc_intel_local:
+ FRAME_BEGIN
+ // NULL pointer sanity check
+ test %USERCIPHERKEY, %USERCIPHERKEY
+ jz .Lenc_key_invalid_param
+ test %AESKEY, %AESKEY
+ jz .Lenc_key_invalid_param
+
+ movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
+ movups %xmm0, (%AESKEY)
+ lea 0x10(%AESKEY), %rcx // key addr
+ pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
+
+ cmp $256, %KEYSIZE32
+ jnz .Lenc_key192
+
+ // AES 256: 14 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+ mov $14, %ROUNDS32
+ movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
+#endif /* OPENSSL_INTERFACE */
+
+ movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
+ movups %xmm2, (%rcx)
+ add $0x10, %rcx
+
+ aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x1, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x2, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x4, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x8, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x10, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x20, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+
+#ifdef OPENSSL_INTERFACE
+ xor %rax, %rax // return 0 (OK)
+#else /* Open Solaris Interface */
+ mov $14, %rax // return # rounds = 14
+#endif
+ FRAME_END
+ ret
+
+.align 4
+.Lenc_key192:
+ cmp $192, %KEYSIZE32
+ jnz .Lenc_key128
+
+ // AES 192: 12 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+ mov $12, %ROUNDS32
+ movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
+#endif /* OPENSSL_INTERFACE */
+
+ movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
+ aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+ aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+ aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+ aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+
+#ifdef OPENSSL_INTERFACE
+ xor %rax, %rax // return 0 (OK)
+#else /* OpenSolaris Interface */
+ mov $12, %rax // return # rounds = 12
+#endif
+ FRAME_END
+ ret
+
+.align 4
+.Lenc_key128:
+ cmp $128, %KEYSIZE32
+ jnz .Lenc_key_invalid_key_bits
+
+ // AES 128: 10 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+ mov $10, %ROUNDS32
+ movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
+#endif /* OPENSSL_INTERFACE */
+
+ aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+
+#ifdef OPENSSL_INTERFACE
+ xor %rax, %rax // return 0 (OK)
+#else /* OpenSolaris Interface */
+ mov $10, %rax // return # rounds = 10
+#endif
+ FRAME_END
+ ret
+
+.Lenc_key_invalid_param:
+#ifdef OPENSSL_INTERFACE
+ mov $-1, %rax // user key or AES key pointer is NULL
+ FRAME_END
+ ret
+#else
+ /* FALLTHROUGH */
+#endif /* OPENSSL_INTERFACE */
+
+.Lenc_key_invalid_key_bits:
+#ifdef OPENSSL_INTERFACE
+ mov $-2, %rax // keysize is invalid
+#else /* Open Solaris Interface */
+ xor %rax, %rax // a key pointer is NULL or invalid keysize
+#endif /* OPENSSL_INTERFACE */
+ FRAME_END
+ ret
+ SET_SIZE(rijndael_key_setup_enc_intel)
+
+
+/*
+ * rijndael_key_setup_dec_intel()
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * OpenSolaris interface:
+ * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
+ * uint64_t keyBits);
+ * Return value is 0 on error, number of rounds on success.
+ * P1->P2, P2->P3, P3->P1
+ *
+ * Original Intel OpenSSL interface:
+ * int intel_AES_set_decrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * Return value is non-zero on error, 0 on success.
+ */
+
+ENTRY_NP(rijndael_key_setup_dec_intel)
+FRAME_BEGIN
+ // Generate round keys used for encryption
+ call rijndael_key_setup_enc_intel_local
+ test %rax, %rax
+#ifdef OPENSSL_INTERFACE
+ jnz .Ldec_key_exit // Failed if returned non-0
+#else /* OpenSolaris Interface */
+ jz .Ldec_key_exit // Failed if returned 0
+#endif /* OPENSSL_INTERFACE */
+
+ /*
+ * Convert round keys used for encryption
+ * to a form usable for decryption
+ */
+#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
+ mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
+ // (already set for OpenSSL)
+#endif
+
+ lea 0x10(%AESKEY), %rcx // key addr
+ shl $4, %ROUNDS32
+ add %AESKEY, %ROUNDS64
+ mov %ROUNDS64, %ENDAESKEY
+
+.align 4
+.Ldec_key_reorder_loop:
+ movups (%AESKEY), %xmm0
+ movups (%ROUNDS64), %xmm1
+ movups %xmm0, (%ROUNDS64)
+ movups %xmm1, (%AESKEY)
+ lea 0x10(%AESKEY), %AESKEY
+ lea -0x10(%ROUNDS64), %ROUNDS64
+ cmp %AESKEY, %ROUNDS64
+ ja .Ldec_key_reorder_loop
+
+.align 4
+.Ldec_key_inv_loop:
+ movups (%rcx), %xmm0
+ // Convert an encryption round key to a form usable for decryption
+ // with the "AES Inverse Mix Columns" instruction
+ aesimc %xmm0, %xmm1
+ movups %xmm1, (%rcx)
+ lea 0x10(%rcx), %rcx
+ cmp %ENDAESKEY, %rcx
+ jnz .Ldec_key_inv_loop
+
+.Ldec_key_exit:
+ // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
+ // OpenSSL: rax = 0 for OK, or non-zero for error
+ FRAME_END
+ ret
+ SET_SIZE(rijndael_key_setup_dec_intel)
+
+
+/*
+ * aes_encrypt_intel()
+ * Encrypt a single block (in and out can overlap).
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * Temporary register usage:
+ * %xmm0 State
+ * %xmm1 Key
+ *
+ * Original OpenSolaris Interface:
+ * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])
+ *
+ * Original Intel OpenSSL Interface:
+ * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key)
+ */
+
+#ifdef OPENSSL_INTERFACE
+#define aes_encrypt_intel intel_AES_encrypt
+#define aes_decrypt_intel intel_AES_decrypt
+
+#define INP rdi /* P1, 64 bits */
+#define OUTP rsi /* P2, 64 bits */
+#define KEYP rdx /* P3, 64 bits */
+
+/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
+#define NROUNDS32 ecx /* temporary, 32 bits */
+#define NROUNDS cl /* temporary, 8 bits */
+
+#else /* OpenSolaris Interface */
+#define KEYP rdi /* P1, 64 bits */
+#define NROUNDS esi /* P2, 32 bits */
+#define INP rdx /* P3, 64 bits */
+#define OUTP rcx /* P4, 64 bits */
+#endif /* OPENSSL_INTERFACE */
+
+#define STATE xmm0 /* temporary, 128 bits */
+#define KEY xmm1 /* temporary, 128 bits */
+
+
+ENTRY_NP(aes_encrypt_intel)
+
+ movups (%INP), %STATE // input
+ movups (%KEYP), %KEY // key
+#ifdef OPENSSL_INTERFACE
+ mov 240(%KEYP), %NROUNDS32 // round count
+#else /* OpenSolaris Interface */
+ /* Round count is already present as P2 in %rsi/%esi */
+#endif /* OPENSSL_INTERFACE */
+
+ pxor %KEY, %STATE // round 0
+ lea 0x30(%KEYP), %KEYP
+ cmp $12, %NROUNDS
+ jb .Lenc128
+ lea 0x20(%KEYP), %KEYP
+ je .Lenc192
+
+ // AES 256
+ lea 0x20(%KEYP), %KEYP
+ movups -0x60(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups -0x50(%KEYP), %KEY
+ aesenc %KEY, %STATE
+
+.align 4
+.Lenc192:
+ // AES 192 and 256
+ movups -0x40(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups -0x30(%KEYP), %KEY
+ aesenc %KEY, %STATE
+
+.align 4
+.Lenc128:
+ // AES 128, 192, and 256
+ movups -0x20(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups -0x10(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups (%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x10(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x20(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x30(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x40(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x50(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x60(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movups 0x70(%KEYP), %KEY
+ aesenclast %KEY, %STATE // last round
+ movups %STATE, (%OUTP) // output
+
+ ret
+ SET_SIZE(aes_encrypt_intel)
+
+
+/*
+ * aes_decrypt_intel()
+ * Decrypt a single block (in and out can overlap).
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * Temporary register usage:
+ * %xmm0 State
+ * %xmm1 Key
+ *
+ * Original OpenSolaris Interface:
+ * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original Intel OpenSSL Interface:
+ * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key);
+ */
+ENTRY_NP(aes_decrypt_intel)
+
+ movups (%INP), %STATE // input
+ movups (%KEYP), %KEY // key
+#ifdef OPENSSL_INTERFACE
+ mov 240(%KEYP), %NROUNDS32 // round count
+#else /* OpenSolaris Interface */
+ /* Round count is already present as P2 in %rsi/%esi */
+#endif /* OPENSSL_INTERFACE */
+
+ pxor %KEY, %STATE // round 0
+ lea 0x30(%KEYP), %KEYP
+ cmp $12, %NROUNDS
+ jb .Ldec128
+ lea 0x20(%KEYP), %KEYP
+ je .Ldec192
+
+ // AES 256
+ lea 0x20(%KEYP), %KEYP
+ movups -0x60(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups -0x50(%KEYP), %KEY
+ aesdec %KEY, %STATE
+
+.align 4
+.Ldec192:
+ // AES 192 and 256
+ movups -0x40(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups -0x30(%KEYP), %KEY
+ aesdec %KEY, %STATE
+
+.align 4
+.Ldec128:
+ // AES 128, 192, and 256
+ movups -0x20(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups -0x10(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups (%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x10(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x20(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x30(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x40(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x50(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x60(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movups 0x70(%KEYP), %KEY
+ aesdeclast %KEY, %STATE // last round
+ movups %STATE, (%OUTP) // output
+
+ ret
+ SET_SIZE(aes_decrypt_intel)
+
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
new file mode 100644
index 000000000000..9db3a3179230
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
@@ -0,0 +1,906 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue 20/12/2007
+ *
+ * I am grateful to Dag Arne Osvik for many discussions of the techniques that
+ * can be used to optimise AES assembler code on AMD64/EM64T architectures.
+ * Some of the techniques used in this implementation are the result of
+ * suggestions made by him for which I am most grateful.
+ *
+ * An AES implementation for AMD64 processors using the YASM assembler. This
+ * implementation provides only encryption, decryption and hence requires key
+ * scheduling support in C. It uses 8k bytes of tables but its encryption and
+ * decryption performance is very close to that obtained using large tables.
+ * It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions,
+ * which are as follows:
+ * ms windows gnu/linux/opensolaris os
+ *
+ * in_blk rcx rdi
+ * out_blk rdx rsi
+ * context (cx) r8 rdx
+ *
+ * preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15
+ * registers rdi - on both
+ *
+ * destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11
+ * registers - rdi on both
+ *
+ * The convention used here is that for gnu/linux/opensolaris os.
+ *
+ * This code provides the standard AES block size (128 bits, 16 bytes) and the
+ * three standard AES key sizes (128, 192 and 256 bits). It has the same call
+ * interface as my C implementation. It uses the Microsoft C AMD64 calling
+ * conventions in which the three parameters are placed in rcx, rdx and r8
+ * respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
+ *
+ * OpenSolaris Note:
+ * Modified to use GNU/Linux/Solaris calling conventions.
+ * That is parameters are placed in rdi, rsi, rdx, and rcx, respectively.
+ *
+ * AES_RETURN aes_encrypt(const unsigned char in_blk[],
+ * unsigned char out_blk[], const aes_encrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_decrypt(const unsigned char in_blk[],
+ * unsigned char out_blk[], const aes_decrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+ * const aes_encrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+ * const aes_decrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_encrypt_key(const unsigned char key[],
+ * unsigned int len, const aes_decrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_decrypt_key(const unsigned char key[],
+ * unsigned int len, const aes_decrypt_ctx cx[1])/
+ *
+ * where <NNN> is 128, 102 or 256. In the last two calls the length can be in
+ * either bits or bytes.
+ *
+ * Comment in/out the following lines to obtain the desired subroutines. These
+ * selections MUST match those in the C header file aesopt.h
+ */
+#define AES_REV_DKS /* define if key decryption schedule is reversed */
+
+#define LAST_ROUND_TABLES /* define for the faster version using extra tables */
+
+/*
+ * The encryption key schedule has the following in memory layout where N is the
+ * number of rounds (10, 12 or 14):
+ *
+ * lo: | input key (round 0) | / each round is four 32-bit words
+ * | encryption round 1 |
+ * | encryption round 2 |
+ * ....
+ * | encryption round N-1 |
+ * hi: | encryption round N |
+ *
+ * The decryption key schedule is normally set up so that it has the same
+ * layout as above by actually reversing the order of the encryption key
+ * schedule in memory (this happens when AES_REV_DKS is set):
+ *
+ * lo: | decryption round 0 | = | encryption round N |
+ * | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
+ * | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
+ * .... ....
+ * | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
+ * hi: | decryption round N | = | input key (round 0) |
+ *
+ * with rounds except the first and last modified using inv_mix_column()
+ * But if AES_REV_DKS is NOT set the order of keys is left as it is for
+ * encryption so that it has to be accessed in reverse when used for
+ * decryption (although the inverse mix column modifications are done)
+ *
+ * lo: | decryption round 0 | = | input key (round 0) |
+ * | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
+ * | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
+ * .... ....
+ * | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+ * hi: | decryption round N | = | encryption round N |
+ *
+ * This layout is faster when the assembler key scheduling provided here
+ * is used.
+ *
+ * End of user defines
+ */
+
+/*
+ * ---------------------------------------------------------------------------
+ * OpenSolaris OS modifications
+ *
+ * This source originates from Brian Gladman file aes_amd64.asm
+ * in http://fp.gladman.plus.com/AES/aes-src-04-03-08.zip
+ * with these changes:
+ *
+ * 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and
+ * !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION,
+ * AES_128, AES_192, AES_256, AES_VAR ifdefs.
+ *
+ * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
+ *
+ * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
+ *
+ * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
+ * (operands reversed, literals prefixed with "$", registers prefixed with "%",
+ * and "[register+offset]", addressing changed to "offset(register)",
+ * parenthesis in constant expressions "()" changed to square brackets "[]",
+ * "." removed from local (numeric) labels, and other changes.
+ * Examples:
+ * Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax
+ * mov rax,(4*20h) mov $[4*0x20],%rax
+ * mov rax,[ebx+20h] mov 0x20(%ebx),%rax
+ * lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax
+ * sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax
+ *
+ * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
+ * definitions for lint.
+ *
+ * 6. Renamed functions and reordered parameters to match OpenSolaris:
+ * Original Gladman interface:
+ * int aes_encrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ * int aes_decrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
+ * and a union type, inf., containing inf.l, a uint32_t and
+ * inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is
+ * used and contains the key schedule length * 16 where key schedule length is
+ * 10, 12, or 14 bytes.
+ *
+ * OpenSolaris OS interface:
+ * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
+ * uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
+ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
+ * ct is crypto text, and MAX_AES_NR is 14.
+ * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
+ */
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+/* ARGSUSED */
+void
+aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4]) {
+}
+/* ARGSUSED */
+void
+aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4]) {
+}
+
+
+#else
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#define KS_LENGTH 60
+
+#define raxd eax
+#define rdxd edx
+#define rcxd ecx
+#define rbxd ebx
+#define rsid esi
+#define rdid edi
+
+#define raxb al
+#define rdxb dl
+#define rcxb cl
+#define rbxb bl
+#define rsib sil
+#define rdib dil
+
+// finite field multiplies by {02}, {04} and {08}
+
+#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
+#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
+#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
+
+// finite field multiplies required in table generation
+
+#define f3(x) [[f2(x)] ^ [x]]
+#define f9(x) [[f8(x)] ^ [x]]
+#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
+#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
+#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
+
+// macros for expanding S-box data
+
+#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
+#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
+#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
+
+#define enc_vals(x) \
+ .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
+ .byte x(0x30),x(0x01),x(0x67),x(0x2b),x(0xfe),x(0xd7),x(0xab),x(0x76); \
+ .byte x(0xca),x(0x82),x(0xc9),x(0x7d),x(0xfa),x(0x59),x(0x47),x(0xf0); \
+ .byte x(0xad),x(0xd4),x(0xa2),x(0xaf),x(0x9c),x(0xa4),x(0x72),x(0xc0); \
+ .byte x(0xb7),x(0xfd),x(0x93),x(0x26),x(0x36),x(0x3f),x(0xf7),x(0xcc); \
+ .byte x(0x34),x(0xa5),x(0xe5),x(0xf1),x(0x71),x(0xd8),x(0x31),x(0x15); \
+ .byte x(0x04),x(0xc7),x(0x23),x(0xc3),x(0x18),x(0x96),x(0x05),x(0x9a); \
+ .byte x(0x07),x(0x12),x(0x80),x(0xe2),x(0xeb),x(0x27),x(0xb2),x(0x75); \
+ .byte x(0x09),x(0x83),x(0x2c),x(0x1a),x(0x1b),x(0x6e),x(0x5a),x(0xa0); \
+ .byte x(0x52),x(0x3b),x(0xd6),x(0xb3),x(0x29),x(0xe3),x(0x2f),x(0x84); \
+ .byte x(0x53),x(0xd1),x(0x00),x(0xed),x(0x20),x(0xfc),x(0xb1),x(0x5b); \
+ .byte x(0x6a),x(0xcb),x(0xbe),x(0x39),x(0x4a),x(0x4c),x(0x58),x(0xcf); \
+ .byte x(0xd0),x(0xef),x(0xaa),x(0xfb),x(0x43),x(0x4d),x(0x33),x(0x85); \
+ .byte x(0x45),x(0xf9),x(0x02),x(0x7f),x(0x50),x(0x3c),x(0x9f),x(0xa8); \
+ .byte x(0x51),x(0xa3),x(0x40),x(0x8f),x(0x92),x(0x9d),x(0x38),x(0xf5); \
+ .byte x(0xbc),x(0xb6),x(0xda),x(0x21),x(0x10),x(0xff),x(0xf3),x(0xd2); \
+ .byte x(0xcd),x(0x0c),x(0x13),x(0xec),x(0x5f),x(0x97),x(0x44),x(0x17); \
+ .byte x(0xc4),x(0xa7),x(0x7e),x(0x3d),x(0x64),x(0x5d),x(0x19),x(0x73); \
+ .byte x(0x60),x(0x81),x(0x4f),x(0xdc),x(0x22),x(0x2a),x(0x90),x(0x88); \
+ .byte x(0x46),x(0xee),x(0xb8),x(0x14),x(0xde),x(0x5e),x(0x0b),x(0xdb); \
+ .byte x(0xe0),x(0x32),x(0x3a),x(0x0a),x(0x49),x(0x06),x(0x24),x(0x5c); \
+ .byte x(0xc2),x(0xd3),x(0xac),x(0x62),x(0x91),x(0x95),x(0xe4),x(0x79); \
+ .byte x(0xe7),x(0xc8),x(0x37),x(0x6d),x(0x8d),x(0xd5),x(0x4e),x(0xa9); \
+ .byte x(0x6c),x(0x56),x(0xf4),x(0xea),x(0x65),x(0x7a),x(0xae),x(0x08); \
+ .byte x(0xba),x(0x78),x(0x25),x(0x2e),x(0x1c),x(0xa6),x(0xb4),x(0xc6); \
+ .byte x(0xe8),x(0xdd),x(0x74),x(0x1f),x(0x4b),x(0xbd),x(0x8b),x(0x8a); \
+ .byte x(0x70),x(0x3e),x(0xb5),x(0x66),x(0x48),x(0x03),x(0xf6),x(0x0e); \
+ .byte x(0x61),x(0x35),x(0x57),x(0xb9),x(0x86),x(0xc1),x(0x1d),x(0x9e); \
+ .byte x(0xe1),x(0xf8),x(0x98),x(0x11),x(0x69),x(0xd9),x(0x8e),x(0x94); \
+ .byte x(0x9b),x(0x1e),x(0x87),x(0xe9),x(0xce),x(0x55),x(0x28),x(0xdf); \
+ .byte x(0x8c),x(0xa1),x(0x89),x(0x0d),x(0xbf),x(0xe6),x(0x42),x(0x68); \
+ .byte x(0x41),x(0x99),x(0x2d),x(0x0f),x(0xb0),x(0x54),x(0xbb),x(0x16)
+
+#define dec_vals(x) \
+ .byte x(0x52),x(0x09),x(0x6a),x(0xd5),x(0x30),x(0x36),x(0xa5),x(0x38); \
+ .byte x(0xbf),x(0x40),x(0xa3),x(0x9e),x(0x81),x(0xf3),x(0xd7),x(0xfb); \
+ .byte x(0x7c),x(0xe3),x(0x39),x(0x82),x(0x9b),x(0x2f),x(0xff),x(0x87); \
+ .byte x(0x34),x(0x8e),x(0x43),x(0x44),x(0xc4),x(0xde),x(0xe9),x(0xcb); \
+ .byte x(0x54),x(0x7b),x(0x94),x(0x32),x(0xa6),x(0xc2),x(0x23),x(0x3d); \
+ .byte x(0xee),x(0x4c),x(0x95),x(0x0b),x(0x42),x(0xfa),x(0xc3),x(0x4e); \
+ .byte x(0x08),x(0x2e),x(0xa1),x(0x66),x(0x28),x(0xd9),x(0x24),x(0xb2); \
+ .byte x(0x76),x(0x5b),x(0xa2),x(0x49),x(0x6d),x(0x8b),x(0xd1),x(0x25); \
+ .byte x(0x72),x(0xf8),x(0xf6),x(0x64),x(0x86),x(0x68),x(0x98),x(0x16); \
+ .byte x(0xd4),x(0xa4),x(0x5c),x(0xcc),x(0x5d),x(0x65),x(0xb6),x(0x92); \
+ .byte x(0x6c),x(0x70),x(0x48),x(0x50),x(0xfd),x(0xed),x(0xb9),x(0xda); \
+ .byte x(0x5e),x(0x15),x(0x46),x(0x57),x(0xa7),x(0x8d),x(0x9d),x(0x84); \
+ .byte x(0x90),x(0xd8),x(0xab),x(0x00),x(0x8c),x(0xbc),x(0xd3),x(0x0a); \
+ .byte x(0xf7),x(0xe4),x(0x58),x(0x05),x(0xb8),x(0xb3),x(0x45),x(0x06); \
+ .byte x(0xd0),x(0x2c),x(0x1e),x(0x8f),x(0xca),x(0x3f),x(0x0f),x(0x02); \
+ .byte x(0xc1),x(0xaf),x(0xbd),x(0x03),x(0x01),x(0x13),x(0x8a),x(0x6b); \
+ .byte x(0x3a),x(0x91),x(0x11),x(0x41),x(0x4f),x(0x67),x(0xdc),x(0xea); \
+ .byte x(0x97),x(0xf2),x(0xcf),x(0xce),x(0xf0),x(0xb4),x(0xe6),x(0x73); \
+ .byte x(0x96),x(0xac),x(0x74),x(0x22),x(0xe7),x(0xad),x(0x35),x(0x85); \
+ .byte x(0xe2),x(0xf9),x(0x37),x(0xe8),x(0x1c),x(0x75),x(0xdf),x(0x6e); \
+ .byte x(0x47),x(0xf1),x(0x1a),x(0x71),x(0x1d),x(0x29),x(0xc5),x(0x89); \
+ .byte x(0x6f),x(0xb7),x(0x62),x(0x0e),x(0xaa),x(0x18),x(0xbe),x(0x1b); \
+ .byte x(0xfc),x(0x56),x(0x3e),x(0x4b),x(0xc6),x(0xd2),x(0x79),x(0x20); \
+ .byte x(0x9a),x(0xdb),x(0xc0),x(0xfe),x(0x78),x(0xcd),x(0x5a),x(0xf4); \
+ .byte x(0x1f),x(0xdd),x(0xa8),x(0x33),x(0x88),x(0x07),x(0xc7),x(0x31); \
+ .byte x(0xb1),x(0x12),x(0x10),x(0x59),x(0x27),x(0x80),x(0xec),x(0x5f); \
+ .byte x(0x60),x(0x51),x(0x7f),x(0xa9),x(0x19),x(0xb5),x(0x4a),x(0x0d); \
+ .byte x(0x2d),x(0xe5),x(0x7a),x(0x9f),x(0x93),x(0xc9),x(0x9c),x(0xef); \
+ .byte x(0xa0),x(0xe0),x(0x3b),x(0x4d),x(0xae),x(0x2a),x(0xf5),x(0xb0); \
+ .byte x(0xc8),x(0xeb),x(0xbb),x(0x3c),x(0x83),x(0x53),x(0x99),x(0x61); \
+ .byte x(0x17),x(0x2b),x(0x04),x(0x7e),x(0xba),x(0x77),x(0xd6),x(0x26); \
+ .byte x(0xe1),x(0x69),x(0x14),x(0x63),x(0x55),x(0x21),x(0x0c),x(0x7d)
+
+#define tptr %rbp /* table pointer */
+#define kptr %r8 /* key schedule pointer */
+#define fofs 128 /* adjust offset in key schedule to keep |disp| < 128 */
+#define fk_ref(x, y) -16*x+fofs+4*y(kptr)
+
+#ifdef AES_REV_DKS
+#define rofs 128
+#define ik_ref(x, y) -16*x+rofs+4*y(kptr)
+
+#else
+#define rofs -128
+#define ik_ref(x, y) 16*x+rofs+4*y(kptr)
+#endif /* AES_REV_DKS */
+
+#define tab_0(x) (tptr,x,8)
+#define tab_1(x) 3(tptr,x,8)
+#define tab_2(x) 2(tptr,x,8)
+#define tab_3(x) 1(tptr,x,8)
+#define tab_f(x) 1(tptr,x,8)
+#define tab_i(x) 7(tptr,x,8)
+
+#define ff_rnd(p1, p2, p3, p4, round) /* normal forward round */ \
+ mov fk_ref(round,0), p1; \
+ mov fk_ref(round,1), p2; \
+ mov fk_ref(round,2), p3; \
+ mov fk_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p4; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p1; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p3; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p2; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p3; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p1; \
+ \
+ mov p1, %eax; \
+ mov p2, %ebx; \
+ mov p3, %ecx; \
+ mov p4, %edx
+
+#ifdef LAST_ROUND_TABLES
+
+#define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \
+ add $2048, tptr; \
+ mov fk_ref(round,0), p1; \
+ mov fk_ref(round,1), p2; \
+ mov fk_ref(round,2), p3; \
+ mov fk_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p4; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p1; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p3; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p2; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p3; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p1
+
+#else
+
+#define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \
+ mov fk_ref(round,0), p1; \
+ mov fk_ref(round,1), p2; \
+ mov fk_ref(round,2), p3; \
+ mov fk_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ xor %esi, p1; \
+ rol $8, %edi; \
+ xor %edi, p4; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p3; \
+ xor %edi, p2; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ xor %esi, p2; \
+ rol $8, %edi; \
+ xor %edi, p1; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p4; \
+ xor %edi, p3; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ shr $16, %ecx; \
+ xor %esi, p3; \
+ rol $8, %edi; \
+ xor %edi, p2; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p1; \
+ xor %edi, p4; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ shr $16, %edx; \
+ xor %esi, p4; \
+ rol $8, %edi; \
+ xor %edi, p3; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p2; \
+ xor %edi, p1
+
+#endif /* LAST_ROUND_TABLES */
+
+#define ii_rnd(p1, p2, p3, p4, round) /* normal inverse round */ \
+ mov ik_ref(round,0), p1; \
+ mov ik_ref(round,1), p2; \
+ mov ik_ref(round,2), p3; \
+ mov ik_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p2; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p3; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p1; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p4; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p1; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p3; \
+ \
+ mov p1, %eax; \
+ mov p2, %ebx; \
+ mov p3, %ecx; \
+ mov p4, %edx
+
+#ifdef LAST_ROUND_TABLES
+
+#define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \
+ add $2048, tptr; \
+ mov ik_ref(round,0), p1; \
+ mov ik_ref(round,1), p2; \
+ mov ik_ref(round,2), p3; \
+ mov ik_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p2; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p3; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p1; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p4; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p1; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p3
+
+#else
+
+#define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \
+ mov ik_ref(round,0), p1; \
+ mov ik_ref(round,1), p2; \
+ mov ik_ref(round,2), p3; \
+ mov ik_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %eax; \
+ xor %esi, p1; \
+ rol $8, %edi; \
+ xor %edi, p2; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p3; \
+ xor %edi, p4; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %ebx; \
+ xor %esi, p2; \
+ rol $8, %edi; \
+ xor %edi, p3; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p4; \
+ xor %edi, p1; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %ecx; \
+ xor %esi, p3; \
+ rol $8, %edi; \
+ xor %edi, p4; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p1; \
+ xor %edi, p2; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %edx; \
+ xor %esi, p4; \
+ rol $8, %edi; \
+ xor %edi, p1; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p2; \
+ xor %edi, p3
+
+#endif /* LAST_ROUND_TABLES */
+
+/*
+ * OpenSolaris OS:
+ * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original interface:
+ * int aes_encrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ */
+.data
+.align 64
+enc_tab:
+ enc_vals(u8)
+#ifdef LAST_ROUND_TABLES
+ // Last Round Tables:
+ enc_vals(w8)
+#endif
+
+
+ENTRY_NP(aes_encrypt_amd64)
+#ifdef GLADMAN_INTERFACE
+ // Original interface
+ sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
+ mov %rsi, (%rsp) // output pointer (P2)
+ mov %rdx, %r8 // context (P3)
+
+ mov %rbx, 1*8(%rsp) // P1: input pointer in rdi
+ mov %rbp, 2*8(%rsp) // P2: output pointer in (rsp)
+ mov %r12, 3*8(%rsp) // P3: context in r8
+ movzx 4*KS_LENGTH(kptr), %esi // Get byte key length * 16
+
+#else
+ // OpenSolaris OS interface
+ sub $[4*8], %rsp // Make room on stack to save registers
+ mov %rcx, (%rsp) // Save output pointer (P4) on stack
+ mov %rdi, %r8 // context (P1)
+ mov %rdx, %rdi // P3: save input pointer
+ shl $4, %esi // P2: esi byte key length * 16
+
+ mov %rbx, 1*8(%rsp) // Save registers
+ mov %rbp, 2*8(%rsp)
+ mov %r12, 3*8(%rsp)
+ // P1: context in r8
+ // P2: byte key length * 16 in esi
+ // P3: input pointer in rdi
+ // P4: output pointer in (rsp)
+#endif /* GLADMAN_INTERFACE */
+
+ lea enc_tab(%rip), tptr
+ sub $fofs, kptr
+
+ // Load input block into registers
+ mov (%rdi), %eax
+ mov 1*4(%rdi), %ebx
+ mov 2*4(%rdi), %ecx
+ mov 3*4(%rdi), %edx
+
+ xor fofs(kptr), %eax
+ xor fofs+4(kptr), %ebx
+ xor fofs+8(kptr), %ecx
+ xor fofs+12(kptr), %edx
+
+ lea (kptr,%rsi), kptr
+ // Jump based on byte key length * 16:
+ cmp $[10*16], %esi
+ je 3f
+ cmp $[12*16], %esi
+ je 2f
+ cmp $[14*16], %esi
+ je 1f
+ mov $-1, %rax // error
+ jmp 4f
+
+ // Perform normal forward rounds
+1: ff_rnd(%r9d, %r10d, %r11d, %r12d, 13)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 12)
+2: ff_rnd(%r9d, %r10d, %r11d, %r12d, 11)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 10)
+3: ff_rnd(%r9d, %r10d, %r11d, %r12d, 9)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 8)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 7)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 6)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 5)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 4)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 3)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 2)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 1)
+ fl_rnd(%r9d, %r10d, %r11d, %r12d, 0)
+
+ // Copy results
+ mov (%rsp), %rbx
+ mov %r9d, (%rbx)
+ mov %r10d, 4(%rbx)
+ mov %r11d, 8(%rbx)
+ mov %r12d, 12(%rbx)
+ xor %rax, %rax
+4: // Restore registers
+ mov 1*8(%rsp), %rbx
+ mov 2*8(%rsp), %rbp
+ mov 3*8(%rsp), %r12
+ add $[4*8], %rsp
+ ret
+
+ SET_SIZE(aes_encrypt_amd64)
+
+/*
+ * OpenSolaris OS:
+ * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original interface:
+ * int aes_decrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ */
+.data
+.align 64
+dec_tab:
+ dec_vals(v8)
+#ifdef LAST_ROUND_TABLES
+ // Last Round Tables:
+ dec_vals(w8)
+#endif
+
+
+ENTRY_NP(aes_decrypt_amd64)
+#ifdef GLADMAN_INTERFACE
+ // Original interface
+ sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
+ mov %rsi, (%rsp) // output pointer (P2)
+ mov %rdx, %r8 // context (P3)
+
+ mov %rbx, 1*8(%rsp) // P1: input pointer in rdi
+ mov %rbp, 2*8(%rsp) // P2: output pointer in (rsp)
+ mov %r12, 3*8(%rsp) // P3: context in r8
+ movzx 4*KS_LENGTH(kptr), %esi // Get byte key length * 16
+
+#else
+ // OpenSolaris OS interface
+ sub $[4*8], %rsp // Make room on stack to save registers
+ mov %rcx, (%rsp) // Save output pointer (P4) on stack
+ mov %rdi, %r8 // context (P1)
+ mov %rdx, %rdi // P3: save input pointer
+ shl $4, %esi // P2: esi byte key length * 16
+
+ mov %rbx, 1*8(%rsp) // Save registers
+ mov %rbp, 2*8(%rsp)
+ mov %r12, 3*8(%rsp)
+ // P1: context in r8
+ // P2: byte key length * 16 in esi
+ // P3: input pointer in rdi
+ // P4: output pointer in (rsp)
+#endif /* GLADMAN_INTERFACE */
+
+ lea dec_tab(%rip), tptr
+ sub $rofs, kptr
+
+ // Load input block into registers
+ mov (%rdi), %eax
+ mov 1*4(%rdi), %ebx
+ mov 2*4(%rdi), %ecx
+ mov 3*4(%rdi), %edx
+
+#ifdef AES_REV_DKS
+ mov kptr, %rdi
+ lea (kptr,%rsi), kptr
+#else
+ lea (kptr,%rsi), %rdi
+#endif
+
+ xor rofs(%rdi), %eax
+ xor rofs+4(%rdi), %ebx
+ xor rofs+8(%rdi), %ecx
+ xor rofs+12(%rdi), %edx
+
+ // Jump based on byte key length * 16:
+ cmp $[10*16], %esi
+ je 3f
+ cmp $[12*16], %esi
+ je 2f
+ cmp $[14*16], %esi
+ je 1f
+ mov $-1, %rax // error
+ jmp 4f
+
+ // Perform normal inverse rounds
+1: ii_rnd(%r9d, %r10d, %r11d, %r12d, 13)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 12)
+2: ii_rnd(%r9d, %r10d, %r11d, %r12d, 11)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 10)
+3: ii_rnd(%r9d, %r10d, %r11d, %r12d, 9)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 8)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 7)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 6)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 5)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 4)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 3)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 2)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 1)
+ il_rnd(%r9d, %r10d, %r11d, %r12d, 0)
+
+ // Copy results
+ mov (%rsp), %rbx
+ mov %r9d, (%rbx)
+ mov %r10d, 4(%rbx)
+ mov %r11d, 8(%rbx)
+ mov %r12d, 12(%rbx)
+ xor %rax, %rax
+4: // Restore registers
+ mov 1*8(%rsp), %rbx
+ mov 2*8(%rsp), %rbp
+ mov 3*8(%rsp), %r12
+ add $[4*8], %rsp
+ ret
+
+ SET_SIZE(aes_decrypt_amd64)
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aeskey.c b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aeskey.c
new file mode 100644
index 000000000000..c3d1f2990874
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aeskey.c
@@ -0,0 +1,580 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue Date: 20/12/2007
+ */
+
+#include <aes/aes_impl.h>
+#include "aesopt.h"
+#include "aestab.h"
+#include "aestab2.h"
+
+/*
+ * Initialise the key schedule from the user supplied key. The key
+ * length can be specified in bytes, with legal values of 16, 24
+ * and 32, or in bits, with legal values of 128, 192 and 256. These
+ * values correspond with Nk values of 4, 6 and 8 respectively.
+ *
+ * The following macros implement a single cycle in the key
+ * schedule generation process. The number of cycles needed
+ * for each cx->n_col and nk value is:
+ *
+ * nk = 4 5 6 7 8
+ * ------------------------------
+ * cx->n_col = 4 10 9 8 7 7
+ * cx->n_col = 5 14 11 10 9 9
+ * cx->n_col = 6 19 15 12 11 11
+ * cx->n_col = 7 21 19 16 13 14
+ * cx->n_col = 8 29 23 19 17 14
+ */
+
+/*
+ * OpenSolaris changes
+ * 1. Added header files aes_impl.h and aestab2.h
+ * 2. Changed uint_8t and uint_32t to uint8_t and uint32_t
+ * 3. Remove code under ifdef USE_VIA_ACE_IF_PRESENT (always undefined)
+ * 4. Removed always-defined ifdefs FUNCS_IN_C, ENC_KEYING_IN_C,
+ * AES_128, AES_192, AES_256, AES_VAR defines
+ * 5. Changed aes_encrypt_key* aes_decrypt_key* functions to "static void"
+ * 6. Changed N_COLS to MAX_AES_NB
+ * 7. Replaced functions aes_encrypt_key and aes_decrypt_key with
+ * OpenSolaris-compatible functions rijndael_key_setup_enc_amd64 and
+ * rijndael_key_setup_dec_amd64
+ * 8. cstyled code and removed lint warnings
+ */
+
+#if defined(REDUCE_CODE_SIZE)
+#define ls_box ls_sub
+ uint32_t ls_sub(const uint32_t t, const uint32_t n);
+#define inv_mcol im_sub
+ uint32_t im_sub(const uint32_t x);
+#ifdef ENC_KS_UNROLL
+#undef ENC_KS_UNROLL
+#endif
+#ifdef DEC_KS_UNROLL
+#undef DEC_KS_UNROLL
+#endif
+#endif /* REDUCE_CODE_SIZE */
+
+
+#define ke4(k, i) \
+{ k[4 * (i) + 4] = ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[4 * (i) + 5] = ss[1] ^= ss[0]; \
+ k[4 * (i) + 6] = ss[2] ^= ss[1]; \
+ k[4 * (i) + 7] = ss[3] ^= ss[2]; \
+}
+
+static void
+aes_encrypt_key128(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[4];
+
+ rk[0] = ss[0] = word_in(key, 0);
+ rk[1] = ss[1] = word_in(key, 1);
+ rk[2] = ss[2] = word_in(key, 2);
+ rk[3] = ss[3] = word_in(key, 3);
+
+#ifdef ENC_KS_UNROLL
+ ke4(rk, 0); ke4(rk, 1);
+ ke4(rk, 2); ke4(rk, 3);
+ ke4(rk, 4); ke4(rk, 5);
+ ke4(rk, 6); ke4(rk, 7);
+ ke4(rk, 8);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 9; ++i)
+ ke4(rk, i);
+ }
+#endif /* ENC_KS_UNROLL */
+ ke4(rk, 9);
+}
+
+
+#define kef6(k, i) \
+{ k[6 * (i) + 6] = ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[6 * (i) + 7] = ss[1] ^= ss[0]; \
+ k[6 * (i) + 8] = ss[2] ^= ss[1]; \
+ k[6 * (i) + 9] = ss[3] ^= ss[2]; \
+}
+
+#define ke6(k, i) \
+{ kef6(k, i); \
+ k[6 * (i) + 10] = ss[4] ^= ss[3]; \
+ k[6 * (i) + 11] = ss[5] ^= ss[4]; \
+}
+
+static void
+aes_encrypt_key192(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[6];
+
+ rk[0] = ss[0] = word_in(key, 0);
+ rk[1] = ss[1] = word_in(key, 1);
+ rk[2] = ss[2] = word_in(key, 2);
+ rk[3] = ss[3] = word_in(key, 3);
+ rk[4] = ss[4] = word_in(key, 4);
+ rk[5] = ss[5] = word_in(key, 5);
+
+#ifdef ENC_KS_UNROLL
+ ke6(rk, 0); ke6(rk, 1);
+ ke6(rk, 2); ke6(rk, 3);
+ ke6(rk, 4); ke6(rk, 5);
+ ke6(rk, 6);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 7; ++i)
+ ke6(rk, i);
+ }
+#endif /* ENC_KS_UNROLL */
+ kef6(rk, 7);
+}
+
+
+
+#define kef8(k, i) \
+{ k[8 * (i) + 8] = ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[8 * (i) + 9] = ss[1] ^= ss[0]; \
+ k[8 * (i) + 10] = ss[2] ^= ss[1]; \
+ k[8 * (i) + 11] = ss[3] ^= ss[2]; \
+}
+
+#define ke8(k, i) \
+{ kef8(k, i); \
+ k[8 * (i) + 12] = ss[4] ^= ls_box(ss[3], 0); \
+ k[8 * (i) + 13] = ss[5] ^= ss[4]; \
+ k[8 * (i) + 14] = ss[6] ^= ss[5]; \
+ k[8 * (i) + 15] = ss[7] ^= ss[6]; \
+}
+
+static void
+aes_encrypt_key256(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[8];
+
+ rk[0] = ss[0] = word_in(key, 0);
+ rk[1] = ss[1] = word_in(key, 1);
+ rk[2] = ss[2] = word_in(key, 2);
+ rk[3] = ss[3] = word_in(key, 3);
+ rk[4] = ss[4] = word_in(key, 4);
+ rk[5] = ss[5] = word_in(key, 5);
+ rk[6] = ss[6] = word_in(key, 6);
+ rk[7] = ss[7] = word_in(key, 7);
+
+#ifdef ENC_KS_UNROLL
+ ke8(rk, 0); ke8(rk, 1);
+ ke8(rk, 2); ke8(rk, 3);
+ ke8(rk, 4); ke8(rk, 5);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 6; ++i)
+ ke8(rk, i);
+ }
+#endif /* ENC_KS_UNROLL */
+ kef8(rk, 6);
+}
+
+
+/*
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4 * (Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+int
+rijndael_key_setup_enc_amd64(uint32_t rk[], const uint32_t cipherKey[],
+ int keyBits)
+{
+ switch (keyBits) {
+ case 128:
+ aes_encrypt_key128((unsigned char *)&cipherKey[0], rk);
+ return (10);
+ case 192:
+ aes_encrypt_key192((unsigned char *)&cipherKey[0], rk);
+ return (12);
+ case 256:
+ aes_encrypt_key256((unsigned char *)&cipherKey[0], rk);
+ return (14);
+ default: /* should never get here */
+ break;
+ }
+
+ return (0);
+}
+
+
+/* this is used to store the decryption round keys */
+/* in forward or reverse order */
+
+#ifdef AES_REV_DKS
+#define v(n, i) ((n) - (i) + 2 * ((i) & 3))
+#else
+#define v(n, i) (i)
+#endif
+
+#if DEC_ROUND == NO_TABLES
+#define ff(x) (x)
+#else
+#define ff(x) inv_mcol(x)
+#if defined(dec_imvars)
+#define d_vars dec_imvars
+#endif
+#endif /* FUNCS_IN_C & DEC_KEYING_IN_C */
+
+
+#define k4e(k, i) \
+{ k[v(40, (4 * (i)) + 4)] = ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[v(40, (4 * (i)) + 5)] = ss[1] ^= ss[0]; \
+ k[v(40, (4 * (i)) + 6)] = ss[2] ^= ss[1]; \
+ k[v(40, (4 * (i)) + 7)] = ss[3] ^= ss[2]; \
+}
+
+#if 1
+
+#define kdf4(k, i) \
+{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
+ ss[1] = ss[1] ^ ss[3]; \
+ ss[2] = ss[2] ^ ss[3]; \
+ ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
+ ss[i % 4] ^= ss[4]; \
+ ss[4] ^= k[v(40, (4 * (i)))]; k[v(40, (4 * (i)) + 4)] = ff(ss[4]); \
+ ss[4] ^= k[v(40, (4 * (i)) + 1)]; k[v(40, (4 * (i)) + 5)] = ff(ss[4]); \
+ ss[4] ^= k[v(40, (4 * (i)) + 2)]; k[v(40, (4 * (i)) + 6)] = ff(ss[4]); \
+ ss[4] ^= k[v(40, (4 * (i)) + 3)]; k[v(40, (4 * (i)) + 7)] = ff(ss[4]); \
+}
+
+#define kd4(k, i) \
+{ ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
+ ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
+ k[v(40, (4 * (i)) + 4)] = ss[4] ^= k[v(40, (4 * (i)))]; \
+ k[v(40, (4 * (i)) + 5)] = ss[4] ^= k[v(40, (4 * (i)) + 1)]; \
+ k[v(40, (4 * (i)) + 6)] = ss[4] ^= k[v(40, (4 * (i)) + 2)]; \
+ k[v(40, (4 * (i)) + 7)] = ss[4] ^= k[v(40, (4 * (i)) + 3)]; \
+}
+
+#define kdl4(k, i) \
+{ ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
+ ss[i % 4] ^= ss[4]; \
+ k[v(40, (4 * (i)) + 4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
+ k[v(40, (4 * (i)) + 5)] = ss[1] ^ ss[3]; \
+ k[v(40, (4 * (i)) + 6)] = ss[0]; \
+ k[v(40, (4 * (i)) + 7)] = ss[1]; \
+}
+
+#else
+
+#define kdf4(k, i) \
+{ ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[v(40, (4 * (i)) + 4)] = ff(ss[0]); \
+ ss[1] ^= ss[0]; k[v(40, (4 * (i)) + 5)] = ff(ss[1]); \
+ ss[2] ^= ss[1]; k[v(40, (4 * (i)) + 6)] = ff(ss[2]); \
+ ss[3] ^= ss[2]; k[v(40, (4 * (i)) + 7)] = ff(ss[3]); \
+}
+
+#define kd4(k, i) \
+{ ss[4] = ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ ss[0] ^= ss[4]; \
+ ss[4] = ff(ss[4]); \
+ k[v(40, (4 * (i)) + 4)] = ss[4] ^= k[v(40, (4 * (i)))]; \
+ ss[1] ^= ss[0]; \
+ k[v(40, (4 * (i)) + 5)] = ss[4] ^= k[v(40, (4 * (i)) + 1)]; \
+ ss[2] ^= ss[1]; \
+ k[v(40, (4 * (i)) + 6)] = ss[4] ^= k[v(40, (4 * (i)) + 2)]; \
+ ss[3] ^= ss[2]; \
+ k[v(40, (4 * (i)) + 7)] = ss[4] ^= k[v(40, (4 * (i)) + 3)]; \
+}
+
+#define kdl4(k, i) \
+{ ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[v(40, (4 * (i)) + 4)] = ss[0]; \
+ ss[1] ^= ss[0]; k[v(40, (4 * (i)) + 5)] = ss[1]; \
+ ss[2] ^= ss[1]; k[v(40, (4 * (i)) + 6)] = ss[2]; \
+ ss[3] ^= ss[2]; k[v(40, (4 * (i)) + 7)] = ss[3]; \
+}
+
+#endif
+
+static void
+aes_decrypt_key128(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[5];
+#if defined(d_vars)
+ d_vars;
+#endif
+ rk[v(40, (0))] = ss[0] = word_in(key, 0);
+ rk[v(40, (1))] = ss[1] = word_in(key, 1);
+ rk[v(40, (2))] = ss[2] = word_in(key, 2);
+ rk[v(40, (3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+ kdf4(rk, 0); kd4(rk, 1);
+ kd4(rk, 2); kd4(rk, 3);
+ kd4(rk, 4); kd4(rk, 5);
+ kd4(rk, 6); kd4(rk, 7);
+ kd4(rk, 8); kdl4(rk, 9);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 10; ++i)
+ k4e(rk, i);
+#if !(DEC_ROUND == NO_TABLES)
+ for (i = MAX_AES_NB; i < 10 * MAX_AES_NB; ++i)
+ rk[i] = inv_mcol(rk[i]);
+#endif
+ }
+#endif /* DEC_KS_UNROLL */
+}
+
+
+
+#define k6ef(k, i) \
+{ k[v(48, (6 * (i)) + 6)] = ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[v(48, (6 * (i)) + 7)] = ss[1] ^= ss[0]; \
+ k[v(48, (6 * (i)) + 8)] = ss[2] ^= ss[1]; \
+ k[v(48, (6 * (i)) + 9)] = ss[3] ^= ss[2]; \
+}
+
+#define k6e(k, i) \
+{ k6ef(k, i); \
+ k[v(48, (6 * (i)) + 10)] = ss[4] ^= ss[3]; \
+ k[v(48, (6 * (i)) + 11)] = ss[5] ^= ss[4]; \
+}
+
+#define kdf6(k, i) \
+{ ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[v(48, (6 * (i)) + 6)] = ff(ss[0]); \
+ ss[1] ^= ss[0]; k[v(48, (6 * (i)) + 7)] = ff(ss[1]); \
+ ss[2] ^= ss[1]; k[v(48, (6 * (i)) + 8)] = ff(ss[2]); \
+ ss[3] ^= ss[2]; k[v(48, (6 * (i)) + 9)] = ff(ss[3]); \
+ ss[4] ^= ss[3]; k[v(48, (6 * (i)) + 10)] = ff(ss[4]); \
+ ss[5] ^= ss[4]; k[v(48, (6 * (i)) + 11)] = ff(ss[5]); \
+}
+
+#define kd6(k, i) \
+{ ss[6] = ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
+ k[v(48, (6 * (i)) + 6)] = ss[6] ^= k[v(48, (6 * (i)))]; \
+ ss[1] ^= ss[0]; \
+ k[v(48, (6 * (i)) + 7)] = ss[6] ^= k[v(48, (6 * (i)) + 1)]; \
+ ss[2] ^= ss[1]; \
+ k[v(48, (6 * (i)) + 8)] = ss[6] ^= k[v(48, (6 * (i)) + 2)]; \
+ ss[3] ^= ss[2]; \
+ k[v(48, (6 * (i)) + 9)] = ss[6] ^= k[v(48, (6 * (i)) + 3)]; \
+ ss[4] ^= ss[3]; \
+ k[v(48, (6 * (i)) + 10)] = ss[6] ^= k[v(48, (6 * (i)) + 4)]; \
+ ss[5] ^= ss[4]; \
+ k[v(48, (6 * (i)) + 11)] = ss[6] ^= k[v(48, (6 * (i)) + 5)]; \
+}
+
+#define kdl6(k, i) \
+{ ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[v(48, (6 * (i)) + 6)] = ss[0]; \
+ ss[1] ^= ss[0]; k[v(48, (6 * (i)) + 7)] = ss[1]; \
+ ss[2] ^= ss[1]; k[v(48, (6 * (i)) + 8)] = ss[2]; \
+ ss[3] ^= ss[2]; k[v(48, (6 * (i)) + 9)] = ss[3]; \
+}
+
+static void
+aes_decrypt_key192(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[7];
+#if defined(d_vars)
+ d_vars;
+#endif
+ rk[v(48, (0))] = ss[0] = word_in(key, 0);
+ rk[v(48, (1))] = ss[1] = word_in(key, 1);
+ rk[v(48, (2))] = ss[2] = word_in(key, 2);
+ rk[v(48, (3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+ ss[4] = word_in(key, 4);
+ rk[v(48, (4))] = ff(ss[4]);
+ ss[5] = word_in(key, 5);
+ rk[v(48, (5))] = ff(ss[5]);
+ kdf6(rk, 0); kd6(rk, 1);
+ kd6(rk, 2); kd6(rk, 3);
+ kd6(rk, 4); kd6(rk, 5);
+ kd6(rk, 6); kdl6(rk, 7);
+#else
+ rk[v(48, (4))] = ss[4] = word_in(key, 4);
+ rk[v(48, (5))] = ss[5] = word_in(key, 5);
+ {
+ uint32_t i;
+
+ for (i = 0; i < 7; ++i)
+ k6e(rk, i);
+ k6ef(rk, 7);
+#if !(DEC_ROUND == NO_TABLES)
+ for (i = MAX_AES_NB; i < 12 * MAX_AES_NB; ++i)
+ rk[i] = inv_mcol(rk[i]);
+#endif
+ }
+#endif
+}
+
+
+
+#define k8ef(k, i) \
+{ k[v(56, (8 * (i)) + 8)] = ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[v(56, (8 * (i)) + 9)] = ss[1] ^= ss[0]; \
+ k[v(56, (8 * (i)) + 10)] = ss[2] ^= ss[1]; \
+ k[v(56, (8 * (i)) + 11)] = ss[3] ^= ss[2]; \
+}
+
+#define k8e(k, i) \
+{ k8ef(k, i); \
+ k[v(56, (8 * (i)) + 12)] = ss[4] ^= ls_box(ss[3], 0); \
+ k[v(56, (8 * (i)) + 13)] = ss[5] ^= ss[4]; \
+ k[v(56, (8 * (i)) + 14)] = ss[6] ^= ss[5]; \
+ k[v(56, (8 * (i)) + 15)] = ss[7] ^= ss[6]; \
+}
+
+#define kdf8(k, i) \
+{ ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[v(56, (8 * (i)) + 8)] = ff(ss[0]); \
+ ss[1] ^= ss[0]; k[v(56, (8 * (i)) + 9)] = ff(ss[1]); \
+ ss[2] ^= ss[1]; k[v(56, (8 * (i)) + 10)] = ff(ss[2]); \
+ ss[3] ^= ss[2]; k[v(56, (8 * (i)) + 11)] = ff(ss[3]); \
+ ss[4] ^= ls_box(ss[3], 0); k[v(56, (8 * (i)) + 12)] = ff(ss[4]); \
+ ss[5] ^= ss[4]; k[v(56, (8 * (i)) + 13)] = ff(ss[5]); \
+ ss[6] ^= ss[5]; k[v(56, (8 * (i)) + 14)] = ff(ss[6]); \
+ ss[7] ^= ss[6]; k[v(56, (8 * (i)) + 15)] = ff(ss[7]); \
+}
+
+#define kd8(k, i) \
+{ ss[8] = ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ ss[0] ^= ss[8]; \
+ ss[8] = ff(ss[8]); \
+ k[v(56, (8 * (i)) + 8)] = ss[8] ^= k[v(56, (8 * (i)))]; \
+ ss[1] ^= ss[0]; \
+ k[v(56, (8 * (i)) + 9)] = ss[8] ^= k[v(56, (8 * (i)) + 1)]; \
+ ss[2] ^= ss[1]; \
+ k[v(56, (8 * (i)) + 10)] = ss[8] ^= k[v(56, (8 * (i)) + 2)]; \
+ ss[3] ^= ss[2]; \
+ k[v(56, (8 * (i)) + 11)] = ss[8] ^= k[v(56, (8 * (i)) + 3)]; \
+ ss[8] = ls_box(ss[3], 0); \
+ ss[4] ^= ss[8]; \
+ ss[8] = ff(ss[8]); \
+ k[v(56, (8 * (i)) + 12)] = ss[8] ^= k[v(56, (8 * (i)) + 4)]; \
+ ss[5] ^= ss[4]; \
+ k[v(56, (8 * (i)) + 13)] = ss[8] ^= k[v(56, (8 * (i)) + 5)]; \
+ ss[6] ^= ss[5]; \
+ k[v(56, (8 * (i)) + 14)] = ss[8] ^= k[v(56, (8 * (i)) + 6)]; \
+ ss[7] ^= ss[6]; \
+ k[v(56, (8 * (i)) + 15)] = ss[8] ^= k[v(56, (8 * (i)) + 7)]; \
+}
+
+#define kdl8(k, i) \
+{ ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[v(56, (8 * (i)) + 8)] = ss[0]; \
+ ss[1] ^= ss[0]; k[v(56, (8 * (i)) + 9)] = ss[1]; \
+ ss[2] ^= ss[1]; k[v(56, (8 * (i)) + 10)] = ss[2]; \
+ ss[3] ^= ss[2]; k[v(56, (8 * (i)) + 11)] = ss[3]; \
+}
+
+static void
+aes_decrypt_key256(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[9];
+#if defined(d_vars)
+ d_vars;
+#endif
+ rk[v(56, (0))] = ss[0] = word_in(key, 0);
+ rk[v(56, (1))] = ss[1] = word_in(key, 1);
+ rk[v(56, (2))] = ss[2] = word_in(key, 2);
+ rk[v(56, (3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+ ss[4] = word_in(key, 4);
+ rk[v(56, (4))] = ff(ss[4]);
+ ss[5] = word_in(key, 5);
+ rk[v(56, (5))] = ff(ss[5]);
+ ss[6] = word_in(key, 6);
+ rk[v(56, (6))] = ff(ss[6]);
+ ss[7] = word_in(key, 7);
+ rk[v(56, (7))] = ff(ss[7]);
+ kdf8(rk, 0); kd8(rk, 1);
+ kd8(rk, 2); kd8(rk, 3);
+ kd8(rk, 4); kd8(rk, 5);
+ kdl8(rk, 6);
+#else
+ rk[v(56, (4))] = ss[4] = word_in(key, 4);
+ rk[v(56, (5))] = ss[5] = word_in(key, 5);
+ rk[v(56, (6))] = ss[6] = word_in(key, 6);
+ rk[v(56, (7))] = ss[7] = word_in(key, 7);
+ {
+ uint32_t i;
+
+ for (i = 0; i < 6; ++i)
+ k8e(rk, i);
+ k8ef(rk, 6);
+#if !(DEC_ROUND == NO_TABLES)
+ for (i = MAX_AES_NB; i < 14 * MAX_AES_NB; ++i)
+ rk[i] = inv_mcol(rk[i]);
+#endif
+ }
+#endif /* DEC_KS_UNROLL */
+}
+
+
+/*
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4 * (Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+int
+rijndael_key_setup_dec_amd64(uint32_t rk[], const uint32_t cipherKey[],
+ int keyBits)
+{
+ switch (keyBits) {
+ case 128:
+ aes_decrypt_key128((unsigned char *)&cipherKey[0], rk);
+ return (10);
+ case 192:
+ aes_decrypt_key192((unsigned char *)&cipherKey[0], rk);
+ return (12);
+ case 256:
+ aes_decrypt_key256((unsigned char *)&cipherKey[0], rk);
+ return (14);
+ default: /* should never get here */
+ break;
+ }
+
+ return (0);
+}
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aesopt.h b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aesopt.h
new file mode 100644
index 000000000000..472111f96e59
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aesopt.h
@@ -0,0 +1,770 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue Date: 20/12/2007
+ *
+ * This file contains the compilation options for AES (Rijndael) and code
+ * that is common across encryption, key scheduling and table generation.
+ *
+ * OPERATION
+ *
+ * These source code files implement the AES algorithm Rijndael designed by
+ * Joan Daemen and Vincent Rijmen. This version is designed for the standard
+ * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
+ * and 32 bytes).
+ *
+ * This version is designed for flexibility and speed using operations on
+ * 32-bit words rather than operations on bytes. It can be compiled with
+ * either big or little endian internal byte order but is faster when the
+ * native byte order for the processor is used.
+ *
+ * THE CIPHER INTERFACE
+ *
+ * The cipher interface is implemented as an array of bytes in which lower
+ * AES bit sequence indexes map to higher numeric significance within bytes.
+ */
+
+/*
+ * OpenSolaris changes
+ * 1. Added __cplusplus and _AESTAB_H header guards
+ * 2. Added header files sys/types.h and aes_impl.h
+ * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
+ * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
+ * from brg_endian.h
+ * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
+ * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
+ * 7. Defined aes_sw32 as htonl() for byte swapping
+ * 8. Cstyled and hdrchk code
+ *
+ */
+
+#ifndef _AESOPT_H
+#define _AESOPT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <aes/aes_impl.h>
+
+/* SUPPORT FEATURES */
+#define AES_ENCRYPT /* if support for encryption is needed */
+#define AES_DECRYPT /* if support for decryption is needed */
+
+/* PLATFORM-SPECIFIC FEATURES */
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#define AES_REV_DKS /* define to reverse decryption key schedule */
+
+
+/*
+ * CONFIGURATION - THE USE OF DEFINES
+ * Later in this section there are a number of defines that control the
+ * operation of the code. In each section, the purpose of each define is
+ * explained so that the relevant form can be included or excluded by
+ * setting either 1's or 0's respectively on the branches of the related
+ * #if clauses. The following local defines should not be changed.
+ */
+
+#define ENCRYPTION_IN_C 1
+#define DECRYPTION_IN_C 2
+#define ENC_KEYING_IN_C 4
+#define DEC_KEYING_IN_C 8
+
+#define NO_TABLES 0
+#define ONE_TABLE 1
+#define FOUR_TABLES 4
+#define NONE 0
+#define PARTIAL 1
+#define FULL 2
+
+/* --- START OF USER CONFIGURED OPTIONS --- */
+
+/*
+ * 1. BYTE ORDER WITHIN 32 BIT WORDS
+ *
+ * The fundamental data processing units in Rijndael are 8-bit bytes. The
+ * input, output and key input are all enumerated arrays of bytes in which
+ * bytes are numbered starting at zero and increasing to one less than the
+ * number of bytes in the array in question. This enumeration is only used
+ * for naming bytes and does not imply any adjacency or order relationship
+ * from one byte to another. When these inputs and outputs are considered
+ * as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
+ * byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
+ * In this implementation bits are numbered from 0 to 7 starting at the
+ * numerically least significant end of each byte. Bit n represents 2^n.
+ *
+ * However, Rijndael can be implemented more efficiently using 32-bit
+ * words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
+ * into word[n]. While in principle these bytes can be assembled into words
+ * in any positions, this implementation only supports the two formats in
+ * which bytes in adjacent positions within words also have adjacent byte
+ * numbers. This order is called big-endian if the lowest numbered bytes
+ * in words have the highest numeric significance and little-endian if the
+ * opposite applies.
+ *
+ * This code can work in either order irrespective of the order used by the
+ * machine on which it runs. Normally the internal byte order will be set
+ * to the order of the processor on which the code is to be run but this
+ * define can be used to reverse this in special situations
+ *
+ * WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
+ * This define will hence be redefined later (in section 4) if necessary
+ */
+
+#if 1
+#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#elif 0
+#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0
+#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+#error The algorithm byte order is not defined
+#endif
+
+/* 2. VIA ACE SUPPORT */
+
+#if defined(__GNUC__) && defined(__i386__) || \
+ defined(_WIN32) && defined(_M_IX86) && \
+ !(defined(_WIN64) || defined(_WIN32_WCE) || \
+ defined(_MSC_VER) && (_MSC_VER <= 800))
+#define VIA_ACE_POSSIBLE
+#endif
+
+/*
+ * Define this option if support for the VIA ACE is required. This uses
+ * inline assembler instructions and is only implemented for the Microsoft,
+ * Intel and GCC compilers. If VIA ACE is known to be present, then defining
+ * ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
+ * code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
+ * it is detected (both present and enabled) but the normal AES code will
+ * also be present.
+ *
+ * When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
+ * aligned; other input/output buffers do not need to be 16 byte aligned
+ * but there are very large performance gains if this can be arranged.
+ * VIA ACE also requires the decryption key schedule to be in reverse
+ * order (which later checks below ensure).
+ */
+
+/* VIA ACE is not used here for OpenSolaris: */
+#undef VIA_ACE_POSSIBLE
+#undef ASSUME_VIA_ACE_PRESENT
+
+#if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
+#define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
+#define ASSUME_VIA_ACE_PRESENT
+#endif
+
+
+/*
+ * 3. ASSEMBLER SUPPORT
+ *
+ * This define (which can be on the command line) enables the use of the
+ * assembler code routines for encryption, decryption and key scheduling
+ * as follows:
+ *
+ * ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
+ * encryption and decryption and but with key scheduling in C
+ * ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
+ * encryption, decryption and key scheduling
+ * ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
+ * encryption and decryption and but with key scheduling in C
+ * ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
+ * encryption and decryption and but with key scheduling in C
+ *
+ * Change one 'if 0' below to 'if 1' to select the version or define
+ * as a compilation option.
+ */
+
+#if 0 && !defined(ASM_X86_V1C)
+#define ASM_X86_V1C
+#elif 0 && !defined(ASM_X86_V2)
+#define ASM_X86_V2
+#elif 0 && !defined(ASM_X86_V2C)
+#define ASM_X86_V2C
+#elif 1 && !defined(ASM_AMD64_C)
+#define ASM_AMD64_C
+#endif
+
+#if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
+ !defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
+ !defined(__amd64)
+#error Assembler code is only available for x86 and AMD64 systems
+#endif
+
+/*
+ * 4. FAST INPUT/OUTPUT OPERATIONS.
+ *
+ * On some machines it is possible to improve speed by transferring the
+ * bytes in the input and output arrays to and from the internal 32-bit
+ * variables by addressing these arrays as if they are arrays of 32-bit
+ * words. On some machines this will always be possible but there may
+ * be a large performance penalty if the byte arrays are not aligned on
+ * the normal word boundaries. On other machines this technique will
+ * lead to memory access errors when such 32-bit word accesses are not
+ * properly aligned. The option SAFE_IO avoids such problems but will
+ * often be slower on those machines that support misaligned access
+ * (especially so if care is taken to align the input and output byte
+ * arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
+ * assumed that access to byte arrays as if they are arrays of 32-bit
+ * words will not cause problems when such accesses are misaligned.
+ */
+#if 1 && !defined(_MSC_VER)
+#define SAFE_IO
+#endif
+
+/*
+ * 5. LOOP UNROLLING
+ *
+ * The code for encryption and decryption cycles through a number of rounds
+ * that can be implemented either in a loop or by expanding the code into a
+ * long sequence of instructions, the latter producing a larger program but
+ * one that will often be much faster. The latter is called loop unrolling.
+ * There are also potential speed advantages in expanding two iterations in
+ * a loop with half the number of iterations, which is called partial loop
+ * unrolling. The following options allow partial or full loop unrolling
+ * to be set independently for encryption and decryption
+ */
+#if 1
+#define ENC_UNROLL FULL
+#elif 0
+#define ENC_UNROLL PARTIAL
+#else
+#define ENC_UNROLL NONE
+#endif
+
+#if 1
+#define DEC_UNROLL FULL
+#elif 0
+#define DEC_UNROLL PARTIAL
+#else
+#define DEC_UNROLL NONE
+#endif
+
+#if 1
+#define ENC_KS_UNROLL
+#endif
+
+#if 1
+#define DEC_KS_UNROLL
+#endif
+
+/*
+ * 6. FAST FINITE FIELD OPERATIONS
+ *
+ * If this section is included, tables are used to provide faster finite
+ * field arithmetic. This has no effect if FIXED_TABLES is defined.
+ */
+#if 1
+#define FF_TABLES
+#endif
+
+/*
+ * 7. INTERNAL STATE VARIABLE FORMAT
+ *
+ * The internal state of Rijndael is stored in a number of local 32-bit
+ * word variables which can be defined either as an array or as individual
+ * names variables. Include this section if you want to store these local
+ * variables in arrays. Otherwise individual local variables will be used.
+ */
+#if 1
+#define ARRAYS
+#endif
+
+/*
+ * 8. FIXED OR DYNAMIC TABLES
+ *
+ * When this section is included the tables used by the code are compiled
+ * statically into the binary file. Otherwise the subroutine aes_init()
+ * must be called to compute them before the code is first used.
+ */
+#if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
+#define FIXED_TABLES
+#endif
+
+/*
+ * 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
+ *
+ * In some systems it is better to mask longer values to extract bytes
+ * rather than using a cast. This option allows this choice.
+ */
+#if 0
+#define to_byte(x) ((uint8_t)(x))
+#else
+#define to_byte(x) ((x) & 0xff)
+#endif
+
+/*
+ * 10. TABLE ALIGNMENT
+ *
+ * On some systems speed will be improved by aligning the AES large lookup
+ * tables on particular boundaries. This define should be set to a power of
+ * two giving the desired alignment. It can be left undefined if alignment
+ * is not needed. This option is specific to the Microsoft VC++ compiler -
+ * it seems to sometimes cause trouble for the VC++ version 6 compiler.
+ */
+
+#if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
+#define TABLE_ALIGN 32
+#endif
+
+/*
+ * 11. REDUCE CODE AND TABLE SIZE
+ *
+ * This replaces some expanded macros with function calls if AES_ASM_V2 or
+ * AES_ASM_V2C are defined
+ */
+
+#if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
+#define REDUCE_CODE_SIZE
+#endif
+
+/*
+ * 12. TABLE OPTIONS
+ *
+ * This cipher proceeds by repeating in a number of cycles known as rounds
+ * which are implemented by a round function which is optionally be speeded
+ * up using tables. The basic tables are 256 32-bit words, with either
+ * one or four tables being required for each round function depending on
+ * how much speed is required. Encryption and decryption round functions
+ * are different and the last encryption and decryption round functions are
+ * different again making four different round functions in all.
+ *
+ * This means that:
+ * 1. Normal encryption and decryption rounds can each use either 0, 1
+ * or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+ * 2. The last encryption and decryption rounds can also use either 0, 1
+ * or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+ *
+ * Include or exclude the appropriate definitions below to set the number
+ * of tables used by this implementation.
+ */
+
+#if 1 /* set tables for the normal encryption round */
+#define ENC_ROUND FOUR_TABLES
+#elif 0
+#define ENC_ROUND ONE_TABLE
+#else
+#define ENC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the last encryption round */
+#define LAST_ENC_ROUND FOUR_TABLES
+#elif 0
+#define LAST_ENC_ROUND ONE_TABLE
+#else
+#define LAST_ENC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the normal decryption round */
+#define DEC_ROUND FOUR_TABLES
+#elif 0
+#define DEC_ROUND ONE_TABLE
+#else
+#define DEC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the last decryption round */
+#define LAST_DEC_ROUND FOUR_TABLES
+#elif 0
+#define LAST_DEC_ROUND ONE_TABLE
+#else
+#define LAST_DEC_ROUND NO_TABLES
+#endif
+
+/*
+ * The decryption key schedule can be speeded up with tables in the same
+ * way that the round functions can. Include or exclude the following
+ * defines to set this requirement.
+ */
+#if 1
+#define KEY_SCHED FOUR_TABLES
+#elif 0
+#define KEY_SCHED ONE_TABLE
+#else
+#define KEY_SCHED NO_TABLES
+#endif
+
+/* ---- END OF USER CONFIGURED OPTIONS ---- */
+
+/* VIA ACE support is only available for VC++ and GCC */
+
+#if !defined(_MSC_VER) && !defined(__GNUC__)
+#if defined(ASSUME_VIA_ACE_PRESENT)
+#undef ASSUME_VIA_ACE_PRESENT
+#endif
+#if defined(USE_VIA_ACE_IF_PRESENT)
+#undef USE_VIA_ACE_IF_PRESENT
+#endif
+#endif
+
+#if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
+#define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
+#define AES_REV_DKS
+#endif
+
+/* Assembler support requires the use of platform byte order */
+
+#if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
+ (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
+#undef ALGORITHM_BYTE_ORDER
+#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#endif
+
+/*
+ * In this implementation the columns of the state array are each held in
+ * 32-bit words. The state array can be held in various ways: in an array
+ * of words, in a number of individual word variables or in a number of
+ * processor registers. The following define maps a variable name x and
+ * a column number c to the way the state array variable is to be held.
+ * The first define below maps the state into an array x[c] whereas the
+ * second form maps the state into a number of individual variables x0,
+ * x1, etc. Another form could map individual state columns to machine
+ * register names.
+ */
+
+#if defined(ARRAYS)
+#define s(x, c) x[c]
+#else
+#define s(x, c) x##c
+#endif
+
+/*
+ * This implementation provides subroutines for encryption, decryption
+ * and for setting the three key lengths (separately) for encryption
+ * and decryption. Since not all functions are needed, masks are set
+ * up here to determine which will be implemented in C
+ */
+
+#if !defined(AES_ENCRYPT)
+#define EFUNCS_IN_C 0
+#elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
+ defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
+#define EFUNCS_IN_C ENC_KEYING_IN_C
+#elif !defined(ASM_X86_V2)
+#define EFUNCS_IN_C (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
+#else
+#define EFUNCS_IN_C 0
+#endif
+
+#if !defined(AES_DECRYPT)
+#define DFUNCS_IN_C 0
+#elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
+ defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
+#define DFUNCS_IN_C DEC_KEYING_IN_C
+#elif !defined(ASM_X86_V2)
+#define DFUNCS_IN_C (DECRYPTION_IN_C | DEC_KEYING_IN_C)
+#else
+#define DFUNCS_IN_C 0
+#endif
+
+#define FUNCS_IN_C (EFUNCS_IN_C | DFUNCS_IN_C)
+
+/* END OF CONFIGURATION OPTIONS */
+
+/* Disable or report errors on some combinations of options */
+
+#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
+#undef LAST_ENC_ROUND
+#define LAST_ENC_ROUND NO_TABLES
+#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
+#undef LAST_ENC_ROUND
+#define LAST_ENC_ROUND ONE_TABLE
+#endif
+
+#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
+#undef ENC_UNROLL
+#define ENC_UNROLL NONE
+#endif
+
+#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
+#undef LAST_DEC_ROUND
+#define LAST_DEC_ROUND NO_TABLES
+#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
+#undef LAST_DEC_ROUND
+#define LAST_DEC_ROUND ONE_TABLE
+#endif
+
+#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
+#undef DEC_UNROLL
+#define DEC_UNROLL NONE
+#endif
+
+#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#define aes_sw32 htonl
+#elif defined(bswap32)
+#define aes_sw32 bswap32
+#elif defined(bswap_32)
+#define aes_sw32 bswap_32
+#else
+#define brot(x, n) (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))
+#define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
+#endif
+
+
+/*
+ * upr(x, n): rotates bytes within words by n positions, moving bytes to
+ * higher index positions with wrap around into low positions
+ * ups(x, n): moves bytes by n positions to higher index positions in
+ * words but without wrap around
+ * bval(x, n): extracts a byte from a word
+ *
+ * WARNING: The definitions given here are intended only for use with
+ * unsigned variables and with shift counts that are compile
+ * time constants
+ */
+
+#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#define upr(x, n) (((uint32_t)(x) << (8 * (n))) | \
+ ((uint32_t)(x) >> (32 - 8 * (n))))
+#define ups(x, n) ((uint32_t)(x) << (8 * (n)))
+#define bval(x, n) to_byte((x) >> (8 * (n)))
+#define bytes2word(b0, b1, b2, b3) \
+ (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
+ ((uint32_t)(b1) << 8) | (b0))
+#endif
+
+#if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
+#define upr(x, n) (((uint32_t)(x) >> (8 * (n))) | \
+ ((uint32_t)(x) << (32 - 8 * (n))))
+#define ups(x, n) ((uint32_t)(x) >> (8 * (n)))
+#define bval(x, n) to_byte((x) >> (24 - 8 * (n)))
+#define bytes2word(b0, b1, b2, b3) \
+ (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
+ ((uint32_t)(b2) << 8) | (b3))
+#endif
+
+#if defined(SAFE_IO)
+#define word_in(x, c) bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
+ ((const uint8_t *)(x) + 4 * c)[1], \
+ ((const uint8_t *)(x) + 4 * c)[2], \
+ ((const uint8_t *)(x) + 4 * c)[3])
+#define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
+ ((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
+ ((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
+ ((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
+#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
+#define word_in(x, c) (*((uint32_t *)(x) + (c)))
+#define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
+#else
+#define word_in(x, c) aes_sw32(*((uint32_t *)(x) + (c)))
+#define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
+#endif
+
+/* the finite field modular polynomial and elements */
+
+#define WPOLY 0x011b
+#define BPOLY 0x1b
+
+/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
+
+#define m1 0x80808080
+#define m2 0x7f7f7f7f
+#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
+
+/*
+ * The following defines provide alternative definitions of gf_mulx that might
+ * give improved performance if a fast 32-bit multiply is not available. Note
+ * that a temporary variable u needs to be defined where gf_mulx is used.
+ *
+ * #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
+ * ((u >> 3) | (u >> 6))
+ * #define m4 (0x01010101 * BPOLY)
+ * #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
+ * & m4)
+ */
+
+/* Work out which tables are needed for the different options */
+
+#if defined(ASM_X86_V1C)
+#if defined(ENC_ROUND)
+#undef ENC_ROUND
+#endif
+#define ENC_ROUND FOUR_TABLES
+#if defined(LAST_ENC_ROUND)
+#undef LAST_ENC_ROUND
+#endif
+#define LAST_ENC_ROUND FOUR_TABLES
+#if defined(DEC_ROUND)
+#undef DEC_ROUND
+#endif
+#define DEC_ROUND FOUR_TABLES
+#if defined(LAST_DEC_ROUND)
+#undef LAST_DEC_ROUND
+#endif
+#define LAST_DEC_ROUND FOUR_TABLES
+#if defined(KEY_SCHED)
+#undef KEY_SCHED
+#define KEY_SCHED FOUR_TABLES
+#endif
+#endif
+
+#if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
+#if ENC_ROUND == ONE_TABLE
+#define FT1_SET
+#elif ENC_ROUND == FOUR_TABLES
+#define FT4_SET
+#else
+#define SBX_SET
+#endif
+#if LAST_ENC_ROUND == ONE_TABLE
+#define FL1_SET
+#elif LAST_ENC_ROUND == FOUR_TABLES
+#define FL4_SET
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+
+#if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
+#if DEC_ROUND == ONE_TABLE
+#define IT1_SET
+#elif DEC_ROUND == FOUR_TABLES
+#define IT4_SET
+#else
+#define ISB_SET
+#endif
+#if LAST_DEC_ROUND == ONE_TABLE
+#define IL1_SET
+#elif LAST_DEC_ROUND == FOUR_TABLES
+#define IL4_SET
+#elif !defined(ISB_SET)
+#define ISB_SET
+#endif
+#endif
+
+
+#if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
+ defined(ASM_X86_V2C)))
+#if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
+#if KEY_SCHED == ONE_TABLE
+#if !defined(FL1_SET) && !defined(FL4_SET)
+#define LS1_SET
+#endif
+#elif KEY_SCHED == FOUR_TABLES
+#if !defined(FL4_SET)
+#define LS4_SET
+#endif
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+#if (FUNCS_IN_C & DEC_KEYING_IN_C)
+#if KEY_SCHED == ONE_TABLE
+#define IM1_SET
+#elif KEY_SCHED == FOUR_TABLES
+#define IM4_SET
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+#endif
+
+/* generic definitions of Rijndael macros that use tables */
+
+#define no_table(x, box, vf, rf, c) bytes2word(\
+ box[bval(vf(x, 0, c), rf(0, c))], \
+ box[bval(vf(x, 1, c), rf(1, c))], \
+ box[bval(vf(x, 2, c), rf(2, c))], \
+ box[bval(vf(x, 3, c), rf(3, c))])
+
+#define one_table(x, op, tab, vf, rf, c) \
+ (tab[bval(vf(x, 0, c), rf(0, c))] \
+ ^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
+ ^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
+ ^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
+
+#define four_tables(x, tab, vf, rf, c) \
+ (tab[0][bval(vf(x, 0, c), rf(0, c))] \
+ ^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
+ ^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
+ ^ tab[3][bval(vf(x, 3, c), rf(3, c))])
+
+#define vf1(x, r, c) (x)
+#define rf1(r, c) (r)
+#define rf2(r, c) ((8+r-c)&3)
+
+/*
+ * Perform forward and inverse column mix operation on four bytes in long word
+ * x in parallel. NOTE: x must be a simple variable, NOT an expression in
+ * these macros.
+ */
+
+#if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
+ defined(ASM_X86_V2C)))
+
+#if defined(FM4_SET) /* not currently used */
+#define fwd_mcol(x) four_tables(x, t_use(f, m), vf1, rf1, 0)
+#elif defined(FM1_SET) /* not currently used */
+#define fwd_mcol(x) one_table(x, upr, t_use(f, m), vf1, rf1, 0)
+#else
+#define dec_fmvars uint32_t g2
+#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
+ upr((x), 2) ^ upr((x), 1))
+#endif
+
+#if defined(IM4_SET)
+#define inv_mcol(x) four_tables(x, t_use(i, m), vf1, rf1, 0)
+#elif defined(IM1_SET)
+#define inv_mcol(x) one_table(x, upr, t_use(i, m), vf1, rf1, 0)
+#else
+#define dec_imvars uint32_t g2, g4, g9
+#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
+ (x) ^ gf_mulx(g4), g4 ^= g9, \
+ (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
+ upr(g4, 2) ^ upr(g9, 1))
+#endif
+
+#if defined(FL4_SET)
+#define ls_box(x, c) four_tables(x, t_use(f, l), vf1, rf2, c)
+#elif defined(LS4_SET)
+#define ls_box(x, c) four_tables(x, t_use(l, s), vf1, rf2, c)
+#elif defined(FL1_SET)
+#define ls_box(x, c) one_table(x, upr, t_use(f, l), vf1, rf2, c)
+#elif defined(LS1_SET)
+#define ls_box(x, c) one_table(x, upr, t_use(l, s), vf1, rf2, c)
+#else
+#define ls_box(x, c) no_table(x, t_use(s, box), vf1, rf2, c)
+#endif
+
+#endif
+
+#if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
+#define ISB_SET
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AESOPT_H */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab.h b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab.h
new file mode 100644
index 000000000000..33cdb6c6f9fe
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab.h
@@ -0,0 +1,165 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue Date: 20/12/2007
+ *
+ * This file contains the code for declaring the tables needed to implement
+ * AES. The file aesopt.h is assumed to be included before this header file.
+ * If there are no global variables, the definitions here can be used to put
+ * the AES tables in a structure so that a pointer can then be added to the
+ * AES context to pass them to the AES routines that need them. If this
+ * facility is used, the calling program has to ensure that this pointer is
+ * managed appropriately. In particular, the value of the t_dec(in, it) item
+ * in the table structure must be set to zero in order to ensure that the
+ * tables are initialised. In practice the three code sequences in aeskey.c
+ * that control the calls to aes_init() and the aes_init() routine itself will
+ * have to be changed for a specific implementation. If global variables are
+ * available it will generally be preferable to use them with the precomputed
+ * FIXED_TABLES option that uses static global tables.
+ *
+ * The following defines can be used to control the way the tables
+ * are defined, initialised and used in embedded environments that
+ * require special features for these purposes
+ *
+ * the 't_dec' construction is used to declare fixed table arrays
+ * the 't_set' construction is used to set fixed table values
+ * the 't_use' construction is used to access fixed table values
+ *
+ * 256 byte tables:
+ *
+ * t_xxx(s, box) => forward S box
+ * t_xxx(i, box) => inverse S box
+ *
+ * 256 32-bit word OR 4 x 256 32-bit word tables:
+ *
+ * t_xxx(f, n) => forward normal round
+ * t_xxx(f, l) => forward last round
+ * t_xxx(i, n) => inverse normal round
+ * t_xxx(i, l) => inverse last round
+ * t_xxx(l, s) => key schedule table
+ * t_xxx(i, m) => key schedule table
+ *
+ * Other variables and tables:
+ *
+ * t_xxx(r, c) => the rcon table
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * 1. Added __cplusplus and _AESTAB_H header guards
+ * 2. Added header file sys/types.h
+ * 3. Remove code defined for _MSC_VER
+ * 4. Changed all variables to "static const"
+ * 5. Changed uint_8t and uint_32t to uint8_t and uint32_t
+ * 6. Cstyled and hdrchk code
+ */
+
+#ifndef _AESTAB_H
+#define _AESTAB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#define t_dec(m, n) t_##m##n
+#define t_set(m, n) t_##m##n
+#define t_use(m, n) t_##m##n
+
+#if defined(DO_TABLES) && defined(FIXED_TABLES)
+#define d_1(t, n, b, e) static const t n[256] = b(e)
+#define d_4(t, n, b, e, f, g, h) static const t n[4][256] = \
+ {b(e), b(f), b(g), b(h)}
+static const uint32_t t_dec(r, c)[RC_LENGTH] = rc_data(w0);
+#else
+#define d_1(t, n, b, e) static const t n[256]
+#define d_4(t, n, b, e, f, g, h) static const t n[4][256]
+static const uint32_t t_dec(r, c)[RC_LENGTH];
+#endif
+
+#if defined(SBX_SET)
+ d_1(uint8_t, t_dec(s, box), sb_data, h0);
+#endif
+#if defined(ISB_SET)
+ d_1(uint8_t, t_dec(i, box), isb_data, h0);
+#endif
+
+#if defined(FT1_SET)
+ d_1(uint32_t, t_dec(f, n), sb_data, u0);
+#endif
+#if defined(FT4_SET)
+ d_4(uint32_t, t_dec(f, n), sb_data, u0, u1, u2, u3);
+#endif
+
+#if defined(FL1_SET)
+ d_1(uint32_t, t_dec(f, l), sb_data, w0);
+#endif
+#if defined(FL4_SET)
+ d_4(uint32_t, t_dec(f, l), sb_data, w0, w1, w2, w3);
+#endif
+
+#if defined(IT1_SET)
+ d_1(uint32_t, t_dec(i, n), isb_data, v0);
+#endif
+#if defined(IT4_SET)
+ d_4(uint32_t, t_dec(i, n), isb_data, v0, v1, v2, v3);
+#endif
+
+#if defined(IL1_SET)
+ d_1(uint32_t, t_dec(i, l), isb_data, w0);
+#endif
+#if defined(IL4_SET)
+ d_4(uint32_t, t_dec(i, l), isb_data, w0, w1, w2, w3);
+#endif
+
+#if defined(LS1_SET)
+#if defined(FL1_SET)
+#undef LS1_SET
+#else
+ d_1(uint32_t, t_dec(l, s), sb_data, w0);
+#endif
+#endif
+
+#if defined(LS4_SET)
+#if defined(FL4_SET)
+#undef LS4_SET
+#else
+ d_4(uint32_t, t_dec(l, s), sb_data, w0, w1, w2, w3);
+#endif
+#endif
+
+#if defined(IM1_SET)
+ d_1(uint32_t, t_dec(i, m), mm_data, v0);
+#endif
+#if defined(IM4_SET)
+ d_4(uint32_t, t_dec(i, m), mm_data, v0, v1, v2, v3);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AESTAB_H */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h
new file mode 100644
index 000000000000..eb13f72b10d8
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h
@@ -0,0 +1,594 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _AESTAB2_H
+#define _AESTAB2_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * To create this file for OpenSolaris:
+ * 1. Compile and run tablegen.c, from aes-src-04-03-08.zip,
+ * after defining ASM_AMD64_C
+ * 2. mv aestab2.c aestab2.h
+ * 3. Add __cplusplus and _AESTAB2_H header guards
+ * 3. Add #include <aes_impl.h>
+ * 4. Change "uint_32t" to "uint32_t"
+ * 5. Change all variables to "static const"
+ * 6. Cstyle and hdrchk this file
+ */
+
+#include <aes/aes_impl.h>
+
+static const uint32_t t_rc[RC_LENGTH] =
+{
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080,
+ 0x0000001b, 0x00000036
+};
+
+static const uint32_t t_ls[4][256] =
+{
+ {
+ 0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
+ 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
+ 0x00000030, 0x00000001, 0x00000067, 0x0000002b,
+ 0x000000fe, 0x000000d7, 0x000000ab, 0x00000076,
+ 0x000000ca, 0x00000082, 0x000000c9, 0x0000007d,
+ 0x000000fa, 0x00000059, 0x00000047, 0x000000f0,
+ 0x000000ad, 0x000000d4, 0x000000a2, 0x000000af,
+ 0x0000009c, 0x000000a4, 0x00000072, 0x000000c0,
+ 0x000000b7, 0x000000fd, 0x00000093, 0x00000026,
+ 0x00000036, 0x0000003f, 0x000000f7, 0x000000cc,
+ 0x00000034, 0x000000a5, 0x000000e5, 0x000000f1,
+ 0x00000071, 0x000000d8, 0x00000031, 0x00000015,
+ 0x00000004, 0x000000c7, 0x00000023, 0x000000c3,
+ 0x00000018, 0x00000096, 0x00000005, 0x0000009a,
+ 0x00000007, 0x00000012, 0x00000080, 0x000000e2,
+ 0x000000eb, 0x00000027, 0x000000b2, 0x00000075,
+ 0x00000009, 0x00000083, 0x0000002c, 0x0000001a,
+ 0x0000001b, 0x0000006e, 0x0000005a, 0x000000a0,
+ 0x00000052, 0x0000003b, 0x000000d6, 0x000000b3,
+ 0x00000029, 0x000000e3, 0x0000002f, 0x00000084,
+ 0x00000053, 0x000000d1, 0x00000000, 0x000000ed,
+ 0x00000020, 0x000000fc, 0x000000b1, 0x0000005b,
+ 0x0000006a, 0x000000cb, 0x000000be, 0x00000039,
+ 0x0000004a, 0x0000004c, 0x00000058, 0x000000cf,
+ 0x000000d0, 0x000000ef, 0x000000aa, 0x000000fb,
+ 0x00000043, 0x0000004d, 0x00000033, 0x00000085,
+ 0x00000045, 0x000000f9, 0x00000002, 0x0000007f,
+ 0x00000050, 0x0000003c, 0x0000009f, 0x000000a8,
+ 0x00000051, 0x000000a3, 0x00000040, 0x0000008f,
+ 0x00000092, 0x0000009d, 0x00000038, 0x000000f5,
+ 0x000000bc, 0x000000b6, 0x000000da, 0x00000021,
+ 0x00000010, 0x000000ff, 0x000000f3, 0x000000d2,
+ 0x000000cd, 0x0000000c, 0x00000013, 0x000000ec,
+ 0x0000005f, 0x00000097, 0x00000044, 0x00000017,
+ 0x000000c4, 0x000000a7, 0x0000007e, 0x0000003d,
+ 0x00000064, 0x0000005d, 0x00000019, 0x00000073,
+ 0x00000060, 0x00000081, 0x0000004f, 0x000000dc,
+ 0x00000022, 0x0000002a, 0x00000090, 0x00000088,
+ 0x00000046, 0x000000ee, 0x000000b8, 0x00000014,
+ 0x000000de, 0x0000005e, 0x0000000b, 0x000000db,
+ 0x000000e0, 0x00000032, 0x0000003a, 0x0000000a,
+ 0x00000049, 0x00000006, 0x00000024, 0x0000005c,
+ 0x000000c2, 0x000000d3, 0x000000ac, 0x00000062,
+ 0x00000091, 0x00000095, 0x000000e4, 0x00000079,
+ 0x000000e7, 0x000000c8, 0x00000037, 0x0000006d,
+ 0x0000008d, 0x000000d5, 0x0000004e, 0x000000a9,
+ 0x0000006c, 0x00000056, 0x000000f4, 0x000000ea,
+ 0x00000065, 0x0000007a, 0x000000ae, 0x00000008,
+ 0x000000ba, 0x00000078, 0x00000025, 0x0000002e,
+ 0x0000001c, 0x000000a6, 0x000000b4, 0x000000c6,
+ 0x000000e8, 0x000000dd, 0x00000074, 0x0000001f,
+ 0x0000004b, 0x000000bd, 0x0000008b, 0x0000008a,
+ 0x00000070, 0x0000003e, 0x000000b5, 0x00000066,
+ 0x00000048, 0x00000003, 0x000000f6, 0x0000000e,
+ 0x00000061, 0x00000035, 0x00000057, 0x000000b9,
+ 0x00000086, 0x000000c1, 0x0000001d, 0x0000009e,
+ 0x000000e1, 0x000000f8, 0x00000098, 0x00000011,
+ 0x00000069, 0x000000d9, 0x0000008e, 0x00000094,
+ 0x0000009b, 0x0000001e, 0x00000087, 0x000000e9,
+ 0x000000ce, 0x00000055, 0x00000028, 0x000000df,
+ 0x0000008c, 0x000000a1, 0x00000089, 0x0000000d,
+ 0x000000bf, 0x000000e6, 0x00000042, 0x00000068,
+ 0x00000041, 0x00000099, 0x0000002d, 0x0000000f,
+ 0x000000b0, 0x00000054, 0x000000bb, 0x00000016
+ },
+ {
+ 0x00006300, 0x00007c00, 0x00007700, 0x00007b00,
+ 0x0000f200, 0x00006b00, 0x00006f00, 0x0000c500,
+ 0x00003000, 0x00000100, 0x00006700, 0x00002b00,
+ 0x0000fe00, 0x0000d700, 0x0000ab00, 0x00007600,
+ 0x0000ca00, 0x00008200, 0x0000c900, 0x00007d00,
+ 0x0000fa00, 0x00005900, 0x00004700, 0x0000f000,
+ 0x0000ad00, 0x0000d400, 0x0000a200, 0x0000af00,
+ 0x00009c00, 0x0000a400, 0x00007200, 0x0000c000,
+ 0x0000b700, 0x0000fd00, 0x00009300, 0x00002600,
+ 0x00003600, 0x00003f00, 0x0000f700, 0x0000cc00,
+ 0x00003400, 0x0000a500, 0x0000e500, 0x0000f100,
+ 0x00007100, 0x0000d800, 0x00003100, 0x00001500,
+ 0x00000400, 0x0000c700, 0x00002300, 0x0000c300,
+ 0x00001800, 0x00009600, 0x00000500, 0x00009a00,
+ 0x00000700, 0x00001200, 0x00008000, 0x0000e200,
+ 0x0000eb00, 0x00002700, 0x0000b200, 0x00007500,
+ 0x00000900, 0x00008300, 0x00002c00, 0x00001a00,
+ 0x00001b00, 0x00006e00, 0x00005a00, 0x0000a000,
+ 0x00005200, 0x00003b00, 0x0000d600, 0x0000b300,
+ 0x00002900, 0x0000e300, 0x00002f00, 0x00008400,
+ 0x00005300, 0x0000d100, 0x00000000, 0x0000ed00,
+ 0x00002000, 0x0000fc00, 0x0000b100, 0x00005b00,
+ 0x00006a00, 0x0000cb00, 0x0000be00, 0x00003900,
+ 0x00004a00, 0x00004c00, 0x00005800, 0x0000cf00,
+ 0x0000d000, 0x0000ef00, 0x0000aa00, 0x0000fb00,
+ 0x00004300, 0x00004d00, 0x00003300, 0x00008500,
+ 0x00004500, 0x0000f900, 0x00000200, 0x00007f00,
+ 0x00005000, 0x00003c00, 0x00009f00, 0x0000a800,
+ 0x00005100, 0x0000a300, 0x00004000, 0x00008f00,
+ 0x00009200, 0x00009d00, 0x00003800, 0x0000f500,
+ 0x0000bc00, 0x0000b600, 0x0000da00, 0x00002100,
+ 0x00001000, 0x0000ff00, 0x0000f300, 0x0000d200,
+ 0x0000cd00, 0x00000c00, 0x00001300, 0x0000ec00,
+ 0x00005f00, 0x00009700, 0x00004400, 0x00001700,
+ 0x0000c400, 0x0000a700, 0x00007e00, 0x00003d00,
+ 0x00006400, 0x00005d00, 0x00001900, 0x00007300,
+ 0x00006000, 0x00008100, 0x00004f00, 0x0000dc00,
+ 0x00002200, 0x00002a00, 0x00009000, 0x00008800,
+ 0x00004600, 0x0000ee00, 0x0000b800, 0x00001400,
+ 0x0000de00, 0x00005e00, 0x00000b00, 0x0000db00,
+ 0x0000e000, 0x00003200, 0x00003a00, 0x00000a00,
+ 0x00004900, 0x00000600, 0x00002400, 0x00005c00,
+ 0x0000c200, 0x0000d300, 0x0000ac00, 0x00006200,
+ 0x00009100, 0x00009500, 0x0000e400, 0x00007900,
+ 0x0000e700, 0x0000c800, 0x00003700, 0x00006d00,
+ 0x00008d00, 0x0000d500, 0x00004e00, 0x0000a900,
+ 0x00006c00, 0x00005600, 0x0000f400, 0x0000ea00,
+ 0x00006500, 0x00007a00, 0x0000ae00, 0x00000800,
+ 0x0000ba00, 0x00007800, 0x00002500, 0x00002e00,
+ 0x00001c00, 0x0000a600, 0x0000b400, 0x0000c600,
+ 0x0000e800, 0x0000dd00, 0x00007400, 0x00001f00,
+ 0x00004b00, 0x0000bd00, 0x00008b00, 0x00008a00,
+ 0x00007000, 0x00003e00, 0x0000b500, 0x00006600,
+ 0x00004800, 0x00000300, 0x0000f600, 0x00000e00,
+ 0x00006100, 0x00003500, 0x00005700, 0x0000b900,
+ 0x00008600, 0x0000c100, 0x00001d00, 0x00009e00,
+ 0x0000e100, 0x0000f800, 0x00009800, 0x00001100,
+ 0x00006900, 0x0000d900, 0x00008e00, 0x00009400,
+ 0x00009b00, 0x00001e00, 0x00008700, 0x0000e900,
+ 0x0000ce00, 0x00005500, 0x00002800, 0x0000df00,
+ 0x00008c00, 0x0000a100, 0x00008900, 0x00000d00,
+ 0x0000bf00, 0x0000e600, 0x00004200, 0x00006800,
+ 0x00004100, 0x00009900, 0x00002d00, 0x00000f00,
+ 0x0000b000, 0x00005400, 0x0000bb00, 0x00001600
+ },
+ {
+ 0x00630000, 0x007c0000, 0x00770000, 0x007b0000,
+ 0x00f20000, 0x006b0000, 0x006f0000, 0x00c50000,
+ 0x00300000, 0x00010000, 0x00670000, 0x002b0000,
+ 0x00fe0000, 0x00d70000, 0x00ab0000, 0x00760000,
+ 0x00ca0000, 0x00820000, 0x00c90000, 0x007d0000,
+ 0x00fa0000, 0x00590000, 0x00470000, 0x00f00000,
+ 0x00ad0000, 0x00d40000, 0x00a20000, 0x00af0000,
+ 0x009c0000, 0x00a40000, 0x00720000, 0x00c00000,
+ 0x00b70000, 0x00fd0000, 0x00930000, 0x00260000,
+ 0x00360000, 0x003f0000, 0x00f70000, 0x00cc0000,
+ 0x00340000, 0x00a50000, 0x00e50000, 0x00f10000,
+ 0x00710000, 0x00d80000, 0x00310000, 0x00150000,
+ 0x00040000, 0x00c70000, 0x00230000, 0x00c30000,
+ 0x00180000, 0x00960000, 0x00050000, 0x009a0000,
+ 0x00070000, 0x00120000, 0x00800000, 0x00e20000,
+ 0x00eb0000, 0x00270000, 0x00b20000, 0x00750000,
+ 0x00090000, 0x00830000, 0x002c0000, 0x001a0000,
+ 0x001b0000, 0x006e0000, 0x005a0000, 0x00a00000,
+ 0x00520000, 0x003b0000, 0x00d60000, 0x00b30000,
+ 0x00290000, 0x00e30000, 0x002f0000, 0x00840000,
+ 0x00530000, 0x00d10000, 0x00000000, 0x00ed0000,
+ 0x00200000, 0x00fc0000, 0x00b10000, 0x005b0000,
+ 0x006a0000, 0x00cb0000, 0x00be0000, 0x00390000,
+ 0x004a0000, 0x004c0000, 0x00580000, 0x00cf0000,
+ 0x00d00000, 0x00ef0000, 0x00aa0000, 0x00fb0000,
+ 0x00430000, 0x004d0000, 0x00330000, 0x00850000,
+ 0x00450000, 0x00f90000, 0x00020000, 0x007f0000,
+ 0x00500000, 0x003c0000, 0x009f0000, 0x00a80000,
+ 0x00510000, 0x00a30000, 0x00400000, 0x008f0000,
+ 0x00920000, 0x009d0000, 0x00380000, 0x00f50000,
+ 0x00bc0000, 0x00b60000, 0x00da0000, 0x00210000,
+ 0x00100000, 0x00ff0000, 0x00f30000, 0x00d20000,
+ 0x00cd0000, 0x000c0000, 0x00130000, 0x00ec0000,
+ 0x005f0000, 0x00970000, 0x00440000, 0x00170000,
+ 0x00c40000, 0x00a70000, 0x007e0000, 0x003d0000,
+ 0x00640000, 0x005d0000, 0x00190000, 0x00730000,
+ 0x00600000, 0x00810000, 0x004f0000, 0x00dc0000,
+ 0x00220000, 0x002a0000, 0x00900000, 0x00880000,
+ 0x00460000, 0x00ee0000, 0x00b80000, 0x00140000,
+ 0x00de0000, 0x005e0000, 0x000b0000, 0x00db0000,
+ 0x00e00000, 0x00320000, 0x003a0000, 0x000a0000,
+ 0x00490000, 0x00060000, 0x00240000, 0x005c0000,
+ 0x00c20000, 0x00d30000, 0x00ac0000, 0x00620000,
+ 0x00910000, 0x00950000, 0x00e40000, 0x00790000,
+ 0x00e70000, 0x00c80000, 0x00370000, 0x006d0000,
+ 0x008d0000, 0x00d50000, 0x004e0000, 0x00a90000,
+ 0x006c0000, 0x00560000, 0x00f40000, 0x00ea0000,
+ 0x00650000, 0x007a0000, 0x00ae0000, 0x00080000,
+ 0x00ba0000, 0x00780000, 0x00250000, 0x002e0000,
+ 0x001c0000, 0x00a60000, 0x00b40000, 0x00c60000,
+ 0x00e80000, 0x00dd0000, 0x00740000, 0x001f0000,
+ 0x004b0000, 0x00bd0000, 0x008b0000, 0x008a0000,
+ 0x00700000, 0x003e0000, 0x00b50000, 0x00660000,
+ 0x00480000, 0x00030000, 0x00f60000, 0x000e0000,
+ 0x00610000, 0x00350000, 0x00570000, 0x00b90000,
+ 0x00860000, 0x00c10000, 0x001d0000, 0x009e0000,
+ 0x00e10000, 0x00f80000, 0x00980000, 0x00110000,
+ 0x00690000, 0x00d90000, 0x008e0000, 0x00940000,
+ 0x009b0000, 0x001e0000, 0x00870000, 0x00e90000,
+ 0x00ce0000, 0x00550000, 0x00280000, 0x00df0000,
+ 0x008c0000, 0x00a10000, 0x00890000, 0x000d0000,
+ 0x00bf0000, 0x00e60000, 0x00420000, 0x00680000,
+ 0x00410000, 0x00990000, 0x002d0000, 0x000f0000,
+ 0x00b00000, 0x00540000, 0x00bb0000, 0x00160000
+ },
+ {
+ 0x63000000, 0x7c000000, 0x77000000, 0x7b000000,
+ 0xf2000000, 0x6b000000, 0x6f000000, 0xc5000000,
+ 0x30000000, 0x01000000, 0x67000000, 0x2b000000,
+ 0xfe000000, 0xd7000000, 0xab000000, 0x76000000,
+ 0xca000000, 0x82000000, 0xc9000000, 0x7d000000,
+ 0xfa000000, 0x59000000, 0x47000000, 0xf0000000,
+ 0xad000000, 0xd4000000, 0xa2000000, 0xaf000000,
+ 0x9c000000, 0xa4000000, 0x72000000, 0xc0000000,
+ 0xb7000000, 0xfd000000, 0x93000000, 0x26000000,
+ 0x36000000, 0x3f000000, 0xf7000000, 0xcc000000,
+ 0x34000000, 0xa5000000, 0xe5000000, 0xf1000000,
+ 0x71000000, 0xd8000000, 0x31000000, 0x15000000,
+ 0x04000000, 0xc7000000, 0x23000000, 0xc3000000,
+ 0x18000000, 0x96000000, 0x05000000, 0x9a000000,
+ 0x07000000, 0x12000000, 0x80000000, 0xe2000000,
+ 0xeb000000, 0x27000000, 0xb2000000, 0x75000000,
+ 0x09000000, 0x83000000, 0x2c000000, 0x1a000000,
+ 0x1b000000, 0x6e000000, 0x5a000000, 0xa0000000,
+ 0x52000000, 0x3b000000, 0xd6000000, 0xb3000000,
+ 0x29000000, 0xe3000000, 0x2f000000, 0x84000000,
+ 0x53000000, 0xd1000000, 0x00000000, 0xed000000,
+ 0x20000000, 0xfc000000, 0xb1000000, 0x5b000000,
+ 0x6a000000, 0xcb000000, 0xbe000000, 0x39000000,
+ 0x4a000000, 0x4c000000, 0x58000000, 0xcf000000,
+ 0xd0000000, 0xef000000, 0xaa000000, 0xfb000000,
+ 0x43000000, 0x4d000000, 0x33000000, 0x85000000,
+ 0x45000000, 0xf9000000, 0x02000000, 0x7f000000,
+ 0x50000000, 0x3c000000, 0x9f000000, 0xa8000000,
+ 0x51000000, 0xa3000000, 0x40000000, 0x8f000000,
+ 0x92000000, 0x9d000000, 0x38000000, 0xf5000000,
+ 0xbc000000, 0xb6000000, 0xda000000, 0x21000000,
+ 0x10000000, 0xff000000, 0xf3000000, 0xd2000000,
+ 0xcd000000, 0x0c000000, 0x13000000, 0xec000000,
+ 0x5f000000, 0x97000000, 0x44000000, 0x17000000,
+ 0xc4000000, 0xa7000000, 0x7e000000, 0x3d000000,
+ 0x64000000, 0x5d000000, 0x19000000, 0x73000000,
+ 0x60000000, 0x81000000, 0x4f000000, 0xdc000000,
+ 0x22000000, 0x2a000000, 0x90000000, 0x88000000,
+ 0x46000000, 0xee000000, 0xb8000000, 0x14000000,
+ 0xde000000, 0x5e000000, 0x0b000000, 0xdb000000,
+ 0xe0000000, 0x32000000, 0x3a000000, 0x0a000000,
+ 0x49000000, 0x06000000, 0x24000000, 0x5c000000,
+ 0xc2000000, 0xd3000000, 0xac000000, 0x62000000,
+ 0x91000000, 0x95000000, 0xe4000000, 0x79000000,
+ 0xe7000000, 0xc8000000, 0x37000000, 0x6d000000,
+ 0x8d000000, 0xd5000000, 0x4e000000, 0xa9000000,
+ 0x6c000000, 0x56000000, 0xf4000000, 0xea000000,
+ 0x65000000, 0x7a000000, 0xae000000, 0x08000000,
+ 0xba000000, 0x78000000, 0x25000000, 0x2e000000,
+ 0x1c000000, 0xa6000000, 0xb4000000, 0xc6000000,
+ 0xe8000000, 0xdd000000, 0x74000000, 0x1f000000,
+ 0x4b000000, 0xbd000000, 0x8b000000, 0x8a000000,
+ 0x70000000, 0x3e000000, 0xb5000000, 0x66000000,
+ 0x48000000, 0x03000000, 0xf6000000, 0x0e000000,
+ 0x61000000, 0x35000000, 0x57000000, 0xb9000000,
+ 0x86000000, 0xc1000000, 0x1d000000, 0x9e000000,
+ 0xe1000000, 0xf8000000, 0x98000000, 0x11000000,
+ 0x69000000, 0xd9000000, 0x8e000000, 0x94000000,
+ 0x9b000000, 0x1e000000, 0x87000000, 0xe9000000,
+ 0xce000000, 0x55000000, 0x28000000, 0xdf000000,
+ 0x8c000000, 0xa1000000, 0x89000000, 0x0d000000,
+ 0xbf000000, 0xe6000000, 0x42000000, 0x68000000,
+ 0x41000000, 0x99000000, 0x2d000000, 0x0f000000,
+ 0xb0000000, 0x54000000, 0xbb000000, 0x16000000
+ }
+};
+
+static const uint32_t t_im[4][256] =
+{
+ {
+ 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
+ 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
+ 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
+ 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
+ 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
+ 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
+ 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
+ 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
+ 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
+ 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
+ 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
+ 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
+ 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
+ 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
+ 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
+ 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
+ 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
+ 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
+ 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
+ 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
+ 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
+ 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
+ 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
+ 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
+ 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
+ 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
+ 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
+ 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
+ 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
+ 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
+ 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
+ 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
+ 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
+ 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
+ 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
+ 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
+ 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
+ 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
+ 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
+ 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
+ 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
+ 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
+ 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
+ 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
+ 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
+ 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
+ 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
+ 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
+ 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
+ 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
+ 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
+ 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
+ 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
+ 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
+ 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
+ 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
+ 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
+ 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
+ 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
+ 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
+ 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
+ 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
+ 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
+ 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d
+ },
+ {
+ 0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d,
+ 0x3424382c, 0x392d3627, 0x2e36243a, 0x233f2a31,
+ 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245,
+ 0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69,
+ 0xd090e0b0, 0xdd99eebb, 0xca82fca6, 0xc78bf2ad,
+ 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81,
+ 0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5,
+ 0x8cfca8c4, 0x81f5a6cf, 0x96eeb4d2, 0x9be7bad9,
+ 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966,
+ 0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a,
+ 0xd373ab23, 0xde7aa528, 0xc961b735, 0xc468b93e,
+ 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112,
+ 0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6,
+ 0x5f8f03e7, 0x52860dec, 0x459d1ff1, 0x489411fa,
+ 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e,
+ 0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2,
+ 0x6d76adf6, 0x607fa3fd, 0x7764b1e0, 0x7a6dbfeb,
+ 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7,
+ 0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3,
+ 0x311ae582, 0x3c13eb89, 0x2b08f994, 0x2601f79f,
+ 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b,
+ 0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777,
+ 0xd5ae3d1e, 0xd8a73315, 0xcfbc2108, 0xc2b52f03,
+ 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f,
+ 0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490,
+ 0xe2694ea1, 0xef6040aa, 0xf87b52b7, 0xf5725cbc,
+ 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8,
+ 0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4,
+ 0x06dd963d, 0x0bd49836, 0x1ccf8a2b, 0x11c68420,
+ 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c,
+ 0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478,
+ 0x5ab1de49, 0x57b8d042, 0x40a3c25f, 0x4daacc54,
+ 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea,
+ 0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6,
+ 0xb2a431af, 0xbfad3fa4, 0xa8b62db9, 0xa5bf23b2,
+ 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e,
+ 0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a,
+ 0x3e58996b, 0x33519760, 0x244a857d, 0x29438b76,
+ 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302,
+ 0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e,
+ 0x61d79a8c, 0x6cde9487, 0x7bc5869a, 0x76cc8891,
+ 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd,
+ 0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9,
+ 0x3dbbd2f8, 0x30b2dcf3, 0x27a9ceee, 0x2aa0c0e5,
+ 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821,
+ 0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d,
+ 0xd90f0a64, 0xd406046f, 0xc31d1672, 0xce141879,
+ 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055,
+ 0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c,
+ 0x83bed42d, 0x8eb7da26, 0x99acc83b, 0x94a5c630,
+ 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44,
+ 0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668,
+ 0x670a0cb1, 0x6a0302ba, 0x7d1810a7, 0x70111eac,
+ 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680,
+ 0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4,
+ 0x3b6644c5, 0x366f4ace, 0x217458d3, 0x2c7d56d8,
+ 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567,
+ 0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b,
+ 0x64e94722, 0x69e04929, 0x7efb5b34, 0x73f2553f,
+ 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13,
+ 0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7,
+ 0xe815efe6, 0xe51ce1ed, 0xf207f3f0, 0xff0efdfb,
+ 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f,
+ 0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3
+ },
+ {
+ 0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17,
+ 0x24382c34, 0x2d362739, 0x36243a2e, 0x3f2a3123,
+ 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f,
+ 0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b,
+ 0x90e0b0d0, 0x99eebbdd, 0x82fca6ca, 0x8bf2adc7,
+ 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3,
+ 0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af,
+ 0xfca8c48c, 0xf5a6cf81, 0xeeb4d296, 0xe7bad99b,
+ 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac,
+ 0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98,
+ 0x73ab23d3, 0x7aa528de, 0x61b735c9, 0x68b93ec4,
+ 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0,
+ 0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c,
+ 0x8f03e75f, 0x860dec52, 0x9d1ff145, 0x9411fa48,
+ 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14,
+ 0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220,
+ 0x76adf66d, 0x7fa3fd60, 0x64b1e077, 0x6dbfeb7a,
+ 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e,
+ 0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312,
+ 0x1ae58231, 0x13eb893c, 0x08f9942b, 0x01f79f26,
+ 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa,
+ 0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e,
+ 0xae3d1ed5, 0xa73315d8, 0xbc2108cf, 0xb52f03c2,
+ 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6,
+ 0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1,
+ 0x694ea1e2, 0x6040aaef, 0x7b52b7f8, 0x725cbcf5,
+ 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9,
+ 0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d,
+ 0xdd963d06, 0xd498360b, 0xcf8a2b1c, 0xc6842011,
+ 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25,
+ 0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879,
+ 0xb1de495a, 0xb8d04257, 0xa3c25f40, 0xaacc544d,
+ 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd,
+ 0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9,
+ 0xa431afb2, 0xad3fa4bf, 0xb62db9a8, 0xbf23b2a5,
+ 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91,
+ 0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d,
+ 0x58996b3e, 0x51976033, 0x4a857d24, 0x438b7629,
+ 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275,
+ 0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41,
+ 0xd79a8c61, 0xde94876c, 0xc5869a7b, 0xcc889176,
+ 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42,
+ 0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e,
+ 0xbbd2f83d, 0xb2dcf330, 0xa9ceee27, 0xa0c0e52a,
+ 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6,
+ 0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92,
+ 0x0f0a64d9, 0x06046fd4, 0x1d1672c3, 0x141879ce,
+ 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa,
+ 0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0,
+ 0xbed42d83, 0xb7da268e, 0xacc83b99, 0xa5c63094,
+ 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8,
+ 0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc,
+ 0x0a0cb167, 0x0302ba6a, 0x1810a77d, 0x111eac70,
+ 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044,
+ 0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418,
+ 0x6644c53b, 0x6f4ace36, 0x7458d321, 0x7d56d82c,
+ 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b,
+ 0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f,
+ 0xe9472264, 0xe0492969, 0xfb5b347e, 0xf2553f73,
+ 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347,
+ 0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb,
+ 0x15efe6e8, 0x1ce1ede5, 0x07f3f0f2, 0x0efdfbff,
+ 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3,
+ 0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397
+ },
+ {
+ 0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b,
+ 0x382c3424, 0x3627392d, 0x243a2e36, 0x2a31233f,
+ 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53,
+ 0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77,
+ 0xe0b0d090, 0xeebbdd99, 0xfca6ca82, 0xf2adc78b,
+ 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af,
+ 0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3,
+ 0xa8c48cfc, 0xa6cf81f5, 0xb4d296ee, 0xbad99be7,
+ 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20,
+ 0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804,
+ 0xab23d373, 0xa528de7a, 0xb735c961, 0xb93ec468,
+ 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c,
+ 0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0,
+ 0x03e75f8f, 0x0dec5286, 0x1ff1459d, 0x11fa4894,
+ 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8,
+ 0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc,
+ 0xadf66d76, 0xa3fd607f, 0xb1e07764, 0xbfeb7a6d,
+ 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49,
+ 0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225,
+ 0xe582311a, 0xeb893c13, 0xf9942b08, 0xf79f2601,
+ 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd,
+ 0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9,
+ 0x3d1ed5ae, 0x3315d8a7, 0x2108cfbc, 0x2f03c2b5,
+ 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691,
+ 0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156,
+ 0x4ea1e269, 0x40aaef60, 0x52b7f87b, 0x5cbcf572,
+ 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e,
+ 0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a,
+ 0x963d06dd, 0x98360bd4, 0x8a2b1ccf, 0x842011c6,
+ 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2,
+ 0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e,
+ 0xde495ab1, 0xd04257b8, 0xc25f40a3, 0xcc544daa,
+ 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7,
+ 0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3,
+ 0x31afb2a4, 0x3fa4bfad, 0x2db9a8b6, 0x23b2a5bf,
+ 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b,
+ 0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67,
+ 0x996b3e58, 0x97603351, 0x857d244a, 0x8b762943,
+ 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f,
+ 0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b,
+ 0x9a8c61d7, 0x94876cde, 0x869a7bc5, 0x889176cc,
+ 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8,
+ 0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84,
+ 0xd2f83dbb, 0xdcf330b2, 0xceee27a9, 0xc0e52aa0,
+ 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c,
+ 0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278,
+ 0x0a64d90f, 0x046fd406, 0x1672c31d, 0x1879ce14,
+ 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30,
+ 0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081,
+ 0xd42d83be, 0xda268eb7, 0xc83b99ac, 0xc63094a5,
+ 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9,
+ 0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced,
+ 0x0cb1670a, 0x02ba6a03, 0x10a77d18, 0x1eac7011,
+ 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435,
+ 0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859,
+ 0x44c53b66, 0x4ace366f, 0x58d32174, 0x56d82c7d,
+ 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba,
+ 0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e,
+ 0x472264e9, 0x492969e0, 0x5b347efb, 0x553f73f2,
+ 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6,
+ 0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a,
+ 0xefe6e815, 0xe1ede51c, 0xf3f0f207, 0xfdfbff0e,
+ 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362,
+ 0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746
+ }
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AESTAB2_H */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
new file mode 100644
index 000000000000..0de1883dc81b
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
@@ -0,0 +1,36 @@
+Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain copyright notices,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials
+ provided with the distribution.
+
+ * Neither the name of the CRYPTOGAMS nor the names of its
+ copyright holder and contributors may be used to endorse or
+ promote products derived from this software without specific
+ prior written permission.
+
+ALTERNATIVELY, provided that this notice is retained in full, this
+product may be distributed under the terms of the GNU General Public
+License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+those given above.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
new file mode 100644
index 000000000000..6184759c8b74
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
@@ -0,0 +1 @@
+PORTIONS OF GCM and GHASH FUNCTIONALITY
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
new file mode 100644
index 000000000000..49cc83d2ee29
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
@@ -0,0 +1,177 @@
+
+ Apache License
+ Version 2.0, January 2004
+ https://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
new file mode 100644
index 000000000000..6184759c8b74
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
@@ -0,0 +1 @@
+PORTIONS OF GCM and GHASH FUNCTIONALITY
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
new file mode 100644
index 000000000000..ed9f660fce5b
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
@@ -0,0 +1,1245 @@
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+#
+# AES-NI-CTR+GHASH stitch.
+#
+# February 2013
+#
+# OpenSSL GCM implementation is organized in such way that its
+# performance is rather close to the sum of its streamed components,
+# in the context parallelized AES-NI CTR and modulo-scheduled
+# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation
+# was observed to perform significantly better than the sum of the
+# components on contemporary CPUs, the effort was deemed impossible to
+# justify. This module is based on combination of Intel submissions,
+# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
+# Locktyukhin of Intel Corp. who verified that it reduces shuffles
+# pressure with notable relative improvement, achieving 1.0 cycle per
+# byte processed with 128-bit key on Haswell processor, 0.74 - on
+# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
+# measurements for favourable packet size, one divisible by 96.
+# Applications using the EVP interface will observe a few percent
+# worse performance.]
+#
+# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP).
+#
+# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
+# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
+
+# Generated once from
+# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/aesni-gcm-x86_64.pl
+# and modified for ICP. Modification are kept at a bare minimum to ease later
+# upstream merges.
+
+#if defined(__x86_64__) && defined(HAVE_AVX) && \
+ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
+
+.extern gcm_avx_can_use_movbe
+
+.text
+
+#ifdef HAVE_MOVBE
+.type _aesni_ctr32_ghash_6x,@function
+.align 32
+_aesni_ctr32_ghash_6x:
+ vmovdqu 32(%r11),%xmm2
+ subq $6,%rdx
+ vpxor %xmm4,%xmm4,%xmm4
+ vmovdqu 0-128(%rcx),%xmm15
+ vpaddb %xmm2,%xmm1,%xmm10
+ vpaddb %xmm2,%xmm10,%xmm11
+ vpaddb %xmm2,%xmm11,%xmm12
+ vpaddb %xmm2,%xmm12,%xmm13
+ vpaddb %xmm2,%xmm13,%xmm14
+ vpxor %xmm15,%xmm1,%xmm9
+ vmovdqu %xmm4,16+8(%rsp)
+ jmp .Loop6x
+
+.align 32
+.Loop6x:
+ addl $100663296,%ebx
+ jc .Lhandle_ctr32
+ vmovdqu 0-32(%r9),%xmm3
+ vpaddb %xmm2,%xmm14,%xmm1
+ vpxor %xmm15,%xmm10,%xmm10
+ vpxor %xmm15,%xmm11,%xmm11
+
+.Lresume_ctr32:
+ vmovdqu %xmm1,(%r8)
+ vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
+ vpxor %xmm15,%xmm12,%xmm12
+ vmovups 16-128(%rcx),%xmm2
+ vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
+ xorq %r12,%r12
+ cmpq %r14,%r15
+
+ vaesenc %xmm2,%xmm9,%xmm9
+ vmovdqu 48+8(%rsp),%xmm0
+ vpxor %xmm15,%xmm13,%xmm13
+ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
+ vaesenc %xmm2,%xmm10,%xmm10
+ vpxor %xmm15,%xmm14,%xmm14
+ setnc %r12b
+ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
+ vaesenc %xmm2,%xmm11,%xmm11
+ vmovdqu 16-32(%r9),%xmm3
+ negq %r12
+ vaesenc %xmm2,%xmm12,%xmm12
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
+ vpxor %xmm4,%xmm8,%xmm8
+ vaesenc %xmm2,%xmm13,%xmm13
+ vpxor %xmm5,%xmm1,%xmm4
+ andq $0x60,%r12
+ vmovups 32-128(%rcx),%xmm15
+ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
+ vaesenc %xmm2,%xmm14,%xmm14
+
+ vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
+ leaq (%r14,%r12,1),%r14
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor 16+8(%rsp),%xmm8,%xmm8
+ vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
+ vmovdqu 64+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 88(%r14),%r13
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 80(%r14),%r12
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,32+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,40+8(%rsp)
+ vmovdqu 48-32(%r9),%xmm5
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 48-128(%rcx),%xmm15
+ vpxor %xmm1,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm2,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpxor %xmm3,%xmm7,%xmm7
+ vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
+ vaesenc %xmm15,%xmm11,%xmm11
+ vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
+ vmovdqu 80+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqu 64-32(%r9),%xmm1
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 64-128(%rcx),%xmm15
+ vpxor %xmm2,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm3,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 72(%r14),%r13
+ vpxor %xmm5,%xmm7,%xmm7
+ vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 64(%r14),%r12
+ vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
+ vmovdqu 96+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,48+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,56+8(%rsp)
+ vpxor %xmm2,%xmm4,%xmm4
+ vmovdqu 96-32(%r9),%xmm2
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 80-128(%rcx),%xmm15
+ vpxor %xmm3,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 56(%r14),%r13
+ vpxor %xmm1,%xmm7,%xmm7
+ vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
+ vpxor 112+8(%rsp),%xmm8,%xmm8
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 48(%r14),%r12
+ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,64+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,72+8(%rsp)
+ vpxor %xmm3,%xmm4,%xmm4
+ vmovdqu 112-32(%r9),%xmm3
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 96-128(%rcx),%xmm15
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm1,%xmm6,%xmm6
+ vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 40(%r14),%r13
+ vpxor %xmm2,%xmm7,%xmm7
+ vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 32(%r14),%r12
+ vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,80+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,88+8(%rsp)
+ vpxor %xmm5,%xmm6,%xmm6
+ vaesenc %xmm15,%xmm14,%xmm14
+ vpxor %xmm1,%xmm6,%xmm6
+
+ vmovups 112-128(%rcx),%xmm15
+ vpslldq $8,%xmm6,%xmm5
+ vpxor %xmm2,%xmm4,%xmm4
+ vmovdqu 16(%r11),%xmm3
+
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm8,%xmm7,%xmm7
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpxor %xmm5,%xmm4,%xmm4
+ movbeq 24(%r14),%r13
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 16(%r14),%r12
+ vpalignr $8,%xmm4,%xmm4,%xmm0
+ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
+ movq %r13,96+8(%rsp)
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r12,104+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ vmovups 128-128(%rcx),%xmm1
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups 144-128(%rcx),%xmm15
+ vaesenc %xmm1,%xmm10,%xmm10
+ vpsrldq $8,%xmm6,%xmm6
+ vaesenc %xmm1,%xmm11,%xmm11
+ vpxor %xmm6,%xmm7,%xmm7
+ vaesenc %xmm1,%xmm12,%xmm12
+ vpxor %xmm0,%xmm4,%xmm4
+ movbeq 8(%r14),%r13
+ vaesenc %xmm1,%xmm13,%xmm13
+ movbeq 0(%r14),%r12
+ vaesenc %xmm1,%xmm14,%xmm14
+ vmovups 160-128(%rcx),%xmm1
+ cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds.
+ jb .Lenc_tail
+
+ vaesenc %xmm15,%xmm9,%xmm9
+ vaesenc %xmm15,%xmm10,%xmm10
+ vaesenc %xmm15,%xmm11,%xmm11
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vaesenc %xmm1,%xmm9,%xmm9
+ vaesenc %xmm1,%xmm10,%xmm10
+ vaesenc %xmm1,%xmm11,%xmm11
+ vaesenc %xmm1,%xmm12,%xmm12
+ vaesenc %xmm1,%xmm13,%xmm13
+ vmovups 176-128(%rcx),%xmm15
+ vaesenc %xmm1,%xmm14,%xmm14
+ vmovups 192-128(%rcx),%xmm1
+ cmpl $14,%ebp // ICP does not zero key schedule.
+ jb .Lenc_tail
+
+ vaesenc %xmm15,%xmm9,%xmm9
+ vaesenc %xmm15,%xmm10,%xmm10
+ vaesenc %xmm15,%xmm11,%xmm11
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vaesenc %xmm1,%xmm9,%xmm9
+ vaesenc %xmm1,%xmm10,%xmm10
+ vaesenc %xmm1,%xmm11,%xmm11
+ vaesenc %xmm1,%xmm12,%xmm12
+ vaesenc %xmm1,%xmm13,%xmm13
+ vmovups 208-128(%rcx),%xmm15
+ vaesenc %xmm1,%xmm14,%xmm14
+ vmovups 224-128(%rcx),%xmm1
+ jmp .Lenc_tail
+
+.align 32
+.Lhandle_ctr32:
+ vmovdqu (%r11),%xmm0
+ vpshufb %xmm0,%xmm1,%xmm6
+ vmovdqu 48(%r11),%xmm5
+ vpaddd 64(%r11),%xmm6,%xmm10
+ vpaddd %xmm5,%xmm6,%xmm11
+ vmovdqu 0-32(%r9),%xmm3
+ vpaddd %xmm5,%xmm10,%xmm12
+ vpshufb %xmm0,%xmm10,%xmm10
+ vpaddd %xmm5,%xmm11,%xmm13
+ vpshufb %xmm0,%xmm11,%xmm11
+ vpxor %xmm15,%xmm10,%xmm10
+ vpaddd %xmm5,%xmm12,%xmm14
+ vpshufb %xmm0,%xmm12,%xmm12
+ vpxor %xmm15,%xmm11,%xmm11
+ vpaddd %xmm5,%xmm13,%xmm1
+ vpshufb %xmm0,%xmm13,%xmm13
+ vpshufb %xmm0,%xmm14,%xmm14
+ vpshufb %xmm0,%xmm1,%xmm1
+ jmp .Lresume_ctr32
+
+.align 32
+.Lenc_tail:
+ vaesenc %xmm15,%xmm9,%xmm9
+ vmovdqu %xmm7,16+8(%rsp)
+ vpalignr $8,%xmm4,%xmm4,%xmm8
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
+ vpxor 0(%rdi),%xmm1,%xmm2
+ vaesenc %xmm15,%xmm11,%xmm11
+ vpxor 16(%rdi),%xmm1,%xmm0
+ vaesenc %xmm15,%xmm12,%xmm12
+ vpxor 32(%rdi),%xmm1,%xmm5
+ vaesenc %xmm15,%xmm13,%xmm13
+ vpxor 48(%rdi),%xmm1,%xmm6
+ vaesenc %xmm15,%xmm14,%xmm14
+ vpxor 64(%rdi),%xmm1,%xmm7
+ vpxor 80(%rdi),%xmm1,%xmm3
+ vmovdqu (%r8),%xmm1
+
+ vaesenclast %xmm2,%xmm9,%xmm9
+ vmovdqu 32(%r11),%xmm2
+ vaesenclast %xmm0,%xmm10,%xmm10
+ vpaddb %xmm2,%xmm1,%xmm0
+ movq %r13,112+8(%rsp)
+ leaq 96(%rdi),%rdi
+ vaesenclast %xmm5,%xmm11,%xmm11
+ vpaddb %xmm2,%xmm0,%xmm5
+ movq %r12,120+8(%rsp)
+ leaq 96(%rsi),%rsi
+ vmovdqu 0-128(%rcx),%xmm15
+ vaesenclast %xmm6,%xmm12,%xmm12
+ vpaddb %xmm2,%xmm5,%xmm6
+ vaesenclast %xmm7,%xmm13,%xmm13
+ vpaddb %xmm2,%xmm6,%xmm7
+ vaesenclast %xmm3,%xmm14,%xmm14
+ vpaddb %xmm2,%xmm7,%xmm3
+
+ addq $0x60,%r10
+ subq $0x6,%rdx
+ jc .L6x_done
+
+ vmovups %xmm9,-96(%rsi)
+ vpxor %xmm15,%xmm1,%xmm9
+ vmovups %xmm10,-80(%rsi)
+ vmovdqa %xmm0,%xmm10
+ vmovups %xmm11,-64(%rsi)
+ vmovdqa %xmm5,%xmm11
+ vmovups %xmm12,-48(%rsi)
+ vmovdqa %xmm6,%xmm12
+ vmovups %xmm13,-32(%rsi)
+ vmovdqa %xmm7,%xmm13
+ vmovups %xmm14,-16(%rsi)
+ vmovdqa %xmm3,%xmm14
+ vmovdqu 32+8(%rsp),%xmm7
+ jmp .Loop6x
+
+.L6x_done:
+ vpxor 16+8(%rsp),%xmm8,%xmm8
+ vpxor %xmm4,%xmm8,%xmm8
+
+ .byte 0xf3,0xc3
+.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+#endif /* ifdef HAVE_MOVBE */
+
+.type _aesni_ctr32_ghash_no_movbe_6x,@function
+.align 32
+_aesni_ctr32_ghash_no_movbe_6x:
+ vmovdqu 32(%r11),%xmm2
+ subq $6,%rdx
+ vpxor %xmm4,%xmm4,%xmm4
+ vmovdqu 0-128(%rcx),%xmm15
+ vpaddb %xmm2,%xmm1,%xmm10
+ vpaddb %xmm2,%xmm10,%xmm11
+ vpaddb %xmm2,%xmm11,%xmm12
+ vpaddb %xmm2,%xmm12,%xmm13
+ vpaddb %xmm2,%xmm13,%xmm14
+ vpxor %xmm15,%xmm1,%xmm9
+ vmovdqu %xmm4,16+8(%rsp)
+ jmp .Loop6x_nmb
+
+.align 32
+.Loop6x_nmb:
+ addl $100663296,%ebx
+ jc .Lhandle_ctr32_nmb
+ vmovdqu 0-32(%r9),%xmm3
+ vpaddb %xmm2,%xmm14,%xmm1
+ vpxor %xmm15,%xmm10,%xmm10
+ vpxor %xmm15,%xmm11,%xmm11
+
+.Lresume_ctr32_nmb:
+ vmovdqu %xmm1,(%r8)
+ vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
+ vpxor %xmm15,%xmm12,%xmm12
+ vmovups 16-128(%rcx),%xmm2
+ vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
+ xorq %r12,%r12
+ cmpq %r14,%r15
+
+ vaesenc %xmm2,%xmm9,%xmm9
+ vmovdqu 48+8(%rsp),%xmm0
+ vpxor %xmm15,%xmm13,%xmm13
+ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
+ vaesenc %xmm2,%xmm10,%xmm10
+ vpxor %xmm15,%xmm14,%xmm14
+ setnc %r12b
+ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
+ vaesenc %xmm2,%xmm11,%xmm11
+ vmovdqu 16-32(%r9),%xmm3
+ negq %r12
+ vaesenc %xmm2,%xmm12,%xmm12
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
+ vpxor %xmm4,%xmm8,%xmm8
+ vaesenc %xmm2,%xmm13,%xmm13
+ vpxor %xmm5,%xmm1,%xmm4
+ andq $0x60,%r12
+ vmovups 32-128(%rcx),%xmm15
+ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
+ vaesenc %xmm2,%xmm14,%xmm14
+
+ vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
+ leaq (%r14,%r12,1),%r14
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor 16+8(%rsp),%xmm8,%xmm8
+ vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
+ vmovdqu 64+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm10,%xmm10
+ movq 88(%r14),%r13
+ bswapq %r13
+ vaesenc %xmm15,%xmm11,%xmm11
+ movq 80(%r14),%r12
+ bswapq %r12
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,32+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,40+8(%rsp)
+ vmovdqu 48-32(%r9),%xmm5
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 48-128(%rcx),%xmm15
+ vpxor %xmm1,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm2,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpxor %xmm3,%xmm7,%xmm7