aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/icp
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/icp')
-rw-r--r--sys/contrib/openzfs/module/icp/Makefile.in101
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c27
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c11
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3.c731
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c204
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c407
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h191
-rw-r--r--sys/contrib/openzfs/module/icp/algs/edonr/edonr.c778
-rw-r--r--sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h216
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/cbc.c51
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ccm.c110
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ctr.c25
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/ecb.c17
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm.c373
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c5
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/modes.c59
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha1/sha1.c835
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha2.c956
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c313
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c562
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c282
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein.c136
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_block.c2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h4
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c24
-rw-r--r--sys/contrib/openzfs/module/icp/algs/skein/skein_port.h4
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_cipher.c795
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_ctxops.c14
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_digest.c491
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_mac.c418
-rw-r--r--sys/contrib/openzfs/module/icp/api/kcf_miscapi.c127
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S2069
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S2406
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S2012
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S1570
-rw-r--r--sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S2774
-rw-r--r--sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S1827
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S2823
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S3064
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S1520
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S2727
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S1722
-rw-r--r--sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S2973
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S48
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S66
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h2
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S1828
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S2594
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S2299
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S2037
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S113
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S10
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S70
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S1369
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S5104
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S2089
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S4011
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S2114
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_callprov.c1451
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c562
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c73
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c386
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_sched.c1638
-rw-r--r--sys/contrib/openzfs/module/icp/illumos-crypto.c31
-rw-r--r--sys/contrib/openzfs/module/icp/include/aes/aes_impl.h18
-rw-r--r--sys/contrib/openzfs/module/icp/include/generic_impl.c233
-rw-r--r--sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h2
-rw-r--r--sys/contrib/openzfs/module/icp/include/modes/modes.h10
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1.h61
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h65
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h73
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h219
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h29
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h46
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/bitmap.h183
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h137
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h1119
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h1480
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h136
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h630
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h434
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h586
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h307
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h160
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h107
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/modctl.h477
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/modhash.h147
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h108
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/stack.h2
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/trap.h2
-rw-r--r--sys/contrib/openzfs/module/icp/io/aes.c340
-rw-r--r--sys/contrib/openzfs/module/icp/io/edonr_mod.c63
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha1_mod.c1230
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha2_mod.c297
-rw-r--r--sys/contrib/openzfs/module/icp/io/skein_mod.c234
-rw-r--r--sys/contrib/openzfs/module/icp/os/modconf.c173
-rw-r--r--sys/contrib/openzfs/module/icp/os/modhash.c927
-rw-r--r--sys/contrib/openzfs/module/icp/spi/kcf_spi.c751
102 files changed, 49777 insertions, 24668 deletions
diff --git a/sys/contrib/openzfs/module/icp/Makefile.in b/sys/contrib/openzfs/module/icp/Makefile.in
deleted file mode 100644
index 858c5a610c26..000000000000
--- a/sys/contrib/openzfs/module/icp/Makefile.in
+++ /dev/null
@@ -1,101 +0,0 @@
-ifneq ($(KBUILD_EXTMOD),)
-src = @abs_srcdir@
-obj = @abs_builddir@
-icp_include = $(src)/include
-else
-icp_include = $(srctree)/$(src)/include
-endif
-
-MODULE := icp
-
-obj-$(CONFIG_ZFS) := $(MODULE).o
-
-asflags-y := -I$(icp_include)
-ccflags-y := -I$(icp_include)
-
-$(MODULE)-objs += illumos-crypto.o
-$(MODULE)-objs += api/kcf_cipher.o
-$(MODULE)-objs += api/kcf_digest.o
-$(MODULE)-objs += api/kcf_mac.o
-$(MODULE)-objs += api/kcf_miscapi.o
-$(MODULE)-objs += api/kcf_ctxops.o
-$(MODULE)-objs += core/kcf_callprov.o
-$(MODULE)-objs += core/kcf_prov_tabs.o
-$(MODULE)-objs += core/kcf_sched.o
-$(MODULE)-objs += core/kcf_mech_tabs.o
-$(MODULE)-objs += core/kcf_prov_lib.o
-$(MODULE)-objs += spi/kcf_spi.o
-$(MODULE)-objs += io/aes.o
-$(MODULE)-objs += io/edonr_mod.o
-$(MODULE)-objs += io/sha1_mod.o
-$(MODULE)-objs += io/sha2_mod.o
-$(MODULE)-objs += io/skein_mod.o
-$(MODULE)-objs += os/modhash.o
-$(MODULE)-objs += os/modconf.o
-$(MODULE)-objs += algs/modes/cbc.o
-$(MODULE)-objs += algs/modes/ccm.o
-$(MODULE)-objs += algs/modes/ctr.o
-$(MODULE)-objs += algs/modes/ecb.o
-$(MODULE)-objs += algs/modes/gcm_generic.o
-$(MODULE)-objs += algs/modes/gcm.o
-$(MODULE)-objs += algs/modes/modes.o
-$(MODULE)-objs += algs/aes/aes_impl_generic.o
-$(MODULE)-objs += algs/aes/aes_impl.o
-$(MODULE)-objs += algs/aes/aes_modes.o
-$(MODULE)-objs += algs/edonr/edonr.o
-$(MODULE)-objs += algs/sha1/sha1.o
-$(MODULE)-objs += algs/sha2/sha2.o
-$(MODULE)-objs += algs/skein/skein.o
-$(MODULE)-objs += algs/skein/skein_block.o
-$(MODULE)-objs += algs/skein/skein_iv.o
-
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aeskey.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_amd64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_aesni.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/gcm_pclmulqdq.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha1/sha1-x86_64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha256_impl.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha512_impl.o
-
-$(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o
-$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o
-$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o
-
-# Suppress objtool "can't find jump dest instruction at" warnings. They
-# are caused by the constants which are defined in the text section of the
-# assembly file using .byte instructions (e.g. bswap_mask). The objtool
-# utility tries to interpret them as opcodes and obviously fails doing so.
-OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
-OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
-# Suppress objtool "unsupported stack pointer realignment" warnings. We are
-# not using a DRAP register while aligning the stack to a 64 byte boundary.
-# See #6950 for the reasoning.
-OBJECT_FILES_NON_STANDARD_sha1-x86_64.o := y
-OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
-OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
-
-ICP_DIRS = \
- api \
- core \
- spi \
- io \
- os \
- algs \
- algs/aes \
- algs/edonr \
- algs/modes \
- algs/sha1 \
- algs/sha2 \
- algs/skein \
- asm-x86_64 \
- asm-x86_64/aes \
- asm-x86_64/modes \
- asm-x86_64/sha1 \
- asm-x86_64/sha2 \
- asm-i386 \
- asm-generic
-
-all:
- mkdir -p $(ICP_DIRS)
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c
index 037be0db60d7..9daa975226fe 100644
--- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -47,7 +47,7 @@ aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
union {
uint64_t ka64[4];
uint32_t ka32[8];
- } keyarr;
+ } keyarr;
switch (keyBits) {
case 128:
@@ -81,7 +81,7 @@ aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
}
} else {
- bcopy(cipherKey, keyarr.ka32, keysize);
+ memcpy(keyarr.ka32, cipherKey, keysize);
}
} else {
/* byte swap */
@@ -132,7 +132,7 @@ aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
} else
- bcopy(pt, &buffer, AES_BLOCK_LEN);
+ memcpy(&buffer, pt, AES_BLOCK_LEN);
ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, buffer, buffer);
@@ -143,7 +143,7 @@ aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
*(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
*(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
} else
- bcopy(&buffer, ct, AES_BLOCK_LEN);
+ memcpy(ct, &buffer, AES_BLOCK_LEN);
}
return (CRYPTO_SUCCESS);
}
@@ -179,7 +179,7 @@ aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
} else
- bcopy(ct, &buffer, AES_BLOCK_LEN);
+ memcpy(&buffer, ct, AES_BLOCK_LEN);
ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, buffer, buffer);
@@ -190,7 +190,7 @@ aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
*(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
*(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
} else
- bcopy(&buffer, pt, AES_BLOCK_LEN);
+ memcpy(pt, &buffer, AES_BLOCK_LEN);
}
return (CRYPTO_SUCCESS);
}
@@ -206,13 +206,12 @@ aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
* size Size of key schedule allocated, in bytes
* kmflag Flag passed to kmem_alloc(9F); ignored in userland.
*/
-/* ARGSUSED */
void *
aes_alloc_keysched(size_t *size, int kmflag)
{
aes_key_t *keysched;
- keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
+ keysched = kmem_alloc(sizeof (aes_key_t), kmflag);
if (keysched != NULL) {
*size = sizeof (aes_key_t);
return (keysched);
@@ -226,7 +225,7 @@ static aes_impl_ops_t aes_fastest_impl = {
};
/* All compiled in implementations */
-const aes_impl_ops_t *aes_all_impl[] = {
+static const aes_impl_ops_t *aes_all_impl[] = {
&aes_generic_impl,
#if defined(__x86_64)
&aes_x86_64_impl,
@@ -338,7 +337,7 @@ aes_impl_init(void)
}
static const struct {
- char *name;
+ const char *name;
uint32_t sel;
} aes_impl_opts[] = {
{ "cycle", IMPL_CYCLE },
@@ -425,13 +424,15 @@ icp_aes_impl_get(char *buffer, zfs_kernel_param_t *kp)
/* list mandatory options */
for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
fmt = (impl == aes_impl_opts[i].sel) ? "[%s] " : "%s ";
- cnt += sprintf(buffer + cnt, fmt, aes_impl_opts[i].name);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
+ aes_impl_opts[i].name);
}
/* list all supported implementations */
for (i = 0; i < aes_supp_impl_cnt; i++) {
fmt = (i == impl) ? "[%s] " : "%s ";
- cnt += sprintf(buffer + cnt, fmt, aes_supp_impl[i]->name);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
+ aes_supp_impl[i]->name);
}
return (cnt);
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c
index 4b5eefd71b17..61085214c77b 100644
--- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -26,15 +26,16 @@
#include <sys/simd.h>
#include <sys/types.h>
+#include <sys/asm_linkage.h>
/* These functions are used to execute AES-NI instructions: */
-extern int rijndael_key_setup_enc_intel(uint32_t rk[],
+extern ASMABI int rijndael_key_setup_enc_intel(uint32_t rk[],
const uint32_t cipherKey[], uint64_t keyBits);
-extern int rijndael_key_setup_dec_intel(uint32_t rk[],
+extern ASMABI int rijndael_key_setup_dec_intel(uint32_t rk[],
const uint32_t cipherKey[], uint64_t keyBits);
-extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
+extern ASMABI void aes_encrypt_intel(const uint32_t rk[], int Nr,
const uint32_t pt[4], uint32_t ct[4]);
-extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
+extern ASMABI void aes_decrypt_intel(const uint32_t rk[], int Nr,
const uint32_t ct[4], uint32_t pt[4]);
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c
index 427c096c6ab3..ae13c0b85578 100644
--- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c
index 19f8fd5012cf..f4f206a00935 100644
--- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c
index 9e4b498fffcb..6a25496d050e 100644
--- a/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c
+++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
new file mode 100644
index 000000000000..0bab7a3a7593
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
@@ -0,0 +1,731 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/simd.h>
+#include <sys/zfs_context.h>
+#include <sys/blake3.h>
+
+#include "blake3_impl.h"
+
+/*
+ * We need 1056 byte stack for blake3_compress_subtree_wide()
+ * - we define this pragma to make gcc happy
+ */
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+/* internal used */
+typedef struct {
+ uint32_t input_cv[8];
+ uint64_t counter;
+ uint8_t block[BLAKE3_BLOCK_LEN];
+ uint8_t block_len;
+ uint8_t flags;
+} output_t;
+
+/* internal flags */
+enum blake3_flags {
+ CHUNK_START = 1 << 0,
+ CHUNK_END = 1 << 1,
+ PARENT = 1 << 2,
+ ROOT = 1 << 3,
+ KEYED_HASH = 1 << 4,
+ DERIVE_KEY_CONTEXT = 1 << 5,
+ DERIVE_KEY_MATERIAL = 1 << 6,
+};
+
+/* internal start */
+static void chunk_state_init(blake3_chunk_state_t *ctx,
+ const uint32_t key[8], uint8_t flags)
+{
+ memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
+ ctx->chunk_counter = 0;
+ memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+ ctx->buf_len = 0;
+ ctx->blocks_compressed = 0;
+ ctx->flags = flags;
+}
+
+static void chunk_state_reset(blake3_chunk_state_t *ctx,
+ const uint32_t key[8], uint64_t chunk_counter)
+{
+ memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
+ ctx->chunk_counter = chunk_counter;
+ ctx->blocks_compressed = 0;
+ memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+ ctx->buf_len = 0;
+}
+
+static size_t chunk_state_len(const blake3_chunk_state_t *ctx)
+{
+ return (BLAKE3_BLOCK_LEN * (size_t)ctx->blocks_compressed) +
+ ((size_t)ctx->buf_len);
+}
+
+static size_t chunk_state_fill_buf(blake3_chunk_state_t *ctx,
+ const uint8_t *input, size_t input_len)
+{
+ size_t take = BLAKE3_BLOCK_LEN - ((size_t)ctx->buf_len);
+ if (take > input_len) {
+ take = input_len;
+ }
+ uint8_t *dest = ctx->buf + ((size_t)ctx->buf_len);
+ memcpy(dest, input, take);
+ ctx->buf_len += (uint8_t)take;
+ return (take);
+}
+
+static uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state_t *ctx)
+{
+ if (ctx->blocks_compressed == 0) {
+ return (CHUNK_START);
+ } else {
+ return (0);
+ }
+}
+
+static output_t make_output(const uint32_t input_cv[8],
+ const uint8_t *block, uint8_t block_len,
+ uint64_t counter, uint8_t flags)
+{
+ output_t ret;
+ memcpy(ret.input_cv, input_cv, 32);
+ memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
+ ret.block_len = block_len;
+ ret.counter = counter;
+ ret.flags = flags;
+ return (ret);
+}
+
+/*
+ * Chaining values within a given chunk (specifically the compress_in_place
+ * interface) are represented as words. This avoids unnecessary bytes<->words
+ * conversion overhead in the portable implementation. However, the hash_many
+ * interface handles both user input and parent node blocks, so it accepts
+ * bytes. For that reason, chaining values in the CV stack are represented as
+ * bytes.
+ */
+static void output_chaining_value(const blake3_ops_t *ops,
+ const output_t *ctx, uint8_t cv[32])
+{
+ uint32_t cv_words[8];
+ memcpy(cv_words, ctx->input_cv, 32);
+ ops->compress_in_place(cv_words, ctx->block, ctx->block_len,
+ ctx->counter, ctx->flags);
+ store_cv_words(cv, cv_words);
+}
+
+static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
+ uint64_t seek, uint8_t *out, size_t out_len)
+{
+ uint64_t output_block_counter = seek / 64;
+ size_t offset_within_block = seek % 64;
+ uint8_t wide_buf[64];
+ while (out_len > 0) {
+ ops->compress_xof(ctx->input_cv, ctx->block, ctx->block_len,
+ output_block_counter, ctx->flags | ROOT, wide_buf);
+ size_t available_bytes = 64 - offset_within_block;
+ size_t memcpy_len;
+ if (out_len > available_bytes) {
+ memcpy_len = available_bytes;
+ } else {
+ memcpy_len = out_len;
+ }
+ memcpy(out, wide_buf + offset_within_block, memcpy_len);
+ out += memcpy_len;
+ out_len -= memcpy_len;
+ output_block_counter += 1;
+ offset_within_block = 0;
+ }
+}
+
+static void chunk_state_update(const blake3_ops_t *ops,
+ blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
+{
+ if (ctx->buf_len > 0) {
+ size_t take = chunk_state_fill_buf(ctx, input, input_len);
+ input += take;
+ input_len -= take;
+ if (input_len > 0) {
+ ops->compress_in_place(ctx->cv, ctx->buf,
+ BLAKE3_BLOCK_LEN, ctx->chunk_counter,
+ ctx->flags|chunk_state_maybe_start_flag(ctx));
+ ctx->blocks_compressed += 1;
+ ctx->buf_len = 0;
+ memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+ }
+ }
+
+ while (input_len > BLAKE3_BLOCK_LEN) {
+ ops->compress_in_place(ctx->cv, input, BLAKE3_BLOCK_LEN,
+ ctx->chunk_counter,
+ ctx->flags|chunk_state_maybe_start_flag(ctx));
+ ctx->blocks_compressed += 1;
+ input += BLAKE3_BLOCK_LEN;
+ input_len -= BLAKE3_BLOCK_LEN;
+ }
+
+ chunk_state_fill_buf(ctx, input, input_len);
+}
+
+static output_t chunk_state_output(const blake3_chunk_state_t *ctx)
+{
+ uint8_t block_flags =
+ ctx->flags | chunk_state_maybe_start_flag(ctx) | CHUNK_END;
+ return (make_output(ctx->cv, ctx->buf, ctx->buf_len, ctx->chunk_counter,
+ block_flags));
+}
+
+static output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
+ const uint32_t key[8], uint8_t flags)
+{
+ return (make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT));
+}
+
+/*
+ * Given some input larger than one chunk, return the number of bytes that
+ * should go in the left subtree. This is the largest power-of-2 number of
+ * chunks that leaves at least 1 byte for the right subtree.
+ */
+static size_t left_len(size_t content_len)
+{
+ /*
+ * Subtract 1 to reserve at least one byte for the right side.
+ * content_len
+ * should always be greater than BLAKE3_CHUNK_LEN.
+ */
+ size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
+ return (round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN);
+}
+
+/*
+ * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
+ * on a single thread. Write out the chunk chaining values and return the
+ * number of chunks hashed. These chunks are never the root and never empty;
+ * those cases use a different codepath.
+ */
+static size_t compress_chunks_parallel(const blake3_ops_t *ops,
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t *out)
+{
+ const uint8_t *chunks_array[MAX_SIMD_DEGREE];
+ size_t input_position = 0;
+ size_t chunks_array_len = 0;
+ while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
+ chunks_array[chunks_array_len] = &input[input_position];
+ input_position += BLAKE3_CHUNK_LEN;
+ chunks_array_len += 1;
+ }
+
+ ops->hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN /
+ BLAKE3_BLOCK_LEN, key, chunk_counter, B_TRUE, flags, CHUNK_START,
+ CHUNK_END, out);
+
+ /*
+ * Hash the remaining partial chunk, if there is one. Note that the
+ * empty chunk (meaning the empty message) is a different codepath.
+ */
+ if (input_len > input_position) {
+ uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
+ blake3_chunk_state_t chunk_state;
+ chunk_state_init(&chunk_state, key, flags);
+ chunk_state.chunk_counter = counter;
+ chunk_state_update(ops, &chunk_state, &input[input_position],
+ input_len - input_position);
+ output_t output = chunk_state_output(&chunk_state);
+ output_chaining_value(ops, &output, &out[chunks_array_len *
+ BLAKE3_OUT_LEN]);
+ return (chunks_array_len + 1);
+ } else {
+ return (chunks_array_len);
+ }
+}
+
+/*
+ * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
+ * on a single thread. Write out the parent chaining values and return the
+ * number of parents hashed. (If there's an odd input chaining value left over,
+ * return it as an additional output.) These parents are never the root and
+ * never empty; those cases use a different codepath.
+ */
+static size_t compress_parents_parallel(const blake3_ops_t *ops,
+ const uint8_t *child_chaining_values, size_t num_chaining_values,
+ const uint32_t key[8], uint8_t flags, uint8_t *out)
+{
+ const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2] = {0};
+ size_t parents_array_len = 0;
+
+ while (num_chaining_values - (2 * parents_array_len) >= 2) {
+ parents_array[parents_array_len] = &child_chaining_values[2 *
+ parents_array_len * BLAKE3_OUT_LEN];
+ parents_array_len += 1;
+ }
+
+ ops->hash_many(parents_array, parents_array_len, 1, key, 0, B_FALSE,
+ flags | PARENT, 0, 0, out);
+
+ /* If there's an odd child left over, it becomes an output. */
+ if (num_chaining_values > 2 * parents_array_len) {
+ memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
+ &child_chaining_values[2 * parents_array_len *
+ BLAKE3_OUT_LEN], BLAKE3_OUT_LEN);
+ return (parents_array_len + 1);
+ } else {
+ return (parents_array_len);
+ }
+}
+
+/*
+ * The wide helper function returns (writes out) an array of chaining values
+ * and returns the length of that array. The number of chaining values returned
+ * is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
+ * if the input is shorter than that many chunks. The reason for maintaining a
+ * wide array of chaining values going back up the tree, is to allow the
+ * implementation to hash as many parents in parallel as possible.
+ *
+ * As a special case when the SIMD degree is 1, this function will still return
+ * at least 2 outputs. This guarantees that this function doesn't perform the
+ * root compression. (If it did, it would use the wrong flags, and also we
+ * wouldn't be able to implement exendable ouput.) Note that this function is
+ * not used when the whole input is only 1 chunk long; that's a different
+ * codepath.
+ *
+ * Why not just have the caller split the input on the first update(), instead
+ * of implementing this special rule? Because we don't want to limit SIMD or
+ * multi-threading parallelism for that update().
+ */
+static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t *out)
+{
+ /*
+ * Note that the single chunk case does *not* bump the SIMD degree up
+ * to 2 when it is 1. If this implementation adds multi-threading in
+ * the future, this gives us the option of multi-threading even the
+ * 2-chunk case, which can help performance on smaller platforms.
+ */
+ if (input_len <= (size_t)(ops->degree * BLAKE3_CHUNK_LEN)) {
+ return (compress_chunks_parallel(ops, input, input_len, key,
+ chunk_counter, flags, out));
+ }
+
+
+ /*
+ * With more than simd_degree chunks, we need to recurse. Start by
+ * dividing the input into left and right subtrees. (Note that this is
+ * only optimal as long as the SIMD degree is a power of 2. If we ever
+ * get a SIMD degree of 3 or something, we'll need a more complicated
+ * strategy.)
+ */
+ size_t left_input_len = left_len(input_len);
+ size_t right_input_len = input_len - left_input_len;
+ const uint8_t *right_input = &input[left_input_len];
+ uint64_t right_chunk_counter = chunk_counter +
+ (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
+
+ /*
+ * Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2
+ * to account for the special case of returning 2 outputs when the
+ * SIMD degree is 1.
+ */
+ uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+ size_t degree = ops->degree;
+ if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
+
+ /*
+ * The special case: We always use a degree of at least two,
+ * to make sure there are two outputs. Except, as noted above,
+ * at the chunk level, where we allow degree=1. (Note that the
+ * 1-chunk-input case is a different codepath.)
+ */
+ degree = 2;
+ }
+ uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
+
+ /*
+ * Recurse! If this implementation adds multi-threading support in the
+ * future, this is where it will go.
+ */
+ size_t left_n = blake3_compress_subtree_wide(ops, input, left_input_len,
+ key, chunk_counter, flags, cv_array);
+ size_t right_n = blake3_compress_subtree_wide(ops, right_input,
+ right_input_len, key, right_chunk_counter, flags, right_cvs);
+
+ /*
+ * The special case again. If simd_degree=1, then we'll have left_n=1
+ * and right_n=1. Rather than compressing them into a single output,
+ * return them directly, to make sure we always have at least two
+ * outputs.
+ */
+ if (left_n == 1) {
+ memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+ return (2);
+ }
+
+ /* Otherwise, do one layer of parent node compression. */
+ size_t num_chaining_values = left_n + right_n;
+ return compress_parents_parallel(ops, cv_array,
+ num_chaining_values, key, flags, out);
+}
+
+/*
+ * Hash a subtree with compress_subtree_wide(), and then condense the resulting
+ * list of chaining values down to a single parent node. Don't compress that
+ * last parent node, however. Instead, return its message bytes (the
+ * concatenated chaining values of its children). This is necessary when the
+ * first call to update() supplies a complete subtree, because the topmost
+ * parent node of that subtree could end up being the root. It's also necessary
+ * for extended output in the general case.
+ *
+ * As with compress_subtree_wide(), this function is not used on inputs of 1
+ * chunk or less. That's a different codepath.
+ */
+static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
+{
+ uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+ size_t num_cvs = blake3_compress_subtree_wide(ops, input, input_len,
+ key, chunk_counter, flags, cv_array);
+
+ /*
+ * If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
+ * compress_subtree_wide() returns more than 2 chaining values. Condense
+ * them into 2 by forming parent nodes repeatedly.
+ */
+ uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
+ while (num_cvs > 2) {
+ num_cvs = compress_parents_parallel(ops, cv_array, num_cvs, key,
+ flags, out_array);
+ memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
+ }
+ memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+}
+
+static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
+ uint8_t flags)
+{
+ memcpy(ctx->key, key, BLAKE3_KEY_LEN);
+ chunk_state_init(&ctx->chunk, key, flags);
+ ctx->cv_stack_len = 0;
+ ctx->ops = blake3_get_ops();
+}
+
+/*
+ * As described in hasher_push_cv() below, we do "lazy merging", delaying
+ * merges until right before the next CV is about to be added. This is
+ * different from the reference implementation. Another difference is that we
+ * aren't always merging 1 chunk at a time. Instead, each CV might represent
+ * any power-of-two number of chunks, as long as the smaller-above-larger
+ * stack order is maintained. Instead of the "count the trailing 0-bits"
+ * algorithm described in the spec, we use a "count the total number of
+ * 1-bits" variant that doesn't require us to retain the subtree size of the
+ * CV on top of the stack. The principle is the same: each CV that should
+ * remain in the stack is represented by a 1-bit in the total number of chunks
+ * (or bytes) so far.
+ */
+static void hasher_merge_cv_stack(BLAKE3_CTX *ctx, uint64_t total_len)
+{
+ size_t post_merge_stack_len = (size_t)popcnt(total_len);
+ while (ctx->cv_stack_len > post_merge_stack_len) {
+ uint8_t *parent_node =
+ &ctx->cv_stack[(ctx->cv_stack_len - 2) * BLAKE3_OUT_LEN];
+ output_t output =
+ parent_output(parent_node, ctx->key, ctx->chunk.flags);
+ output_chaining_value(ctx->ops, &output, parent_node);
+ ctx->cv_stack_len -= 1;
+ }
+}
+
+/*
+ * In reference_impl.rs, we merge the new CV with existing CVs from the stack
+ * before pushing it. We can do that because we know more input is coming, so
+ * we know none of the merges are root.
+ *
+ * This setting is different. We want to feed as much input as possible to
+ * compress_subtree_wide(), without setting aside anything for the chunk_state.
+ * If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
+ * as a single subtree, if at all possible.
+ *
+ * This leads to two problems:
+ * 1) This 64 KiB input might be the only call that ever gets made to update.
+ * In this case, the root node of the 64 KiB subtree would be the root node
+ * of the whole tree, and it would need to be ROOT finalized. We can't
+ * compress it until we know.
+ * 2) This 64 KiB input might complete a larger tree, whose root node is
+ * similarly going to be the the root of the whole tree. For example, maybe
+ * we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
+ * node at the root of the 256 KiB subtree until we know how to finalize it.
+ *
+ * The second problem is solved with "lazy merging". That is, when we're about
+ * to add a CV to the stack, we don't merge it with anything first, as the
+ * reference impl does. Instead we do merges using the *previous* CV that was
+ * added, which is sitting on top of the stack, and we put the new CV
+ * (unmerged) on top of the stack afterwards. This guarantees that we never
+ * merge the root node until finalize().
+ *
+ * Solving the first problem requires an additional tool,
+ * compress_subtree_to_parent_node(). That function always returns the top
+ * *two* chaining values of the subtree it's compressing. We then do lazy
+ * merging with each of them separately, so that the second CV will always
+ * remain unmerged. (That also helps us support extendable output when we're
+ * hashing an input all-at-once.)
+ */
+static void hasher_push_cv(BLAKE3_CTX *ctx, uint8_t new_cv[BLAKE3_OUT_LEN],
+ uint64_t chunk_counter)
+{
+ hasher_merge_cv_stack(ctx, chunk_counter);
+ memcpy(&ctx->cv_stack[ctx->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
+ BLAKE3_OUT_LEN);
+ ctx->cv_stack_len += 1;
+}
+
+void
+Blake3_Init(BLAKE3_CTX *ctx)
+{
+ hasher_init_base(ctx, BLAKE3_IV, 0);
+}
+
+void
+Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN])
+{
+ uint32_t key_words[8];
+ load_key_words(key, key_words);
+ hasher_init_base(ctx, key_words, KEYED_HASH);
+}
+
+static void
+Blake3_Update2(BLAKE3_CTX *ctx, const void *input, size_t input_len)
+{
+ /*
+ * Explicitly checking for zero avoids causing UB by passing a null
+ * pointer to memcpy. This comes up in practice with things like:
+ * std::vector<uint8_t> v;
+ * blake3_hasher_update(&hasher, v.data(), v.size());
+ */
+ if (input_len == 0) {
+ return;
+ }
+
+ const uint8_t *input_bytes = (const uint8_t *)input;
+
+ /*
+ * If we have some partial chunk bytes in the internal chunk_state, we
+ * need to finish that chunk first.
+ */
+ if (chunk_state_len(&ctx->chunk) > 0) {
+ size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&ctx->chunk);
+ if (take > input_len) {
+ take = input_len;
+ }
+ chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, take);
+ input_bytes += take;
+ input_len -= take;
+ /*
+ * If we've filled the current chunk and there's more coming,
+ * finalize this chunk and proceed. In this case we know it's
+ * not the root.
+ */
+ if (input_len > 0) {
+ output_t output = chunk_state_output(&ctx->chunk);
+ uint8_t chunk_cv[32];
+ output_chaining_value(ctx->ops, &output, chunk_cv);
+ hasher_push_cv(ctx, chunk_cv, ctx->chunk.chunk_counter);
+ chunk_state_reset(&ctx->chunk, ctx->key,
+ ctx->chunk.chunk_counter + 1);
+ } else {
+ return;
+ }
+ }
+
+ /*
+ * Now the chunk_state is clear, and we have more input. If there's
+ * more than a single chunk (so, definitely not the root chunk), hash
+ * the largest whole subtree we can, with the full benefits of SIMD
+ * (and maybe in the future, multi-threading) parallelism. Two
+ * restrictions:
+ * - The subtree has to be a power-of-2 number of chunks. Only
+ * subtrees along the right edge can be incomplete, and we don't know
+ * where the right edge is going to be until we get to finalize().
+ * - The subtree must evenly divide the total number of chunks up
+ * until this point (if total is not 0). If the current incomplete
+ * subtree is only waiting for 1 more chunk, we can't hash a subtree
+ * of 4 chunks. We have to complete the current subtree first.
+ * Because we might need to break up the input to form powers of 2, or
+ * to evenly divide what we already have, this part runs in a loop.
+ */
+ while (input_len > BLAKE3_CHUNK_LEN) {
+ size_t subtree_len = round_down_to_power_of_2(input_len);
+ uint64_t count_so_far =
+ ctx->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
+ /*
+ * Shrink the subtree_len until it evenly divides the count so
+ * far. We know that subtree_len itself is a power of 2, so we
+ * can use a bitmasking trick instead of an actual remainder
+ * operation. (Note that if the caller consistently passes
+ * power-of-2 inputs of the same size, as is hopefully
+ * typical, this loop condition will always fail, and
+ * subtree_len will always be the full length of the input.)
+ *
+ * An aside: We don't have to shrink subtree_len quite this
+ * much. For example, if count_so_far is 1, we could pass 2
+ * chunks to compress_subtree_to_parent_node. Since we'll get
+ * 2 CVs back, we'll still get the right answer in the end,
+ * and we might get to use 2-way SIMD parallelism. The problem
+ * with this optimization, is that it gets us stuck always
+ * hashing 2 chunks. The total number of chunks will remain
+ * odd, and we'll never graduate to higher degrees of
+ * parallelism. See
+ * https://github.com/BLAKE3-team/BLAKE3/issues/69.
+ */
+ while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
+ subtree_len /= 2;
+ }
+ /*
+ * The shrunken subtree_len might now be 1 chunk long. If so,
+ * hash that one chunk by itself. Otherwise, compress the
+ * subtree into a pair of CVs.
+ */
+ uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
+ if (subtree_len <= BLAKE3_CHUNK_LEN) {
+ blake3_chunk_state_t chunk_state;
+ chunk_state_init(&chunk_state, ctx->key,
+ ctx->chunk.flags);
+ chunk_state.chunk_counter = ctx->chunk.chunk_counter;
+ chunk_state_update(ctx->ops, &chunk_state, input_bytes,
+ subtree_len);
+ output_t output = chunk_state_output(&chunk_state);
+ uint8_t cv[BLAKE3_OUT_LEN];
+ output_chaining_value(ctx->ops, &output, cv);
+ hasher_push_cv(ctx, cv, chunk_state.chunk_counter);
+ } else {
+ /*
+ * This is the high-performance happy path, though
+ * getting here depends on the caller giving us a long
+ * enough input.
+ */
+ uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
+ compress_subtree_to_parent_node(ctx->ops, input_bytes,
+ subtree_len, ctx->key, ctx-> chunk.chunk_counter,
+ ctx->chunk.flags, cv_pair);
+ hasher_push_cv(ctx, cv_pair, ctx->chunk.chunk_counter);
+ hasher_push_cv(ctx, &cv_pair[BLAKE3_OUT_LEN],
+ ctx->chunk.chunk_counter + (subtree_chunks / 2));
+ }
+ ctx->chunk.chunk_counter += subtree_chunks;
+ input_bytes += subtree_len;
+ input_len -= subtree_len;
+ }
+
+ /*
+ * If there's any remaining input less than a full chunk, add it to
+ * the chunk state. In that case, also do a final merge loop to make
+ * sure the subtree stack doesn't contain any unmerged pairs. The
+ * remaining input means we know these merges are non-root. This merge
+ * loop isn't strictly necessary here, because hasher_push_chunk_cv
+ * already does its own merge loop, but it simplifies
+ * blake3_hasher_finalize below.
+ */
+ if (input_len > 0) {
+ chunk_state_update(ctx->ops, &ctx->chunk, input_bytes,
+ input_len);
+ hasher_merge_cv_stack(ctx, ctx->chunk.chunk_counter);
+ }
+}
+
+void
+Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t todo)
+{
+ size_t done = 0;
+ const uint8_t *data = input;
+ const size_t block_max = 1024 * 64;
+
+ /* max feed buffer to leave the stack size small */
+ while (todo != 0) {
+ size_t block = (todo >= block_max) ? block_max : todo;
+ Blake3_Update2(ctx, data + done, block);
+ done += block;
+ todo -= block;
+ }
+}
+
+void
+Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out)
+{
+ Blake3_FinalSeek(ctx, 0, out, BLAKE3_OUT_LEN);
+}
+
+void
+Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
+ size_t out_len)
+{
+ /*
+ * Explicitly checking for zero avoids causing UB by passing a null
+ * pointer to memcpy. This comes up in practice with things like:
+ * std::vector<uint8_t> v;
+ * blake3_hasher_finalize(&hasher, v.data(), v.size());
+ */
+ if (out_len == 0) {
+ return;
+ }
+ /* If the subtree stack is empty, then the current chunk is the root. */
+ if (ctx->cv_stack_len == 0) {
+ output_t output = chunk_state_output(&ctx->chunk);
+ output_root_bytes(ctx->ops, &output, seek, out, out_len);
+ return;
+ }
+ /*
+ * If there are any bytes in the chunk state, finalize that chunk and
+ * do a roll-up merge between that chunk hash and every subtree in the
+ * stack. In this case, the extra merge loop at the end of
+ * blake3_hasher_update guarantees that none of the subtrees in the
+ * stack need to be merged with each other first. Otherwise, if there
+ * are no bytes in the chunk state, then the top of the stack is a
+ * chunk hash, and we start the merge from that.
+ */
+ output_t output;
+ size_t cvs_remaining;
+ if (chunk_state_len(&ctx->chunk) > 0) {
+ cvs_remaining = ctx->cv_stack_len;
+ output = chunk_state_output(&ctx->chunk);
+ } else {
+ /* There are always at least 2 CVs in the stack in this case. */
+ cvs_remaining = ctx->cv_stack_len - 2;
+ output = parent_output(&ctx->cv_stack[cvs_remaining * 32],
+ ctx->key, ctx->chunk.flags);
+ }
+ while (cvs_remaining > 0) {
+ cvs_remaining -= 1;
+ uint8_t parent_block[BLAKE3_BLOCK_LEN];
+ memcpy(parent_block, &ctx->cv_stack[cvs_remaining * 32], 32);
+ output_chaining_value(ctx->ops, &output, &parent_block[32]);
+ output = parent_output(parent_block, ctx->key,
+ ctx->chunk.flags);
+ }
+ output_root_bytes(ctx->ops, &output, seek, out, out_len);
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
new file mode 100644
index 000000000000..fbe184969672
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
@@ -0,0 +1,204 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/simd.h>
+#include <sys/zfs_context.h>
+#include "blake3_impl.h"
+
+#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
+ uint32_t x, uint32_t y)
+{
+ state[a] = state[a] + state[b] + x;
+ state[d] = rotr32(state[d] ^ state[a], 16);
+ state[c] = state[c] + state[d];
+ state[b] = rotr32(state[b] ^ state[c], 12);
+ state[a] = state[a] + state[b] + y;
+ state[d] = rotr32(state[d] ^ state[a], 8);
+ state[c] = state[c] + state[d];
+ state[b] = rotr32(state[b] ^ state[c], 7);
+}
+
+static inline void round_fn(uint32_t state[16], const uint32_t *msg,
+ size_t round)
+{
+ /* Select the message schedule based on the round. */
+ const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round];
+
+ /* Mix the columns. */
+ g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
+ g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
+ g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
+ g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
+
+ /* Mix the rows. */
+ g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
+ g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
+ g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
+ g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
+}
+
+static inline void compress_pre(uint32_t state[16], const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags)
+{
+ uint32_t block_words[16];
+ block_words[0] = load32(block + 4 * 0);
+ block_words[1] = load32(block + 4 * 1);
+ block_words[2] = load32(block + 4 * 2);
+ block_words[3] = load32(block + 4 * 3);
+ block_words[4] = load32(block + 4 * 4);
+ block_words[5] = load32(block + 4 * 5);
+ block_words[6] = load32(block + 4 * 6);
+ block_words[7] = load32(block + 4 * 7);
+ block_words[8] = load32(block + 4 * 8);
+ block_words[9] = load32(block + 4 * 9);
+ block_words[10] = load32(block + 4 * 10);
+ block_words[11] = load32(block + 4 * 11);
+ block_words[12] = load32(block + 4 * 12);
+ block_words[13] = load32(block + 4 * 13);
+ block_words[14] = load32(block + 4 * 14);
+ block_words[15] = load32(block + 4 * 15);
+
+ state[0] = cv[0];
+ state[1] = cv[1];
+ state[2] = cv[2];
+ state[3] = cv[3];
+ state[4] = cv[4];
+ state[5] = cv[5];
+ state[6] = cv[6];
+ state[7] = cv[7];
+ state[8] = BLAKE3_IV[0];
+ state[9] = BLAKE3_IV[1];
+ state[10] = BLAKE3_IV[2];
+ state[11] = BLAKE3_IV[3];
+ state[12] = counter_low(counter);
+ state[13] = counter_high(counter);
+ state[14] = (uint32_t)block_len;
+ state[15] = (uint32_t)flags;
+
+ round_fn(state, &block_words[0], 0);
+ round_fn(state, &block_words[0], 1);
+ round_fn(state, &block_words[0], 2);
+ round_fn(state, &block_words[0], 3);
+ round_fn(state, &block_words[0], 4);
+ round_fn(state, &block_words[0], 5);
+ round_fn(state, &block_words[0], 6);
+}
+
+static inline void blake3_compress_in_place_generic(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags)
+{
+ uint32_t state[16];
+ compress_pre(state, cv, block, block_len, counter, flags);
+ cv[0] = state[0] ^ state[8];
+ cv[1] = state[1] ^ state[9];
+ cv[2] = state[2] ^ state[10];
+ cv[3] = state[3] ^ state[11];
+ cv[4] = state[4] ^ state[12];
+ cv[5] = state[5] ^ state[13];
+ cv[6] = state[6] ^ state[14];
+ cv[7] = state[7] ^ state[15];
+}
+
+static inline void hash_one_generic(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
+{
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN,
+ counter, block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ store_cv_words(out, cv);
+}
+
+static inline void blake3_compress_xof_generic(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64])
+{
+ uint32_t state[16];
+ compress_pre(state, cv, block, block_len, counter, flags);
+
+ store32(&out[0 * 4], state[0] ^ state[8]);
+ store32(&out[1 * 4], state[1] ^ state[9]);
+ store32(&out[2 * 4], state[2] ^ state[10]);
+ store32(&out[3 * 4], state[3] ^ state[11]);
+ store32(&out[4 * 4], state[4] ^ state[12]);
+ store32(&out[5 * 4], state[5] ^ state[13]);
+ store32(&out[6 * 4], state[6] ^ state[14]);
+ store32(&out[7 * 4], state[7] ^ state[15]);
+ store32(&out[8 * 4], state[8] ^ cv[0]);
+ store32(&out[9 * 4], state[9] ^ cv[1]);
+ store32(&out[10 * 4], state[10] ^ cv[2]);
+ store32(&out[11 * 4], state[11] ^ cv[3]);
+ store32(&out[12 * 4], state[12] ^ cv[4]);
+ store32(&out[13 * 4], state[13] ^ cv[5]);
+ store32(&out[14 * 4], state[14] ^ cv[6]);
+ store32(&out[15 * 4], state[15] ^ cv[7]);
+}
+
+static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
+ boolean_t increment_counter, uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out)
+{
+ while (num_inputs > 0) {
+ hash_one_generic(inputs[0], blocks, key, counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
+
+/* the generic implementation is always okay */
+static boolean_t blake3_is_supported(void)
+{
+ return (B_TRUE);
+}
+
+const blake3_ops_t blake3_generic_impl = {
+ .compress_in_place = blake3_compress_in_place_generic,
+ .compress_xof = blake3_compress_xof_generic,
+ .hash_many = blake3_hash_many_generic,
+ .is_supported = blake3_is_supported,
+ .degree = 4,
+ .name = "generic"
+};
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
new file mode 100644
index 000000000000..5684b4ff1a97
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
@@ -0,0 +1,407 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/simd.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_impl.h>
+#include <sys/blake3.h>
+
+#include "blake3_impl.h"
+
+#if !defined(OMIT_SIMD) && (defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)))
+#define USE_SIMD
+#endif
+
+#ifdef USE_SIMD
+extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
+
+static void blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
+
+static void blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_sse2_supported(void)
+{
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse2_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
+}
+
+const blake3_ops_t blake3_sse2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse2,
+ .compress_xof = blake3_compress_xof_sse2,
+ .hash_many = blake3_hash_many_sse2,
+ .is_supported = blake3_is_sse2_supported,
+ .degree = 4,
+ .name = "sse2"
+};
+#endif
+
+#ifdef USE_SIMD
+
+extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
+
+static void blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
+
+static void blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_sse41_supported(void)
+{
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse4_1_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
+}
+
+const blake3_ops_t blake3_sse41_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_sse41,
+ .is_supported = blake3_is_sse41_supported,
+ .degree = 4,
+ .name = "sse41"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_avx2_supported(void)
+{
+ return (kfpu_allowed() && zfs_sse4_1_available() &&
+ zfs_avx2_available());
+}
+
+const blake3_ops_t
+blake3_avx2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_avx2,
+ .is_supported = blake3_is_avx2_supported,
+ .degree = 8,
+ .name = "avx2"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
+
+static void blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
+
+static void blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
+
+static boolean_t blake3_is_avx512_supported(void)
+{
+ return (kfpu_allowed() && zfs_avx512f_available() &&
+ zfs_avx512vl_available());
+}
+
+const blake3_ops_t blake3_avx512_impl = {
+ .compress_in_place = blake3_compress_in_place_avx512,
+ .compress_xof = blake3_compress_xof_avx512,
+ .hash_many = blake3_hash_many_avx512,
+ .is_supported = blake3_is_avx512_supported,
+ .degree = 16,
+ .name = "avx512"
+};
+#endif
+
+extern const blake3_ops_t blake3_generic_impl;
+
+static const blake3_ops_t *const blake3_impls[] = {
+ &blake3_generic_impl,
+#ifdef USE_SIMD
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse2_impl,
+#endif
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse41_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+ &blake3_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+ &blake3_avx512_impl,
+#endif
+#endif
+};
+
+/* use the generic implementation functions */
+#define IMPL_NAME "blake3"
+#define IMPL_OPS_T blake3_ops_t
+#define IMPL_ARRAY blake3_impls
+#define IMPL_GET_OPS blake3_get_ops
+#define ZFS_IMPL_OPS zfs_blake3_ops
+#include <generic_impl.c>
+
+#ifdef _KERNEL
+void **blake3_per_cpu_ctx;
+
+void
+blake3_per_cpu_ctx_init(void)
+{
+ /*
+ * Create "The Godfather" ptr to hold all blake3 ctx
+ */
+ blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
+ for (int i = 0; i < max_ncpus; i++) {
+ blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
+ KM_SLEEP);
+ }
+}
+
+void
+blake3_per_cpu_ctx_fini(void)
+{
+ for (int i = 0; i < max_ncpus; i++) {
+ memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
+ kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
+ }
+ memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
+ kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
+}
+
+#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
+
+#if defined(__linux__)
+
+static int
+blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
+{
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ char *fmt;
+ int cnt = 0;
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
+
+ /* list all supported implementations */
+ generic_impl_init();
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
+ blake3_impls[i]->name);
+ }
+
+ return (cnt);
+}
+
+static int
+blake3_param_set(const char *val, zfs_kernel_param_t *unused)
+{
+ (void) unused;
+ return (generic_impl_setname(val));
+}
+
+#elif defined(__FreeBSD__)
+
+#include <sys/sbuf.h>
+
+static int
+blake3_param(ZFS_MODULE_PARAM_ARGS)
+{
+ int err;
+
+ generic_impl_init();
+ if (req->newptr == NULL) {
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ const int init_buflen = 64;
+ const char *fmt;
+ struct sbuf *s;
+
+ s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ (void) sbuf_printf(s, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ (void) sbuf_printf(s, fmt, "fastest");
+
+ /* list all supported implementations */
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
+ }
+
+ err = sbuf_finish(s);
+ sbuf_delete(s);
+
+ return (err);
+ }
+
+ char buf[16];
+
+ err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+ if (err) {
+ return (err);
+ }
+
+ return (-generic_impl_setname(buf));
+}
+#endif
+
+#undef IMPL_FMT
+
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
+ blake3_param_set, blake3_param_get, ZMOD_RW, \
+ "Select BLAKE3 implementation.");
+#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
new file mode 100644
index 000000000000..90d508fac08f
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
@@ -0,0 +1,191 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef BLAKE3_IMPL_H
+#define BLAKE3_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/blake3.h>
+#include <sys/simd.h>
+#include <sys/asm_linkage.h>
+
+/*
+ * Methods used to define BLAKE3 assembler implementations
+ */
+typedef void (*blake3_compress_in_place_f)(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+
+typedef void (*blake3_compress_xof_f)(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+typedef boolean_t (*blake3_is_supported_f)(void);
+
+typedef struct {
+ blake3_compress_in_place_f compress_in_place;
+ blake3_compress_xof_f compress_xof;
+ blake3_hash_many_f hash_many;
+ blake3_is_supported_f is_supported;
+ int degree;
+ const char *name;
+} blake3_ops_t;
+
+/* return selected BLAKE3 implementation ops */
+extern const blake3_ops_t *blake3_get_ops(void);
+
+#if defined(__x86_64)
+#define MAX_SIMD_DEGREE 16
+#else
+#define MAX_SIMD_DEGREE 4
+#endif
+
+#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
+
+static const uint32_t BLAKE3_IV[8] = {
+ 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+ 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL};
+
+static const uint8_t BLAKE3_MSG_SCHEDULE[7][16] = {
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
+ {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
+ {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
+ {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
+ {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
+ {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
+};
+
+/* Find index of the highest set bit */
+static inline unsigned int highest_one(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+ return (63 ^ __builtin_clzll(x));
+#elif defined(_MSC_VER) && defined(IS_X86_64)
+ unsigned long index;
+ _BitScanReverse64(&index, x);
+ return (index);
+#elif defined(_MSC_VER) && defined(IS_X86_32)
+ if (x >> 32) {
+ unsigned long index;
+ _BitScanReverse(&index, x >> 32);
+ return (32 + index);
+ } else {
+ unsigned long index;
+ _BitScanReverse(&index, x);
+ return (index);
+ }
+#else
+ unsigned int c = 0;
+ if (x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
+ if (x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
+ if (x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
+ if (x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
+ if (x & 0x000000000000000cULL) { x >>= 2; c += 2; }
+ if (x & 0x0000000000000002ULL) { c += 1; }
+ return (c);
+#endif
+}
+
+/* Count the number of 1 bits. */
+static inline unsigned int popcnt(uint64_t x) {
+ unsigned int count = 0;
+
+ while (x != 0) {
+ count += 1;
+ x &= x - 1;
+ }
+
+ return (count);
+}
+
+/*
+ * Largest power of two less than or equal to x.
+ * As a special case, returns 1 when x is 0.
+ */
+static inline uint64_t round_down_to_power_of_2(uint64_t x) {
+ return (1ULL << highest_one(x | 1));
+}
+
+static inline uint32_t counter_low(uint64_t counter) {
+ return ((uint32_t)counter);
+}
+
+static inline uint32_t counter_high(uint64_t counter) {
+ return ((uint32_t)(counter >> 32));
+}
+
+static inline uint32_t load32(const void *src) {
+ const uint8_t *p = (const uint8_t *)src;
+ return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
+ ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
+}
+
+static inline void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
+ uint32_t key_words[8]) {
+ key_words[0] = load32(&key[0 * 4]);
+ key_words[1] = load32(&key[1 * 4]);
+ key_words[2] = load32(&key[2 * 4]);
+ key_words[3] = load32(&key[3 * 4]);
+ key_words[4] = load32(&key[4 * 4]);
+ key_words[5] = load32(&key[5 * 4]);
+ key_words[6] = load32(&key[6 * 4]);
+ key_words[7] = load32(&key[7 * 4]);
+}
+
+static inline void store32(void *dst, uint32_t w) {
+ uint8_t *p = (uint8_t *)dst;
+ p[0] = (uint8_t)(w >> 0);
+ p[1] = (uint8_t)(w >> 8);
+ p[2] = (uint8_t)(w >> 16);
+ p[3] = (uint8_t)(w >> 24);
+}
+
+static inline void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
+ store32(&bytes_out[0 * 4], cv_words[0]);
+ store32(&bytes_out[1 * 4], cv_words[1]);
+ store32(&bytes_out[2 * 4], cv_words[2]);
+ store32(&bytes_out[3 * 4], cv_words[3]);
+ store32(&bytes_out[4 * 4], cv_words[4]);
+ store32(&bytes_out[5 * 4], cv_words[5]);
+ store32(&bytes_out[6 * 4], cv_words[6]);
+ store32(&bytes_out[7 * 4], cv_words[7]);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLAKE3_IMPL_H */
diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
index ee96e692ef00..d17a40cefcb8 100644
--- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
+++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
@@ -1,6 +1,4 @@
/*
- * IDI,NTNU
- *
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -19,72 +17,44 @@
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
- *
- * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
- * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
- *
- * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $
*/
+
/*
- * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ * Based on Edon-R implementation for SUPERCOP, based on NIST API.
+ * Copyright (c) 2009, 2010, Jørn Amundsen <jorn.amundsen@ntnu.no>
+ * Copyright (c) 2013 Saso Kiselkov, All rights reserved
+ * Copyright (c) 2023 Tino Reichardt <milky-zfs@mcmilk.de>
*/
-#include <sys/strings.h>
+#include <sys/zfs_context.h>
+#include <sys/string.h>
#include <sys/edonr.h>
-#include <sys/debug.h>
-
-/* big endian support, provides no-op's if run on little endian hosts */
-#include "edonr_byteorder.h"
-#define hashState224(x) ((x)->pipe->p256)
-#define hashState256(x) ((x)->pipe->p256)
-#define hashState384(x) ((x)->pipe->p512)
-#define hashState512(x) ((x)->pipe->p512)
-
-/* shift and rotate shortcuts */
-#define shl(x, n) ((x) << n)
-#define shr(x, n) ((x) >> n)
-
-#define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
-#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
-
-#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
-#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
-
-#if !defined(__C99_RESTRICT)
-#define restrict /* restrict */
+/*
+ * We need 1196 byte stack for Q512() on i386
+ * - we define this pragma to make gcc happy
+ */
+#if defined(__GNUC__) && defined(_ILP32)
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
#endif
-#define EDONR_VALID_HASHBITLEN(x) \
- ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224)
+/*
+ * Insert compiler memory barriers to reduce stack frame size.
+ */
+#define MEMORY_BARRIER asm volatile("" ::: "memory");
-/* EdonR224 initial double chaining pipe */
-static const uint32_t i224p2[16] = {
- 0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful,
- 0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful,
- 0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful,
- 0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful,
-};
+#if defined(_ZFS_BIG_ENDIAN)
+#define ld_swap64(s, d) (d = __builtin_bswap64(*(s)))
+#define st_swap64(s, d) (*(d) = __builtin_bswap64(s))
+#else
+#define ld_swap64(s, d) (d = *(s))
+#define st_swap64(s, d) (*(d) = s)
+#endif
-/* EdonR256 initial double chaining pipe */
-static const uint32_t i256p2[16] = {
- 0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful,
- 0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful,
- 0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful,
- 0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful,
-};
+#define hashState512(x) ((x)->pipe->p512)
-/* EdonR384 initial double chaining pipe */
-static const uint64_t i384p2[16] = {
- 0x0001020304050607ull, 0x08090a0b0c0d0e0full,
- 0x1011121314151617ull, 0x18191a1b1c1d1e1full,
- 0x2021222324252627ull, 0x28292a2b2c2d2e2full,
- 0x3031323334353637ull, 0x38393a3b3c3d3e3full,
- 0x4041424344454647ull, 0x48494a4b4c4d4e4full,
- 0x5051525354555657ull, 0x58595a5b5c5d5e5full,
- 0x6061626364656667ull, 0x68696a6b6c6d6e6full,
- 0x7071727374757677ull, 0x78797a7b7c7d7e7full
-};
+/* rotate shortcuts */
+#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
/* EdonR512 initial double chaining pipe */
static const uint64_t i512p2[16] = {
@@ -98,294 +68,66 @@ static const uint64_t i512p2[16] = {
0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull
};
-/*
- * First Latin Square
- * 0 7 1 3 2 4 6 5
- * 4 1 7 6 3 0 5 2
- * 7 0 4 2 5 3 1 6
- * 1 4 0 5 6 2 7 3
- * 2 3 6 7 1 5 0 4
- * 5 2 3 1 7 6 4 0
- * 3 6 5 0 4 7 2 1
- * 6 5 2 4 0 1 3 7
- */
-#define LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7) \
-{ \
- uint32_t x04, x17, x23, x56, x07, x26; \
- x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \
- s0 = c + x07 + x2; \
- s1 = rotl32(x07 + x3, 4); \
- s2 = rotl32(x07 + x6, 8); \
- x23 = x2 + x3; \
- s5 = rotl32(x04 + x23 + x5, 22); \
- x56 = x5 + x6; \
- s6 = rotl32(x17 + x56 + x0, 24); \
- x26 = x23+x56; \
- s3 = rotl32(x26 + x7, 13); \
- s4 = rotl32(x26 + x1, 17); \
- s7 = rotl32(x26 + x4, 29); \
-}
-
-#define LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7) \
-{ \
- uint64_t x04, x17, x23, x56, x07, x26; \
- x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \
- s0 = c + x07 + x2; \
- s1 = rotl64(x07 + x3, 5); \
- s2 = rotl64(x07 + x6, 15); \
- x23 = x2 + x3; \
- s5 = rotl64(x04 + x23 + x5, 40); \
- x56 = x5 + x6; \
- s6 = rotl64(x17 + x56 + x0, 50); \
- x26 = x23+x56; \
- s3 = rotl64(x26 + x7, 22); \
- s4 = rotl64(x26 + x1, 31); \
- s7 = rotl64(x26 + x4, 59); \
-}
-
-/*
- * Second Orthogonal Latin Square
- * 0 4 2 3 1 6 5 7
- * 7 6 3 2 5 4 1 0
- * 5 3 1 6 0 2 7 4
- * 1 0 5 4 3 7 2 6
- * 2 1 0 7 4 5 6 3
- * 3 5 7 0 6 1 4 2
- * 4 7 6 1 2 0 3 5
- * 6 2 4 5 7 3 0 1
- */
-#define LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7) \
-{ \
- uint32_t y01, y25, y34, y67, y04, y05, y27, y37; \
- y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \
- t0 = ~c + y05 + y7; \
- t2 = rotl32(y05 + y3, 9); \
- y34 = y3+y4, y04 = y01+y34; \
- t1 = rotl32(y04 + y6, 5); \
- t4 = rotl32(y04 + y5, 15); \
- y67 = y6+y7, y37 = y34+y67; \
- t3 = rotl32(y37 + y2, 11); \
- t7 = rotl32(y37 + y0, 27); \
- y27 = y25+y67; \
- t5 = rotl32(y27 + y4, 20); \
- t6 = rotl32(y27 + y1, 25); \
-}
-
-#define LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7) \
-{ \
- uint64_t y01, y25, y34, y67, y04, y05, y27, y37; \
- y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \
- t0 = ~c + y05 + y7; \
- t2 = rotl64(y05 + y3, 19); \
- y34 = y3+y4, y04 = y01+y34; \
- t1 = rotl64(y04 + y6, 10); \
- t4 = rotl64(y04 + y5, 36); \
- y67 = y6+y7, y37 = y34+y67; \
- t3 = rotl64(y37 + y2, 29); \
- t7 = rotl64(y37 + y0, 55); \
- y27 = y25+y67; \
- t5 = rotl64(y27 + y4, 44); \
- t6 = rotl64(y27 + y1, 48); \
+#define LS1_512(x0, x1, x2, x3, x4, x5, x6, x7) \
+{ \
+ MEMORY_BARRIER \
+ z1 = x0 + x4, z2 = x1 + x7; z5 = z1 + z2; \
+ s0 = 0xaaaaaaaaaaaaaaaaull + z5 + x2; \
+ s1 = rotl64(z5 + x3, 5); \
+ s2 = rotl64(z5 + x6, 15); z3 = x2 + x3; \
+ s5 = rotl64(z1 + z3 + x5, 40); z4 = x5 + x6; \
+ s6 = rotl64(z2 + z4 + x0, 50); z6 = z3 + z4; \
+ s3 = rotl64(z6 + x7, 22); \
+ s4 = rotl64(z6 + x1, 31); \
+ s7 = rotl64(z6 + x4, 59); \
}
-#define quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7) \
-{ \
- uint32_t s04, s17, s23, s56, t01, t25, t34, t67; \
- s04 = s0 ^ s4, t01 = t0 ^ t1; \
- r0 = (s04 ^ s1) + (t01 ^ t5); \
- t67 = t6 ^ t7; \
- r1 = (s04 ^ s7) + (t2 ^ t67); \
- s23 = s2 ^ s3; \
- r7 = (s23 ^ s5) + (t4 ^ t67); \
- t34 = t3 ^ t4; \
- r3 = (s23 ^ s4) + (t0 ^ t34); \
- s56 = s5 ^ s6; \
- r5 = (s3 ^ s56) + (t34 ^ t6); \
- t25 = t2 ^ t5; \
- r6 = (s2 ^ s56) + (t25 ^ t7); \
- s17 = s1 ^ s7; \
- r4 = (s0 ^ s17) + (t1 ^ t25); \
- r2 = (s17 ^ s6) + (t01 ^ t3); \
+#define LS2_512(y0, y1, y2, y3, y4, y5, y6, y7) \
+{ \
+ z1 = y0 + y1, z2 = y2 + y5; z6 = z1 + z2; \
+ t0 = ~0xaaaaaaaaaaaaaaaaull + z6 + y7; \
+ t2 = rotl64(z6 + y3, 19); \
+ z3 = y3 + y4, z5 = z1 + z3; \
+ t1 = rotl64(z5 + y6, 10); \
+ t4 = rotl64(z5 + y5, 36); \
+ z4 = y6 + y7, z8 = z3 + z4; \
+ t3 = rotl64(z8 + y2, 29); \
+ t7 = rotl64(z8 + y0, 55); z7 = z2 + z4; \
+ t5 = rotl64(z7 + y4, 44); \
+ t6 = rotl64(z7 + y1, 48); \
}
-#define quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7) \
-{ \
- uint64_t s04, s17, s23, s56, t01, t25, t34, t67; \
- s04 = s0 ^ s4, t01 = t0 ^ t1; \
- r0 = (s04 ^ s1) + (t01 ^ t5); \
- t67 = t6 ^ t7; \
- r1 = (s04 ^ s7) + (t2 ^ t67); \
- s23 = s2 ^ s3; \
- r7 = (s23 ^ s5) + (t4 ^ t67); \
- t34 = t3 ^ t4; \
- r3 = (s23 ^ s4) + (t0 ^ t34); \
- s56 = s5 ^ s6; \
- r5 = (s3 ^ s56) + (t34 ^ t6); \
- t25 = t2 ^ t5; \
- r6 = (s2 ^ s56) + (t25 ^ t7); \
- s17 = s1 ^ s7; \
- r4 = (s0 ^ s17) + (t1 ^ t25); \
- r2 = (s17 ^ s6) + (t01 ^ t3); \
-}
-
-static size_t
-Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
-{
- size_t bl;
-
- for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE;
- bl -= EdonR256_BLOCK_BITSIZE, data += 16) {
- uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
- t5, t6, t7;
- uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
- q5, q6, q7;
- const uint32_t defix = 0xaaaaaaaa;
-#if defined(MACHINE_IS_BIG_ENDIAN)
- uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
- swp9, swp10, swp11, swp12, swp13, swp14, swp15;
-#define d(j) swp ## j
-#define s32(j) ld_swap32((uint32_t *)data + j, swp ## j)
-#else
-#define d(j) data[j]
-#endif
-
- /* First row of quasigroup e-transformations */
-#if defined(MACHINE_IS_BIG_ENDIAN)
- s32(8);
- s32(9);
- s32(10);
- s32(11);
- s32(12);
- s32(13);
- s32(14);
- s32(15);
-#endif
- LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
- d(8));
-#if defined(MACHINE_IS_BIG_ENDIAN)
- s32(0);
- s32(1);
- s32(2);
- s32(3);
- s32(4);
- s32(5);
- s32(6);
- s32(7);
-#undef s32
-#endif
- LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
- quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
-
- LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
- d(15));
- quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
-
- /* Second row of quasigroup e-transformations */
- LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
- p[15]);
- LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
-
- LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
- quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
-
- /* Third row of quasigroup e-transformations */
- LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
- quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
-
- LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
- LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
-
- /* Fourth row of quasigroup e-transformations */
- LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
- LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
-
- LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
- quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
-
- /* Edon-R tweak on the original SHA-3 Edon-R submission. */
- p[0] ^= d(8) ^ p0;
- p[1] ^= d(9) ^ p1;
- p[2] ^= d(10) ^ p2;
- p[3] ^= d(11) ^ p3;
- p[4] ^= d(12) ^ p4;
- p[5] ^= d(13) ^ p5;
- p[6] ^= d(14) ^ p6;
- p[7] ^= d(15) ^ p7;
- p[8] ^= d(0) ^ q0;
- p[9] ^= d(1) ^ q1;
- p[10] ^= d(2) ^ q2;
- p[11] ^= d(3) ^ q3;
- p[12] ^= d(4) ^ q4;
- p[13] ^= d(5) ^ q5;
- p[14] ^= d(6) ^ q6;
- p[15] ^= d(7) ^ q7;
- }
-
-#undef d
- return (bitlen - bl);
+#define QEF_512(r0, r1, r2, r3, r4, r5, r6, r7) \
+{ \
+ z1 = s0 ^ s4, z5 = t0 ^ t1; \
+ r0 = (z1 ^ s1) + (z5 ^ t5); z8 = t6 ^ t7; \
+ r1 = (z1 ^ s7) + (t2 ^ z8); z3 = s2 ^ s3; \
+ r7 = (z3 ^ s5) + (t4 ^ z8); z7 = t3 ^ t4; \
+ r3 = (z3 ^ s4) + (t0 ^ z7); z4 = s5 ^ s6; \
+ r5 = (s3 ^ z4) + (z7 ^ t6); z6 = t2 ^ t5; \
+ r6 = (s2 ^ z4) + (z6 ^ t7); z2 = s1 ^ s7; \
+ r4 = (s0 ^ z2) + (t1 ^ z6); \
+ r2 = (z2 ^ s6) + (z5 ^ t3); \
}
-/*
- * Why is this #pragma here?
- *
- * Checksum functions like this one can go over the stack frame size check
- * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can
- * safely ignore the compiler error since we know that in OpenZFS, that
- * the function will be called from a worker thread that won't be using
- * much stack. The only function that goes over the 1k limit is Q512(),
- * which only goes over it by a hair (1248 bytes on ARM32).
- */
-#include <sys/isa_defs.h> /* for _ILP32 */
-#ifdef _ILP32 /* We're 32-bit, assume small stack frames */
-#pragma GCC diagnostic ignored "-Wframe-larger-than="
-#endif
-
-#if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
static inline size_t
-#else
-static size_t
-#endif
-Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
+Q512(size_t bitlen, const uint64_t *data, uint64_t *p)
{
size_t bl;
for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE;
bl -= EdonR512_BLOCK_BITSIZE, data += 16) {
- uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
- t5, t6, t7;
- uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
- q5, q6, q7;
- const uint64_t defix = 0xaaaaaaaaaaaaaaaaull;
-#if defined(MACHINE_IS_BIG_ENDIAN)
- uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
- swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+ uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+ uint64_t p0, p1, p2, p3, p4, p5, p6, p7;
+ uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
+ uint64_t t0, t1, t2, t3, t4, t5, t6, t7;
+ uint64_t z1, z2, z3, z4, z5, z6, z7, z8;
+
+#if defined(_ZFS_BIG_ENDIAN)
+ uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7,
+ swp8, swp9, swp10, swp11, swp12, swp13, swp14, swp15;
#define d(j) swp##j
#define s64(j) ld_swap64((uint64_t *)data+j, swp##j)
-#else
-#define d(j) data[j]
-#endif
-
- /* First row of quasigroup e-transformations */
-#if defined(MACHINE_IS_BIG_ENDIAN)
- s64(8);
- s64(9);
- s64(10);
- s64(11);
- s64(12);
- s64(13);
- s64(14);
- s64(15);
-#endif
- LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
- d(8));
-#if defined(MACHINE_IS_BIG_ENDIAN)
s64(0);
s64(1);
s64(2);
@@ -394,43 +136,53 @@ Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
s64(5);
s64(6);
s64(7);
-#undef s64
+ s64(8);
+ s64(9);
+ s64(10);
+ s64(11);
+ s64(12);
+ s64(13);
+ s64(14);
+ s64(15);
+#else
+#define d(j) data[j]
#endif
- LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
- quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
- LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
- d(15));
- quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+ /* First row of quasigroup e-transformations */
+ LS1_512(d(15), d(14), d(13), d(12), d(11), d(10), d(9), d(8));
+ LS2_512(d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+ QEF_512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(d(8), d(9), d(10), d(11), d(12), d(13), d(14), d(15));
+ QEF_512(q0, q1, q2, q3, q4, q5, q6, q7);
/* Second row of quasigroup e-transformations */
- LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
- p[15]);
- LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS1_512(p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+ LS2_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ QEF_512(p0, p1, p2, p3, p4, p5, p6, p7);
- LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
- quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+ LS1_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(q0, q1, q2, q3, q4, q5, q6, q7);
+ QEF_512(q0, q1, q2, q3, q4, q5, q6, q7);
/* Third row of quasigroup e-transformations */
- LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
- quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS1_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+ QEF_512(p0, p1, p2, p3, p4, p5, p6, p7);
- LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
- LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+ LS1_512(q0, q1, q2, q3, q4, q5, q6, q7);
+ LS2_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ QEF_512(q0, q1, q2, q3, q4, q5, q6, q7);
/* Fourth row of quasigroup e-transformations */
- LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
- LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS1_512(d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+ LS2_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ QEF_512(p0, p1, p2, p3, p4, p5, p6, p7);
- LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
- LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
- quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+ LS1_512(p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(q0, q1, q2, q3, q4, q5, q6, q7);
+ QEF_512(q0, q1, q2, q3, q4, q5, q6, q7);
/* Edon-R tweak on the original SHA-3 Edon-R submission. */
p[0] ^= d(8) ^ p0;
@@ -451,289 +203,115 @@ Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
p[15] ^= d(7) ^ q7;
}
+#undef s64
#undef d
return (bitlen - bl);
}
void
-EdonRInit(EdonRState *state, size_t hashbitlen)
+EdonRInit(EdonRState *state)
{
- ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen));
- switch (hashbitlen) {
- case 224:
- state->hashbitlen = 224;
- state->bits_processed = 0;
- state->unprocessed_bits = 0;
- bcopy(i224p2, hashState224(state)->DoublePipe,
- 16 * sizeof (uint32_t));
- break;
-
- case 256:
- state->hashbitlen = 256;
- state->bits_processed = 0;
- state->unprocessed_bits = 0;
- bcopy(i256p2, hashState256(state)->DoublePipe,
- 16 * sizeof (uint32_t));
- break;
-
- case 384:
- state->hashbitlen = 384;
- state->bits_processed = 0;
- state->unprocessed_bits = 0;
- bcopy(i384p2, hashState384(state)->DoublePipe,
- 16 * sizeof (uint64_t));
- break;
-
- case 512:
- state->hashbitlen = 512;
- state->bits_processed = 0;
- state->unprocessed_bits = 0;
- bcopy(i512p2, hashState224(state)->DoublePipe,
- 16 * sizeof (uint64_t));
- break;
- }
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ memcpy(hashState512(state)->DoublePipe, i512p2, sizeof (i512p2));
}
-
void
EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen)
{
- uint32_t *data32;
uint64_t *data64;
-
size_t bits_processed;
- ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
- switch (state->hashbitlen) {
- case 224:
- case 256:
- if (state->unprocessed_bits > 0) {
- /* LastBytes = databitlen / 8 */
- int LastBytes = (int)databitlen >> 3;
-
- ASSERT(state->unprocessed_bits + databitlen <=
- EdonR256_BLOCK_SIZE * 8);
-
- bcopy(data, hashState256(state)->LastPart
- + (state->unprocessed_bits >> 3), LastBytes);
- state->unprocessed_bits += (int)databitlen;
- databitlen = state->unprocessed_bits;
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data32 = (uint32_t *)hashState256(state)->LastPart;
- } else
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data32 = (uint32_t *)data;
-
- bits_processed = Q256(databitlen, data32,
- hashState256(state)->DoublePipe);
- state->bits_processed += bits_processed;
- databitlen -= bits_processed;
- state->unprocessed_bits = (int)databitlen;
- if (databitlen > 0) {
- /* LastBytes = Ceil(databitlen / 8) */
- int LastBytes =
- ((~(((-(int)databitlen) >> 3) & 0x01ff)) +
- 1) & 0x01ff;
-
- data32 += bits_processed >> 5; /* byte size update */
- bcopy(data32, hashState256(state)->LastPart, LastBytes);
- }
- break;
-
- case 384:
- case 512:
- if (state->unprocessed_bits > 0) {
- /* LastBytes = databitlen / 8 */
- int LastBytes = (int)databitlen >> 3;
-
- ASSERT(state->unprocessed_bits + databitlen <=
- EdonR512_BLOCK_SIZE * 8);
-
- bcopy(data, hashState512(state)->LastPart
- + (state->unprocessed_bits >> 3), LastBytes);
- state->unprocessed_bits += (int)databitlen;
- databitlen = state->unprocessed_bits;
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data64 = (uint64_t *)hashState512(state)->LastPart;
- } else
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data64 = (uint64_t *)data;
-
- bits_processed = Q512(databitlen, data64,
- hashState512(state)->DoublePipe);
- state->bits_processed += bits_processed;
- databitlen -= bits_processed;
- state->unprocessed_bits = (int)databitlen;
- if (databitlen > 0) {
- /* LastBytes = Ceil(databitlen / 8) */
- int LastBytes =
- ((~(((-(int)databitlen) >> 3) & 0x03ff)) +
- 1) & 0x03ff;
-
- data64 += bits_processed >> 6; /* byte size update */
- bcopy(data64, hashState512(state)->LastPart, LastBytes);
- }
- break;
+ if (state->unprocessed_bits > 0) {
+ /* LastBytes = databitlen / 8 */
+ int LastBytes = (int)databitlen >> 3;
+
+ ASSERT(state->unprocessed_bits + databitlen <=
+ EdonR512_BLOCK_SIZE * 8);
+
+ memcpy(hashState512(state)->LastPart
+ + (state->unprocessed_bits >> 3), data, LastBytes);
+ state->unprocessed_bits += (int)databitlen;
+ databitlen = state->unprocessed_bits;
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState512(state)->LastPart;
+ } else
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)data;
+
+ bits_processed = Q512(databitlen, data64,
+ hashState512(state)->DoublePipe);
+ state->bits_processed += bits_processed;
+ databitlen -= bits_processed;
+ state->unprocessed_bits = (int)databitlen;
+ if (databitlen > 0) {
+ /* LastBytes = Ceil(databitlen / 8) */
+ int LastBytes = ((~(((-(int)databitlen) >> 3) & 0x03ff)) + 1) \
+ & 0x03ff;
+
+ data64 += bits_processed >> 6; /* byte size update */
+ memmove(hashState512(state)->LastPart, data64, LastBytes);
}
}
void
EdonRFinal(EdonRState *state, uint8_t *hashval)
{
- uint32_t *data32;
uint64_t *data64, num_bits;
-
size_t databitlen;
int LastByte, PadOnePosition;
num_bits = state->bits_processed + state->unprocessed_bits;
- ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
- switch (state->hashbitlen) {
- case 224:
- case 256:
- LastByte = (int)state->unprocessed_bits >> 3;
- PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
- hashState256(state)->LastPart[LastByte] =
- (hashState256(state)->LastPart[LastByte]
- & (0xff << (PadOnePosition + 1))) ^
- (0x01 << PadOnePosition);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data64 = (uint64_t *)hashState256(state)->LastPart;
-
- if (state->unprocessed_bits < 448) {
- (void) memset((hashState256(state)->LastPart) +
- LastByte + 1, 0x00,
- EdonR256_BLOCK_SIZE - LastByte - 9);
- databitlen = EdonR256_BLOCK_SIZE * 8;
-#if defined(MACHINE_IS_BIG_ENDIAN)
- st_swap64(num_bits, data64 + 7);
-#else
- data64[7] = num_bits;
-#endif
- } else {
- (void) memset((hashState256(state)->LastPart) +
- LastByte + 1, 0x00,
- EdonR256_BLOCK_SIZE * 2 - LastByte - 9);
- databitlen = EdonR256_BLOCK_SIZE * 16;
-#if defined(MACHINE_IS_BIG_ENDIAN)
- st_swap64(num_bits, data64 + 15);
-#else
- data64[15] = num_bits;
-#endif
- }
-
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data32 = (uint32_t *)hashState256(state)->LastPart;
- state->bits_processed += Q256(databitlen, data32,
- hashState256(state)->DoublePipe);
- break;
-
- case 384:
- case 512:
- LastByte = (int)state->unprocessed_bits >> 3;
- PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
- hashState512(state)->LastPart[LastByte] =
- (hashState512(state)->LastPart[LastByte]
- & (0xff << (PadOnePosition + 1))) ^
- (0x01 << PadOnePosition);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- data64 = (uint64_t *)hashState512(state)->LastPart;
-
- if (state->unprocessed_bits < 960) {
- (void) memset((hashState512(state)->LastPart) +
- LastByte + 1, 0x00,
- EdonR512_BLOCK_SIZE - LastByte - 9);
- databitlen = EdonR512_BLOCK_SIZE * 8;
-#if defined(MACHINE_IS_BIG_ENDIAN)
- st_swap64(num_bits, data64 + 15);
+ LastByte = (int)state->unprocessed_bits >> 3;
+ PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+ hashState512(state)->LastPart[LastByte] =
+ (hashState512(state)->LastPart[LastByte] \
+ & (0xff << (PadOnePosition + 1))) ^ (0x01 << PadOnePosition);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState512(state)->LastPart;
+
+ if (state->unprocessed_bits < 960) {
+ memset((hashState512(state)->LastPart) +
+ LastByte + 1, 0x00, EdonR512_BLOCK_SIZE - LastByte - 9);
+ databitlen = EdonR512_BLOCK_SIZE * 8;
+#if defined(_ZFS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 15);
#else
- data64[15] = num_bits;
+ data64[15] = num_bits;
#endif
- } else {
- (void) memset((hashState512(state)->LastPart) +
- LastByte + 1, 0x00,
- EdonR512_BLOCK_SIZE * 2 - LastByte - 9);
- databitlen = EdonR512_BLOCK_SIZE * 16;
-#if defined(MACHINE_IS_BIG_ENDIAN)
- st_swap64(num_bits, data64 + 31);
+ } else {
+ memset((hashState512(state)->LastPart) + LastByte + 1,
+ 0x00, EdonR512_BLOCK_SIZE * 2 - LastByte - 9);
+ databitlen = EdonR512_BLOCK_SIZE * 16;
+#if defined(_ZFS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 31);
#else
- data64[31] = num_bits;
+ data64[31] = num_bits;
#endif
- }
-
- state->bits_processed += Q512(databitlen, data64,
- hashState512(state)->DoublePipe);
- break;
}
- switch (state->hashbitlen) {
- case 224: {
-#if defined(MACHINE_IS_BIG_ENDIAN)
- uint32_t *d32 = (uint32_t *)hashval;
- uint32_t *s32 = hashState224(state)->DoublePipe + 9;
- int j;
+ state->bits_processed += Q512(databitlen, data64,
+ hashState512(state)->DoublePipe);
- for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++)
- st_swap32(s32[j], d32 + j);
-#else
- bcopy(hashState256(state)->DoublePipe + 9, hashval,
- EdonR224_DIGEST_SIZE);
-#endif
- break;
- }
- case 256: {
-#if defined(MACHINE_IS_BIG_ENDIAN)
- uint32_t *d32 = (uint32_t *)hashval;
- uint32_t *s32 = hashState224(state)->DoublePipe + 8;
- int j;
-
- for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++)
- st_swap32(s32[j], d32 + j);
-#else
- bcopy(hashState256(state)->DoublePipe + 8, hashval,
- EdonR256_DIGEST_SIZE);
-#endif
- break;
- }
- case 384: {
-#if defined(MACHINE_IS_BIG_ENDIAN)
- uint64_t *d64 = (uint64_t *)hashval;
- uint64_t *s64 = hashState384(state)->DoublePipe + 10;
- int j;
-
- for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++)
- st_swap64(s64[j], d64 + j);
-#else
- bcopy(hashState384(state)->DoublePipe + 10, hashval,
- EdonR384_DIGEST_SIZE);
-#endif
- break;
- }
- case 512: {
-#if defined(MACHINE_IS_BIG_ENDIAN)
- uint64_t *d64 = (uint64_t *)hashval;
- uint64_t *s64 = hashState512(state)->DoublePipe + 8;
- int j;
-
- for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++)
- st_swap64(s64[j], d64 + j);
+#if defined(_ZFS_BIG_ENDIAN)
+ data64 = (uint64_t *)hashval;
+ uint64_t *s64 = hashState512(state)->DoublePipe + 8;
+ int j;
+
+ for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++)
+ st_swap64(s64[j], data64 + j);
#else
- bcopy(hashState512(state)->DoublePipe + 8, hashval,
- EdonR512_DIGEST_SIZE);
+ memcpy(hashval, hashState512(state)->DoublePipe + 8,
+ EdonR512_DIGEST_SIZE);
#endif
- break;
- }
- }
}
-
void
-EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
- uint8_t *hashval)
+EdonRHash(const uint8_t *data, size_t databitlen, uint8_t *hashval)
{
EdonRState state;
- EdonRInit(&state, hashbitlen);
+ EdonRInit(&state);
EdonRUpdate(&state, data, databitlen);
EdonRFinal(&state, hashval);
}
diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h b/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h
deleted file mode 100644
index 2b5d48287f26..000000000000
--- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * IDI,NTNU
- *
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
- *
- * C header file to determine compile machine byte order. Take care when cross
- * compiling.
- *
- * $Id: byteorder.h 517 2013-02-17 20:34:39Z joern $
- */
-/*
- * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
- */
-
-#ifndef _CRYPTO_EDONR_BYTEORDER_H
-#define _CRYPTO_EDONR_BYTEORDER_H
-
-#include <sys/sysmacros.h>
-#include <sys/param.h>
-
-#if defined(__BYTE_ORDER)
-#if (__BYTE_ORDER == __BIG_ENDIAN)
-#define MACHINE_IS_BIG_ENDIAN
-#elif (__BYTE_ORDER == __LITTLE_ENDIAN)
-#define MACHINE_IS_LITTLE_ENDIAN
-#endif
-#elif defined(BYTE_ORDER)
-#if (BYTE_ORDER == BIG_ENDIAN)
-#define MACHINE_IS_BIG_ENDIAN
-#elif (BYTE_ORDER == LITTLE_ENDIAN)
-#define MACHINE_IS_LITTLE_ENDIAN
-#endif
-#endif /* __BYTE_ORDER || BYTE_ORDER */
-
-#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
-#if defined(_ZFS_BIG_ENDIAN) || defined(_MIPSEB)
-#define MACHINE_IS_BIG_ENDIAN
-#endif
-#if defined(_ZFS_LITTLE_ENDIAN) || defined(_MIPSEL)
-#define MACHINE_IS_LITTLE_ENDIAN
-#endif
-#endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */
-
-#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
-#error unknown machine byte sex
-#endif
-
-#define BYTEORDER_INCLUDED
-
-#if defined(MACHINE_IS_BIG_ENDIAN)
-/*
- * Byte swapping macros for big endian architectures and compilers,
- * add as appropriate for other architectures and/or compilers.
- *
- * ld_swap64(src,dst) : uint64_t dst = *(src)
- * st_swap64(src,dst) : *(dst) = uint64_t src
- */
-
-#if defined(__PPC__) || defined(_ARCH_PPC)
-
-#if defined(__64BIT__)
-#if defined(_ARCH_PWR7)
-#define aix_ld_swap64(s64, d64)\
- __asm__("ldbrx %0,0,%1" : "=r"(d64) : "r"(s64))
-#define aix_st_swap64(s64, d64)\
- __asm__ volatile("stdbrx %1,0,%0" : : "r"(d64), "r"(s64))
-#else
-#define aix_ld_swap64(s64, d64) \
-{ \
- uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \
- \
- __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0;rldimi %1,%2,32,0"\
- : "+r"(s4), "=r"(d64), "=r"(h) : "b"(s64)); \
-}
-
-#define aix_st_swap64(s64, d64) \
-{ \
- uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \
- h = (s64) >> 32; \
- __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \
- : "+r"(s4) : "r"(s64), "r"(h), "b"(d64)); \
-}
-#endif /* 64BIT && PWR7 */
-#else
-#define aix_ld_swap64(s64, d64) \
-{ \
- uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
- __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0" \
- : "+r"(s4), "=r"(l), "=r"(h) : "b"(s64)); \
- d64 = ((uint64_t)h<<32) | l; \
-}
-
-#define aix_st_swap64(s64, d64) \
-{ \
- uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
- l = (s64) & 0xfffffffful, h = (s64) >> 32; \
- __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \
- : "+r"(s4) : "r"(l), "r"(h), "b"(d64)); \
-}
-#endif /* __64BIT__ */
-#define aix_ld_swap32(s32, d32)\
- __asm__("lwbrx %0,0,%1" : "=r"(d32) : "r"(s32))
-#define aix_st_swap32(s32, d32)\
- __asm__ volatile("stwbrx %1,0,%0" : : "r"(d32), "r"(s32))
-#define ld_swap32(s, d) aix_ld_swap32(s, d)
-#define st_swap32(s, d) aix_st_swap32(s, d)
-#define ld_swap64(s, d) aix_ld_swap64(s, d)
-#define st_swap64(s, d) aix_st_swap64(s, d)
-#endif /* __PPC__ || _ARCH_PPC */
-
-#if defined(__sparc)
-#if !defined(__arch64__) && !defined(__sparcv8) && defined(__sparcv9)
-#define __arch64__
-#endif
-#if defined(__GNUC__) || (defined(__SUNPRO_C) && __SUNPRO_C > 0x590)
-/* need Sun Studio C 5.10 and above for GNU inline assembly */
-#if defined(__arch64__)
-#define sparc_ld_swap64(s64, d64) \
- __asm__("ldxa [%1]0x88,%0" : "=r"(d64) : "r"(s64))
-#define sparc_st_swap64(s64, d64) \
- __asm__ volatile("stxa %0,[%1]0x88" : : "r"(s64), "r"(d64))
-#define st_swap64(s, d) sparc_st_swap64(s, d)
-#else
-#define sparc_ld_swap64(s64, d64) \
-{ \
- uint32_t *s4, h, l; \
- __asm__("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" \
- : "+r"(s4), "=r"(l), "=r"(h) : "r"(s64)); \
- d64 = ((uint64_t)h<<32) | l; \
-}
-#define sparc_st_swap64(s64, d64) \
-{ \
- uint32_t *s4, h, l; \
- l = (s64) & 0xfffffffful, h = (s64) >> 32; \
- __asm__ volatile("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88"\
- : "+r"(s4) : "r"(l), "r"(h), "r"(d64)); \
-}
-#endif /* sparc64 */
-#define sparc_ld_swap32(s32, d32)\
- __asm__("lda [%1]0x88,%0" : "=r"(d32) : "r"(s32))
-#define sparc_st_swap32(s32, d32)\
- __asm__ volatile("sta %0,[%1]0x88" : : "r"(s32), "r"(d32))
-#define ld_swap32(s, d) sparc_ld_swap32(s, d)
-#define st_swap32(s, d) sparc_st_swap32(s, d)
-#define ld_swap64(s, d) sparc_ld_swap64(s, d)
-#define st_swap64(s, d) sparc_st_swap64(s, d)
-#endif /* GCC || Sun Studio C > 5.9 */
-#endif /* sparc */
-
-/* GCC fallback */
-#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap32)
-#define ld_swap32(s, d) (d = __builtin_bswap32(*(s)))
-#define st_swap32(s, d) (*(d) = __builtin_bswap32(s))
-#endif /* GCC4/PGIC && !swap32 */
-#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap64)
-#define ld_swap64(s, d) (d = __builtin_bswap64(*(s)))
-#define st_swap64(s, d) (*(d) = __builtin_bswap64(s))
-#endif /* GCC4/PGIC && !swap64 */
-
-/* generic fallback */
-#if !defined(ld_swap32)
-#define ld_swap32(s, d) \
- (d = (*(s) >> 24) | (*(s) >> 8 & 0xff00) | \
- (*(s) << 8 & 0xff0000) | (*(s) << 24))
-#define st_swap32(s, d) \
- (*(d) = ((s) >> 24) | ((s) >> 8 & 0xff00) | \
- ((s) << 8 & 0xff0000) | ((s) << 24))
-#endif
-#if !defined(ld_swap64)
-#define ld_swap64(s, d) \
- (d = (*(s) >> 56) | (*(s) >> 40 & 0xff00) | \
- (*(s) >> 24 & 0xff0000) | (*(s) >> 8 & 0xff000000) | \
- (*(s) & 0xff000000) << 8 | (*(s) & 0xff0000) << 24 | \
- (*(s) & 0xff00) << 40 | *(s) << 56)
-#define st_swap64(s, d) \
- (*(d) = ((s) >> 56) | ((s) >> 40 & 0xff00) | \
- ((s) >> 24 & 0xff0000) | ((s) >> 8 & 0xff000000) | \
- ((s) & 0xff000000) << 8 | ((s) & 0xff0000) << 24 | \
- ((s) & 0xff00) << 40 | (s) << 56)
-#endif
-
-#endif /* MACHINE_IS_BIG_ENDIAN */
-
-
-#if defined(MACHINE_IS_LITTLE_ENDIAN)
-/* replace swaps with simple assignments on little endian systems */
-#undef ld_swap32
-#undef st_swap32
-#define ld_swap32(s, d) (d = *(s))
-#define st_swap32(s, d) (*(d) = s)
-#undef ld_swap64
-#undef st_swap64
-#define ld_swap64(s, d) (d = *(s))
-#define st_swap64(s, d) (*(d) = s)
-#endif /* MACHINE_IS_LITTLE_ENDIAN */
-
-#endif /* _CRYPTO_EDONR_BYTEORDER_H */
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/cbc.c b/sys/contrib/openzfs/module/icp/algs/modes/cbc.c
index 85864f56dead..d0219fb24c49 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/cbc.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/cbc.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -51,8 +51,8 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
if (length + ctx->cbc_remainder_len < block_size) {
/* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ memcpy((uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ datap,
length);
ctx->cbc_remainder_len += length;
ctx->cbc_copy_to = datap;
@@ -70,8 +70,8 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
- [ctx->cbc_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->cbc_remainder)
+ [ctx->cbc_remainder_len], datap, need);
blockp = (uint8_t *)ctx->cbc_remainder;
} else {
@@ -91,10 +91,10 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
if (out_data_1_len == block_size) {
copy_block(lastp, out_data_1);
} else {
- bcopy(lastp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, lastp, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(lastp + out_data_1_len,
- out_data_2,
+ memcpy(out_data_2,
+ lastp + out_data_1_len,
block_size - out_data_1_len);
}
}
@@ -113,7 +113,7 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block. */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->cbc_remainder, remainder);
+ memcpy(ctx->cbc_remainder, datap, remainder);
ctx->cbc_remainder_len = remainder;
ctx->cbc_copy_to = datap;
goto out;
@@ -137,7 +137,6 @@ out:
#define OTHER(a, ctx) \
(((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock)
-/* ARGSUSED */
int
cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
crypto_data_t *out, size_t block_size,
@@ -158,8 +157,8 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
if (length + ctx->cbc_remainder_len < block_size) {
/* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ memcpy((uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ datap,
length);
ctx->cbc_remainder_len += length;
ctx->cbc_copy_to = datap;
@@ -177,8 +176,8 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
- [ctx->cbc_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->cbc_remainder)
+ [ctx->cbc_remainder_len], datap, need);
blockp = (uint8_t *)ctx->cbc_remainder;
} else {
@@ -204,9 +203,9 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
&out_data_1_len, &out_data_2, block_size);
- bcopy(blockp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, blockp, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(blockp + out_data_1_len, out_data_2,
+ memcpy(out_data_2, blockp + out_data_1_len,
block_size - out_data_1_len);
}
@@ -225,7 +224,7 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block. */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->cbc_remainder, remainder);
+ memcpy(ctx->cbc_remainder, datap, remainder);
ctx->cbc_remainder_len = remainder;
ctx->cbc_lastp = lastp;
ctx->cbc_copy_to = datap;
@@ -243,23 +242,15 @@ int
cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len,
size_t block_size, void (*copy_block)(uint8_t *, uint64_t *))
{
- /*
- * Copy IV into context.
- *
- * If cm_param == NULL then the IV comes from the
- * cd_miscdata field in the crypto_data structure.
- */
- if (param != NULL) {
- ASSERT(param_len == block_size);
- copy_block((uchar_t *)param, cbc_ctx->cbc_iv);
- }
+ /* Copy IV into context. */
+ ASSERT3P(param, !=, NULL);
+ ASSERT3U(param_len, ==, block_size);
+
+ copy_block((uchar_t *)param, cbc_ctx->cbc_iv);
- cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0];
- cbc_ctx->cbc_flags |= CBC_MODE;
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
void *
cbc_alloc_ctx(int kmflag)
{
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
index 5d6507c49db1..1371676d6e68 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -59,15 +59,14 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
if (length + ctx->ccm_remainder_len < block_size) {
/* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ memcpy((uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ datap,
length);
ctx->ccm_remainder_len += length;
ctx->ccm_copy_to = datap;
return (CRYPTO_SUCCESS);
}
- lastp = (uint8_t *)ctx->ccm_cb;
crypto_init_ptrs(out, &iov_or_mp, &offset);
mac_buf = (uint8_t *)ctx->ccm_mac_buf;
@@ -80,8 +79,8 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
- [ctx->ccm_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], datap, need);
blockp = (uint8_t *)ctx->ccm_remainder;
} else {
@@ -132,10 +131,10 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
if (out_data_1_len == block_size) {
copy_block(lastp, out_data_1);
} else {
- bcopy(lastp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, lastp, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(lastp + out_data_1_len,
- out_data_2,
+ memcpy(out_data_2,
+ lastp + out_data_1_len,
block_size - out_data_1_len);
}
}
@@ -154,7 +153,7 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block. */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->ccm_remainder, remainder);
+ memcpy(ctx->ccm_remainder, datap, remainder);
ctx->ccm_remainder_len = remainder;
ctx->ccm_copy_to = datap;
goto out;
@@ -190,7 +189,6 @@ calculate_ccm_mac(ccm_ctx_t *ctx, uint8_t *ccm_mac,
}
}
-/* ARGSUSED */
int
ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
@@ -225,10 +223,10 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
/* ccm_mac_input_buf is not used for encryption */
macp = (uint8_t *)ctx->ccm_mac_input_buf;
- bzero(macp, block_size);
+ memset(macp, 0, block_size);
/* copy remainder to temporary buffer */
- bcopy(ctx->ccm_remainder, macp, ctx->ccm_remainder_len);
+ memcpy(macp, ctx->ccm_remainder, ctx->ccm_remainder_len);
/* calculate the CBC MAC */
xor_block(macp, mac_buf);
@@ -255,33 +253,32 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
ctx->ccm_remainder_len + ctx->ccm_mac_len);
if (ctx->ccm_remainder_len > 0) {
-
/* copy temporary block to where it belongs */
if (out_data_2 == NULL) {
/* everything will fit in out_data_1 */
- bcopy(macp, out_data_1, ctx->ccm_remainder_len);
- bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len,
+ memcpy(out_data_1, macp, ctx->ccm_remainder_len);
+ memcpy(out_data_1 + ctx->ccm_remainder_len, ccm_mac_p,
ctx->ccm_mac_len);
} else {
-
if (out_data_1_len < ctx->ccm_remainder_len) {
-
size_t data_2_len_used;
- bcopy(macp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, macp, out_data_1_len);
data_2_len_used = ctx->ccm_remainder_len
- out_data_1_len;
- bcopy((uint8_t *)macp + out_data_1_len,
- out_data_2, data_2_len_used);
- bcopy(ccm_mac_p, out_data_2 + data_2_len_used,
+ memcpy(out_data_2,
+ (uint8_t *)macp + out_data_1_len,
+ data_2_len_used);
+ memcpy(out_data_2 + data_2_len_used,
+ ccm_mac_p,
ctx->ccm_mac_len);
} else {
- bcopy(macp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, macp, out_data_1_len);
if (out_data_1_len == ctx->ccm_remainder_len) {
/* mac will be in out_data_2 */
- bcopy(ccm_mac_p, out_data_2,
+ memcpy(out_data_2, ccm_mac_p,
ctx->ccm_mac_len);
} else {
size_t len_not_used = out_data_1_len -
@@ -291,11 +288,11 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
* out_data_1, part of the mac will be
* in out_data_2
*/
- bcopy(ccm_mac_p,
- out_data_1 + ctx->ccm_remainder_len,
- len_not_used);
- bcopy(ccm_mac_p + len_not_used,
- out_data_2,
+ memcpy(out_data_1 +
+ ctx->ccm_remainder_len,
+ ccm_mac_p, len_not_used);
+ memcpy(out_data_2,
+ ccm_mac_p + len_not_used,
ctx->ccm_mac_len - len_not_used);
}
@@ -303,9 +300,9 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
}
} else {
/* copy block to where it belongs */
- bcopy(ccm_mac_p, out_data_1, out_data_1_len);
+ memcpy(out_data_1, ccm_mac_p, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(ccm_mac_p + out_data_1_len, out_data_2,
+ memcpy(out_data_2, ccm_mac_p + out_data_1_len,
block_size - out_data_1_len);
}
}
@@ -342,7 +339,6 @@ ccm_decrypt_incomplete_block(ccm_ctx_t *ctx,
* returned to the caller. It will be returned when decrypt_final() is
* called if the MAC matches
*/
-/* ARGSUSED */
int
ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
crypto_data_t *out, size_t block_size,
@@ -350,6 +346,7 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
+ (void) out;
size_t remainder = length;
size_t need = 0;
uint8_t *datap = (uint8_t *)data;
@@ -373,7 +370,7 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
}
tmp = (uint8_t *)ctx->ccm_mac_input_buf;
- bcopy(datap, tmp + pm_len, length);
+ memcpy(tmp + pm_len, datap, length);
ctx->ccm_processed_mac_len += length;
return (CRYPTO_SUCCESS);
@@ -406,15 +403,15 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
mac_len = length - pt_part;
ctx->ccm_processed_mac_len = mac_len;
- bcopy(data + pt_part, ctx->ccm_mac_input_buf, mac_len);
+ memcpy(ctx->ccm_mac_input_buf, data + pt_part, mac_len);
if (pt_part + ctx->ccm_remainder_len < block_size) {
/*
* since this is last of the ciphertext, will
* just decrypt with it here
*/
- bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
- [ctx->ccm_remainder_len], pt_part);
+ memcpy(&((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], datap, pt_part);
ctx->ccm_remainder_len += pt_part;
ccm_decrypt_incomplete_block(ctx, encrypt_block);
ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
@@ -425,9 +422,9 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
length = pt_part;
}
} else if (length + ctx->ccm_remainder_len < block_size) {
- /* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ /* accumulate bytes here and return */
+ memcpy((uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ datap,
length);
ctx->ccm_remainder_len += length;
ctx->ccm_copy_to = datap;
@@ -442,8 +439,8 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
- [ctx->ccm_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], datap, need);
blockp = (uint8_t *)ctx->ccm_remainder;
} else {
@@ -493,7 +490,7 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->ccm_remainder, remainder);
+ memcpy(ctx->ccm_remainder, datap, remainder);
ctx->ccm_remainder_len = remainder;
ctx->ccm_copy_to = datap;
if (ctx->ccm_processed_mac_len > 0) {
@@ -540,10 +537,9 @@ ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
macp = (uint8_t *)ctx->ccm_tmp;
while (mac_remain > 0) {
-
if (mac_remain < block_size) {
- bzero(macp, block_size);
- bcopy(pt, macp, mac_remain);
+ memset(macp, 0, block_size);
+ memcpy(macp, pt, mac_remain);
mac_remain = 0;
} else {
copy_block(pt, macp);
@@ -561,7 +557,7 @@ ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
calculate_ccm_mac((ccm_ctx_t *)ctx, ccm_mac_p, encrypt_block);
/* compare the input CCM MAC value with what we calculated */
- if (bcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) {
+ if (memcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) {
/* They don't match */
return (CRYPTO_INVALID_MAC);
} else {
@@ -655,13 +651,13 @@ ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize,
b0[0] = (have_adata << 6) | (((t - 2) / 2) << 3) | (q - 1);
/* copy the nonce value into b0 */
- bcopy(nonce, &(b0[1]), nonceSize);
+ memcpy(&(b0[1]), nonce, nonceSize);
/* store the length of the payload into b0 */
- bzero(&(b0[1+nonceSize]), q);
+ memset(&(b0[1+nonceSize]), 0, q);
payloadSize = aes_ctx->ccm_data_len;
- limit = 8 < q ? 8 : q;
+ limit = MIN(8, q);
for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) {
b0[k] = (uint8_t)((payloadSize >> j) & 0xFF);
@@ -674,9 +670,9 @@ ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize,
cb[0] = 0x07 & (q-1); /* first byte */
/* copy the nonce value into the counter block */
- bcopy(nonce, &(cb[1]), nonceSize);
+ memcpy(&(cb[1]), nonce, nonceSize);
- bzero(&(cb[1+nonceSize]), q);
+ memset(&(cb[1+nonceSize]), 0, q);
/* Create the mask for the counter field based on the size of nonce */
q <<= 3;
@@ -783,7 +779,7 @@ ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
/* The IV for CBC MAC for AES CCM mode is always zero */
ivp = (uint8_t *)ctx->ccm_tmp;
- bzero(ivp, block_size);
+ memset(ivp, 0, block_size);
xor_block(ivp, mac_buf);
@@ -801,14 +797,14 @@ ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
/* 1st block: it contains encoded associated data, and some data */
authp = (uint8_t *)ctx->ccm_tmp;
- bzero(authp, block_size);
- bcopy(encoded_a, authp, encoded_a_len);
+ memset(authp, 0, block_size);
+ memcpy(authp, encoded_a, encoded_a_len);
processed = block_size - encoded_a_len;
if (processed > auth_data_len) {
/* in case auth_data is very small */
processed = auth_data_len;
}
- bcopy(auth_data, authp+encoded_a_len, processed);
+ memcpy(authp+encoded_a_len, auth_data, processed);
/* xor with previous buffer */
xor_block(authp, mac_buf);
encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
@@ -824,8 +820,8 @@ ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
* There's not a block full of data, pad rest of
* buffer with zero
*/
- bzero(authp, block_size);
- bcopy(&(auth_data[processed]), authp, remainder);
+ memset(authp, 0, block_size);
+ memcpy(authp, &(auth_data[processed]), remainder);
datap = (uint8_t *)authp;
remainder = 0;
} else {
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
index 0188bdd395ff..db6b1c71d5cd 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -52,15 +52,14 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
if (length + ctx->ctr_remainder_len < block_size) {
/* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len,
+ memcpy((uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len,
+ datap,
length);
ctx->ctr_remainder_len += length;
ctx->ctr_copy_to = datap;
return (CRYPTO_SUCCESS);
}
- lastp = (uint8_t *)ctx->ctr_cb;
crypto_init_ptrs(out, &iov_or_mp, &offset);
do {
@@ -71,8 +70,8 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->ctr_remainder)
- [ctx->ctr_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->ctr_remainder)
+ [ctx->ctr_remainder_len], datap, need);
blockp = (uint8_t *)ctx->ctr_remainder;
} else {
@@ -114,9 +113,9 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
&out_data_1_len, &out_data_2, block_size);
/* copy block to where it belongs */
- bcopy(lastp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, lastp, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(lastp + out_data_1_len, out_data_2,
+ memcpy(out_data_2, lastp + out_data_1_len,
block_size - out_data_1_len);
}
/* update offset */
@@ -134,7 +133,7 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block. */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->ctr_remainder, remainder);
+ memcpy(ctx->ctr_remainder, datap, remainder);
ctx->ctr_remainder_len = remainder;
ctx->ctr_copy_to = datap;
goto out;
@@ -176,10 +175,11 @@ ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out,
crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
&out_data_1_len, &out_data_2, ctx->ctr_remainder_len);
- bcopy(p, out_data_1, out_data_1_len);
+ memcpy(out_data_1, p, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy((uint8_t *)p + out_data_1_len,
- out_data_2, ctx->ctr_remainder_len - out_data_1_len);
+ memcpy(out_data_2,
+ (uint8_t *)p + out_data_1_len,
+ ctx->ctr_remainder_len - out_data_1_len);
}
out->cd_offset += ctx->ctr_remainder_len;
ctx->ctr_remainder_len = 0;
@@ -214,7 +214,6 @@ ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb,
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
void *
ctr_alloc_ctx(int kmflag)
{
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ecb.c b/sys/contrib/openzfs/module/icp/algs/modes/ecb.c
index 025f5825cf04..e2d8e71c161c 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/ecb.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/ecb.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -49,8 +49,8 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
if (length + ctx->ecb_remainder_len < block_size) {
/* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len,
+ memcpy((uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len,
+ datap,
length);
ctx->ecb_remainder_len += length;
ctx->ecb_copy_to = datap;
@@ -68,8 +68,8 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->ecb_remainder)
- [ctx->ecb_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->ecb_remainder)
+ [ctx->ecb_remainder_len], datap, need);
blockp = (uint8_t *)ctx->ecb_remainder;
} else {
@@ -81,9 +81,9 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
&out_data_1_len, &out_data_2, block_size);
/* copy block to where it belongs */
- bcopy(lastp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, lastp, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(lastp + out_data_1_len, out_data_2,
+ memcpy(out_data_2, lastp + out_data_1_len,
block_size - out_data_1_len);
}
/* update offset */
@@ -101,7 +101,7 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block. */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->ecb_remainder, remainder);
+ memcpy(ctx->ecb_remainder, datap, remainder);
ctx->ecb_remainder_len = remainder;
ctx->ecb_copy_to = datap;
goto out;
@@ -114,7 +114,6 @@ out:
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
void *
ecb_alloc_ctx(int kmflag)
{
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
index 7332834cbe37..dd8db6f97460 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -23,6 +23,7 @@
*/
#include <sys/zfs_context.h>
+#include <sys/cmn_err.h>
#include <modes/modes.h>
#include <sys/crypto/common.h>
#include <sys/crypto/icp.h>
@@ -49,6 +50,11 @@
static uint32_t icp_gcm_impl = IMPL_FASTEST;
static uint32_t user_sel_impl = IMPL_FASTEST;
+static inline int gcm_init_ctx_impl(boolean_t, gcm_ctx_t *, char *, size_t,
+ int (*)(const void *, const uint8_t *, uint8_t *),
+ void (*)(uint8_t *, uint8_t *),
+ void (*)(uint8_t *, uint8_t *));
+
#ifdef CAN_USE_GCM_ASM
/* Does the architecture we run on support the MOVBE instruction? */
boolean_t gcm_avx_can_use_movbe = B_FALSE;
@@ -59,7 +65,7 @@ boolean_t gcm_avx_can_use_movbe = B_FALSE;
static boolean_t gcm_use_avx = B_FALSE;
#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
-extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
static inline boolean_t gcm_avx_will_work(void);
static inline void gcm_set_avx(boolean_t);
@@ -71,7 +77,7 @@ static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
-static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
+static int gcm_init_avx(gcm_ctx_t *, const uint8_t *, size_t, const uint8_t *,
size_t, size_t);
#endif /* ifdef CAN_USE_GCM_ASM */
@@ -108,8 +114,8 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
if (length + ctx->gcm_remainder_len < block_size) {
/* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
+ memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
+ datap,
length);
ctx->gcm_remainder_len += length;
if (ctx->gcm_copy_to == NULL) {
@@ -118,7 +124,6 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
return (CRYPTO_SUCCESS);
}
- lastp = (uint8_t *)ctx->gcm_cb;
crypto_init_ptrs(out, &iov_or_mp, &offset);
gops = gcm_impl_get_ops();
@@ -130,8 +135,8 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
if (need > remainder)
return (CRYPTO_DATA_LEN_RANGE);
- bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
- [ctx->gcm_remainder_len], need);
+ memcpy(&((uint8_t *)ctx->gcm_remainder)
+ [ctx->gcm_remainder_len], datap, need);
blockp = (uint8_t *)ctx->gcm_remainder;
} else {
@@ -162,10 +167,10 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
if (out_data_1_len == block_size) {
copy_block(lastp, out_data_1);
} else {
- bcopy(lastp, out_data_1, out_data_1_len);
+ memcpy(out_data_1, lastp, out_data_1_len);
if (out_data_2 != NULL) {
- bcopy(lastp + out_data_1_len,
- out_data_2,
+ memcpy(out_data_2,
+ lastp + out_data_1_len,
block_size - out_data_1_len);
}
}
@@ -187,7 +192,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
/* Incomplete last block. */
if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->gcm_remainder, remainder);
+ memcpy(ctx->gcm_remainder, datap, remainder);
ctx->gcm_remainder_len = remainder;
ctx->gcm_copy_to = datap;
goto out;
@@ -199,13 +204,13 @@ out:
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
int
gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
+ (void) copy_block;
#ifdef CAN_USE_GCM_ASM
if (ctx->gcm_use_avx == B_TRUE)
return (gcm_encrypt_final_avx(ctx, out, block_size));
@@ -245,7 +250,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
(uint8_t *)ctx->gcm_tmp);
macp = (uint8_t *)ctx->gcm_remainder;
- bzero(macp + ctx->gcm_remainder_len,
+ memset(macp + ctx->gcm_remainder_len, 0,
block_size - ctx->gcm_remainder_len);
/* XOR with counter block */
@@ -309,8 +314,8 @@ gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
counterp = (uint8_t *)ctx->gcm_tmp;
/* authentication tag */
- bzero((uint8_t *)ctx->gcm_tmp, block_size);
- bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
+ memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
+ memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
/* add ciphertext to the hash */
GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
@@ -324,7 +329,6 @@ gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
}
}
-/* ARGSUSED */
int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
crypto_data_t *out, size_t block_size,
@@ -332,6 +336,8 @@ gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
+ (void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
+ (void) xor_block;
size_t new_len;
uint8_t *new;
@@ -341,17 +347,23 @@ gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
*/
if (length > 0) {
new_len = ctx->gcm_pt_buf_len + length;
- new = vmem_alloc(new_len, ctx->gcm_kmflag);
+ new = vmem_alloc(new_len, KM_SLEEP);
if (new == NULL) {
vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
ctx->gcm_pt_buf = NULL;
return (CRYPTO_HOST_MEMORY);
}
- bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
- vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+
+ if (ctx->gcm_pt_buf != NULL) {
+ memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ } else {
+ ASSERT0(ctx->gcm_pt_buf_len);
+ }
+
ctx->gcm_pt_buf = new;
ctx->gcm_pt_buf_len = new_len;
- bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
+ memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
length);
ctx->gcm_processed_data_len += length;
}
@@ -390,7 +402,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
while (remainder > 0) {
/* Incomplete last block */
if (remainder < block_size) {
- bcopy(blockp, ctx->gcm_remainder, remainder);
+ memcpy(ctx->gcm_remainder, blockp, remainder);
ctx->gcm_remainder_len = remainder;
/*
* not expecting anymore ciphertext, just
@@ -431,7 +443,7 @@ out:
xor_block((uint8_t *)ctx->gcm_J0, ghash);
/* compare the input authentication tag with what we calculated */
- if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
/* They don't match */
return (CRYPTO_INVALID_MAC);
} else {
@@ -472,7 +484,7 @@ gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
}
static void
-gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
+gcm_format_initial_blocks(const uint8_t *iv, ulong_t iv_len,
gcm_ctx_t *ctx, size_t block_size,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
@@ -488,7 +500,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
ghash = (uint8_t *)ctx->gcm_ghash;
cb = (uint8_t *)ctx->gcm_cb;
if (iv_len == 12) {
- bcopy(iv, cb, 12);
+ memcpy(cb, iv, 12);
cb[12] = 0;
cb[13] = 0;
cb[14] = 0;
@@ -499,8 +511,8 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
/* GHASH the IV */
do {
if (remainder < block_size) {
- bzero(cb, block_size);
- bcopy(&(iv[processed]), cb, remainder);
+ memset(cb, 0, block_size);
+ memcpy(cb, &(iv[processed]), remainder);
datap = (uint8_t *)cb;
remainder = 0;
} else {
@@ -521,8 +533,8 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
}
static int
-gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
- unsigned char *auth_data, size_t auth_data_len, size_t block_size,
+gcm_init(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
+ const uint8_t *auth_data, size_t auth_data_len, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
@@ -532,7 +544,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
size_t remainder, processed;
/* encrypt zero block to get subkey H */
- bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
+ memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
(uint8_t *)ctx->gcm_H);
@@ -542,8 +554,8 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
gops = gcm_impl_get_ops();
authp = (uint8_t *)ctx->gcm_tmp;
ghash = (uint8_t *)ctx->gcm_ghash;
- bzero(authp, block_size);
- bzero(ghash, block_size);
+ memset(authp, 0, block_size);
+ memset(ghash, 0, block_size);
processed = 0;
remainder = auth_data_len;
@@ -553,8 +565,15 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
* There's not a block full of data, pad rest of
* buffer with zero
*/
- bzero(authp, block_size);
- bcopy(&(auth_data[processed]), authp, remainder);
+
+ if (auth_data != NULL) {
+ memset(authp, 0, block_size);
+ memcpy(authp, &(auth_data[processed]),
+ remainder);
+ } else {
+ ASSERT0(remainder);
+ }
+
datap = (uint8_t *)authp;
remainder = 0;
} else {
@@ -574,8 +593,6 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
/*
* The following function is called at encrypt or decrypt init time
* for AES GCM mode.
- *
- * Init the GCM context struct. Handle the cycle and avx implementations here.
*/
int
gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
@@ -583,31 +600,75 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
- int rv;
+ return (gcm_init_ctx_impl(B_FALSE, gcm_ctx, param, block_size,
+ encrypt_block, copy_block, xor_block));
+}
+
+/*
+ * The following function is called at encrypt or decrypt init time
+ * for AES GMAC mode.
+ */
+int
+gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ return (gcm_init_ctx_impl(B_TRUE, gcm_ctx, param, block_size,
+ encrypt_block, copy_block, xor_block));
+}
+
+/*
+ * Init the GCM context struct. Handle the cycle and avx implementations here.
+ * Initialization of a GMAC context differs slightly from a GCM context.
+ */
+static inline int
+gcm_init_ctx_impl(boolean_t gmac_mode, gcm_ctx_t *gcm_ctx, char *param,
+ size_t block_size, int (*encrypt_block)(const void *, const uint8_t *,
+ uint8_t *), void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
CK_AES_GCM_PARAMS *gcm_param;
+ int rv = CRYPTO_SUCCESS;
+ size_t tag_len, iv_len;
if (param != NULL) {
gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
- if ((rv = gcm_validate_args(gcm_param)) != 0) {
- return (rv);
- }
+ if (gmac_mode == B_FALSE) {
+ /* GCM mode. */
+ if ((rv = gcm_validate_args(gcm_param)) != 0) {
+ return (rv);
+ }
+ gcm_ctx->gcm_flags |= GCM_MODE;
- gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
- gcm_ctx->gcm_tag_len >>= 3;
+ size_t tbits = gcm_param->ulTagBits;
+ tag_len = CRYPTO_BITS2BYTES(tbits);
+ iv_len = gcm_param->ulIvLen;
+ } else {
+ /* GMAC mode. */
+ gcm_ctx->gcm_flags |= GMAC_MODE;
+ tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
+ iv_len = AES_GMAC_IV_LEN;
+ }
+ gcm_ctx->gcm_tag_len = tag_len;
gcm_ctx->gcm_processed_data_len = 0;
/* these values are in bits */
gcm_ctx->gcm_len_a_len_c[0]
= htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
-
- rv = CRYPTO_SUCCESS;
- gcm_ctx->gcm_flags |= GCM_MODE;
} else {
return (CRYPTO_MECHANISM_PARAM_INVALID);
}
+ const uint8_t *iv = (const uint8_t *)gcm_param->pIv;
+ const uint8_t *aad = (const uint8_t *)gcm_param->pAAD;
+ size_t aad_len = gcm_param->ulAADLen;
+
#ifdef CAN_USE_GCM_ASM
+ boolean_t needs_bswap =
+ ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap;
+
if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
} else {
@@ -616,96 +677,41 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
* non-avx contexts alternately.
*/
gcm_ctx->gcm_use_avx = gcm_toggle_avx();
- /*
- * We don't handle byte swapped key schedules in the avx
- * code path.
- */
- aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
- if (ks->ops->needs_byteswap == B_TRUE) {
+
+ /* The avx impl. doesn't handle byte swapped key schedules. */
+ if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) {
gcm_ctx->gcm_use_avx = B_FALSE;
}
- /* Use the MOVBE and the BSWAP variants alternately. */
- if (gcm_ctx->gcm_use_avx == B_TRUE &&
+ /*
+ * If this is a GCM context, use the MOVBE and the BSWAP
+ * variants alternately. GMAC contexts code paths do not
+ * use the MOVBE instruction.
+ */
+ if (gcm_ctx->gcm_use_avx == B_TRUE && gmac_mode == B_FALSE &&
zfs_movbe_available() == B_TRUE) {
(void) atomic_toggle_boolean_nv(
(volatile boolean_t *)&gcm_avx_can_use_movbe);
}
}
- /* Allocate Htab memory as needed. */
- if (gcm_ctx->gcm_use_avx == B_TRUE) {
- size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
-
- if (htab_len == 0) {
- return (CRYPTO_MECHANISM_PARAM_INVALID);
- }
- gcm_ctx->gcm_htab_len = htab_len;
- gcm_ctx->gcm_Htable =
- (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
-
- if (gcm_ctx->gcm_Htable == NULL) {
- return (CRYPTO_HOST_MEMORY);
- }
- }
- /* Avx and non avx context initialization differs from here on. */
- if (gcm_ctx->gcm_use_avx == B_FALSE) {
-#endif /* ifdef CAN_USE_GCM_ASM */
- if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
- gcm_param->pAAD, gcm_param->ulAADLen, block_size,
- encrypt_block, copy_block, xor_block) != 0) {
- rv = CRYPTO_MECHANISM_PARAM_INVALID;
- }
-#ifdef CAN_USE_GCM_ASM
- } else {
- if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
- gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
- rv = CRYPTO_MECHANISM_PARAM_INVALID;
- }
- }
-#endif /* ifdef CAN_USE_GCM_ASM */
-
- return (rv);
-}
-
-int
-gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
- int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *))
-{
- int rv;
- CK_AES_GMAC_PARAMS *gmac_param;
-
- if (param != NULL) {
- gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
-
- gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
- gcm_ctx->gcm_processed_data_len = 0;
-
- /* these values are in bits */
- gcm_ctx->gcm_len_a_len_c[0]
- = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
-
- rv = CRYPTO_SUCCESS;
- gcm_ctx->gcm_flags |= GMAC_MODE;
- } else {
- return (CRYPTO_MECHANISM_PARAM_INVALID);
- }
-
-#ifdef CAN_USE_GCM_ASM
/*
- * Handle the "cycle" implementation by creating avx and non avx
- * contexts alternately.
+ * We don't handle byte swapped key schedules in the avx code path,
+ * still they could be created by the aes generic implementation.
+ * Make sure not to use them since we'll corrupt data if we do.
*/
- if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
- gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
- } else {
- gcm_ctx->gcm_use_avx = gcm_toggle_avx();
- }
- /* We don't handle byte swapped key schedules in the avx code path. */
- aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
- if (ks->ops->needs_byteswap == B_TRUE) {
+ if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) {
gcm_ctx->gcm_use_avx = B_FALSE;
+
+ cmn_err_once(CE_WARN,
+ "ICP: Can't use the aes generic or cycle implementations "
+ "in combination with the gcm avx implementation!");
+ cmn_err_once(CE_WARN,
+ "ICP: Falling back to a compatible implementation, "
+ "aes-gcm performance will likely be degraded.");
+ cmn_err_once(CE_WARN,
+ "ICP: Choose at least the x86_64 aes implementation to "
+ "restore performance.");
}
+
/* Allocate Htab memory as needed. */
if (gcm_ctx->gcm_use_avx == B_TRUE) {
size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
@@ -715,25 +721,23 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
}
gcm_ctx->gcm_htab_len = htab_len;
gcm_ctx->gcm_Htable =
- (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
+ kmem_alloc(htab_len, KM_SLEEP);
if (gcm_ctx->gcm_Htable == NULL) {
return (CRYPTO_HOST_MEMORY);
}
}
-
/* Avx and non avx context initialization differs from here on. */
if (gcm_ctx->gcm_use_avx == B_FALSE) {
-#endif /* ifdef CAN_USE_GCM_ASM */
- if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
- gmac_param->pAAD, gmac_param->ulAADLen, block_size,
- encrypt_block, copy_block, xor_block) != 0) {
+#endif /* ifdef CAN_USE_GCM_ASM */
+ if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size,
+ encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) {
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
#ifdef CAN_USE_GCM_ASM
} else {
- if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
- gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
+ if (gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len,
+ block_size) != CRYPTO_SUCCESS) {
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
}
@@ -766,19 +770,13 @@ gmac_alloc_ctx(int kmflag)
return (gcm_ctx);
}
-void
-gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
-{
- ctx->gcm_kmflag = kmflag;
-}
-
/* GCM implementation that contains the fastest methods */
static gcm_impl_ops_t gcm_fastest_impl = {
.name = "fastest"
};
/* All compiled in implementations */
-const gcm_impl_ops_t *gcm_all_impl[] = {
+static const gcm_impl_ops_t *gcm_all_impl[] = {
&gcm_generic_impl,
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
&gcm_pclmulqdq_impl,
@@ -798,7 +796,7 @@ static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
* fallback to the fastest generic implementation.
*/
const gcm_impl_ops_t *
-gcm_impl_get_ops()
+gcm_impl_get_ops(void)
{
if (!kfpu_allowed())
return (&gcm_generic_impl);
@@ -899,7 +897,7 @@ gcm_impl_init(void)
}
static const struct {
- char *name;
+ const char *name;
uint32_t sel;
} gcm_impl_opts[] = {
{ "cycle", IMPL_CYCLE },
@@ -1013,13 +1011,15 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
}
#endif
fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
- cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
+ gcm_impl_opts[i].name);
}
/* list all supported implementations */
for (i = 0; i < gcm_supp_impl_cnt; i++) {
fmt = (i == impl) ? "[%s] " : "%s ";
- cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
+ gcm_supp_impl[i]->name);
}
return (cnt);
@@ -1045,9 +1045,6 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
#define GCM_AVX_MAX_CHUNK_SIZE \
(((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
-/* Get the chunk size module parameter. */
-#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
-
/* Clear the FPU registers since they hold sensitive internal state. */
#define clear_fpu_regs() clear_fpu_regs_avx()
#define GHASH_AVX(ctx, in, len) \
@@ -1056,6 +1053,9 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
#define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
+/* Get the chunk size module parameter. */
+#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
+
/*
* Module parameter: number of bytes to process at once while owning the FPU.
* Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
@@ -1064,19 +1064,19 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
static uint32_t gcm_avx_chunk_size =
((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
-extern void clear_fpu_regs_avx(void);
-extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
-extern void aes_encrypt_intel(const uint32_t rk[], int nr,
+extern void ASMABI clear_fpu_regs_avx(void);
+extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
+extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
const uint32_t pt[4], uint32_t ct[4]);
-extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
-extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
+extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
+extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
const uint8_t *in, size_t len);
-extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
+extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
const void *, uint64_t *, uint64_t *);
-extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
+extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
const void *, uint64_t *, uint64_t *);
static inline boolean_t
@@ -1118,24 +1118,6 @@ gcm_simd_get_htab_size(boolean_t simd_mode)
}
}
-/*
- * Clear sensitive data in the context.
- *
- * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
- * ctx->gcm_Htable contain the hash sub key which protects authentication.
- *
- * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
- * a known plaintext attack, they consists of the IV and the first and last
- * counter respectively. If they should be cleared is debatable.
- */
-static inline void
-gcm_clear_ctx(gcm_ctx_t *ctx)
-{
- bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
- bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
- bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
- bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
-}
/* Increment the GCM counter block by n. */
static inline void
@@ -1171,6 +1153,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
int rv = CRYPTO_SUCCESS;
ASSERT(block_size == GCM_BLOCK_LEN);
+ ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
+ B_FALSE);
/*
* If the last call left an incomplete block, try to fill
* it first.
@@ -1179,8 +1163,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
need = block_size - ctx->gcm_remainder_len;
if (length < need) {
/* Accumulate bytes here and return. */
- bcopy(datap, (uint8_t *)ctx->gcm_remainder +
- ctx->gcm_remainder_len, length);
+ memcpy((uint8_t *)ctx->gcm_remainder +
+ ctx->gcm_remainder_len, datap, length);
ctx->gcm_remainder_len += length;
if (ctx->gcm_copy_to == NULL) {
@@ -1189,8 +1173,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
return (CRYPTO_SUCCESS);
} else {
/* Complete incomplete block. */
- bcopy(datap, (uint8_t *)ctx->gcm_remainder +
- ctx->gcm_remainder_len, need);
+ memcpy((uint8_t *)ctx->gcm_remainder +
+ ctx->gcm_remainder_len, datap, need);
ctx->gcm_copy_to = NULL;
}
@@ -1198,7 +1182,7 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
/* Allocate a buffer to encrypt to if there is enough input. */
if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
- ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag);
+ ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
if (ct_buf == NULL) {
return (CRYPTO_HOST_MEMORY);
}
@@ -1268,7 +1252,7 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
/* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
while (bleft > 0) {
if (bleft < block_size) {
- bcopy(datap, ctx->gcm_remainder, bleft);
+ memcpy(ctx->gcm_remainder, datap, bleft);
ctx->gcm_remainder_len = bleft;
ctx->gcm_copy_to = datap;
goto out;
@@ -1315,6 +1299,8 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
int rv;
ASSERT(block_size == GCM_BLOCK_LEN);
+ ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
+ B_FALSE);
if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
return (CRYPTO_DATA_LEN_RANGE);
@@ -1327,7 +1313,7 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
- bzero(remainder + rem_len, block_size - rem_len);
+ memset(remainder + rem_len, 0, block_size - rem_len);
for (int i = 0; i < rem_len; i++) {
remainder[i] ^= tmp[i];
}
@@ -1358,8 +1344,6 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
return (rv);
out->cd_offset += ctx->gcm_tag_len;
- /* Clear sensitive data in the context before returning. */
- gcm_clear_ctx(ctx);
return (CRYPTO_SUCCESS);
}
@@ -1372,6 +1356,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
{
ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
ASSERT3U(block_size, ==, 16);
+ ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
+ B_FALSE);
size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
@@ -1423,8 +1409,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
if (bleft < block_size) {
uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
- bzero(lastb, block_size);
- bcopy(datap, lastb, bleft);
+ memset(lastb, 0, block_size);
+ memcpy(lastb, datap, bleft);
/* The GCM processing. */
GHASH_AVX(ctx, lastb, block_size);
aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
@@ -1460,7 +1446,7 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
kfpu_end();
/* Compare the input authentication tag with what we calculated. */
- if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
/* They don't match. */
return (CRYPTO_INVALID_MAC);
}
@@ -1469,7 +1455,6 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
return (rv);
}
out->cd_offset += pt_len;
- gcm_clear_ctx(ctx);
return (CRYPTO_SUCCESS);
}
@@ -1478,22 +1463,24 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
* initial counter block.
*/
static int
-gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
- unsigned char *auth_data, size_t auth_data_len, size_t block_size)
+gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
+ const uint8_t *auth_data, size_t auth_data_len, size_t block_size)
{
uint8_t *cb = (uint8_t *)ctx->gcm_cb;
uint64_t *H = ctx->gcm_H;
const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
- uint8_t *datap = auth_data;
+ const uint8_t *datap = auth_data;
size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
size_t bleft;
ASSERT(block_size == GCM_BLOCK_LEN);
+ ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
+ B_FALSE);
/* Init H (encrypt zero block) and create the initial counter block. */
- bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash));
- bzero(H, sizeof (ctx->gcm_H));
+ memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
+ memset(H, 0, sizeof (ctx->gcm_H));
kfpu_begin();
aes_encrypt_intel(keysched, aes_rounds,
(const uint32_t *)H, (uint32_t *)H);
@@ -1501,13 +1488,13 @@ gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
gcm_init_htab_avx(ctx->gcm_Htable, H);
if (iv_len == 12) {
- bcopy(iv, cb, 12);
+ memcpy(cb, iv, 12);
cb[12] = 0;
cb[13] = 0;
cb[14] = 0;
cb[15] = 1;
/* We need the ICB later. */
- bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0));
+ memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
} else {
/*
* Most consumers use 12 byte IVs, so it's OK to use the
@@ -1545,8 +1532,8 @@ gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
/* Zero pad and hash incomplete last block. */
uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
- bzero(authp, block_size);
- bcopy(datap, authp, incomp);
+ memset(authp, 0, block_size);
+ memcpy(authp, datap, incomp);
GHASH_AVX(ctx, authp, block_size);
}
}
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c
index 16b57998a92f..84e26d09cdcf 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c
index 05920115ce86..737d2e47ecb7 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -26,9 +26,10 @@
#include <sys/types.h>
#include <sys/simd.h>
+#include <sys/asm_linkage.h>
/* These functions are used to execute pclmulqdq based assembly methods */
-extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
+extern void ASMABI gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
#include <modes/gcm_impl.h>
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
index 59743c7d6829..6f6649b3b58b 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/modes.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -106,8 +106,10 @@ crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
} else {
/* one block spans two iovecs */
*out_data_1_len = iov_len - offset;
- if (vec_idx == zfs_uio_iovcnt(uio))
+ if (vec_idx == zfs_uio_iovcnt(uio)) {
+ *out_data_2 = NULL;
return;
+ }
vec_idx++;
zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
*out_data_2 = (uint8_t *)iov_base;
@@ -148,18 +150,47 @@ crypto_free_mode_ctx(void *ctx)
case GCM_MODE:
case GMAC_MODE:
- if (((gcm_ctx_t *)ctx)->gcm_pt_buf != NULL)
- vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf,
- ((gcm_ctx_t *)ctx)->gcm_pt_buf_len);
-
-#ifdef CAN_USE_GCM_ASM
- if (((gcm_ctx_t *)ctx)->gcm_Htable != NULL) {
- gcm_ctx_t *gcm_ctx = (gcm_ctx_t *)ctx;
- bzero(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len);
- kmem_free(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len);
- }
-#endif
-
+ gcm_clear_ctx((gcm_ctx_t *)ctx);
kmem_free(ctx, sizeof (gcm_ctx_t));
}
}
+
+static void *
+explicit_memset(void *s, int c, size_t n)
+{
+ memset(s, c, n);
+ __asm__ __volatile__("" :: "r"(s) : "memory");
+ return (s);
+}
+
+/*
+ * Clear sensitive data in the context and free allocated memory.
+ *
+ * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
+ * ctx->gcm_Htable contain the hash sub key which protects authentication.
+ * ctx->gcm_pt_buf contains the plaintext result of decryption.
+ *
+ * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
+ * a known plaintext attack, they consist of the IV and the first and last
+ * counter respectively. If they should be cleared is debatable.
+ */
+void
+gcm_clear_ctx(gcm_ctx_t *ctx)
+{
+ explicit_memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder));
+ explicit_memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
+#if defined(CAN_USE_GCM_ASM)
+ if (ctx->gcm_use_avx == B_TRUE) {
+ ASSERT3P(ctx->gcm_Htable, !=, NULL);
+ memset(ctx->gcm_Htable, 0, ctx->gcm_htab_len);
+ kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
+ }
+#endif
+ if (ctx->gcm_pt_buf != NULL) {
+ memset(ctx->gcm_pt_buf, 0, ctx->gcm_pt_buf_len);
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ }
+ /* Optional */
+ explicit_memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0));
+ explicit_memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp));
+}
diff --git a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c b/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c
deleted file mode 100644
index da34222c8fc3..000000000000
--- a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c
+++ /dev/null
@@ -1,835 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * The basic framework for this code came from the reference
- * implementation for MD5. That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
- * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
- * Not as fast as one would like -- further optimizations are encouraged
- * and appreciated.
- */
-
-#include <sys/zfs_context.h>
-#include <sha1/sha1.h>
-#include <sha1/sha1_consts.h>
-
-#ifdef _LITTLE_ENDIAN
-#include <sys/byteorder.h>
-#define HAVE_HTONL
-#endif
-
-#define _RESTRICT_KYWD
-
-static void Encode(uint8_t *, const uint32_t *, size_t);
-
-#if defined(__sparc)
-
-#define SHA1_TRANSFORM(ctx, in) \
- SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
- (ctx)->state[3], (ctx)->state[4], (ctx), (in))
-
-static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
- SHA1_CTX *, const uint8_t *);
-
-#elif defined(__amd64)
-
-#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
-#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
- (in), (num))
-
-void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
-
-#else
-
-#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
-
-static void SHA1Transform(SHA1_CTX *, const uint8_t *);
-
-#endif
-
-
-static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
-
-/*
- * F, G, and H are the basic SHA1 functions.
- */
-#define F(b, c, d) (((b) & (c)) | ((~b) & (d)))
-#define G(b, c, d) ((b) ^ (c) ^ (d))
-#define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d)))
-
-/*
- * SHA1Init()
- *
- * purpose: initializes the sha1 context and begins and sha1 digest operation
- * input: SHA1_CTX * : the context to initializes.
- * output: void
- */
-
-void
-SHA1Init(SHA1_CTX *ctx)
-{
- ctx->count[0] = ctx->count[1] = 0;
-
- /*
- * load magic initialization constants. Tell lint
- * that these constants are unsigned by using U.
- */
-
- ctx->state[0] = 0x67452301U;
- ctx->state[1] = 0xefcdab89U;
- ctx->state[2] = 0x98badcfeU;
- ctx->state[3] = 0x10325476U;
- ctx->state[4] = 0xc3d2e1f0U;
-}
-
-void
-SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
-{
- uint32_t i, buf_index, buf_len;
- const uint8_t *input = inptr;
-#if defined(__amd64)
- uint32_t block_count;
-#endif /* __amd64 */
-
- /* check for noop */
- if (input_len == 0)
- return;
-
- /* compute number of bytes mod 64 */
- buf_index = (ctx->count[1] >> 3) & 0x3F;
-
- /* update number of bits */
- if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
- ctx->count[0]++;
-
- ctx->count[0] += (input_len >> 29);
-
- buf_len = 64 - buf_index;
-
- /* transform as many times as possible */
- i = 0;
- if (input_len >= buf_len) {
-
- /*
- * general optimization:
- *
- * only do initial bcopy() and SHA1Transform() if
- * buf_index != 0. if buf_index == 0, we're just
- * wasting our time doing the bcopy() since there
- * wasn't any data left over from a previous call to
- * SHA1Update().
- */
-
- if (buf_index) {
- bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
- SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
- i = buf_len;
- }
-
-#if !defined(__amd64)
- for (; i + 63 < input_len; i += 64)
- SHA1_TRANSFORM(ctx, &input[i]);
-#else
- block_count = (input_len - i) >> 6;
- if (block_count > 0) {
- SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
- i += block_count << 6;
- }
-#endif /* !__amd64 */
-
- /*
- * general optimization:
- *
- * if i and input_len are the same, return now instead
- * of calling bcopy(), since the bcopy() in this case
- * will be an expensive nop.
- */
-
- if (input_len == i)
- return;
-
- buf_index = 0;
- }
-
- /* buffer remaining input */
- bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
-}
-
-/*
- * SHA1Final()
- *
- * purpose: ends an sha1 digest operation, finalizing the message digest and
- * zeroing the context.
- * input: uchar_t * : A buffer to store the digest.
- * : The function actually uses void* because many
- * : callers pass things other than uchar_t here.
- * SHA1_CTX * : the context to finalize, save, and zero
- * output: void
- */
-
-void
-SHA1Final(void *digest, SHA1_CTX *ctx)
-{
- uint8_t bitcount_be[sizeof (ctx->count)];
- uint32_t index = (ctx->count[1] >> 3) & 0x3f;
-
- /* store bit count, big endian */
- Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
-
- /* pad out to 56 mod 64 */
- SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
-
- /* append length (before padding) */
- SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
-
- /* store state in digest */
- Encode(digest, ctx->state, sizeof (ctx->state));
-
- /* zeroize sensitive information */
- bzero(ctx, sizeof (*ctx));
-}
-
-
-#if !defined(__amd64)
-
-typedef uint32_t sha1word;
-
-/*
- * sparc optimization:
- *
- * on the sparc, we can load big endian 32-bit data easily. note that
- * special care must be taken to ensure the address is 32-bit aligned.
- * in the interest of speed, we don't check to make sure, since
- * careful programming can guarantee this for us.
- */
-
-#if defined(_ZFS_BIG_ENDIAN)
-#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
-
-#elif defined(HAVE_HTONL)
-#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
-
-#else
-#define LOAD_BIG_32(addr) BE_32(*((uint32_t *)(addr)))
-#endif /* _BIG_ENDIAN */
-
-/*
- * SHA1Transform()
- */
-#if defined(W_ARRAY)
-#define W(n) w[n]
-#else /* !defined(W_ARRAY) */
-#define W(n) w_ ## n
-#endif /* !defined(W_ARRAY) */
-
-/*
- * ROTATE_LEFT rotates x left n bits.
- */
-
-#if defined(__GNUC__) && defined(_LP64)
-static __inline__ uint64_t
-ROTATE_LEFT(uint64_t value, uint32_t n)
-{
- uint32_t t32;
-
- t32 = (uint32_t)value;
- return ((t32 << n) | (t32 >> (32 - n)));
-}
-
-#else
-
-#define ROTATE_LEFT(x, n) \
- (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
-
-#endif
-
-#if defined(__sparc)
-
-
-/*
- * sparc register window optimization:
- *
- * `a', `b', `c', `d', and `e' are passed into SHA1Transform
- * explicitly since it increases the number of registers available to
- * the compiler. under this scheme, these variables can be held in
- * %i0 - %i4, which leaves more local and out registers available.
- *
- * purpose: sha1 transformation -- updates the digest based on `block'
- * input: uint32_t : bytes 1 - 4 of the digest
- * uint32_t : bytes 5 - 8 of the digest
- * uint32_t : bytes 9 - 12 of the digest
- * uint32_t : bytes 12 - 16 of the digest
- * uint32_t : bytes 16 - 20 of the digest
- * SHA1_CTX * : the context to update
- * uint8_t [64]: the block to use to update the digest
- * output: void
- */
-
-
-void
-SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
- SHA1_CTX *ctx, const uint8_t blk[64])
-{
- /*
- * sparc optimization:
- *
- * while it is somewhat counter-intuitive, on sparc, it is
- * more efficient to place all the constants used in this
- * function in an array and load the values out of the array
- * than to manually load the constants. this is because
- * setting a register to a 32-bit value takes two ops in most
- * cases: a `sethi' and an `or', but loading a 32-bit value
- * from memory only takes one `ld' (or `lduw' on v9). while
- * this increases memory usage, the compiler can find enough
- * other things to do while waiting to keep the pipeline does
- * not stall. additionally, it is likely that many of these
- * constants are cached so that later accesses do not even go
- * out to the bus.
- *
- * this array is declared `static' to keep the compiler from
- * having to bcopy() this array onto the stack frame of
- * SHA1Transform() each time it is called -- which is
- * unacceptably expensive.
- *
- * the `const' is to ensure that callers are good citizens and
- * do not try to munge the array. since these routines are
- * going to be called from inside multithreaded kernelland,
- * this is a good safety check. -- `sha1_consts' will end up in
- * .rodata.
- *
- * unfortunately, loading from an array in this manner hurts
- * performance under Intel. So, there is a macro,
- * SHA1_CONST(), used in SHA1Transform(), that either expands to
- * a reference to this array, or to the actual constant,
- * depending on what platform this code is compiled for.
- */
-
-
- static const uint32_t sha1_consts[] = {
- SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
- };
-
-
- /*
- * general optimization:
- *
- * use individual integers instead of using an array. this is a
- * win, although the amount it wins by seems to vary quite a bit.
- */
-
-
- uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
- uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
-
-
- /*
- * sparc optimization:
- *
- * if `block' is already aligned on a 4-byte boundary, use
- * LOAD_BIG_32() directly. otherwise, bcopy() into a
- * buffer that *is* aligned on a 4-byte boundary and then do
- * the LOAD_BIG_32() on that buffer. benchmarks have shown
- * that using the bcopy() is better than loading the bytes
- * individually and doing the endian-swap by hand.
- *
- * even though it's quite tempting to assign to do:
- *
- * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
- *
- * and only have one set of LOAD_BIG_32()'s, the compiler
- * *does not* like that, so please resist the urge.
- */
-
-
- if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
- bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
- w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
- w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
- w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
- w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
- w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
- w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
- w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9);
- w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8);
- w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7);
- w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6);
- w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5);
- w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4);
- w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3);
- w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2);
- w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1);
- w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0);
- } else {
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_15 = LOAD_BIG_32(blk + 60);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_14 = LOAD_BIG_32(blk + 56);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_13 = LOAD_BIG_32(blk + 52);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_12 = LOAD_BIG_32(blk + 48);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_11 = LOAD_BIG_32(blk + 44);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_10 = LOAD_BIG_32(blk + 40);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_9 = LOAD_BIG_32(blk + 36);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_8 = LOAD_BIG_32(blk + 32);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_7 = LOAD_BIG_32(blk + 28);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_6 = LOAD_BIG_32(blk + 24);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_5 = LOAD_BIG_32(blk + 20);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_4 = LOAD_BIG_32(blk + 16);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_3 = LOAD_BIG_32(blk + 12);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_2 = LOAD_BIG_32(blk + 8);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_1 = LOAD_BIG_32(blk + 4);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_0 = LOAD_BIG_32(blk + 0);
- }
-#else /* !defined(__sparc) */
-
-void /* CSTYLED */
-SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
-{
- /* CSTYLED */
- sha1word a = ctx->state[0];
- sha1word b = ctx->state[1];
- sha1word c = ctx->state[2];
- sha1word d = ctx->state[3];
- sha1word e = ctx->state[4];
-
-#if defined(W_ARRAY)
- sha1word w[16];
-#else /* !defined(W_ARRAY) */
- sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
- sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
-#endif /* !defined(W_ARRAY) */
-
- W(0) = LOAD_BIG_32((void *)(blk + 0));
- W(1) = LOAD_BIG_32((void *)(blk + 4));
- W(2) = LOAD_BIG_32((void *)(blk + 8));
- W(3) = LOAD_BIG_32((void *)(blk + 12));
- W(4) = LOAD_BIG_32((void *)(blk + 16));
- W(5) = LOAD_BIG_32((void *)(blk + 20));
- W(6) = LOAD_BIG_32((void *)(blk + 24));
- W(7) = LOAD_BIG_32((void *)(blk + 28));
- W(8) = LOAD_BIG_32((void *)(blk + 32));
- W(9) = LOAD_BIG_32((void *)(blk + 36));
- W(10) = LOAD_BIG_32((void *)(blk + 40));
- W(11) = LOAD_BIG_32((void *)(blk + 44));
- W(12) = LOAD_BIG_32((void *)(blk + 48));
- W(13) = LOAD_BIG_32((void *)(blk + 52));
- W(14) = LOAD_BIG_32((void *)(blk + 56));
- W(15) = LOAD_BIG_32((void *)(blk + 60));
-
-#endif /* !defined(__sparc) */
-
- /*
- * general optimization:
- *
- * even though this approach is described in the standard as
- * being slower algorithmically, it is 30-40% faster than the
- * "faster" version under SPARC, because this version has more
- * of the constraints specified at compile-time and uses fewer
- * variables (and therefore has better register utilization)
- * than its "speedier" brother. (i've tried both, trust me)
- *
- * for either method given in the spec, there is an "assignment"
- * phase where the following takes place:
- *
- * tmp = (main_computation);
- * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
- *
- * we can make the algorithm go faster by not doing this work,
- * but just pretending that `d' is now `e', etc. this works
- * really well and obviates the need for a temporary variable.
- * however, we still explicitly perform the rotate action,
- * since it is cheaper on SPARC to do it once than to have to
- * do it over and over again.
- */
-
- /* round 1 */
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
- b = ROTATE_LEFT(b, 30);
-
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
- a = ROTATE_LEFT(a, 30);
-
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
- e = ROTATE_LEFT(e, 30);
-
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
- d = ROTATE_LEFT(d, 30);
-
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
- c = ROTATE_LEFT(c, 30);
-
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
- b = ROTATE_LEFT(b, 30);
-
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
- a = ROTATE_LEFT(a, 30);
-
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
- e = ROTATE_LEFT(e, 30);
-
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
- d = ROTATE_LEFT(d, 30);
-
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
- c = ROTATE_LEFT(c, 30);
-
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
- b = ROTATE_LEFT(b, 30);
-
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
- a = ROTATE_LEFT(a, 30);
-
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
- e = ROTATE_LEFT(e, 30);
-
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
- d = ROTATE_LEFT(d, 30);
-
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
- c = ROTATE_LEFT(c, 30);
-
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
- b = ROTATE_LEFT(b, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
- a = ROTATE_LEFT(a, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
- e = ROTATE_LEFT(e, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
- d = ROTATE_LEFT(d, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
- c = ROTATE_LEFT(c, 30);
-
- /* round 2 */
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- /* round 3 */
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- /* round 4 */
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
- c = ROTATE_LEFT(c, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
- c = ROTATE_LEFT(c, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
- c = ROTATE_LEFT(c, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */
-
- ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
- SHA1_CONST(3);
- ctx->state[1] += b;
- ctx->state[2] += ROTATE_LEFT(c, 30);
- ctx->state[3] += d;
- ctx->state[4] += e;
-
- /* zeroize sensitive information */
- W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
- W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
-}
-#endif /* !__amd64 */
-
-
-/*
- * Encode()
- *
- * purpose: to convert a list of numbers from little endian to big endian
- * input: uint8_t * : place to store the converted big endian numbers
- * uint32_t * : place to get numbers to convert from
- * size_t : the length of the input in bytes
- * output: void
- */
-
-static void
-Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
- size_t len)
-{
- size_t i, j;
-
-#if defined(__sparc)
- if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
- for (i = 0, j = 0; j < len; i++, j += 4) {
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- *((uint32_t *)(output + j)) = input[i];
- }
- } else {
-#endif /* little endian -- will work on big endian, but slowly */
-
- for (i = 0, j = 0; j < len; i++, j += 4) {
- output[j] = (input[i] >> 24) & 0xff;
- output[j + 1] = (input[i] >> 16) & 0xff;
- output[j + 2] = (input[i] >> 8) & 0xff;
- output[j + 3] = input[i] & 0xff;
- }
-#if defined(__sparc)
- }
-#endif
-}
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c
deleted file mode 100644
index 75f6a3c1af4b..000000000000
--- a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c
+++ /dev/null
@@ -1,956 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright 2013 Saso Kiselkov. All rights reserved.
- */
-
-/*
- * The basic framework for this code came from the reference
- * implementation for MD5. That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
- * standard, available at
- * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
- * Not as fast as one would like -- further optimizations are encouraged
- * and appreciated.
- */
-
-#include <sys/zfs_context.h>
-#define _SHA2_IMPL
-#include <sys/sha2.h>
-#include <sha2/sha2_consts.h>
-
-#define _RESTRICT_KYWD
-
-#ifdef _ZFS_LITTLE_ENDIAN
-#include <sys/byteorder.h>
-#define HAVE_HTONL
-#endif
-#include <sys/isa_defs.h> /* for _ILP32 */
-
-static void Encode(uint8_t *, uint32_t *, size_t);
-static void Encode64(uint8_t *, uint64_t *, size_t);
-
-/* userspace only supports the generic version */
-#if defined(__amd64) && defined(_KERNEL)
-#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
-#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
-
-void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
-void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
-
-#else
-static void SHA256Transform(SHA2_CTX *, const uint8_t *);
-static void SHA512Transform(SHA2_CTX *, const uint8_t *);
-#endif /* __amd64 && _KERNEL */
-
-static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
-
-/*
- * The low-level checksum routines use a lot of stack space. On systems where
- * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
- * barriers to reduce stack frame size. This can reduce the SHA512Transform()
- * stack frame usage from 3k to <1k on ARM32, for example.
- */
-#if defined(_ILP32) || defined(__powerpc) /* small stack */
-#define SMALL_STACK_MEMORY_BARRIER asm volatile("": : :"memory");
-#else
-#define SMALL_STACK_MEMORY_BARRIER
-#endif
-
-/* Ch and Maj are the basic SHA2 functions. */
-#define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d)))
-#define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
-
-/* Rotates x right n bits. */
-#define ROTR(x, n) \
- (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
-
-/* Shift x right n bits */
-#define SHR(x, n) ((x) >> (n))
-
-/* SHA256 Functions */
-#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
-#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
-#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
-#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
-
-#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
- T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \
- d += T1; \
- T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \
- h = T1 + T2
-
-/* SHA384/512 Functions */
-#define BIGSIGMA0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
-#define BIGSIGMA1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
-#define SIGMA0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
-#define SIGMA1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
-#define SHA512ROUND(a, b, c, d, e, f, g, h, i, w) \
- T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w; \
- d += T1; \
- T2 = BIGSIGMA0(a) + Maj(a, b, c); \
- h = T1 + T2; \
- SMALL_STACK_MEMORY_BARRIER;
-
-/*
- * sparc optimization:
- *
- * on the sparc, we can load big endian 32-bit data easily. note that
- * special care must be taken to ensure the address is 32-bit aligned.
- * in the interest of speed, we don't check to make sure, since
- * careful programming can guarantee this for us.
- */
-
-#if defined(_ZFS_BIG_ENDIAN)
-#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
-#define LOAD_BIG_64(addr) (*(uint64_t *)(addr))
-
-#elif defined(HAVE_HTONL)
-#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
-#define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
-
-#else
-/* little endian -- will work on big endian, but slowly */
-#define LOAD_BIG_32(addr) \
- (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
-#define LOAD_BIG_64(addr) \
- (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \
- ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
- ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
- ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
-#endif /* _BIG_ENDIAN */
-
-
-#if !defined(__amd64) || !defined(_KERNEL)
-/* SHA256 Transform */
-
-static void
-SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
-{
- uint32_t a = ctx->state.s32[0];
- uint32_t b = ctx->state.s32[1];
- uint32_t c = ctx->state.s32[2];
- uint32_t d = ctx->state.s32[3];
- uint32_t e = ctx->state.s32[4];
- uint32_t f = ctx->state.s32[5];
- uint32_t g = ctx->state.s32[6];
- uint32_t h = ctx->state.s32[7];
-
- uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
- uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
- uint32_t T1, T2;
-
-#if defined(__sparc)
- static const uint32_t sha256_consts[] = {
- SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
- SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
- SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
- SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
- SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
- SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
- SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
- SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
- SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
- SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
- SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
- SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
- SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
- SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
- SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
- SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
- SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
- SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
- SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
- SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
- SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
- SHA256_CONST_63
- };
-#endif /* __sparc */
-
- if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
- bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
- blk = (uint8_t *)ctx->buf_un.buf32;
- }
-
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w0 = LOAD_BIG_32(blk + 4 * 0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w1 = LOAD_BIG_32(blk + 4 * 1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w2 = LOAD_BIG_32(blk + 4 * 2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w3 = LOAD_BIG_32(blk + 4 * 3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w4 = LOAD_BIG_32(blk + 4 * 4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w5 = LOAD_BIG_32(blk + 4 * 5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w6 = LOAD_BIG_32(blk + 4 * 6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w7 = LOAD_BIG_32(blk + 4 * 7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w8 = LOAD_BIG_32(blk + 4 * 8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w9 = LOAD_BIG_32(blk + 4 * 9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w10 = LOAD_BIG_32(blk + 4 * 10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w11 = LOAD_BIG_32(blk + 4 * 11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w12 = LOAD_BIG_32(blk + 4 * 12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w13 = LOAD_BIG_32(blk + 4 * 13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w14 = LOAD_BIG_32(blk + 4 * 14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w15 = LOAD_BIG_32(blk + 4 * 15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
- w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
- SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
- w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
- SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
- w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
- SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
- w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
- SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
- w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
- SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
- w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
- SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
- w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
- SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
- w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
- SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
- w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
- SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
- w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
- SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
- w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
- SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
- w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
- SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
- w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
- SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
- w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
- SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
- w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
- SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
- w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
- SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
- w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
- SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
- w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
- SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
- w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
- SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
- w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
- SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
- w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
- SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
- w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
- SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
- w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
- SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
- w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
- SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
- w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
- SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
- w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
- SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
- w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
- SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
- w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
- SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
- w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
- SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
- w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
- SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
- w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
- SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
- w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
- SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
- w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
- SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
- w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
- SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
- w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
- SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
- w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
- SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
- w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
- SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
- w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
- SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
- w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
- SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
- w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
- SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
- w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
- SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
- w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
- SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
- w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
- SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
- w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
- SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
- w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
- SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
- w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
- SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
- w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
- SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
- w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
- SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
- ctx->state.s32[0] += a;
- ctx->state.s32[1] += b;
- ctx->state.s32[2] += c;
- ctx->state.s32[3] += d;
- ctx->state.s32[4] += e;
- ctx->state.s32[5] += f;
- ctx->state.s32[6] += g;
- ctx->state.s32[7] += h;
-}
-
-
-/* SHA384 and SHA512 Transform */
-
-static void
-SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
-{
-
- uint64_t a = ctx->state.s64[0];
- uint64_t b = ctx->state.s64[1];
- uint64_t c = ctx->state.s64[2];
- uint64_t d = ctx->state.s64[3];
- uint64_t e = ctx->state.s64[4];
- uint64_t f = ctx->state.s64[5];
- uint64_t g = ctx->state.s64[6];
- uint64_t h = ctx->state.s64[7];
-
- uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
- uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
- uint64_t T1, T2;
-
-#if defined(__sparc)
- static const uint64_t sha512_consts[] = {
- SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
- SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
- SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
- SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
- SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
- SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
- SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
- SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
- SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
- SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
- SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
- SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
- SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
- SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
- SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
- SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
- SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
- SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
- SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
- SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
- SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
- SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
- SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
- SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
- SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
- SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
- SHA512_CONST_78, SHA512_CONST_79
- };
-#endif /* __sparc */
-
-
- if ((uintptr_t)blk & 0x7) { /* not 8-byte aligned? */
- bcopy(blk, ctx->buf_un.buf64, sizeof (ctx->buf_un.buf64));
- blk = (uint8_t *)ctx->buf_un.buf64;
- }
-
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w0 = LOAD_BIG_64(blk + 8 * 0);
- SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w1 = LOAD_BIG_64(blk + 8 * 1);
- SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w2 = LOAD_BIG_64(blk + 8 * 2);
- SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w3 = LOAD_BIG_64(blk + 8 * 3);
- SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w4 = LOAD_BIG_64(blk + 8 * 4);
- SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w5 = LOAD_BIG_64(blk + 8 * 5);
- SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w6 = LOAD_BIG_64(blk + 8 * 6);
- SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w7 = LOAD_BIG_64(blk + 8 * 7);
- SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w8 = LOAD_BIG_64(blk + 8 * 8);
- SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w9 = LOAD_BIG_64(blk + 8 * 9);
- SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w10 = LOAD_BIG_64(blk + 8 * 10);
- SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w11 = LOAD_BIG_64(blk + 8 * 11);
- SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w12 = LOAD_BIG_64(blk + 8 * 12);
- SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w13 = LOAD_BIG_64(blk + 8 * 13);
- SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w14 = LOAD_BIG_64(blk + 8 * 14);
- SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w15 = LOAD_BIG_64(blk + 8 * 15);
- SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
- w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
- SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
- w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
- SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
- w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
- SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
- w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
- SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
- w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
- SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
- w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
- SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
- w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
- SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
- w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
- SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
- w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
- SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
- w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
- SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
- w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
- SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
- w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
- SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
- w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
- SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
- w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
- SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
- w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
- SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
- w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
- SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
- w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
- SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
- w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
- SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
- w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
- SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
- w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
- SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
- w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
- SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
- w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
- SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
- w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
- SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
- w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
- SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
- w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
- SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
- w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
- SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
- w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
- SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
- w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
- SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
- w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
- SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
- w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
- SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
- w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
- SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
- w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
- SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
- w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
- SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
- w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
- SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
- w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
- SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
- w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
- SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
- w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
- SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
- w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
- SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
- w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
- SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
- w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
- SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
- w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
- SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
- w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
- SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
- w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
- SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
- w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
- SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
- w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
- SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
- w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
- SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
- w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
- SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
- w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
- SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
- w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
- SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
- w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
- SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
- w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
- SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
- w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
- SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
- w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
- SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
- w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
- SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
- w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
- SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
- w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
- SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
- w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
- SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
- w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
- SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
- w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
- SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
- w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
- SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
- w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
- SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
- w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
- SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
- w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
- SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
- w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
- SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
-
- ctx->state.s64[0] += a;
- ctx->state.s64[1] += b;
- ctx->state.s64[2] += c;
- ctx->state.s64[3] += d;
- ctx->state.s64[4] += e;
- ctx->state.s64[5] += f;
- ctx->state.s64[6] += g;
- ctx->state.s64[7] += h;
-
-}
-#endif /* !__amd64 || !_KERNEL */
-
-
-/*
- * Encode()
- *
- * purpose: to convert a list of numbers from little endian to big endian
- * input: uint8_t * : place to store the converted big endian numbers
- * uint32_t * : place to get numbers to convert from
- * size_t : the length of the input in bytes
- * output: void
- */
-
-static void
-Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
- size_t len)
-{
- size_t i, j;
-
-#if defined(__sparc)
- if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
- for (i = 0, j = 0; j < len; i++, j += 4) {
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- *((uint32_t *)(output + j)) = input[i];
- }
- } else {
-#endif /* little endian -- will work on big endian, but slowly */
- for (i = 0, j = 0; j < len; i++, j += 4) {
- output[j] = (input[i] >> 24) & 0xff;
- output[j + 1] = (input[i] >> 16) & 0xff;
- output[j + 2] = (input[i] >> 8) & 0xff;
- output[j + 3] = input[i] & 0xff;
- }
-#if defined(__sparc)
- }
-#endif
-}
-
-static void
-Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
- size_t len)
-{
- size_t i, j;
-
-#if defined(__sparc)
- if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
- for (i = 0, j = 0; j < len; i++, j += 8) {
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- *((uint64_t *)(output + j)) = input[i];
- }
- } else {
-#endif /* little endian -- will work on big endian, but slowly */
- for (i = 0, j = 0; j < len; i++, j += 8) {
-
- output[j] = (input[i] >> 56) & 0xff;
- output[j + 1] = (input[i] >> 48) & 0xff;
- output[j + 2] = (input[i] >> 40) & 0xff;
- output[j + 3] = (input[i] >> 32) & 0xff;
- output[j + 4] = (input[i] >> 24) & 0xff;
- output[j + 5] = (input[i] >> 16) & 0xff;
- output[j + 6] = (input[i] >> 8) & 0xff;
- output[j + 7] = input[i] & 0xff;
- }
-#if defined(__sparc)
- }
-#endif
-}
-
-
-void
-SHA2Init(uint64_t mech, SHA2_CTX *ctx)
-{
-
- switch (mech) {
- case SHA256_MECH_INFO_TYPE:
- case SHA256_HMAC_MECH_INFO_TYPE:
- case SHA256_HMAC_GEN_MECH_INFO_TYPE:
- ctx->state.s32[0] = 0x6a09e667U;
- ctx->state.s32[1] = 0xbb67ae85U;
- ctx->state.s32[2] = 0x3c6ef372U;
- ctx->state.s32[3] = 0xa54ff53aU;
- ctx->state.s32[4] = 0x510e527fU;
- ctx->state.s32[5] = 0x9b05688cU;
- ctx->state.s32[6] = 0x1f83d9abU;
- ctx->state.s32[7] = 0x5be0cd19U;
- break;
- case SHA384_MECH_INFO_TYPE:
- case SHA384_HMAC_MECH_INFO_TYPE:
- case SHA384_HMAC_GEN_MECH_INFO_TYPE:
- ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
- ctx->state.s64[1] = 0x629a292a367cd507ULL;
- ctx->state.s64[2] = 0x9159015a3070dd17ULL;
- ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
- ctx->state.s64[4] = 0x67332667ffc00b31ULL;
- ctx->state.s64[5] = 0x8eb44a8768581511ULL;
- ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
- ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
- break;
- case SHA512_MECH_INFO_TYPE:
- case SHA512_HMAC_MECH_INFO_TYPE:
- case SHA512_HMAC_GEN_MECH_INFO_TYPE:
- ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
- ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
- ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
- ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
- ctx->state.s64[4] = 0x510e527fade682d1ULL;
- ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
- ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
- ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
- break;
- case SHA512_224_MECH_INFO_TYPE:
- ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
- ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
- ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
- ctx->state.s64[3] = 0x679DD514582F9FCFULL;
- ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
- ctx->state.s64[5] = 0x77E36F7304C48942ULL;
- ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
- ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
- break;
- case SHA512_256_MECH_INFO_TYPE:
- ctx->state.s64[0] = 0x22312194FC2BF72CULL;
- ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
- ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
- ctx->state.s64[3] = 0x963877195940EABDULL;
- ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
- ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
- ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
- ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
- break;
-#ifdef _KERNEL
- default:
- cmn_err(CE_PANIC,
- "sha2_init: failed to find a supported algorithm: 0x%x",
- (uint32_t)mech);
-
-#endif /* _KERNEL */
- }
-
- ctx->algotype = (uint32_t)mech;
- ctx->count.c64[0] = ctx->count.c64[1] = 0;
-}
-
-#ifndef _KERNEL
-
-// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
-void
-SHA256Init(SHA256_CTX *ctx)
-{
- SHA2Init(SHA256, ctx);
-}
-
-void
-SHA384Init(SHA384_CTX *ctx)
-{
- SHA2Init(SHA384, ctx);
-}
-
-void
-SHA512Init(SHA512_CTX *ctx)
-{
- SHA2Init(SHA512, ctx);
-}
-
-#endif /* _KERNEL */
-
-/*
- * SHA2Update()
- *
- * purpose: continues an sha2 digest operation, using the message block
- * to update the context.
- * input: SHA2_CTX * : the context to update
- * void * : the message block
- * size_t : the length of the message block, in bytes
- * output: void
- */
-
-void
-SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
-{
- uint32_t i, buf_index, buf_len, buf_limit;
- const uint8_t *input = inptr;
- uint32_t algotype = ctx->algotype;
-
- /* check for noop */
- if (input_len == 0)
- return;
-
- if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
- buf_limit = 64;
-
- /* compute number of bytes mod 64 */
- buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
-
- /* update number of bits */
- if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
- ctx->count.c32[0]++;
-
- ctx->count.c32[0] += (input_len >> 29);
-
- } else {
- buf_limit = 128;
-
- /* compute number of bytes mod 128 */
- buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
-
- /* update number of bits */
- if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
- ctx->count.c64[0]++;
-
- ctx->count.c64[0] += (input_len >> 29);
- }
-
- buf_len = buf_limit - buf_index;
-
- /* transform as many times as possible */
- i = 0;
- if (input_len >= buf_len) {
-
- /*
- * general optimization:
- *
- * only do initial bcopy() and SHA2Transform() if
- * buf_index != 0. if buf_index == 0, we're just
- * wasting our time doing the bcopy() since there
- * wasn't any data left over from a previous call to
- * SHA2Update().
- */
- if (buf_index) {
- bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
- if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
- SHA256Transform(ctx, ctx->buf_un.buf8);
- else
- SHA512Transform(ctx, ctx->buf_un.buf8);
-
- i = buf_len;
- }
-
-#if !defined(__amd64) || !defined(_KERNEL)
- if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
- for (; i + buf_limit - 1 < input_len; i += buf_limit) {
- SHA256Transform(ctx, &input[i]);
- }
- } else {
- for (; i + buf_limit - 1 < input_len; i += buf_limit) {
- SHA512Transform(ctx, &input[i]);
- }
- }
-
-#else
- uint32_t block_count;
- if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
- block_count = (input_len - i) >> 6;
- if (block_count > 0) {
- SHA256TransformBlocks(ctx, &input[i],
- block_count);
- i += block_count << 6;
- }
- } else {
- block_count = (input_len - i) >> 7;
- if (block_count > 0) {
- SHA512TransformBlocks(ctx, &input[i],
- block_count);
- i += block_count << 7;
- }
- }
-#endif /* !__amd64 || !_KERNEL */
-
- /*
- * general optimization:
- *
- * if i and input_len are the same, return now instead
- * of calling bcopy(), since the bcopy() in this case
- * will be an expensive noop.
- */
-
- if (input_len == i)
- return;
-
- buf_index = 0;
- }
-
- /* buffer remaining input */
- bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
-}
-
-
-/*
- * SHA2Final()
- *
- * purpose: ends an sha2 digest operation, finalizing the message digest and
- * zeroing the context.
- * input: uchar_t * : a buffer to store the digest
- * : The function actually uses void* because many
- * : callers pass things other than uchar_t here.
- * SHA2_CTX * : the context to finalize, save, and zero
- * output: void
- */
-
-void
-SHA2Final(void *digest, SHA2_CTX *ctx)
-{
- uint8_t bitcount_be[sizeof (ctx->count.c32)];
- uint8_t bitcount_be64[sizeof (ctx->count.c64)];
- uint32_t index;
- uint32_t algotype = ctx->algotype;
-
- if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
- index = (ctx->count.c32[1] >> 3) & 0x3f;
- Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
- SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
- SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
- Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
- } else {
- index = (ctx->count.c64[1] >> 3) & 0x7f;
- Encode64(bitcount_be64, ctx->count.c64,
- sizeof (bitcount_be64));
- SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
- SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
- if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
- ctx->state.s64[6] = ctx->state.s64[7] = 0;
- Encode64(digest, ctx->state.s64,
- sizeof (uint64_t) * 6);
- } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
- uint8_t last[sizeof (uint64_t)];
- /*
- * Since SHA-512/224 doesn't align well to 64-bit
- * boundaries, we must do the encoding in three steps:
- * 1) encode the three 64-bit words that fit neatly
- * 2) encode the last 64-bit word to a temp buffer
- * 3) chop out the lower 32-bits from the temp buffer
- * and append them to the digest
- */
- Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
- Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
- bcopy(last, (uint8_t *)digest + 24, 4);
- } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
- Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
- } else {
- Encode64(digest, ctx->state.s64,
- sizeof (ctx->state.s64));
- }
- }
-
- /* zeroize sensitive information */
- bzero(ctx, sizeof (*ctx));
-}
-
-#ifdef _KERNEL
-EXPORT_SYMBOL(SHA2Init);
-EXPORT_SYMBOL(SHA2Update);
-EXPORT_SYMBOL(SHA2Final);
-#endif
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
new file mode 100644
index 000000000000..0f24319511d7
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c
@@ -0,0 +1,313 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/simd.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_impl.h>
+#include <sys/sha2.h>
+
+#include <sha2/sha2_impl.h>
+#include <sys/asm_linkage.h>
+
+#define TF(E, N) \
+ extern void ASMABI E(uint32_t s[8], const void *, size_t); \
+ static inline void N(uint32_t s[8], const void *d, size_t b) { \
+ kfpu_begin(); E(s, d, b); kfpu_end(); \
+}
+
+/* some implementation is always okay */
+static inline boolean_t sha2_is_supported(void)
+{
+ return (B_TRUE);
+}
+
+#if defined(__x86_64)
+
+/* Users of ASMABI requires all calls to be from wrappers */
+extern void ASMABI
+zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t);
+
+static inline void
+tf_sha256_transform_x64(uint32_t s[8], const void *d, size_t b)
+{
+ zfs_sha256_transform_x64(s, d, b);
+}
+
+const sha256_ops_t sha256_x64_impl = {
+ .is_supported = sha2_is_supported,
+ .transform = tf_sha256_transform_x64,
+ .name = "x64"
+};
+
+#if defined(HAVE_SSSE3)
+static boolean_t sha2_have_ssse3(void)
+{
+ return (kfpu_allowed() && zfs_ssse3_available());
+}
+
+TF(zfs_sha256_transform_ssse3, tf_sha256_ssse3);
+const sha256_ops_t sha256_ssse3_impl = {
+ .is_supported = sha2_have_ssse3,
+ .transform = tf_sha256_ssse3,
+ .name = "ssse3"
+};
+#endif
+
+#if defined(HAVE_AVX)
+static boolean_t sha2_have_avx(void)
+{
+ return (kfpu_allowed() && zfs_avx_available());
+}
+
+TF(zfs_sha256_transform_avx, tf_sha256_avx);
+const sha256_ops_t sha256_avx_impl = {
+ .is_supported = sha2_have_avx,
+ .transform = tf_sha256_avx,
+ .name = "avx"
+};
+#endif
+
+#if defined(HAVE_AVX2)
+static boolean_t sha2_have_avx2(void)
+{
+ return (kfpu_allowed() && zfs_avx2_available());
+}
+
+TF(zfs_sha256_transform_avx2, tf_sha256_avx2);
+const sha256_ops_t sha256_avx2_impl = {
+ .is_supported = sha2_have_avx2,
+ .transform = tf_sha256_avx2,
+ .name = "avx2"
+};
+#endif
+
+#if defined(HAVE_SSE4_1)
+static boolean_t sha2_have_shani(void)
+{
+ return (kfpu_allowed() && zfs_sse4_1_available() && \
+ zfs_shani_available());
+}
+
+TF(zfs_sha256_transform_shani, tf_sha256_shani);
+const sha256_ops_t sha256_shani_impl = {
+ .is_supported = sha2_have_shani,
+ .transform = tf_sha256_shani,
+ .name = "shani"
+};
+#endif
+
+#elif defined(__aarch64__) || defined(__arm__)
+extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t);
+const sha256_ops_t sha256_armv7_impl = {
+ .is_supported = sha2_is_supported,
+ .transform = zfs_sha256_block_armv7,
+ .name = "armv7"
+};
+
+#if __ARM_ARCH > 6
+static boolean_t sha256_have_neon(void)
+{
+ return (kfpu_allowed() && zfs_neon_available());
+}
+
+static boolean_t sha256_have_armv8ce(void)
+{
+ return (kfpu_allowed() && zfs_sha256_available());
+}
+
+TF(zfs_sha256_block_neon, tf_sha256_neon);
+const sha256_ops_t sha256_neon_impl = {
+ .is_supported = sha256_have_neon,
+ .transform = tf_sha256_neon,
+ .name = "neon"
+};
+
+TF(zfs_sha256_block_armv8, tf_sha256_armv8ce);
+const sha256_ops_t sha256_armv8_impl = {
+ .is_supported = sha256_have_armv8ce,
+ .transform = tf_sha256_armv8ce,
+ .name = "armv8-ce"
+};
+#endif
+
+#elif defined(__PPC64__)
+static boolean_t sha256_have_isa207(void)
+{
+ return (kfpu_allowed() && zfs_isa207_available());
+}
+
+TF(zfs_sha256_ppc, tf_sha256_ppc);
+const sha256_ops_t sha256_ppc_impl = {
+ .is_supported = sha2_is_supported,
+ .transform = tf_sha256_ppc,
+ .name = "ppc"
+};
+
+TF(zfs_sha256_power8, tf_sha256_power8);
+const sha256_ops_t sha256_power8_impl = {
+ .is_supported = sha256_have_isa207,
+ .transform = tf_sha256_power8,
+ .name = "power8"
+};
+#endif /* __PPC64__ */
+
+/* the two generic ones */
+extern const sha256_ops_t sha256_generic_impl;
+
+/* array with all sha256 implementations */
+static const sha256_ops_t *const sha256_impls[] = {
+ &sha256_generic_impl,
+#if defined(__x86_64)
+ &sha256_x64_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSSE3)
+ &sha256_ssse3_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX)
+ &sha256_avx_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX2)
+ &sha256_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1)
+ &sha256_shani_impl,
+#endif
+#if defined(__aarch64__) || defined(__arm__)
+ &sha256_armv7_impl,
+#if __ARM_ARCH > 6
+ &sha256_neon_impl,
+ &sha256_armv8_impl,
+#endif
+#endif
+#if defined(__PPC64__)
+ &sha256_ppc_impl,
+ &sha256_power8_impl,
+#endif /* __PPC64__ */
+};
+
+/* use the generic implementation functions */
+#define IMPL_NAME "sha256"
+#define IMPL_OPS_T sha256_ops_t
+#define IMPL_ARRAY sha256_impls
+#define IMPL_GET_OPS sha256_get_ops
+#define ZFS_IMPL_OPS zfs_sha256_ops
+#include <generic_impl.c>
+
+#ifdef _KERNEL
+
+#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
+
+#if defined(__linux__)
+
+static int
+sha256_param_get(char *buffer, zfs_kernel_param_t *unused)
+{
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ char *fmt;
+ int cnt = 0;
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ cnt += sprintf(buffer + cnt, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ cnt += sprintf(buffer + cnt, fmt, "fastest");
+
+ /* list all supported implementations */
+ generic_impl_init();
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ cnt += sprintf(buffer + cnt, fmt,
+ generic_supp_impls[i]->name);
+ }
+
+ return (cnt);
+}
+
+static int
+sha256_param_set(const char *val, zfs_kernel_param_t *unused)
+{
+ (void) unused;
+ return (generic_impl_setname(val));
+}
+
+#elif defined(__FreeBSD__)
+
+#include <sys/sbuf.h>
+
+static int
+sha256_param(ZFS_MODULE_PARAM_ARGS)
+{
+ int err;
+
+ generic_impl_init();
+ if (req->newptr == NULL) {
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ const int init_buflen = 64;
+ const char *fmt;
+ struct sbuf *s;
+
+ s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ (void) sbuf_printf(s, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ (void) sbuf_printf(s, fmt, "fastest");
+
+ /* list all supported implementations */
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
+ }
+
+ err = sbuf_finish(s);
+ sbuf_delete(s);
+
+ return (err);
+ }
+
+ char buf[16];
+
+ err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+ if (err) {
+ return (err);
+ }
+
+ return (-generic_impl_setname(buf));
+}
+#endif
+
+#undef IMPL_FMT
+
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, sha256_impl,
+ sha256_param_set, sha256_param_get, ZMOD_RW, \
+ "Select SHA256 implementation.");
+#endif
+
+#undef TF
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c
new file mode 100644
index 000000000000..60d7ad9a1dfa
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c
@@ -0,0 +1,562 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on public domain code in cppcrypto 0.10.
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zfs_impl.h>
+#include <sys/sha2.h>
+
+#include <sha2/sha2_impl.h>
+
+/*
+ * On i386, gcc brings this for sha512_generic():
+ * error: the frame size of 1040 bytes is larger than 1024
+ */
+#if defined(__GNUC__) && defined(_ILP32)
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+/* SHA256 */
+static const uint32_t SHA256_K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x, y, z) (((y) & (z)) | (((y) | (z)) & (x)))
+
+#define rotr32(x, n) (((x) >> n) | ((x) << (32 - n)))
+#define sum0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22))
+#define sum1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25))
+#define sigma0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3))
+#define sigma1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10))
+
+#define WU(j) (W[j & 15] += sigma1(W[(j + 14) & 15]) \
+ + W[(j + 9) & 15] + sigma0(W[(j + 1) & 15]))
+
+#define COMPRESS(i, j, K) \
+ T1 = h + sum1(e) + Ch(e, f, g) + K[i + j] + (i? WU(j): W[j]); \
+ T2 = sum0(a) + Maj(a, b, c); \
+ h = g, g = f, f = e, e = d + T1; \
+ d = c, c = b, b = a, a = T1 + T2;
+
+static void sha256_generic(uint32_t state[8], const void *data, size_t num_blks)
+{
+ uint64_t blk;
+
+ for (blk = 0; blk < num_blks; blk++) {
+ uint32_t W[16];
+ uint32_t a, b, c, d, e, f, g, h;
+ uint32_t T1, T2;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ W[i] = BE_32( \
+ (((const uint32_t *)(data))[blk * 16 + i]));
+ }
+
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ f = state[5];
+ g = state[6];
+ h = state[7];
+
+ for (i = 0; i <= 63; i += 16) {
+ COMPRESS(i, 0, SHA256_K);
+ COMPRESS(i, 1, SHA256_K);
+ COMPRESS(i, 2, SHA256_K);
+ COMPRESS(i, 3, SHA256_K);
+ COMPRESS(i, 4, SHA256_K);
+ COMPRESS(i, 5, SHA256_K);
+ COMPRESS(i, 6, SHA256_K);
+ COMPRESS(i, 7, SHA256_K);
+ COMPRESS(i, 8, SHA256_K);
+ COMPRESS(i, 9, SHA256_K);
+ COMPRESS(i, 10, SHA256_K);
+ COMPRESS(i, 11, SHA256_K);
+ COMPRESS(i, 12, SHA256_K);
+ COMPRESS(i, 13, SHA256_K);
+ COMPRESS(i, 14, SHA256_K);
+ COMPRESS(i, 15, SHA256_K);
+ }
+
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
+ }
+}
+
+#undef sum0
+#undef sum1
+#undef sigma0
+#undef sigma1
+
+#define rotr64(x, n) (((x) >> n) | ((x) << (64 - n)))
+#define sum0(x) (rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39))
+#define sum1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41))
+#define sigma0(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7))
+#define sigma1(x) (rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >> 6))
+
+/* SHA512 */
+static const uint64_t SHA512_K[80] = {
+ 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f,
+ 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019,
+ 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242,
+ 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+ 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235,
+ 0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
+ 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 0x2de92c6f592b0275,
+ 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+ 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f,
+ 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
+ 0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc,
+ 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+ 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6,
+ 0x92722c851482353b, 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
+ 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218,
+ 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
+ 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99,
+ 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
+ 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc,
+ 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+ 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915,
+ 0xc67178f2e372532b, 0xca273eceea26619c, 0xd186b8c721c0c207,
+ 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba,
+ 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
+ 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc,
+ 0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
+ 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+};
+
+static void sha512_generic(uint64_t state[8], const void *data, size_t num_blks)
+{
+ uint64_t blk;
+
+ for (blk = 0; blk < num_blks; blk++) {
+ uint64_t W[16];
+ uint64_t a, b, c, d, e, f, g, h;
+ uint64_t T1, T2;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ W[i] = BE_64( \
+ (((const uint64_t *)(data))[blk * 16 + i]));
+ }
+
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ f = state[5];
+ g = state[6];
+ h = state[7];
+
+ for (i = 0; i <= 79; i += 16) {
+ COMPRESS(i, 0, SHA512_K);
+ COMPRESS(i, 1, SHA512_K);
+ COMPRESS(i, 2, SHA512_K);
+ COMPRESS(i, 3, SHA512_K);
+ COMPRESS(i, 4, SHA512_K);
+ COMPRESS(i, 5, SHA512_K);
+ COMPRESS(i, 6, SHA512_K);
+ COMPRESS(i, 7, SHA512_K);
+ COMPRESS(i, 8, SHA512_K);
+ COMPRESS(i, 9, SHA512_K);
+ COMPRESS(i, 10, SHA512_K);
+ COMPRESS(i, 11, SHA512_K);
+ COMPRESS(i, 12, SHA512_K);
+ COMPRESS(i, 13, SHA512_K);
+ COMPRESS(i, 14, SHA512_K);
+ COMPRESS(i, 15, SHA512_K);
+ }
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
+ }
+}
+
+static void sha256_update(sha256_ctx *ctx, const uint8_t *data, size_t len)
+{
+ uint64_t pos = ctx->count[0];
+ uint64_t total = ctx->count[1];
+ uint8_t *m = ctx->wbuf;
+ const sha256_ops_t *ops = ctx->ops;
+
+ if (pos && pos + len >= 64) {
+ memcpy(m + pos, data, 64 - pos);
+ ops->transform(ctx->state, m, 1);
+ len -= 64 - pos;
+ total += (64 - pos) * 8;
+ data += 64 - pos;
+ pos = 0;
+ }
+
+ if (len >= 64) {
+ uint32_t blocks = len / 64;
+ uint32_t bytes = blocks * 64;
+ ops->transform(ctx->state, data, blocks);
+ len -= bytes;
+ total += (bytes) * 8;
+ data += bytes;
+ }
+ memcpy(m + pos, data, len);
+
+ pos += len;
+ total += len * 8;
+ ctx->count[0] = pos;
+ ctx->count[1] = total;
+}
+
+static void sha512_update(sha512_ctx *ctx, const uint8_t *data, size_t len)
+{
+ uint64_t pos = ctx->count[0];
+ uint64_t total = ctx->count[1];
+ uint8_t *m = ctx->wbuf;
+ const sha512_ops_t *ops = ctx->ops;
+
+ if (pos && pos + len >= 128) {
+ memcpy(m + pos, data, 128 - pos);
+ ops->transform(ctx->state, m, 1);
+ len -= 128 - pos;
+ total += (128 - pos) * 8;
+ data += 128 - pos;
+ pos = 0;
+ }
+
+ if (len >= 128) {
+ uint64_t blocks = len / 128;
+ uint64_t bytes = blocks * 128;
+ ops->transform(ctx->state, data, blocks);
+ len -= bytes;
+ total += (bytes) * 8;
+ data += bytes;
+ }
+ memcpy(m + pos, data, len);
+
+ pos += len;
+ total += len * 8;
+ ctx->count[0] = pos;
+ ctx->count[1] = total;
+}
+
+static void sha256_final(sha256_ctx *ctx, uint8_t *result, int bits)
+{
+ uint64_t mlen, pos = ctx->count[0];
+ uint8_t *m = ctx->wbuf;
+ uint32_t *R = (uint32_t *)result;
+ const sha256_ops_t *ops = ctx->ops;
+
+ m[pos++] = 0x80;
+ if (pos > 56) {
+ memset(m + pos, 0, 64 - pos);
+ ops->transform(ctx->state, m, 1);
+ pos = 0;
+ }
+
+ memset(m + pos, 0, 64 - pos);
+ mlen = BE_64(ctx->count[1]);
+ memcpy(m + (64 - 8), &mlen, 64 / 8);
+ ops->transform(ctx->state, m, 1);
+
+ switch (bits) {
+ case 224: /* 28 - unused currently /TR */
+ R[0] = BE_32(ctx->state[0]);
+ R[1] = BE_32(ctx->state[1]);
+ R[2] = BE_32(ctx->state[2]);
+ R[3] = BE_32(ctx->state[3]);
+ R[4] = BE_32(ctx->state[4]);
+ R[5] = BE_32(ctx->state[5]);
+ R[6] = BE_32(ctx->state[6]);
+ break;
+ case 256: /* 32 */
+ R[0] = BE_32(ctx->state[0]);
+ R[1] = BE_32(ctx->state[1]);
+ R[2] = BE_32(ctx->state[2]);
+ R[3] = BE_32(ctx->state[3]);
+ R[4] = BE_32(ctx->state[4]);
+ R[5] = BE_32(ctx->state[5]);
+ R[6] = BE_32(ctx->state[6]);
+ R[7] = BE_32(ctx->state[7]);
+ break;
+ }
+
+ memset(ctx, 0, sizeof (*ctx));
+}
+
+static void sha512_final(sha512_ctx *ctx, uint8_t *result, int bits)
+{
+ uint64_t mlen, pos = ctx->count[0];
+ uint8_t *m = ctx->wbuf, *r;
+ uint64_t *R = (uint64_t *)result;
+ const sha512_ops_t *ops = ctx->ops;
+
+ m[pos++] = 0x80;
+ if (pos > 112) {
+ memset(m + pos, 0, 128 - pos);
+ ops->transform(ctx->state, m, 1);
+ pos = 0;
+ }
+
+ memset(m + pos, 0, 128 - pos);
+ mlen = BE_64(ctx->count[1]);
+ memcpy(m + (128 - 8), &mlen, 64 / 8);
+ ops->transform(ctx->state, m, 1);
+
+ switch (bits) {
+ case 224: /* 28 => 3,5 x 8 */
+ r = result + 24;
+ R[0] = BE_64(ctx->state[0]);
+ R[1] = BE_64(ctx->state[1]);
+ R[2] = BE_64(ctx->state[2]);
+ /* last 4 bytes are special here */
+ *r++ = (uint8_t)(ctx->state[3] >> 56);
+ *r++ = (uint8_t)(ctx->state[3] >> 48);
+ *r++ = (uint8_t)(ctx->state[3] >> 40);
+ *r++ = (uint8_t)(ctx->state[3] >> 32);
+ break;
+ case 256: /* 32 */
+ R[0] = BE_64(ctx->state[0]);
+ R[1] = BE_64(ctx->state[1]);
+ R[2] = BE_64(ctx->state[2]);
+ R[3] = BE_64(ctx->state[3]);
+ break;
+ case 384: /* 48 */
+ R[0] = BE_64(ctx->state[0]);
+ R[1] = BE_64(ctx->state[1]);
+ R[2] = BE_64(ctx->state[2]);
+ R[3] = BE_64(ctx->state[3]);
+ R[4] = BE_64(ctx->state[4]);
+ R[5] = BE_64(ctx->state[5]);
+ break;
+ case 512: /* 64 */
+ R[0] = BE_64(ctx->state[0]);
+ R[1] = BE_64(ctx->state[1]);
+ R[2] = BE_64(ctx->state[2]);
+ R[3] = BE_64(ctx->state[3]);
+ R[4] = BE_64(ctx->state[4]);
+ R[5] = BE_64(ctx->state[5]);
+ R[6] = BE_64(ctx->state[6]);
+ R[7] = BE_64(ctx->state[7]);
+ break;
+ }
+
+ memset(ctx, 0, sizeof (*ctx));
+}
+
+/* SHA2 Init function */
+void
+SHA2Init(int algotype, SHA2_CTX *ctx)
+{
+ sha256_ctx *ctx256 = &ctx->sha256;
+ sha512_ctx *ctx512 = &ctx->sha512;
+
+ ASSERT3S(algotype, >=, SHA256_MECH_INFO_TYPE);
+ ASSERT3S(algotype, <=, SHA512_256_MECH_INFO_TYPE);
+
+ memset(ctx, 0, sizeof (*ctx));
+ ctx->algotype = algotype;
+ switch (ctx->algotype) {
+ case SHA256_MECH_INFO_TYPE:
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ ctx256->state[0] = 0x6a09e667;
+ ctx256->state[1] = 0xbb67ae85;
+ ctx256->state[2] = 0x3c6ef372;
+ ctx256->state[3] = 0xa54ff53a;
+ ctx256->state[4] = 0x510e527f;
+ ctx256->state[5] = 0x9b05688c;
+ ctx256->state[6] = 0x1f83d9ab;
+ ctx256->state[7] = 0x5be0cd19;
+ ctx256->count[0] = 0;
+ ctx256->ops = sha256_get_ops();
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ ctx512->state[0] = 0xcbbb9d5dc1059ed8ULL;
+ ctx512->state[1] = 0x629a292a367cd507ULL;
+ ctx512->state[2] = 0x9159015a3070dd17ULL;
+ ctx512->state[3] = 0x152fecd8f70e5939ULL;
+ ctx512->state[4] = 0x67332667ffc00b31ULL;
+ ctx512->state[5] = 0x8eb44a8768581511ULL;
+ ctx512->state[6] = 0xdb0c2e0d64f98fa7ULL;
+ ctx512->state[7] = 0x47b5481dbefa4fa4ULL;
+ ctx512->count[0] = 0;
+ ctx512->count[1] = 0;
+ ctx512->ops = sha512_get_ops();
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ ctx512->state[0] = 0x6a09e667f3bcc908ULL;
+ ctx512->state[1] = 0xbb67ae8584caa73bULL;
+ ctx512->state[2] = 0x3c6ef372fe94f82bULL;
+ ctx512->state[3] = 0xa54ff53a5f1d36f1ULL;
+ ctx512->state[4] = 0x510e527fade682d1ULL;
+ ctx512->state[5] = 0x9b05688c2b3e6c1fULL;
+ ctx512->state[6] = 0x1f83d9abfb41bd6bULL;
+ ctx512->state[7] = 0x5be0cd19137e2179ULL;
+ ctx512->count[0] = 0;
+ ctx512->count[1] = 0;
+ ctx512->ops = sha512_get_ops();
+ break;
+ case SHA512_224_MECH_INFO_TYPE:
+ ctx512->state[0] = 0x8c3d37c819544da2ULL;
+ ctx512->state[1] = 0x73e1996689dcd4d6ULL;
+ ctx512->state[2] = 0x1dfab7ae32ff9c82ULL;
+ ctx512->state[3] = 0x679dd514582f9fcfULL;
+ ctx512->state[4] = 0x0f6d2b697bd44da8ULL;
+ ctx512->state[5] = 0x77e36f7304c48942ULL;
+ ctx512->state[6] = 0x3f9d85a86a1d36c8ULL;
+ ctx512->state[7] = 0x1112e6ad91d692a1ULL;
+ ctx512->count[0] = 0;
+ ctx512->count[1] = 0;
+ ctx512->ops = sha512_get_ops();
+ break;
+ case SHA512_256_MECH_INFO_TYPE:
+ ctx512->state[0] = 0x22312194fc2bf72cULL;
+ ctx512->state[1] = 0x9f555fa3c84c64c2ULL;
+ ctx512->state[2] = 0x2393b86b6f53b151ULL;
+ ctx512->state[3] = 0x963877195940eabdULL;
+ ctx512->state[4] = 0x96283ee2a88effe3ULL;
+ ctx512->state[5] = 0xbe5e1e2553863992ULL;
+ ctx512->state[6] = 0x2b0199fc2c85b8aaULL;
+ ctx512->state[7] = 0x0eb72ddc81c52ca2ULL;
+ ctx512->count[0] = 0;
+ ctx512->count[1] = 0;
+ ctx512->ops = sha512_get_ops();
+ break;
+ }
+}
+
+/* SHA2 Update function */
+void
+SHA2Update(SHA2_CTX *ctx, const void *data, size_t len)
+{
+ /* check for zero input length */
+ if (len == 0)
+ return;
+
+ ASSERT3P(data, !=, NULL);
+
+ switch (ctx->algotype) {
+ case SHA256_MECH_INFO_TYPE:
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha256_update(&ctx->sha256, data, len);
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ sha512_update(&ctx->sha512, data, len);
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha512_update(&ctx->sha512, data, len);
+ break;
+ case SHA512_224_MECH_INFO_TYPE:
+ sha512_update(&ctx->sha512, data, len);
+ break;
+ case SHA512_256_MECH_INFO_TYPE:
+ sha512_update(&ctx->sha512, data, len);
+ break;
+ }
+}
+
+/* SHA2Final function */
+void
+SHA2Final(void *digest, SHA2_CTX *ctx)
+{
+ switch (ctx->algotype) {
+ case SHA256_MECH_INFO_TYPE:
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha256_final(&ctx->sha256, digest, 256);
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ sha512_final(&ctx->sha512, digest, 384);
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha512_final(&ctx->sha512, digest, 512);
+ break;
+ case SHA512_224_MECH_INFO_TYPE:
+ sha512_final(&ctx->sha512, digest, 224);
+ break;
+ case SHA512_256_MECH_INFO_TYPE:
+ sha512_final(&ctx->sha512, digest, 256);
+ break;
+ }
+}
+
+/* the generic implementation is always okay */
+static boolean_t sha2_is_supported(void)
+{
+ return (B_TRUE);
+}
+
+const sha256_ops_t sha256_generic_impl = {
+ .name = "generic",
+ .transform = sha256_generic,
+ .is_supported = sha2_is_supported
+};
+
+const sha512_ops_t sha512_generic_impl = {
+ .name = "generic",
+ .transform = sha512_generic,
+ .is_supported = sha2_is_supported
+};
diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
new file mode 100644
index 000000000000..6291fbd77e36
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c
@@ -0,0 +1,282 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/simd.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_impl.h>
+#include <sys/sha2.h>
+
+#include <sha2/sha2_impl.h>
+#include <sys/asm_linkage.h>
+
+#define TF(E, N) \
+ extern void ASMABI E(uint64_t s[8], const void *, size_t); \
+ static inline void N(uint64_t s[8], const void *d, size_t b) { \
+ kfpu_begin(); E(s, d, b); kfpu_end(); \
+}
+
+/* some implementation is always okay */
+static inline boolean_t sha2_is_supported(void)
+{
+ return (B_TRUE);
+}
+
+#if defined(__x86_64)
+
+/* Users of ASMABI requires all calls to be from wrappers */
+extern void ASMABI
+zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t);
+
+static inline void
+tf_sha512_transform_x64(uint64_t s[8], const void *d, size_t b)
+{
+ zfs_sha512_transform_x64(s, d, b);
+}
+const sha512_ops_t sha512_x64_impl = {
+ .is_supported = sha2_is_supported,
+ .transform = tf_sha512_transform_x64,
+ .name = "x64"
+};
+
+#if defined(HAVE_AVX)
+static boolean_t sha2_have_avx(void)
+{
+ return (kfpu_allowed() && zfs_avx_available());
+}
+
+TF(zfs_sha512_transform_avx, tf_sha512_avx);
+const sha512_ops_t sha512_avx_impl = {
+ .is_supported = sha2_have_avx,
+ .transform = tf_sha512_avx,
+ .name = "avx"
+};
+#endif
+
+#if defined(HAVE_AVX2)
+static boolean_t sha2_have_avx2(void)
+{
+ return (kfpu_allowed() && zfs_avx2_available());
+}
+
+TF(zfs_sha512_transform_avx2, tf_sha512_avx2);
+const sha512_ops_t sha512_avx2_impl = {
+ .is_supported = sha2_have_avx2,
+ .transform = tf_sha512_avx2,
+ .name = "avx2"
+};
+#endif
+
+#elif defined(__aarch64__) || defined(__arm__)
+extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t);
+const sha512_ops_t sha512_armv7_impl = {
+ .is_supported = sha2_is_supported,
+ .transform = zfs_sha512_block_armv7,
+ .name = "armv7"
+};
+
+#if defined(__aarch64__)
+static boolean_t sha512_have_armv8ce(void)
+{
+ return (kfpu_allowed() && zfs_sha512_available());
+}
+
+TF(zfs_sha512_block_armv8, tf_sha512_armv8ce);
+const sha512_ops_t sha512_armv8_impl = {
+ .is_supported = sha512_have_armv8ce,
+ .transform = tf_sha512_armv8ce,
+ .name = "armv8-ce"
+};
+#endif
+
+#if defined(__arm__) && __ARM_ARCH > 6
+static boolean_t sha512_have_neon(void)
+{
+ return (kfpu_allowed() && zfs_neon_available());
+}
+
+TF(zfs_sha512_block_neon, tf_sha512_neon);
+const sha512_ops_t sha512_neon_impl = {
+ .is_supported = sha512_have_neon,
+ .transform = tf_sha512_neon,
+ .name = "neon"
+};
+#endif
+
+#elif defined(__PPC64__)
+TF(zfs_sha512_ppc, tf_sha512_ppc);
+const sha512_ops_t sha512_ppc_impl = {
+ .is_supported = sha2_is_supported,
+ .transform = tf_sha512_ppc,
+ .name = "ppc"
+};
+
+static boolean_t sha512_have_isa207(void)
+{
+ return (kfpu_allowed() && zfs_isa207_available());
+}
+
+TF(zfs_sha512_power8, tf_sha512_power8);
+const sha512_ops_t sha512_power8_impl = {
+ .is_supported = sha512_have_isa207,
+ .transform = tf_sha512_power8,
+ .name = "power8"
+};
+#endif /* __PPC64__ */
+
+/* the two generic ones */
+extern const sha512_ops_t sha512_generic_impl;
+
+/* array with all sha512 implementations */
+static const sha512_ops_t *const sha512_impls[] = {
+ &sha512_generic_impl,
+#if defined(__x86_64)
+ &sha512_x64_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX)
+ &sha512_avx_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX2)
+ &sha512_avx2_impl,
+#endif
+#if defined(__aarch64__) || defined(__arm__)
+ &sha512_armv7_impl,
+#if defined(__aarch64__)
+ &sha512_armv8_impl,
+#endif
+#if defined(__arm__) && __ARM_ARCH > 6
+ &sha512_neon_impl,
+#endif
+#endif
+#if defined(__PPC64__)
+ &sha512_ppc_impl,
+ &sha512_power8_impl,
+#endif /* __PPC64__ */
+};
+
+/* use the generic implementation functions */
+#define IMPL_NAME "sha512"
+#define IMPL_OPS_T sha512_ops_t
+#define IMPL_ARRAY sha512_impls
+#define IMPL_GET_OPS sha512_get_ops
+#define ZFS_IMPL_OPS zfs_sha512_ops
+#include <generic_impl.c>
+
+#ifdef _KERNEL
+
+#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
+
+#if defined(__linux__)
+
+static int
+sha512_param_get(char *buffer, zfs_kernel_param_t *unused)
+{
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ char *fmt;
+ int cnt = 0;
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ cnt += sprintf(buffer + cnt, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ cnt += sprintf(buffer + cnt, fmt, "fastest");
+
+ /* list all supported implementations */
+ generic_impl_init();
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ cnt += sprintf(buffer + cnt, fmt,
+ generic_supp_impls[i]->name);
+ }
+
+ return (cnt);
+}
+
+static int
+sha512_param_set(const char *val, zfs_kernel_param_t *unused)
+{
+ (void) unused;
+ return (generic_impl_setname(val));
+}
+
+#elif defined(__FreeBSD__)
+
+#include <sys/sbuf.h>
+
+static int
+sha512_param(ZFS_MODULE_PARAM_ARGS)
+{
+ int err;
+
+ generic_impl_init();
+ if (req->newptr == NULL) {
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ const int init_buflen = 64;
+ const char *fmt;
+ struct sbuf *s;
+
+ s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ (void) sbuf_printf(s, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ (void) sbuf_printf(s, fmt, "fastest");
+
+ /* list all supported implementations */
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
+ }
+
+ err = sbuf_finish(s);
+ sbuf_delete(s);
+
+ return (err);
+ }
+
+ /* we got module parameter */
+ char buf[16];
+
+ err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+ if (err) {
+ return (err);
+ }
+
+ return (-generic_impl_setname(buf));
+}
+#endif
+
+#undef IMPL_FMT
+
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, sha512_impl,
+ sha512_param_set, sha512_param_get, ZMOD_RW, \
+ "Select SHA512 implementation.");
+#endif
+
+#undef TF
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein.c b/sys/contrib/openzfs/module/icp/algs/skein/skein.c
index 83fe84260307..41ed2dd44e9e 100644
--- a/sys/contrib/openzfs/module/icp/algs/skein/skein.c
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein.c
@@ -26,16 +26,16 @@ Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
switch (hashBitLen) { /* use pre-computed values, where available */
#ifndef SKEIN_NO_PRECOMP
case 256:
- bcopy(SKEIN_256_IV_256, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_256_IV_256, sizeof (ctx->X));
break;
case 224:
- bcopy(SKEIN_256_IV_224, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_256_IV_224, sizeof (ctx->X));
break;
case 160:
- bcopy(SKEIN_256_IV_160, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_256_IV_160, sizeof (ctx->X));
break;
case 128:
- bcopy(SKEIN_256_IV_128, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_256_IV_128, sizeof (ctx->X));
break;
#endif
default:
@@ -53,11 +53,11 @@ Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
cfg.w[1] = Skein_Swap64(hashBitLen);
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
/* zero pad config block */
- bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+ memset(&cfg.w[3], 0, sizeof (cfg) - 3 * sizeof (cfg.w[0]));
/* compute the initial chaining values from config block */
/* zero the chaining variables */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
break;
}
@@ -91,7 +91,7 @@ Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
/* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) { /* is there a key? */
/* no key: use all zeroes as key for config block */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
} else { /* here to pre-process a key */
Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
@@ -101,13 +101,13 @@ Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
/* set tweaks: T0 = 0; T1 = KEY type */
Skein_Start_New_Type(ctx, KEY);
/* zero the initial chaining variables */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
/* hash the key */
(void) Skein_256_Update(ctx, key, keyBytes);
/* put result into cfg.b[] */
(void) Skein_256_Final_Pad(ctx, cfg.b);
/* copy over into ctx->X[] */
- bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+ memcpy(ctx->X, cfg.b, sizeof (cfg.b));
#if SKEIN_NEED_SWAP
{
uint_t i;
@@ -124,7 +124,7 @@ Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx, CFG_FINAL);
- bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ memset(&cfg.w, 0, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
/* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
@@ -161,7 +161,7 @@ Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
if (n) {
/* check on our logic here */
Skein_assert(n < msgByteCnt);
- bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ memcpy(&ctx->b[ctx->h.bCnt], msg, n);
msgByteCnt -= n;
msg += n;
ctx->h.bCnt += n;
@@ -189,7 +189,7 @@ Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
/* copy any remaining source message data bytes into b[] */
if (msgByteCnt) {
Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
- bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
ctx->h.bCnt += msgByteCnt;
}
@@ -209,7 +209,7 @@ Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
/* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
- bzero(&ctx->b[ctx->h.bCnt],
+ memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
@@ -221,13 +221,12 @@ Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
/* run Threefish in "counter mode" to generate output */
/* zero out b[], so it can hold the counter */
- bzero(ctx->b, sizeof (ctx->b));
+ memset(ctx->b, 0, sizeof (ctx->b));
/* keep a local copy of counter mode "key" */
- bcopy(ctx->X, X, sizeof (X));
+ memcpy(X, ctx->X, sizeof (X));
for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
/* build the counter block */
- uint64_t tmp = Skein_Swap64((uint64_t)i);
- bcopy(&tmp, ctx->b, sizeof (tmp));
+ *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i);
Skein_Start_New_Type(ctx, OUT_FINAL);
/* run "counter mode" */
Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
@@ -240,7 +239,7 @@ Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
Skein_Show_Final(256, &ctx->h, n,
hashVal + i * SKEIN_256_BLOCK_BYTES);
/* restore the counter mode key for next time */
- bcopy(X, ctx->X, sizeof (X));
+ memcpy(ctx->X, X, sizeof (X));
}
return (SKEIN_SUCCESS);
}
@@ -262,16 +261,16 @@ Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
switch (hashBitLen) { /* use pre-computed values, where available */
#ifndef SKEIN_NO_PRECOMP
case 512:
- bcopy(SKEIN_512_IV_512, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_512_IV_512, sizeof (ctx->X));
break;
case 384:
- bcopy(SKEIN_512_IV_384, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_512_IV_384, sizeof (ctx->X));
break;
case 256:
- bcopy(SKEIN_512_IV_256, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_512_IV_256, sizeof (ctx->X));
break;
case 224:
- bcopy(SKEIN_512_IV_224, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN_512_IV_224, sizeof (ctx->X));
break;
#endif
default:
@@ -289,11 +288,11 @@ Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
cfg.w[1] = Skein_Swap64(hashBitLen);
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
/* zero pad config block */
- bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+ memset(&cfg.w[3], 0, sizeof (cfg) - 3 * sizeof (cfg.w[0]));
/* compute the initial chaining values from config block */
/* zero the chaining variables */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
break;
}
@@ -328,7 +327,7 @@ Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
/* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) { /* is there a key? */
/* no key: use all zeroes as key for config block */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
} else { /* here to pre-process a key */
Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
@@ -338,12 +337,12 @@ Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
/* set tweaks: T0 = 0; T1 = KEY type */
Skein_Start_New_Type(ctx, KEY);
/* zero the initial chaining variables */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
(void) Skein_512_Update(ctx, key, keyBytes); /* hash the key */
/* put result into cfg.b[] */
(void) Skein_512_Final_Pad(ctx, cfg.b);
/* copy over into ctx->X[] */
- bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+ memcpy(ctx->X, cfg.b, sizeof (cfg.b));
#if SKEIN_NEED_SWAP
{
uint_t i;
@@ -360,7 +359,7 @@ Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx, CFG_FINAL);
- bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ memset(&cfg.w, 0, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
/* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
@@ -397,7 +396,7 @@ Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
if (n) {
/* check on our logic here */
Skein_assert(n < msgByteCnt);
- bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ memcpy(&ctx->b[ctx->h.bCnt], msg, n);
msgByteCnt -= n;
msg += n;
ctx->h.bCnt += n;
@@ -425,7 +424,7 @@ Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
/* copy any remaining source message data bytes into b[] */
if (msgByteCnt) {
Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
- bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
ctx->h.bCnt += msgByteCnt;
}
@@ -445,7 +444,7 @@ Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
/* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
- bzero(&ctx->b[ctx->h.bCnt],
+ memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
@@ -457,13 +456,12 @@ Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
/* run Threefish in "counter mode" to generate output */
/* zero out b[], so it can hold the counter */
- bzero(ctx->b, sizeof (ctx->b));
+ memset(ctx->b, 0, sizeof (ctx->b));
/* keep a local copy of counter mode "key" */
- bcopy(ctx->X, X, sizeof (X));
+ memcpy(X, ctx->X, sizeof (X));
for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
/* build the counter block */
- uint64_t tmp = Skein_Swap64((uint64_t)i);
- bcopy(&tmp, ctx->b, sizeof (tmp));
+ *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i);
Skein_Start_New_Type(ctx, OUT_FINAL);
/* run "counter mode" */
Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
@@ -476,7 +474,7 @@ Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
Skein_Show_Final(512, &ctx->h, n,
hashVal + i * SKEIN_512_BLOCK_BYTES);
/* restore the counter mode key for next time */
- bcopy(X, ctx->X, sizeof (X));
+ memcpy(ctx->X, X, sizeof (X));
}
return (SKEIN_SUCCESS);
}
@@ -498,13 +496,13 @@ Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
switch (hashBitLen) { /* use pre-computed values, where available */
#ifndef SKEIN_NO_PRECOMP
case 512:
- bcopy(SKEIN1024_IV_512, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN1024_IV_512, sizeof (ctx->X));
break;
case 384:
- bcopy(SKEIN1024_IV_384, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN1024_IV_384, sizeof (ctx->X));
break;
case 1024:
- bcopy(SKEIN1024_IV_1024, ctx->X, sizeof (ctx->X));
+ memcpy(ctx->X, SKEIN1024_IV_1024, sizeof (ctx->X));
break;
#endif
default:
@@ -522,11 +520,11 @@ Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
cfg.w[1] = Skein_Swap64(hashBitLen);
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
/* zero pad config block */
- bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+ memset(&cfg.w[3], 0, sizeof (cfg) - 3 * sizeof (cfg.w[0]));
/* compute the initial chaining values from config block */
/* zero the chaining variables */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
break;
}
@@ -561,7 +559,7 @@ Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
/* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) { /* is there a key? */
/* no key: use all zeroes as key for config block */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
} else { /* here to pre-process a key */
Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
/* do a mini-Init right here */
@@ -570,12 +568,12 @@ Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
/* set tweaks: T0 = 0; T1 = KEY type */
Skein_Start_New_Type(ctx, KEY);
/* zero the initial chaining variables */
- bzero(ctx->X, sizeof (ctx->X));
+ memset(ctx->X, 0, sizeof (ctx->X));
(void) Skein1024_Update(ctx, key, keyBytes); /* hash the key */
/* put result into cfg.b[] */
(void) Skein1024_Final_Pad(ctx, cfg.b);
/* copy over into ctx->X[] */
- bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+ memcpy(ctx->X, cfg.b, sizeof (cfg.b));
#if SKEIN_NEED_SWAP
{
uint_t i;
@@ -592,7 +590,7 @@ Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx, CFG_FINAL);
- bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ memset(&cfg.w, 0, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
/* hash result length in bits */
cfg.w[1] = Skein_Swap64(hashBitLen);
@@ -630,7 +628,7 @@ Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
if (n) {
/* check on our logic here */
Skein_assert(n < msgByteCnt);
- bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ memcpy(&ctx->b[ctx->h.bCnt], msg, n);
msgByteCnt -= n;
msg += n;
ctx->h.bCnt += n;
@@ -658,7 +656,7 @@ Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
/* copy any remaining source message data bytes into b[] */
if (msgByteCnt) {
Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
- bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
ctx->h.bCnt += msgByteCnt;
}
@@ -678,7 +676,7 @@ Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
/* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
- bzero(&ctx->b[ctx->h.bCnt],
+ memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
@@ -690,13 +688,12 @@ Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
/* run Threefish in "counter mode" to generate output */
/* zero out b[], so it can hold the counter */
- bzero(ctx->b, sizeof (ctx->b));
+ memset(ctx->b, 0, sizeof (ctx->b));
/* keep a local copy of counter mode "key" */
- bcopy(ctx->X, X, sizeof (X));
+ memcpy(X, ctx->X, sizeof (X));
for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
/* build the counter block */
- uint64_t tmp = Skein_Swap64((uint64_t)i);
- bcopy(&tmp, ctx->b, sizeof (tmp));
+ *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i);
Skein_Start_New_Type(ctx, OUT_FINAL);
/* run "counter mode" */
Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
@@ -709,7 +706,7 @@ Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
Skein_Show_Final(1024, &ctx->h, n,
hashVal + i * SKEIN1024_BLOCK_BYTES);
/* restore the counter mode key for next time */
- bcopy(X, ctx->X, sizeof (X));
+ memcpy(ctx->X, X, sizeof (X));
}
return (SKEIN_SUCCESS);
}
@@ -727,7 +724,7 @@ Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
/* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
- bzero(&ctx->b[ctx->h.bCnt],
+ memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
@@ -748,7 +745,7 @@ Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
/* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
- bzero(&ctx->b[ctx->h.bCnt],
+ memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
@@ -770,7 +767,7 @@ Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
/* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
- bzero(&ctx->b[ctx->h.bCnt],
+ memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
@@ -798,13 +795,12 @@ Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
/* run Threefish in "counter mode" to generate output */
/* zero out b[], so it can hold the counter */
- bzero(ctx->b, sizeof (ctx->b));
+ memset(ctx->b, 0, sizeof (ctx->b));
/* keep a local copy of counter mode "key" */
- bcopy(ctx->X, X, sizeof (X));
+ memcpy(X, ctx->X, sizeof (X));
for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
/* build the counter block */
- uint64_t tmp = Skein_Swap64((uint64_t)i);
- bcopy(&tmp, ctx->b, sizeof (tmp));
+ *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i);
Skein_Start_New_Type(ctx, OUT_FINAL);
/* run "counter mode" */
Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
@@ -817,7 +813,7 @@ Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
Skein_Show_Final(256, &ctx->h, n,
hashVal + i * SKEIN_256_BLOCK_BYTES);
/* restore the counter mode key for next time */
- bcopy(X, ctx->X, sizeof (X));
+ memcpy(ctx->X, X, sizeof (X));
}
return (SKEIN_SUCCESS);
}
@@ -838,13 +834,12 @@ Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
/* run Threefish in "counter mode" to generate output */
/* zero out b[], so it can hold the counter */
- bzero(ctx->b, sizeof (ctx->b));
+ memset(ctx->b, 0, sizeof (ctx->b));
/* keep a local copy of counter mode "key" */
- bcopy(ctx->X, X, sizeof (X));
+ memcpy(X, ctx->X, sizeof (X));
for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
/* build the counter block */
- uint64_t tmp = Skein_Swap64((uint64_t)i);
- bcopy(&tmp, ctx->b, sizeof (tmp));
+ *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i);
Skein_Start_New_Type(ctx, OUT_FINAL);
/* run "counter mode" */
Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
@@ -857,7 +852,7 @@ Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
Skein_Show_Final(256, &ctx->h, n,
hashVal + i * SKEIN_512_BLOCK_BYTES);
/* restore the counter mode key for next time */
- bcopy(X, ctx->X, sizeof (X));
+ memcpy(ctx->X, X, sizeof (X));
}
return (SKEIN_SUCCESS);
}
@@ -878,13 +873,12 @@ Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
/* run Threefish in "counter mode" to generate output */
/* zero out b[], so it can hold the counter */
- bzero(ctx->b, sizeof (ctx->b));
+ memset(ctx->b, 0, sizeof (ctx->b));
/* keep a local copy of counter mode "key" */
- bcopy(ctx->X, X, sizeof (X));
+ memcpy(X, ctx->X, sizeof (X));
for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
/* build the counter block */
- uint64_t tmp = Skein_Swap64((uint64_t)i);
- bcopy(&tmp, ctx->b, sizeof (tmp));
+ *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i);
Skein_Start_New_Type(ctx, OUT_FINAL);
/* run "counter mode" */
Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
@@ -897,7 +891,7 @@ Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
Skein_Show_Final(256, &ctx->h, n,
hashVal + i * SKEIN1024_BLOCK_BYTES);
/* restore the counter mode key for next time */
- bcopy(X, ctx->X, sizeof (X));
+ memcpy(ctx->X, X, sizeof (X));
}
return (SKEIN_SUCCESS);
}
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c b/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c
index 7ba165a48511..3ad52da5f6a3 100644
--- a/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c
@@ -30,7 +30,9 @@
* the #pragma here to ignore the warning.
*/
#if defined(_ILP32) || defined(__powerpc) /* Assume small stack */
+#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
/*
* We're running on 32-bit, don't unroll loops to save stack frame space
*
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h b/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h
index 2f6307fa7b55..eff19ce83f81 100644
--- a/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h
@@ -25,7 +25,7 @@
#define _SKEIN_IMPL_H_
#include <sys/skein.h>
-#include <sys/strings.h>
+#include <sys/string.h>
#include "skein_impl.h"
#include "skein_port.h"
@@ -263,8 +263,6 @@ extern const uint64_t SKEIN_256_IV_128[];
extern const uint64_t SKEIN_256_IV_160[];
extern const uint64_t SKEIN_256_IV_224[];
extern const uint64_t SKEIN_256_IV_256[];
-extern const uint64_t SKEIN_512_IV_128[];
-extern const uint64_t SKEIN_512_IV_160[];
extern const uint64_t SKEIN_512_IV_224[];
extern const uint64_t SKEIN_512_IV_256[];
extern const uint64_t SKEIN_512_IV_384[];
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c b/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c
index 140d38f76547..84cefe4546ca 100644
--- a/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c
@@ -52,30 +52,6 @@ const uint64_t SKEIN_256_IV_256[] = {
MK_64(0x6A54E920, 0xFDE8DA69)
};
-/* blkSize = 512 bits. hashSize = 128 bits */
-const uint64_t SKEIN_512_IV_128[] = {
- MK_64(0xA8BC7BF3, 0x6FBF9F52),
- MK_64(0x1E9872CE, 0xBD1AF0AA),
- MK_64(0x309B1790, 0xB32190D3),
- MK_64(0xBCFBB854, 0x3F94805C),
- MK_64(0x0DA61BCD, 0x6E31B11B),
- MK_64(0x1A18EBEA, 0xD46A32E3),
- MK_64(0xA2CC5B18, 0xCE84AA82),
- MK_64(0x6982AB28, 0x9D46982D)
-};
-
-/* blkSize = 512 bits. hashSize = 160 bits */
-const uint64_t SKEIN_512_IV_160[] = {
- MK_64(0x28B81A2A, 0xE013BD91),
- MK_64(0xC2F11668, 0xB5BDF78F),
- MK_64(0x1760D8F3, 0xF6A56F12),
- MK_64(0x4FB74758, 0x8239904F),
- MK_64(0x21EDE07F, 0x7EAF5056),
- MK_64(0xD908922E, 0x63ED70B8),
- MK_64(0xB8EC76FF, 0xECCB52FA),
- MK_64(0x01A47BB8, 0xA3F27A6E)
-};
-
/* blkSize = 512 bits. hashSize = 224 bits */
const uint64_t SKEIN_512_IV_224[] = {
MK_64(0xCCD06162, 0x48677224),
diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h b/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h
index ce4353082552..96d1266d019e 100644
--- a/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h
+++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h
@@ -50,9 +50,9 @@
#else
/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
#define SKEIN_NEED_SWAP (0)
-#define Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt)
+#define Skein_Put64_LSB_First(dst08, src64, bCnt) memcpy(dst08, src64, bCnt)
#define Skein_Get64_LSB_First(dst64, src08, wCnt) \
- bcopy(src08, dst64, 8 * (wCnt))
+ memcpy(dst64, src08, 8 * (wCnt))
#endif
#endif /* ifndef SKEIN_NEED_SWAP */
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_cipher.c b/sys/contrib/openzfs/module/icp/api/kcf_cipher.c
index d6aa48147edb..4bea46807197 100644
--- a/sys/contrib/openzfs/module/icp/api/kcf_cipher.c
+++ b/sys/contrib/openzfs/module/icp/api/kcf_cipher.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -34,253 +34,11 @@
* Encryption and decryption routines.
*/
-/*
- * The following are the possible returned values common to all the routines
- * below. The applicability of some of these return values depends on the
- * presence of the arguments.
- *
- * CRYPTO_SUCCESS: The operation completed successfully.
- * CRYPTO_QUEUED: A request was submitted successfully. The callback
- * routine will be called when the operation is done.
- * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or
- * CRYPTO_INVALID_MECH for problems with the 'mech'.
- * CRYPTO_INVALID_DATA for bogus 'data'
- * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
- * CRYPTO_INVALID_CONTEXT: Not a valid context.
- * CRYPTO_BUSY: Cannot process the request now. Schedule a
- * crypto_bufcall(), or try later.
- * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is
- * capable of a function or a mechanism.
- * CRYPTO_INVALID_KEY: bogus 'key' argument.
- * CRYPTO_INVALID_PLAINTEXT: bogus 'plaintext' argument.
- * CRYPTO_INVALID_CIPHERTEXT: bogus 'ciphertext' argument.
- */
/*
- * crypto_cipher_init_prov()
+ * crypto_encrypt()
*
* Arguments:
- *
- * pd: provider descriptor
- * sid: session id
- * mech: crypto_mechanism_t pointer.
- * mech_type is a valid value previously returned by
- * crypto_mech2id();
- * When the mech's parameter is not NULL, its definition depends
- * on the standard definition of the mechanism.
- * key: pointer to a crypto_key_t structure.
- * tmpl: a crypto_ctx_template_t, opaque template of a context of an
- * encryption or decryption with the 'mech' using 'key'.
- * 'tmpl' is created by a previous call to
- * crypto_create_ctx_template().
- * ctxp: Pointer to a crypto_context_t.
- * func: CRYPTO_FG_ENCRYPT or CRYPTO_FG_DECRYPT.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * This is a common function invoked internally by both
- * crypto_encrypt_init() and crypto_decrypt_init().
- * Asynchronously submits a request for, or synchronously performs the
- * initialization of an encryption or a decryption operation.
- * When possible and applicable, will internally use the pre-expanded key
- * schedule from the context template, tmpl.
- * When complete and successful, 'ctxp' will contain a crypto_context_t
- * valid for later calls to encrypt_update() and encrypt_final(), or
- * decrypt_update() and decrypt_final().
- * The caller should hold a reference on the specified provider
- * descriptor before calling this function.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-static int
-crypto_cipher_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_spi_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq, crypto_func_group_t func)
-{
- int error;
- crypto_ctx_t *ctx;
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- if (func == CRYPTO_FG_ENCRYPT) {
- error = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_ENCRYPT);
- } else {
- error = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_DECRYPT);
- }
-
- if (error != CRYPTO_SUCCESS)
- return (error);
- }
-
- /* Allocate and initialize the canonical context */
- if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
- return (CRYPTO_HOST_MEMORY);
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
-
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
-
- if (func == CRYPTO_FG_ENCRYPT)
- error = KCF_PROV_ENCRYPT_INIT(real_provider, ctx,
- &lmech, key, tmpl, KCF_SWFP_RHNDL(crq));
- else {
- ASSERT(func == CRYPTO_FG_DECRYPT);
-
- error = KCF_PROV_DECRYPT_INIT(real_provider, ctx,
- &lmech, key, tmpl, KCF_SWFP_RHNDL(crq));
- }
- KCF_PROV_INCRSTATS(pd, error);
-
- goto done;
- }
-
- /* Check if context sharing is possible */
- if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
- key->ck_format == CRYPTO_KEY_RAW &&
- KCF_CAN_SHARE_OPSTATE(pd, mech->cm_type)) {
- kcf_context_t *tctxp = (kcf_context_t *)ctx;
- kcf_provider_desc_t *tpd = NULL;
- crypto_mech_info_t *sinfo;
-
- if ((kcf_get_sw_prov(mech->cm_type, &tpd, &tctxp->kc_mech,
- B_FALSE) == CRYPTO_SUCCESS)) {
- int tlen;
-
- sinfo = &(KCF_TO_PROV_MECHINFO(tpd, mech->cm_type));
- /*
- * key->ck_length from the consumer is always in bits.
- * We convert it to be in the same unit registered by
- * the provider in order to do a comparison.
- */
- if (sinfo->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES)
- tlen = key->ck_length >> 3;
- else
- tlen = key->ck_length;
- /*
- * Check if the software provider can support context
- * sharing and support this key length.
- */
- if ((sinfo->cm_mech_flags & CRYPTO_CAN_SHARE_OPSTATE) &&
- (tlen >= sinfo->cm_min_key_length) &&
- (tlen <= sinfo->cm_max_key_length)) {
- ctx->cc_flags = CRYPTO_INIT_OPSTATE;
- tctxp->kc_sw_prov_desc = tpd;
- } else
- KCF_PROV_REFRELE(tpd);
- }
- }
-
- if (func == CRYPTO_FG_ENCRYPT) {
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_INIT, sid,
- mech, key, NULL, NULL, tmpl);
- } else {
- ASSERT(func == CRYPTO_FG_DECRYPT);
- KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_INIT, sid,
- mech, key, NULL, NULL, tmpl);
- }
-
- error = kcf_submit_request(real_provider, ctx, crq, &params,
- B_FALSE);
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
-done:
- if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED))
- *ctxp = (crypto_context_t)ctx;
- else {
- /* Release the hold done in kcf_new_ctx(). */
- KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
- }
-
- return (error);
-}
-
-/*
- * Same as crypto_cipher_init_prov(), but relies on the scheduler to pick
- * an appropriate provider. See crypto_cipher_init_prov() comments for more
- * details.
- */
-static int
-crypto_cipher_init(crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq, crypto_func_group_t func)
-{
- int error;
- kcf_mech_entry_t *me;
- kcf_provider_desc_t *pd;
- kcf_ctx_template_t *ctx_tmpl;
- crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
- kcf_prov_tried_t *list = NULL;
-
-retry:
- /* pd is returned held */
- if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
- list, func, CHECK_RESTRICT(crq), 0)) == NULL) {
- if (list != NULL)
- kcf_free_triedlist(list);
- return (error);
- }
-
- /*
- * For SW providers, check the validity of the context template
- * It is very rare that the generation number mis-matches, so
- * is acceptable to fail here, and let the consumer recover by
- * freeing this tmpl and create a new one for the key and new SW
- * provider
- */
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
- if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (CRYPTO_OLD_CTX_TEMPLATE);
- } else {
- spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- }
- }
-
- error = crypto_cipher_init_prov(pd, pd->pd_sid, mech, key,
- spi_ctx_tmpl, ctxp, crq, func);
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
- /* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
- goto retry;
- }
-
- if (list != NULL)
- kcf_free_triedlist(list);
-
- KCF_PROV_REFRELE(pd);
- return (error);
-}
-
-/*
- * crypto_encrypt_prov()
- *
- * Arguments:
- * pd: provider descriptor
* sid: session id
* mech: crypto_mechanism_t pointer.
* mech_type is a valid value previously returned by
@@ -294,7 +52,6 @@ retry:
* tmpl: a crypto_ctx_template_t, opaque template of a context of an
* encryption with the 'mech' using 'key'. 'tmpl' is created by
* a previous call to crypto_create_ctx_template().
- * cr: crypto_call_req_t calling conditions and call back info.
*
* Description:
* Asynchronously submits a request for, or synchronously performs a
@@ -302,57 +59,17 @@ retry:
* the key 'key'.
* When complete and successful, 'ciphertext' will contain the encrypted
* message.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
+ * Relies on the KCF scheduler to pick a provider.
*
* Returns:
* See comment in the beginning of the file.
*/
int
-crypto_encrypt_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_data_t *plaintext, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
- crypto_call_req_t *crq)
-{
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
- int error;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- error = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_ENCRYPT_ATOMIC);
-
- if (error != CRYPTO_SUCCESS)
- return (error);
- }
-
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, key,
- plaintext, ciphertext, tmpl);
-
- error = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- return (error);
-}
-
-/*
- * Same as crypto_encrypt_prov(), but relies on the scheduler to pick
- * a provider. See crypto_encrypt_prov() for more details.
- */
-int
crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
- crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
- crypto_call_req_t *crq)
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext)
{
int error;
kcf_mech_entry_t *me;
- kcf_req_params_t params;
kcf_provider_desc_t *pd;
kcf_ctx_template_t *ctx_tmpl;
crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
@@ -361,52 +78,23 @@ crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
retry:
/* pd is returned held */
if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
- list, CRYPTO_FG_ENCRYPT_ATOMIC, CHECK_RESTRICT(crq),
- plaintext->cd_length)) == NULL) {
+ list, CRYPTO_FG_ENCRYPT_ATOMIC)) == NULL) {
if (list != NULL)
kcf_free_triedlist(list);
return (error);
}
- /*
- * For SW providers, check the validity of the context template
- * It is very rare that the generation number mis-matches, so
- * is acceptable to fail here, and let the consumer recover by
- * freeing this tmpl and create a new one for the key and new SW
- * provider
- */
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
- if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (CRYPTO_OLD_CTX_TEMPLATE);
- } else {
- spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- }
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
+ if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL))
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+ crypto_mechanism_t lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+ error = KCF_PROV_ENCRYPT_ATOMIC(pd, &lmech, key,
+ plaintext, ciphertext, spi_ctx_tmpl);
- error = KCF_PROV_ENCRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key,
- plaintext, ciphertext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, pd->pd_sid,
- mech, key, plaintext, ciphertext, spi_ctx_tmpl);
- error = kcf_submit_request(pd, NULL, crq, &params, B_FALSE);
- }
-
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
+ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
/* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL)
goto retry;
}
@@ -418,147 +106,6 @@ retry:
}
/*
- * crypto_encrypt_init_prov()
- *
- * Calls crypto_cipher_init_prov() to initialize an encryption operation.
- */
-int
-crypto_encrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq)
-{
- return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq,
- CRYPTO_FG_ENCRYPT));
-}
-
-/*
- * crypto_encrypt_init()
- *
- * Calls crypto_cipher_init() to initialize an encryption operation
- */
-int
-crypto_encrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq)
-{
- return (crypto_cipher_init(mech, key, tmpl, ctxp, crq,
- CRYPTO_FG_ENCRYPT));
-}
-
-/*
- * crypto_encrypt_update()
- *
- * Arguments:
- * context: A crypto_context_t initialized by encrypt_init().
- * plaintext: The message part to be encrypted
- * ciphertext: Storage for the encrypted message part.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs a
- * part of an encryption operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_encrypt_update(crypto_context_t context, crypto_data_t *plaintext,
- crypto_data_t *ciphertext, crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext,
- ciphertext, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- return (error);
- }
-
- /* Check if we should use a software provider for small jobs */
- if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) {
- if (plaintext->cd_length < kcf_ctx->kc_mech->me_threshold &&
- kcf_ctx->kc_sw_prov_desc != NULL &&
- KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) {
- pd = kcf_ctx->kc_sw_prov_desc;
- }
- }
-
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_UPDATE,
- ctx->cc_session, NULL, NULL, plaintext, ciphertext, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
-
- return (error);
-}
-
-/*
- * crypto_encrypt_final()
- *
- * Arguments:
- * context: A crypto_context_t initialized by encrypt_init().
- * ciphertext: Storage for the last part of encrypted message
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs the
- * final part of an encryption operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext,
- crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_FINAL,
- ctx->cc_session, NULL, NULL, NULL, ciphertext, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
-/*
* crypto_decrypt_prov()
*
* Arguments:
@@ -576,7 +123,6 @@ crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext,
* tmpl: a crypto_ctx_template_t, opaque template of a context of an
* encryption with the 'mech' using 'key'. 'tmpl' is created by
* a previous call to crypto_create_ctx_template().
- * cr: crypto_call_req_t calling conditions and call back info.
*
* Description:
* Asynchronously submits a request for, or synchronously performs a
@@ -584,58 +130,17 @@ crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext,
* the key 'key'.
* When complete and successful, 'plaintext' will contain the decrypted
* message.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
+ * Relies on the KCF scheduler to choose a provider.
*
* Returns:
* See comment in the beginning of the file.
*/
int
-crypto_decrypt_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_data_t *ciphertext, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
- crypto_call_req_t *crq)
-{
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
- int rv;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- rv = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_DECRYPT_ATOMIC);
-
- if (rv != CRYPTO_SUCCESS)
- return (rv);
- }
-
- KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, key,
- ciphertext, plaintext, tmpl);
-
- rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- return (rv);
-}
-
-/*
- * Same as crypto_decrypt_prov(), but relies on the KCF scheduler to
- * choose a provider. See crypto_decrypt_prov() comments for more
- * information.
- */
-int
crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext,
- crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
- crypto_call_req_t *crq)
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext)
{
int error;
kcf_mech_entry_t *me;
- kcf_req_params_t params;
kcf_provider_desc_t *pd;
kcf_ctx_template_t *ctx_tmpl;
crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
@@ -644,52 +149,24 @@ crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext,
retry:
/* pd is returned held */
if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
- list, CRYPTO_FG_DECRYPT_ATOMIC, CHECK_RESTRICT(crq),
- ciphertext->cd_length)) == NULL) {
+ list, CRYPTO_FG_DECRYPT_ATOMIC)) == NULL) {
if (list != NULL)
kcf_free_triedlist(list);
return (error);
}
- /*
- * For SW providers, check the validity of the context template
- * It is very rare that the generation number mis-matches, so
- * is acceptable to fail here, and let the consumer recover by
- * freeing this tmpl and create a new one for the key and new SW
- * provider
- */
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
- if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (CRYPTO_OLD_CTX_TEMPLATE);
- } else {
- spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- }
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
+ if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL))
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+ crypto_mechanism_t lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
- error = KCF_PROV_DECRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key,
- ciphertext, plaintext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_ATOMIC, pd->pd_sid,
- mech, key, ciphertext, plaintext, spi_ctx_tmpl);
- error = kcf_submit_request(pd, NULL, crq, &params, B_FALSE);
- }
+ error = KCF_PROV_DECRYPT_ATOMIC(pd, &lmech, key,
+ ciphertext, plaintext, spi_ctx_tmpl);
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
+ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
/* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL)
goto retry;
}
@@ -700,231 +177,7 @@ retry:
return (error);
}
-/*
- * crypto_decrypt_init_prov()
- *
- * Calls crypto_cipher_init_prov() to initialize a decryption operation
- */
-int
-crypto_decrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq)
-{
- return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq,
- CRYPTO_FG_DECRYPT));
-}
-
-/*
- * crypto_decrypt_init()
- *
- * Calls crypto_cipher_init() to initialize a decryption operation
- */
-int
-crypto_decrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq)
-{
- return (crypto_cipher_init(mech, key, tmpl, ctxp, crq,
- CRYPTO_FG_DECRYPT));
-}
-
-/*
- * crypto_decrypt_update()
- *
- * Arguments:
- * context: A crypto_context_t initialized by decrypt_init().
- * ciphertext: The message part to be decrypted
- * plaintext: Storage for the decrypted message part.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs a
- * part of an decryption operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_decrypt_update(crypto_context_t context, crypto_data_t *ciphertext,
- crypto_data_t *plaintext, crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext,
- plaintext, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- return (error);
- }
-
- /* Check if we should use a software provider for small jobs */
- if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) {
- if (ciphertext->cd_length < kcf_ctx->kc_mech->me_threshold &&
- kcf_ctx->kc_sw_prov_desc != NULL &&
- KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) {
- pd = kcf_ctx->kc_sw_prov_desc;
- }
- }
-
- KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_UPDATE,
- ctx->cc_session, NULL, NULL, ciphertext, plaintext, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
-
- return (error);
-}
-
-/*
- * crypto_decrypt_final()
- *
- * Arguments:
- * context: A crypto_context_t initialized by decrypt_init().
- * plaintext: Storage for the last part of the decrypted message
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs the
- * final part of a decryption operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_decrypt_final(crypto_context_t context, crypto_data_t *plaintext,
- crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext,
- NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_FINAL,
- ctx->cc_session, NULL, NULL, NULL, plaintext, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
-/*
- * See comments for crypto_encrypt_update().
- */
-int
-crypto_encrypt_single(crypto_context_t context, crypto_data_t *plaintext,
- crypto_data_t *ciphertext, crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_ENCRYPT(pd, ctx, plaintext,
- ciphertext, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
- NULL, NULL, plaintext, ciphertext, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
-/*
- * See comments for crypto_decrypt_update().
- */
-int
-crypto_decrypt_single(crypto_context_t context, crypto_data_t *ciphertext,
- crypto_data_t *plaintext, crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DECRYPT(pd, ctx, ciphertext,
- plaintext, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DECRYPT_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
- NULL, NULL, ciphertext, plaintext, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
#if defined(_KERNEL)
-EXPORT_SYMBOL(crypto_encrypt_prov);
EXPORT_SYMBOL(crypto_encrypt);
-EXPORT_SYMBOL(crypto_encrypt_init_prov);
-EXPORT_SYMBOL(crypto_encrypt_init);
-EXPORT_SYMBOL(crypto_encrypt_update);
-EXPORT_SYMBOL(crypto_encrypt_final);
-EXPORT_SYMBOL(crypto_decrypt_prov);
EXPORT_SYMBOL(crypto_decrypt);
-EXPORT_SYMBOL(crypto_decrypt_init_prov);
-EXPORT_SYMBOL(crypto_decrypt_init);
-EXPORT_SYMBOL(crypto_decrypt_update);
-EXPORT_SYMBOL(crypto_decrypt_final);
-EXPORT_SYMBOL(crypto_encrypt_single);
-EXPORT_SYMBOL(crypto_decrypt_single);
#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c
index 21b0977d3634..b8cd67ea7f67 100644
--- a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c
+++ b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -48,7 +48,6 @@
* ptmpl: a storage for the opaque crypto_ctx_template_t, allocated and
* initialized by the software provider this routine is
* dispatched to.
- * kmflag: KM_SLEEP/KM_NOSLEEP mem. alloc. flag.
*
* Description:
* Redirects the call to the software provider of the specified
@@ -69,7 +68,7 @@
*/
int
crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t *ptmpl, int kmflag)
+ crypto_ctx_template_t *ptmpl)
{
int error;
kcf_mech_entry_t *me;
@@ -89,8 +88,8 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key,
if (error != CRYPTO_SUCCESS)
return (error);
- if ((ctx_tmpl = (kcf_ctx_template_t *)kmem_alloc(
- sizeof (kcf_ctx_template_t), kmflag)) == NULL) {
+ if ((ctx_tmpl = kmem_alloc(
+ sizeof (kcf_ctx_template_t), KM_SLEEP)) == NULL) {
KCF_PROV_REFRELE(pd);
return (CRYPTO_HOST_MEMORY);
}
@@ -101,10 +100,9 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key,
prov_mech.cm_param_len = mech->cm_param_len;
error = KCF_PROV_CREATE_CTX_TEMPLATE(pd, &prov_mech, key,
- &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size), KCF_RHNDL(kmflag));
+ &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size));
if (error == CRYPTO_SUCCESS) {
- ctx_tmpl->ct_generation = me->me_gen_swprov;
*ptmpl = ctx_tmpl;
} else {
kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
@@ -140,7 +138,7 @@ crypto_destroy_ctx_template(crypto_ctx_template_t tmpl)
ASSERT(ctx_tmpl->ct_prov_tmpl != NULL);
- bzero(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
+ memset(ctx_tmpl->ct_prov_tmpl, 0, ctx_tmpl->ct_size);
kmem_free(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
}
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_digest.c b/sys/contrib/openzfs/module/icp/api/kcf_digest.c
deleted file mode 100644
index aa68d69bc162..000000000000
--- a/sys/contrib/openzfs/module/icp/api/kcf_digest.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/crypto/common.h>
-#include <sys/crypto/impl.h>
-#include <sys/crypto/api.h>
-#include <sys/crypto/spi.h>
-#include <sys/crypto/sched_impl.h>
-
-/*
- * Message digest routines
- */
-
-/*
- * The following are the possible returned values common to all the routines
- * below. The applicability of some of these return values depends on the
- * presence of the arguments.
- *
- * CRYPTO_SUCCESS: The operation completed successfully.
- * CRYPTO_QUEUED: A request was submitted successfully. The callback
- * routine will be called when the operation is done.
- * CRYPTO_MECHANISM_INVALID or CRYPTO_INVALID_MECH_PARAM
- * for problems with the 'mech'.
- * CRYPTO_INVALID_DATA for bogus 'data'
- * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
- * CRYPTO_INVALID_CONTEXT: Not a valid context.
- * CRYPTO_BUSY: Cannot process the request now. Schedule a
- * crypto_bufcall(), or try later.
- * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED:
- * No provider is capable of a function or a mechanism.
- */
-
-
-/*
- * crypto_digest_prov()
- *
- * Arguments:
- * pd: pointer to the descriptor of the provider to use for this
- * operation.
- * sid: provider session id.
- * mech: crypto_mechanism_t pointer.
- * mech_type is a valid value previously returned by
- * crypto_mech2id();
- * When the mech's parameter is not NULL, its definition depends
- * on the standard definition of the mechanism.
- * data: The message to be digested.
- * digest: Storage for the digest. The length needed depends on the
- * mechanism.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs the
- * digesting operation of 'data' on the specified
- * provider with the specified session.
- * When complete and successful, 'digest' will contain the digest value.
- * The caller should hold a reference on the specified provider
- * descriptor before calling this function.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_digest_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_data_t *data, crypto_data_t *digest,
- crypto_call_req_t *crq)
-{
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
- int rv;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- rv = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq),
- pd, &real_provider, CRYPTO_FG_DIGEST_ATOMIC);
-
- if (rv != CRYPTO_SUCCESS)
- return (rv);
- }
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, NULL,
- data, digest);
-
- /* no crypto context to carry between multiple parts. */
- rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- return (rv);
-}
-
-
-/*
- * Same as crypto_digest_prov(), but relies on the KCF scheduler to
- * choose a provider. See crypto_digest_prov() comments for more information.
- */
-int
-crypto_digest(crypto_mechanism_t *mech, crypto_data_t *data,
- crypto_data_t *digest, crypto_call_req_t *crq)
-{
- int error;
- kcf_provider_desc_t *pd;
- kcf_req_params_t params;
- kcf_prov_tried_t *list = NULL;
-
-retry:
- /* The pd is returned held */
- if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error, list,
- CRYPTO_FG_DIGEST_ATOMIC, CHECK_RESTRICT(crq),
- data->cd_length)) == NULL) {
- if (list != NULL)
- kcf_free_triedlist(list);
- return (error);
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
-
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
- error = KCF_PROV_DIGEST_ATOMIC(pd, pd->pd_sid, &lmech, data,
- digest, KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
- (data->cd_length > pd->pd_hash_limit)) {
- error = CRYPTO_BUFFER_TOO_BIG;
- } else {
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_ATOMIC,
- pd->pd_sid, mech, NULL, data, digest);
-
- /* no crypto context to carry between multiple parts. */
- error = kcf_submit_request(pd, NULL, crq, &params,
- B_FALSE);
- }
- }
-
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
- /* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
- goto retry;
- }
-
- if (list != NULL)
- kcf_free_triedlist(list);
-
- KCF_PROV_REFRELE(pd);
- return (error);
-}
-
-/*
- * crypto_digest_init_prov()
- *
- * pd: pointer to the descriptor of the provider to use for this
- * operation.
- * sid: provider session id.
- * mech: crypto_mechanism_t pointer.
- * mech_type is a valid value previously returned by
- * crypto_mech2id();
- * When the mech's parameter is not NULL, its definition depends
- * on the standard definition of the mechanism.
- * ctxp: Pointer to a crypto_context_t.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs the
- * initialization of a message digest operation on the specified
- * provider with the specified session.
- * When complete and successful, 'ctxp' will contain a crypto_context_t
- * valid for later calls to digest_update() and digest_final().
- * The caller should hold a reference on the specified provider
- * descriptor before calling this function.
- */
-int
-crypto_digest_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_context_t *ctxp, crypto_call_req_t *crq)
-{
- int error;
- crypto_ctx_t *ctx;
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- error = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_DIGEST);
-
- if (error != CRYPTO_SUCCESS)
- return (error);
- }
-
- /* Allocate and initialize the canonical context */
- if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
- return (CRYPTO_HOST_MEMORY);
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
-
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
- error = KCF_PROV_DIGEST_INIT(real_provider, ctx, &lmech,
- KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_INIT, sid,
- mech, NULL, NULL, NULL);
- error = kcf_submit_request(real_provider, ctx, crq, &params,
- B_FALSE);
- }
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED))
- *ctxp = (crypto_context_t)ctx;
- else {
- /* Release the hold done in kcf_new_ctx(). */
- KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
- }
-
- return (error);
-}
-
-/*
- * Same as crypto_digest_init_prov(), but relies on the KCF scheduler
- * to choose a provider. See crypto_digest_init_prov() comments for
- * more information.
- */
-int
-crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp,
- crypto_call_req_t *crq)
-{
- int error;
- kcf_provider_desc_t *pd;
- kcf_prov_tried_t *list = NULL;
-
-retry:
- /* The pd is returned held */
- if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error,
- list, CRYPTO_FG_DIGEST, CHECK_RESTRICT(crq), 0)) == NULL) {
- if (list != NULL)
- kcf_free_triedlist(list);
- return (error);
- }
-
- if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) {
- /*
- * The hardware provider has limited digest support.
- * So, we fallback early here to using a software provider.
- *
- * XXX - need to enhance to do the fallback later in
- * crypto_digest_update() if the size of accumulated input data
- * exceeds the maximum size digestable by hardware provider.
- */
- error = CRYPTO_BUFFER_TOO_BIG;
- } else {
- error = crypto_digest_init_prov(pd, pd->pd_sid,
- mech, ctxp, crq);
- }
-
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
- /* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
- goto retry;
- }
-
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (error);
-}
-
-/*
- * crypto_digest_update()
- *
- * Arguments:
- * context: A crypto_context_t initialized by digest_init().
- * data: The part of message to be digested.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs a
- * part of a message digest operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_digest_update(crypto_context_t context, crypto_data_t *data,
- crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DIGEST_UPDATE(pd, ctx, data, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_UPDATE,
- ctx->cc_session, NULL, NULL, data, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- return (error);
-}
-
-/*
- * crypto_digest_final()
- *
- * Arguments:
- * context: A crypto_context_t initialized by digest_init().
- * digest: The storage for the digest.
- * cr: crypto_call_req_t calling conditions and call back info.
- *
- * Description:
- * Asynchronously submits a request for, or synchronously performs the
- * final part of a message digest operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
- * Returns:
- * See comment in the beginning of the file.
- */
-int
-crypto_digest_final(crypto_context_t context, crypto_data_t *digest,
- crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DIGEST_FINAL(pd, ctx, digest, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_FINAL,
- ctx->cc_session, NULL, NULL, NULL, digest);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
-/*
- * Performs a digest update on the specified key. Note that there is
- * no k-API crypto_digest_key() equivalent of this function.
- */
-int
-crypto_digest_key_prov(crypto_context_t context, crypto_key_t *key,
- crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DIGEST_KEY(pd, ctx, key, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_DIGEST_KEY,
- ctx->cc_session, NULL, key, NULL, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- return (error);
-}
-
-/*
- * See comments for crypto_digest_update() and crypto_digest_final().
- */
-int
-crypto_digest_single(crypto_context_t context, crypto_data_t *data,
- crypto_data_t *digest, crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_DIGEST(pd, ctx, data, digest, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_DIGEST_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
- NULL, NULL, data, digest);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(crypto_digest_prov);
-EXPORT_SYMBOL(crypto_digest);
-EXPORT_SYMBOL(crypto_digest_init_prov);
-EXPORT_SYMBOL(crypto_digest_init);
-EXPORT_SYMBOL(crypto_digest_update);
-EXPORT_SYMBOL(crypto_digest_final);
-EXPORT_SYMBOL(crypto_digest_key_prov);
-EXPORT_SYMBOL(crypto_digest_single);
-#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_mac.c b/sys/contrib/openzfs/module/icp/api/kcf_mac.c
index a7722d8f914c..287467e68350 100644
--- a/sys/contrib/openzfs/module/icp/api/kcf_mac.c
+++ b/sys/contrib/openzfs/module/icp/api/kcf_mac.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -40,15 +40,12 @@
* presence of the arguments.
*
* CRYPTO_SUCCESS: The operation completed successfully.
- * CRYPTO_QUEUED: A request was submitted successfully. The callback
- * routine will be called when the operation is done.
* CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or
* CRYPTO_INVALID_MECH for problems with the 'mech'.
* CRYPTO_INVALID_DATA for bogus 'data'
* CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
* CRYPTO_INVALID_CONTEXT: Not a valid context.
- * CRYPTO_BUSY: Cannot process the request now. Schedule a
- * crypto_bufcall(), or try later.
+ * CRYPTO_BUSY: Cannot process the request now. Try later.
* CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is
* capable of a function or a mechanism.
* CRYPTO_INVALID_KEY: bogus 'key' argument.
@@ -70,7 +67,6 @@
* tmpl: a crypto_ctx_template_t, opaque template of a context of a
* MAC with the 'mech' using 'key'. 'tmpl' is created by
* a previous call to crypto_create_ctx_template().
- * cr: crypto_call_req_t calling conditions and call back info.
*
* Description:
* Asynchronously submits a request for, or synchronously performs a
@@ -79,55 +75,17 @@
* the specified session id.
* When complete and successful, 'mac' will contain the message
* authentication code.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'crq'.
+ * Relies on the KCF scheduler to choose a provider.
*
* Returns:
* See comment in the beginning of the file.
*/
int
-crypto_mac_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq)
-{
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
- int rv;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- rv = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_MAC_ATOMIC);
-
- if (rv != CRYPTO_SUCCESS)
- return (rv);
- }
-
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_ATOMIC, sid, mech, key,
- data, mac, tmpl);
- rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- return (rv);
-}
-
-/*
- * Same as crypto_mac_prov(), but relies on the KCF scheduler to choose
- * a provider. See crypto_mac() comments for more information.
- */
-int
crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
- crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
- crypto_call_req_t *crq)
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac)
{
int error;
kcf_mech_entry_t *me;
- kcf_req_params_t params;
kcf_provider_desc_t *pd;
kcf_ctx_template_t *ctx_tmpl;
crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
@@ -136,187 +94,23 @@ crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
retry:
/* The pd is returned held */
if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
- list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq),
- data->cd_length)) == NULL) {
+ list, CRYPTO_FG_MAC_ATOMIC)) == NULL) {
if (list != NULL)
kcf_free_triedlist(list);
return (error);
}
- /*
- * For SW providers, check the validity of the context template
- * It is very rare that the generation number mis-matches, so
- * is acceptable to fail here, and let the consumer recover by
- * freeing this tmpl and create a new one for the key and new SW
- * provider
- */
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
- if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (CRYPTO_OLD_CTX_TEMPLATE);
- } else {
- spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- }
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
-
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
-
- error = KCF_PROV_MAC_ATOMIC(pd, pd->pd_sid, &lmech, key, data,
- mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
- (data->cd_length > pd->pd_hash_limit)) {
- /*
- * XXX - We need a check to see if this is indeed
- * a HMAC. So far, all kernel clients use
- * this interface only for HMAC. So, this is fine
- * for now.
- */
- error = CRYPTO_BUFFER_TOO_BIG;
- } else {
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_ATOMIC,
- pd->pd_sid, mech, key, data, mac, spi_ctx_tmpl);
+ if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL))
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- error = kcf_submit_request(pd, NULL, crq, &params,
- KCF_ISDUALREQ(crq));
- }
- }
+ crypto_mechanism_t lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+ error = KCF_PROV_MAC_ATOMIC(pd, &lmech, key, data,
+ mac, spi_ctx_tmpl);
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
+ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
/* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
- goto retry;
- }
-
- if (list != NULL)
- kcf_free_triedlist(list);
-
- KCF_PROV_REFRELE(pd);
- return (error);
-}
-
-/*
- * Single part operation to compute the MAC corresponding to the specified
- * 'data' and to verify that it matches the MAC specified by 'mac'.
- * The other arguments are the same as the function crypto_mac_prov().
- */
-int
-crypto_mac_verify_prov(crypto_provider_t provider, crypto_session_id_t sid,
- crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq)
-{
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
- kcf_provider_desc_t *real_provider = pd;
- int rv;
-
- ASSERT(KCF_PROV_REFHELD(pd));
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- rv = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_MAC_ATOMIC);
-
- if (rv != CRYPTO_SUCCESS)
- return (rv);
- }
-
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_MAC_VERIFY_ATOMIC, sid, mech,
- key, data, mac, tmpl);
- rv = kcf_submit_request(real_provider, NULL, crq, &params, B_FALSE);
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- return (rv);
-}
-
-/*
- * Same as crypto_mac_verify_prov(), but relies on the KCF scheduler to choose
- * a provider. See crypto_mac_verify_prov() comments for more information.
- */
-int
-crypto_mac_verify(crypto_mechanism_t *mech, crypto_data_t *data,
- crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
- crypto_call_req_t *crq)
-{
- int error;
- kcf_mech_entry_t *me;
- kcf_req_params_t params;
- kcf_provider_desc_t *pd;
- kcf_ctx_template_t *ctx_tmpl;
- crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
- kcf_prov_tried_t *list = NULL;
-
-retry:
- /* The pd is returned held */
- if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
- list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq),
- data->cd_length)) == NULL) {
- if (list != NULL)
- kcf_free_triedlist(list);
- return (error);
- }
-
- /*
- * For SW providers, check the validity of the context template
- * It is very rare that the generation number mis-matches, so
- * is acceptable to fail here, and let the consumer recover by
- * freeing this tmpl and create a new one for the key and new SW
- * provider
- */
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
- if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (CRYPTO_OLD_CTX_TEMPLATE);
- } else {
- spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- }
- }
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
-
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
-
- error = KCF_PROV_MAC_VERIFY_ATOMIC(pd, pd->pd_sid, &lmech, key,
- data, mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
- (data->cd_length > pd->pd_hash_limit)) {
- /* see comments in crypto_mac() */
- error = CRYPTO_BUFFER_TOO_BIG;
- } else {
- KCF_WRAP_MAC_OPS_PARAMS(&params,
- KCF_OP_MAC_VERIFY_ATOMIC, pd->pd_sid, mech,
- key, data, mac, spi_ctx_tmpl);
-
- error = kcf_submit_request(pd, NULL, crq, &params,
- KCF_ISDUALREQ(crq));
- }
- }
-
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
- /* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL)
goto retry;
}
@@ -333,7 +127,6 @@ retry:
* Arguments:
* pd: pointer to the descriptor of the provider to use for this
* operation.
- * sid: provider session id.
* mech: crypto_mechanism_t pointer.
* mech_type is a valid value previously returned by
* crypto_mech2id();
@@ -344,7 +137,6 @@ retry:
* MAC with the 'mech' using 'key'. 'tmpl' is created by
* a previous call to crypto_create_ctx_template().
* ctxp: Pointer to a crypto_context_t.
- * cr: crypto_call_req_t calling conditions and call back info.
*
* Description:
* Asynchronously submits a request for, or synchronously performs the
@@ -357,61 +149,29 @@ retry:
* The caller should hold a reference on the specified provider
* descriptor before calling this function.
*
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
- *
* Returns:
* See comment in the beginning of the file.
*/
-int
-crypto_mac_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+static int
+crypto_mac_init_prov(kcf_provider_desc_t *pd,
crypto_mechanism_t *mech, crypto_key_t *key, crypto_spi_ctx_template_t tmpl,
- crypto_context_t *ctxp, crypto_call_req_t *crq)
+ crypto_context_t *ctxp)
{
int rv;
crypto_ctx_t *ctx;
- kcf_req_params_t params;
- kcf_provider_desc_t *pd = provider;
kcf_provider_desc_t *real_provider = pd;
ASSERT(KCF_PROV_REFHELD(pd));
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- rv = kcf_get_hardware_provider(mech->cm_type,
- CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
- &real_provider, CRYPTO_FG_MAC);
-
- if (rv != CRYPTO_SUCCESS)
- return (rv);
- }
-
/* Allocate and initialize the canonical context */
- if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
+ if ((ctx = kcf_new_ctx(real_provider)) == NULL)
return (CRYPTO_HOST_MEMORY);
- }
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(crq, pd)) {
- crypto_mechanism_t lmech;
+ crypto_mechanism_t lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+ rv = KCF_PROV_MAC_INIT(real_provider, ctx, &lmech, key, tmpl);
- lmech = *mech;
- KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
- rv = KCF_PROV_MAC_INIT(real_provider, ctx, &lmech, key, tmpl,
- KCF_SWFP_RHNDL(crq));
- KCF_PROV_INCRSTATS(pd, rv);
- } else {
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_INIT, sid, mech, key,
- NULL, NULL, tmpl);
- rv = kcf_submit_request(real_provider, ctx, crq, &params,
- B_FALSE);
- }
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
- KCF_PROV_REFRELE(real_provider);
-
- if ((rv == CRYPTO_SUCCESS) || (rv == CRYPTO_QUEUED))
+ if (rv == CRYPTO_SUCCESS)
*ctxp = (crypto_context_t)ctx;
else {
/* Release the hold done in kcf_new_ctx(). */
@@ -428,8 +188,7 @@ crypto_mac_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
*/
int
crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
- crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
- crypto_call_req_t *crq)
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp)
{
int error;
kcf_mech_entry_t *me;
@@ -441,51 +200,27 @@ crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
retry:
/* The pd is returned held */
if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
- list, CRYPTO_FG_MAC, CHECK_RESTRICT(crq), 0)) == NULL) {
+ list, CRYPTO_FG_MAC)) == NULL) {
if (list != NULL)
kcf_free_triedlist(list);
return (error);
}
/*
- * For SW providers, check the validity of the context template
+ * Check the validity of the context template
* It is very rare that the generation number mis-matches, so
* is acceptable to fail here, and let the consumer recover by
- * freeing this tmpl and create a new one for the key and new SW
- * provider
+ * freeing this tmpl and create a new one for the key and new provider
*/
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
- if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
- if (list != NULL)
- kcf_free_triedlist(list);
- KCF_PROV_REFRELE(pd);
- return (CRYPTO_OLD_CTX_TEMPLATE);
- } else {
- spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- }
- }
+ if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL))
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
- if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) {
- /*
- * The hardware provider has limited HMAC support.
- * So, we fallback early here to using a software provider.
- *
- * XXX - need to enhance to do the fallback later in
- * crypto_mac_update() if the size of accumulated input data
- * exceeds the maximum size digestable by hardware provider.
- */
- error = CRYPTO_BUFFER_TOO_BIG;
- } else {
- error = crypto_mac_init_prov(pd, pd->pd_sid, mech, key,
- spi_ctx_tmpl, ctxp, crq);
- }
- if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
- IS_RECOVERABLE(error)) {
+ error = crypto_mac_init_prov(pd, mech, key,
+ spi_ctx_tmpl, ctxp);
+ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
/* Add pd to the linked list of providers tried. */
- if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL)
goto retry;
}
@@ -502,27 +237,19 @@ retry:
* Arguments:
* context: A crypto_context_t initialized by mac_init().
* data: The message part to be MAC'ed
- * cr: crypto_call_req_t calling conditions and call back info.
*
* Description:
- * Asynchronously submits a request for, or synchronously performs a
- * part of a MAC operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
+ * Synchronously performs a part of a MAC operation.
*
* Returns:
* See comment in the beginning of the file.
*/
int
-crypto_mac_update(crypto_context_t context, crypto_data_t *data,
- crypto_call_req_t *cr)
+crypto_mac_update(crypto_context_t context, crypto_data_t *data)
{
crypto_ctx_t *ctx = (crypto_ctx_t *)context;
kcf_context_t *kcf_ctx;
kcf_provider_desc_t *pd;
- kcf_req_params_t params;
- int rv;
if ((ctx == NULL) ||
((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
@@ -530,19 +257,7 @@ crypto_mac_update(crypto_context_t context, crypto_data_t *data,
return (CRYPTO_INVALID_CONTEXT);
}
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- rv = KCF_PROV_MAC_UPDATE(pd, ctx, data, NULL);
- KCF_PROV_INCRSTATS(pd, rv);
- } else {
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_UPDATE,
- ctx->cc_session, NULL, NULL, data, NULL, NULL);
- rv = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- return (rv);
+ return (KCF_PROV_MAC_UPDATE(pd, ctx, data));
}
/*
@@ -551,27 +266,19 @@ crypto_mac_update(crypto_context_t context, crypto_data_t *data,
* Arguments:
* context: A crypto_context_t initialized by mac_init().
* mac: Storage for the message authentication code.
- * cr: crypto_call_req_t calling conditions and call back info.
*
* Description:
- * Asynchronously submits a request for, or synchronously performs a
- * part of a message authentication operation.
- *
- * Context:
- * Process or interrupt, according to the semantics dictated by the 'cr'.
+ * Synchronously performs a part of a message authentication operation.
*
* Returns:
* See comment in the beginning of the file.
*/
int
-crypto_mac_final(crypto_context_t context, crypto_data_t *mac,
- crypto_call_req_t *cr)
+crypto_mac_final(crypto_context_t context, crypto_data_t *mac)
{
crypto_ctx_t *ctx = (crypto_ctx_t *)context;
kcf_context_t *kcf_ctx;
kcf_provider_desc_t *pd;
- kcf_req_params_t params;
- int rv;
if ((ctx == NULL) ||
((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
@@ -579,67 +286,16 @@ crypto_mac_final(crypto_context_t context, crypto_data_t *mac,
return (CRYPTO_INVALID_CONTEXT);
}
- ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- rv = KCF_PROV_MAC_FINAL(pd, ctx, mac, NULL);
- KCF_PROV_INCRSTATS(pd, rv);
- } else {
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_FINAL,
- ctx->cc_session, NULL, NULL, NULL, mac, NULL);
- rv = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
+ int rv = KCF_PROV_MAC_FINAL(pd, ctx, mac);
/* Release the hold done in kcf_new_ctx() during init step. */
KCF_CONTEXT_COND_RELEASE(rv, kcf_ctx);
return (rv);
}
-/*
- * See comments for crypto_mac_update() and crypto_mac_final().
- */
-int
-crypto_mac_single(crypto_context_t context, crypto_data_t *data,
- crypto_data_t *mac, crypto_call_req_t *cr)
-{
- crypto_ctx_t *ctx = (crypto_ctx_t *)context;
- kcf_context_t *kcf_ctx;
- kcf_provider_desc_t *pd;
- int error;
- kcf_req_params_t params;
-
-
- if ((ctx == NULL) ||
- ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
- ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
- return (CRYPTO_INVALID_CONTEXT);
- }
-
-
- /* The fast path for SW providers. */
- if (CHECK_FASTPATH(cr, pd)) {
- error = KCF_PROV_MAC(pd, ctx, data, mac, NULL);
- KCF_PROV_INCRSTATS(pd, error);
- } else {
- KCF_WRAP_MAC_OPS_PARAMS(&params, KCF_OP_SINGLE, pd->pd_sid,
- NULL, NULL, data, mac, NULL);
- error = kcf_submit_request(pd, ctx, cr, &params, B_FALSE);
- }
-
- /* Release the hold done in kcf_new_ctx() during init step. */
- KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
- return (error);
-}
-
#if defined(_KERNEL)
-EXPORT_SYMBOL(crypto_mac_prov);
EXPORT_SYMBOL(crypto_mac);
-EXPORT_SYMBOL(crypto_mac_verify_prov);
-EXPORT_SYMBOL(crypto_mac_verify);
-EXPORT_SYMBOL(crypto_mac_init_prov);
EXPORT_SYMBOL(crypto_mac_init);
EXPORT_SYMBOL(crypto_mac_update);
EXPORT_SYMBOL(crypto_mac_final);
-EXPORT_SYMBOL(crypto_mac_single);
#endif
diff --git a/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c b/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c
deleted file mode 100644
index c0f415b264a7..000000000000
--- a/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/crypto/common.h>
-#include <sys/crypto/api.h>
-#include <sys/crypto/impl.h>
-#include <sys/crypto/sched_impl.h>
-
-/*
- * All event subscribers are put on a list. kcf_notify_list_lock
- * protects changes to this list.
- *
- * The following locking order is maintained in the code - The
- * global kcf_notify_list_lock followed by the individual lock
- * in a kcf_ntfy_elem structure (kn_lock).
- */
-kmutex_t ntfy_list_lock;
-kcondvar_t ntfy_list_cv; /* cv the service thread waits on */
-static kcf_ntfy_elem_t *ntfy_list_head;
-
-/*
- * crypto_mech2id()
- *
- * Arguments:
- * . mechname: A null-terminated string identifying the mechanism name.
- *
- * Description:
- * Walks the mechanisms tables, looking for an entry that matches the
- * mechname. Once it find it, it builds the 64-bit mech_type and returns
- * it. If there are no hardware or software providers for the mechanism,
- * but there is an unloaded software provider, this routine will attempt
- * to load it.
- *
- * Context:
- * Process and interruption.
- *
- * Returns:
- * The unique mechanism identified by 'mechname', if found.
- * CRYPTO_MECH_INVALID otherwise.
- */
-crypto_mech_type_t
-crypto_mech2id(char *mechname)
-{
- return (crypto_mech2id_common(mechname, B_TRUE));
-}
-
-/*
- * We walk the notification list and do the callbacks.
- */
-void
-kcf_walk_ntfylist(uint32_t event, void *event_arg)
-{
- kcf_ntfy_elem_t *nep;
- int nelem = 0;
-
- mutex_enter(&ntfy_list_lock);
-
- /*
- * Count how many clients are on the notification list. We need
- * this count to ensure that clients which joined the list after we
- * have started this walk, are not wrongly notified.
- */
- for (nep = ntfy_list_head; nep != NULL; nep = nep->kn_next)
- nelem++;
-
- for (nep = ntfy_list_head; (nep != NULL && nelem); nep = nep->kn_next) {
- nelem--;
-
- /*
- * Check if this client is interested in the
- * event.
- */
- if (!(nep->kn_event_mask & event))
- continue;
-
- mutex_enter(&nep->kn_lock);
- nep->kn_state = NTFY_RUNNING;
- mutex_exit(&nep->kn_lock);
- mutex_exit(&ntfy_list_lock);
-
- /*
- * We invoke the callback routine with no locks held. Another
- * client could have joined the list meanwhile. This is fine
- * as we maintain nelem as stated above. The NULL check in the
- * for loop guards against shrinkage. Also, any callers of
- * crypto_unnotify_events() at this point cv_wait till kn_state
- * changes to NTFY_WAITING. Hence, nep is assured to be valid.
- */
- (*nep->kn_func)(event, event_arg);
-
- mutex_enter(&nep->kn_lock);
- nep->kn_state = NTFY_WAITING;
- cv_broadcast(&nep->kn_cv);
- mutex_exit(&nep->kn_lock);
-
- mutex_enter(&ntfy_list_lock);
- }
-
- mutex_exit(&ntfy_list_lock);
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(crypto_mech2id);
-#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
new file mode 100644
index 000000000000..b0af629066ea
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
@@ -0,0 +1,2069 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
+ * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE2 -> ARMv8-A
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ *
+ * Should work on FreeBSD, Linux and macOS
+ * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
+ */
+
+#if defined(__aarch64__)
+
+/* make gcc <= 9 happy */
+#if !defined(LD_VERSION) || LD_VERSION >= 233010000
+#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
+#else
+#define CFI_NEGATE_RA_STATE
+#endif
+
+ .text
+ .section .note.gnu.property,"a",@note
+ .p2align 3
+ .word 4
+ .word 16
+ .word 5
+ .asciz "GNU"
+ .word 3221225472
+ .word 4
+ .word 3
+ .word 0
+.Lsec_end0:
+ .text
+ .globl zfs_blake3_compress_in_place_sse2
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse2,@function
+zfs_blake3_compress_in_place_sse2:
+ .cfi_startproc
+ hint #25
+ CFI_NEGATE_RA_STATE
+ sub sp, sp, #96
+ stp x29, x30, [sp, #64]
+ add x29, sp, #64
+ str x19, [sp, #80]
+ .cfi_def_cfa w29, 32
+ .cfi_offset w19, -16
+ .cfi_offset w30, -24
+ .cfi_offset w29, -32
+ mov x19, x0
+ mov w5, w4
+ mov x4, x3
+ mov w3, w2
+ mov x2, x1
+ mov x0, sp
+ mov x1, x19
+ bl compress_pre
+ ldp q0, q1, [sp]
+ ldp q2, q3, [sp, #32]
+ eor v0.16b, v2.16b, v0.16b
+ eor v1.16b, v3.16b, v1.16b
+ ldp x29, x30, [sp, #64]
+ stp q0, q1, [x19]
+ ldr x19, [sp, #80]
+ add sp, sp, #96
+ hint #29
+ ret
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .xword -4942790177982912921
+ .xword -6534734903820487822
+ .text
+ .p2align 2
+ .type compress_pre,@function
+compress_pre:
+ .cfi_startproc
+ hint #34
+ fmov s1, w3
+ movi d0, #0x0000ff000000ff
+ ldr q2, [x1]
+ fmov d3, x4
+ adrp x8, .LCPI1_0
+ mov v1.s[1], w5
+ str q2, [x0]
+ ldr q4, [x8, :lo12:.LCPI1_0]
+ add x8, x2, #32
+ ldr q5, [x1, #16]
+ and v0.8b, v1.8b, v0.8b
+ stp q5, q4, [x0, #16]
+ mov v3.d[1], v0.d[0]
+ str q3, [x0, #48]
+ ldp q0, q6, [x2]
+ uzp1 v1.4s, v0.4s, v6.4s
+ uzp2 v0.4s, v0.4s, v6.4s
+ add v2.4s, v2.4s, v1.4s
+ uzp1 v18.4s, v1.4s, v1.4s
+ add v2.4s, v2.4s, v5.4s
+ eor v3.16b, v2.16b, v3.16b
+ add v2.4s, v2.4s, v0.4s
+ rev32 v3.8h, v3.8h
+ add v4.4s, v3.4s, v4.4s
+ eor v5.16b, v4.16b, v5.16b
+ ushr v6.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v6.16b
+ add v2.4s, v2.4s, v5.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v6.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v6.16b
+ ld2 { v6.4s, v7.4s }, [x8]
+ add v4.4s, v3.4s, v4.4s
+ ext v3.16b, v3.16b, v3.16b, #8
+ add v2.4s, v2.4s, v6.4s
+ eor v5.16b, v4.16b, v5.16b
+ ext v4.16b, v4.16b, v4.16b, #4
+ ext v6.16b, v6.16b, v6.16b, #12
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v16.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v16.16b
+ ext v16.16b, v7.16b, v7.16b, #12
+ add v2.4s, v2.4s, v5.4s
+ mov v7.16b, v16.16b
+ eor v3.16b, v3.16b, v2.16b
+ add v2.4s, v2.4s, v16.4s
+ mov v7.s[1], v6.s[2]
+ rev32 v3.8h, v3.8h
+ add v4.4s, v4.4s, v3.4s
+ eor v5.16b, v4.16b, v5.16b
+ ushr v17.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v17.16b
+ add v2.4s, v2.4s, v5.4s
+ eor v3.16b, v2.16b, v3.16b
+ ushr v17.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v17.16b
+ ext v17.16b, v18.16b, v1.16b, #8
+ add v4.4s, v3.4s, v4.4s
+ uzp2 v17.4s, v17.4s, v0.4s
+ ext v3.16b, v3.16b, v3.16b, #8
+ eor v5.16b, v4.16b, v5.16b
+ add v2.4s, v2.4s, v17.4s
+ ext v4.16b, v4.16b, v4.16b, #12
+ ushr v18.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v5.16b, v5.16b, v18.16b
+ ext v18.16b, v1.16b, v1.16b, #12
+ add v2.4s, v2.4s, v5.4s
+ ext v1.16b, v1.16b, v18.16b, #12
+ zip1 v18.2d, v16.2d, v0.2d
+ zip2 v0.4s, v0.4s, v16.4s
+ eor v3.16b, v3.16b, v2.16b
+ rev64 v1.4s, v1.4s
+ mov v18.s[3], v6.s[3]
+ zip1 v16.4s, v0.4s, v6.4s
+ rev32 v3.8h, v3.8h
+ trn2 v1.4s, v1.4s, v7.4s
+ zip1 v0.4s, v6.4s, v0.4s
+ add v4.4s, v4.4s, v3.4s
+ add v2.4s, v2.4s, v1.4s
+ ext v6.16b, v0.16b, v16.16b, #8
+ eor v5.16b, v4.16b, v5.16b
+ ushr v7.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v7.16b
+ add v7.4s, v2.4s, v5.4s
+ eor v2.16b, v7.16b, v3.16b
+ ext v7.16b, v7.16b, v7.16b, #12
+ ushr v3.4s, v2.4s, #8
+ shl v2.4s, v2.4s, #24
+ orr v3.16b, v2.16b, v3.16b
+ ext v2.16b, v18.16b, v18.16b, #12
+ add v4.4s, v3.4s, v4.4s
+ uzp1 v2.4s, v18.4s, v2.4s
+ ext v3.16b, v3.16b, v3.16b, #8
+ eor v5.16b, v4.16b, v5.16b
+ add v7.4s, v7.4s, v2.4s
+ ext v4.16b, v4.16b, v4.16b, #4
+ ushr v18.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v18.16b
+ add v7.4s, v7.4s, v5.4s
+ eor v3.16b, v3.16b, v7.16b
+ add v7.4s, v7.4s, v6.4s
+ rev32 v3.8h, v3.8h
+ add v4.4s, v4.4s, v3.4s
+ eor v5.16b, v4.16b, v5.16b
+ ushr v0.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v0.16b, v5.16b, v0.16b
+ add v5.4s, v7.4s, v0.4s
+ ext v7.16b, v17.16b, v17.16b, #4
+ eor v3.16b, v5.16b, v3.16b
+ uzp1 v17.4s, v7.4s, v7.4s
+ ushr v16.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v16.16b
+ ext v16.16b, v17.16b, v7.16b, #8
+ add v4.4s, v3.4s, v4.4s
+ uzp2 v16.4s, v16.4s, v1.4s
+ ext v3.16b, v3.16b, v3.16b, #8
+ eor v0.16b, v4.16b, v0.16b
+ add v5.4s, v5.4s, v16.4s
+ ext v4.16b, v4.16b, v4.16b, #12
+ ushr v17.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v0.16b, v0.16b, v17.16b
+ ext v17.16b, v7.16b, v7.16b, #12
+ add v5.4s, v5.4s, v0.4s
+ ext v7.16b, v7.16b, v17.16b, #12
+ mov v17.16b, v6.16b
+ eor v3.16b, v3.16b, v5.16b
+ rev64 v7.4s, v7.4s
+ mov v17.s[1], v2.s[2]
+ rev32 v3.8h, v3.8h
+ add v4.4s, v4.4s, v3.4s
+ eor v18.16b, v4.16b, v0.16b
+ trn2 v0.4s, v7.4s, v17.4s
+ ushr v7.4s, v18.4s, #12
+ shl v17.4s, v18.4s, #20
+ add v5.4s, v5.4s, v0.4s
+ zip1 v18.2d, v6.2d, v1.2d
+ zip2 v1.4s, v1.4s, v6.4s
+ orr v7.16b, v17.16b, v7.16b
+ mov v18.s[3], v2.s[3]
+ zip1 v6.4s, v1.4s, v2.4s
+ add v5.4s, v5.4s, v7.4s
+ zip1 v1.4s, v2.4s, v1.4s
+ eor v3.16b, v5.16b, v3.16b
+ ext v5.16b, v5.16b, v5.16b, #12
+ ext v6.16b, v1.16b, v6.16b, #8
+ ushr v17.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v17.16b, v3.16b, v17.16b
+ ext v3.16b, v18.16b, v18.16b, #12
+ add v4.4s, v17.4s, v4.4s
+ uzp1 v3.4s, v18.4s, v3.4s
+ ext v17.16b, v17.16b, v17.16b, #8
+ eor v7.16b, v4.16b, v7.16b
+ add v5.4s, v5.4s, v3.4s
+ ext v4.16b, v4.16b, v4.16b, #4
+ ushr v18.4s, v7.4s, #7
+ shl v7.4s, v7.4s, #25
+ orr v7.16b, v7.16b, v18.16b
+ add v5.4s, v5.4s, v7.4s
+ eor v17.16b, v17.16b, v5.16b
+ add v5.4s, v5.4s, v6.4s
+ rev32 v17.8h, v17.8h
+ add v4.4s, v4.4s, v17.4s
+ eor v2.16b, v4.16b, v7.16b
+ ext v7.16b, v16.16b, v16.16b, #4
+ ushr v1.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v1.16b, v2.16b, v1.16b
+ add v2.4s, v5.4s, v1.4s
+ eor v5.16b, v2.16b, v17.16b
+ uzp1 v17.4s, v7.4s, v7.4s
+ ushr v16.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ orr v5.16b, v5.16b, v16.16b
+ ext v16.16b, v17.16b, v7.16b, #8
+ add v4.4s, v5.4s, v4.4s
+ uzp2 v16.4s, v16.4s, v0.4s
+ ext v5.16b, v5.16b, v5.16b, #8
+ eor v1.16b, v4.16b, v1.16b
+ add v2.4s, v2.4s, v16.4s
+ ext v4.16b, v4.16b, v4.16b, #12
+ ushr v17.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v1.16b, v1.16b, v17.16b
+ ext v17.16b, v7.16b, v7.16b, #12
+ add v2.4s, v2.4s, v1.4s
+ ext v7.16b, v7.16b, v17.16b, #12
+ mov v17.16b, v6.16b
+ eor v5.16b, v5.16b, v2.16b
+ rev64 v7.4s, v7.4s
+ mov v17.s[1], v3.s[2]
+ rev32 v5.8h, v5.8h
+ add v4.4s, v4.4s, v5.4s
+ eor v18.16b, v4.16b, v1.16b
+ trn2 v1.4s, v7.4s, v17.4s
+ ushr v7.4s, v18.4s, #12
+ shl v17.4s, v18.4s, #20
+ add v2.4s, v2.4s, v1.4s
+ zip1 v18.2d, v6.2d, v0.2d
+ zip2 v0.4s, v0.4s, v6.4s
+ orr v7.16b, v17.16b, v7.16b
+ mov v18.s[3], v3.s[3]
+ add v2.4s, v2.4s, v7.4s
+ eor v5.16b, v2.16b, v5.16b
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v17.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ orr v5.16b, v5.16b, v17.16b
+ add v17.4s, v5.4s, v4.4s
+ ext v4.16b, v18.16b, v18.16b, #12
+ ext v5.16b, v5.16b, v5.16b, #8
+ eor v7.16b, v17.16b, v7.16b
+ uzp1 v4.4s, v18.4s, v4.4s
+ ext v17.16b, v17.16b, v17.16b, #4
+ ushr v18.4s, v7.4s, #7
+ shl v7.4s, v7.4s, #25
+ add v2.4s, v2.4s, v4.4s
+ orr v7.16b, v7.16b, v18.16b
+ add v2.4s, v2.4s, v7.4s
+ eor v5.16b, v5.16b, v2.16b
+ rev32 v5.8h, v5.8h
+ add v6.4s, v17.4s, v5.4s
+ zip1 v17.4s, v0.4s, v3.4s
+ zip1 v0.4s, v3.4s, v0.4s
+ eor v3.16b, v6.16b, v7.16b
+ ext v0.16b, v0.16b, v17.16b, #8
+ ushr v7.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v2.4s, v2.4s, v0.4s
+ orr v3.16b, v3.16b, v7.16b
+ ext v7.16b, v16.16b, v16.16b, #4
+ add v2.4s, v2.4s, v3.4s
+ uzp1 v17.4s, v7.4s, v7.4s
+ eor v5.16b, v2.16b, v5.16b
+ ushr v16.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ orr v5.16b, v5.16b, v16.16b
+ ext v16.16b, v17.16b, v7.16b, #8
+ add v6.4s, v5.4s, v6.4s
+ uzp2 v16.4s, v16.4s, v1.4s
+ ext v5.16b, v5.16b, v5.16b, #8
+ eor v3.16b, v6.16b, v3.16b
+ add v2.4s, v2.4s, v16.4s
+ ext v6.16b, v6.16b, v6.16b, #12
+ ushr v17.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ ext v2.16b, v2.16b, v2.16b, #4
+ orr v3.16b, v3.16b, v17.16b
+ add v17.4s, v2.4s, v3.4s
+ eor v2.16b, v5.16b, v17.16b
+ ext v5.16b, v7.16b, v7.16b, #12
+ rev32 v18.8h, v2.8h
+ ext v2.16b, v7.16b, v5.16b, #12
+ mov v5.16b, v0.16b
+ add v6.4s, v6.4s, v18.4s
+ rev64 v2.4s, v2.4s
+ mov v5.s[1], v4.s[2]
+ eor v3.16b, v6.16b, v3.16b
+ trn2 v2.4s, v2.4s, v5.4s
+ ushr v5.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v7.4s, v17.4s, v2.4s
+ orr v3.16b, v3.16b, v5.16b
+ add v5.4s, v7.4s, v3.4s
+ eor v7.16b, v5.16b, v18.16b
+ zip1 v18.2d, v0.2d, v1.2d
+ ext v5.16b, v5.16b, v5.16b, #12
+ zip2 v0.4s, v1.4s, v0.4s
+ ushr v17.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ mov v18.s[3], v4.s[3]
+ orr v7.16b, v7.16b, v17.16b
+ ext v17.16b, v18.16b, v18.16b, #12
+ add v6.4s, v7.4s, v6.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v19.16b, v6.16b, v3.16b
+ uzp1 v3.4s, v18.4s, v17.4s
+ ext v6.16b, v6.16b, v6.16b, #4
+ ushr v17.4s, v19.4s, #7
+ shl v18.4s, v19.4s, #25
+ add v5.4s, v5.4s, v3.4s
+ orr v17.16b, v18.16b, v17.16b
+ add v5.4s, v5.4s, v17.4s
+ eor v7.16b, v7.16b, v5.16b
+ rev32 v7.8h, v7.8h
+ add v1.4s, v6.4s, v7.4s
+ zip1 v6.4s, v0.4s, v4.4s
+ zip1 v0.4s, v4.4s, v0.4s
+ eor v4.16b, v1.16b, v17.16b
+ ext v6.16b, v0.16b, v6.16b, #8
+ ushr v0.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ add v5.4s, v5.4s, v6.4s
+ zip1 v20.2d, v6.2d, v2.2d
+ orr v0.16b, v4.16b, v0.16b
+ mov v20.s[3], v3.s[3]
+ add v4.4s, v5.4s, v0.4s
+ eor v5.16b, v4.16b, v7.16b
+ ext v7.16b, v16.16b, v16.16b, #4
+ ushr v16.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ uzp1 v17.4s, v7.4s, v7.4s
+ orr v5.16b, v5.16b, v16.16b
+ ext v16.16b, v17.16b, v7.16b, #8
+ add v1.4s, v5.4s, v1.4s
+ uzp2 v16.4s, v16.4s, v2.4s
+ zip2 v2.4s, v2.4s, v6.4s
+ eor v0.16b, v1.16b, v0.16b
+ add v4.4s, v4.4s, v16.4s
+ ext v1.16b, v1.16b, v1.16b, #12
+ ext v16.16b, v16.16b, v16.16b, #4
+ ushr v17.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v17.16b, v0.16b, v17.16b
+ ext v0.16b, v5.16b, v5.16b, #8
+ ext v5.16b, v7.16b, v7.16b, #12
+ add v4.4s, v4.4s, v17.4s
+ eor v0.16b, v0.16b, v4.16b
+ rev32 v18.8h, v0.8h
+ ext v0.16b, v7.16b, v5.16b, #12
+ mov v5.16b, v6.16b
+ add v7.4s, v1.4s, v18.4s
+ rev64 v1.4s, v0.4s
+ mov v5.s[1], v3.s[2]
+ eor v17.16b, v7.16b, v17.16b
+ trn2 v1.4s, v1.4s, v5.4s
+ ushr v19.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ add v4.4s, v4.4s, v1.4s
+ orr v17.16b, v17.16b, v19.16b
+ add v19.4s, v4.4s, v17.4s
+ eor v4.16b, v19.16b, v18.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ ushr v18.4s, v4.4s, #8
+ shl v4.4s, v4.4s, #24
+ orr v18.16b, v4.16b, v18.16b
+ ext v4.16b, v20.16b, v20.16b, #12
+ add v7.4s, v18.4s, v7.4s
+ uzp1 v4.4s, v20.4s, v4.4s
+ ext v18.16b, v18.16b, v18.16b, #8
+ eor v17.16b, v7.16b, v17.16b
+ add v19.4s, v19.4s, v4.4s
+ ext v7.16b, v7.16b, v7.16b, #4
+ ushr v20.4s, v17.4s, #7
+ shl v17.4s, v17.4s, #25
+ orr v17.16b, v17.16b, v20.16b
+ add v19.4s, v19.4s, v17.4s
+ eor v18.16b, v18.16b, v19.16b
+ rev32 v18.8h, v18.8h
+ add v6.4s, v7.4s, v18.4s
+ zip1 v7.4s, v2.4s, v3.4s
+ zip1 v2.4s, v3.4s, v2.4s
+ eor v3.16b, v6.16b, v17.16b
+ ext v2.16b, v2.16b, v7.16b, #8
+ ushr v7.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v17.4s, v19.4s, v2.4s
+ zip1 v1.2d, v2.2d, v1.2d
+ zip2 v0.4s, v0.4s, v2.4s
+ orr v3.16b, v3.16b, v7.16b
+ mov v1.s[3], v4.s[3]
+ add v7.4s, v17.4s, v3.4s
+ eor v17.16b, v7.16b, v18.16b
+ ext v7.16b, v7.16b, v7.16b, #4
+ ushr v18.4s, v17.4s, #8
+ shl v17.4s, v17.4s, #24
+ orr v17.16b, v17.16b, v18.16b
+ ext v18.16b, v16.16b, v16.16b, #8
+ add v6.4s, v17.4s, v6.4s
+ uzp2 v5.4s, v18.4s, v5.4s
+ eor v3.16b, v6.16b, v3.16b
+ ext v5.16b, v5.16b, v18.16b, #4
+ ext v6.16b, v6.16b, v6.16b, #12
+ ushr v18.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ add v5.4s, v7.4s, v5.4s
+ ext v7.16b, v17.16b, v17.16b, #8
+ ext v17.16b, v16.16b, v16.16b, #12
+ orr v3.16b, v3.16b, v18.16b
+ ext v16.16b, v16.16b, v17.16b, #12
+ add v5.4s, v3.4s, v5.4s
+ mov v17.16b, v2.16b
+ rev64 v16.4s, v16.4s
+ eor v7.16b, v7.16b, v5.16b
+ mov v17.s[1], v4.s[2]
+ rev32 v7.8h, v7.8h
+ trn2 v16.4s, v16.4s, v17.4s
+ add v6.4s, v6.4s, v7.4s
+ add v5.4s, v5.4s, v16.4s
+ eor v3.16b, v6.16b, v3.16b
+ ushr v17.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ orr v3.16b, v3.16b, v17.16b
+ add v5.4s, v5.4s, v3.4s
+ eor v7.16b, v5.16b, v7.16b
+ ext v5.16b, v5.16b, v5.16b, #12
+ ushr v16.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ orr v7.16b, v7.16b, v16.16b
+ ext v16.16b, v1.16b, v1.16b, #12
+ add v6.4s, v7.4s, v6.4s
+ uzp1 v1.4s, v1.4s, v16.4s
+ eor v3.16b, v6.16b, v3.16b
+ add v1.4s, v5.4s, v1.4s
+ ext v5.16b, v7.16b, v7.16b, #8
+ ext v6.16b, v6.16b, v6.16b, #4
+ ushr v16.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ orr v3.16b, v3.16b, v16.16b
+ add v1.4s, v1.4s, v3.4s
+ eor v5.16b, v5.16b, v1.16b
+ rev32 v5.8h, v5.8h
+ add v2.4s, v6.4s, v5.4s
+ zip1 v6.4s, v0.4s, v4.4s
+ zip1 v0.4s, v4.4s, v0.4s
+ eor v3.16b, v2.16b, v3.16b
+ ext v0.16b, v0.16b, v6.16b, #8
+ ushr v4.4s, v3.4s, #12
+ shl v3.4s, v3.4s, #20
+ add v0.4s, v1.4s, v0.4s
+ orr v1.16b, v3.16b, v4.16b
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v0.16b, v5.16b
+ ext v0.16b, v0.16b, v0.16b, #4
+ ushr v4.4s, v3.4s, #8
+ shl v3.4s, v3.4s, #24
+ orr v3.16b, v3.16b, v4.16b
+ add v2.4s, v3.4s, v2.4s
+ ext v3.16b, v3.16b, v3.16b, #8
+ eor v1.16b, v2.16b, v1.16b
+ ext v2.16b, v2.16b, v2.16b, #12
+ ushr v4.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ stp q2, q3, [x0, #32]
+ orr v1.16b, v1.16b, v4.16b
+ stp q0, q1, [x0]
+ ret
+.Lfunc_end1:
+ .size compress_pre, .Lfunc_end1-compress_pre
+ .cfi_endproc
+
+ .globl zfs_blake3_compress_xof_sse2
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse2,@function
+zfs_blake3_compress_xof_sse2:
+ .cfi_startproc
+ hint #25
+ CFI_NEGATE_RA_STATE
+ sub sp, sp, #96
+ stp x29, x30, [sp, #64]
+ add x29, sp, #64
+ stp x20, x19, [sp, #80]
+ .cfi_def_cfa w29, 32
+ .cfi_offset w19, -8
+ .cfi_offset w20, -16
+ .cfi_offset w30, -24
+ .cfi_offset w29, -32
+ mov x20, x0
+ mov x19, x5
+ mov w5, w4
+ mov x4, x3
+ mov w3, w2
+ mov x2, x1
+ mov x0, sp
+ mov x1, x20
+ bl compress_pre
+ ldp q0, q1, [sp]
+ ldp q2, q3, [sp, #32]
+ eor v0.16b, v2.16b, v0.16b
+ eor v1.16b, v3.16b, v1.16b
+ ldp x29, x30, [sp, #64]
+ stp q0, q1, [x19]
+ ldr q0, [x20]
+ eor v0.16b, v0.16b, v2.16b
+ str q0, [x19, #32]
+ ldr q0, [x20, #16]
+ eor v0.16b, v0.16b, v3.16b
+ str q0, [x19, #48]
+ ldp x20, x19, [sp, #80]
+ add sp, sp, #96
+ hint #29
+ ret
+.Lfunc_end2:
+ .size zfs_blake3_compress_xof_sse2, .Lfunc_end2-zfs_blake3_compress_xof_sse2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI3_0:
+ .word 0
+ .word 1
+ .word 2
+ .word 3
+ .text
+ .globl zfs_blake3_hash_many_sse2
+ .p2align 2
+ .type zfs_blake3_hash_many_sse2,@function
+zfs_blake3_hash_many_sse2:
+ .cfi_startproc
+ hint #25
+ CFI_NEGATE_RA_STATE
+ stp d15, d14, [sp, #-160]!
+ stp d13, d12, [sp, #16]
+ stp d11, d10, [sp, #32]
+ stp d9, d8, [sp, #48]
+ stp x29, x30, [sp, #64]
+ add x29, sp, #64
+ stp x28, x27, [sp, #80]
+ stp x26, x25, [sp, #96]
+ stp x24, x23, [sp, #112]
+ stp x22, x21, [sp, #128]
+ stp x20, x19, [sp, #144]
+ sub sp, sp, #464
+ .cfi_def_cfa w29, 96
+ .cfi_offset w19, -8
+ .cfi_offset w20, -16
+ .cfi_offset w21, -24
+ .cfi_offset w22, -32
+ .cfi_offset w23, -40
+ .cfi_offset w24, -48
+ .cfi_offset w25, -56
+ .cfi_offset w26, -64
+ .cfi_offset w27, -72
+ .cfi_offset w28, -80
+ .cfi_offset w30, -88
+ .cfi_offset w29, -96
+ .cfi_offset b8, -104
+ .cfi_offset b9, -112
+ .cfi_offset b10, -120
+ .cfi_offset b11, -128
+ .cfi_offset b12, -136
+ .cfi_offset b13, -144
+ .cfi_offset b14, -152
+ .cfi_offset b15, -160
+ mov w19, w6
+ mov x20, x4
+ mov x24, x1
+ ldr x26, [x29, #104]
+ ldrb w27, [x29, #96]
+ cmp x1, #4
+ str x3, [sp, #40]
+ b.lo .LBB3_6
+ adrp x8, .LCPI3_0
+ sbfx w9, w5, #0, #1
+ mov w10, #44677
+ mov w11, #62322
+ movk w10, #47975, lsl #16
+ movk w11, #15470, lsl #16
+ ldr q0, [x8, :lo12:.LCPI3_0]
+ dup v1.4s, w9
+ mov w9, #58983
+ orr w8, w7, w19
+ movk w9, #27145, lsl #16
+ and v0.16b, v1.16b, v0.16b
+ dup v1.4s, w11
+ movi v24.4s, #64
+ dup v2.4s, w9
+ mov w9, #62778
+ movk w9, #42319, lsl #16
+ str q0, [sp, #16]
+ orr v0.4s, #128, lsl #24
+ stp q2, q1, [sp, #48]
+ str q0, [sp]
+ dup v0.4s, w10
+ str q0, [sp, #80]
+ b .LBB3_3
+.LBB3_2:
+ zip1 v0.4s, v12.4s, v31.4s
+ add x10, x20, #4
+ zip1 v1.4s, v29.4s, v30.4s
+ tst w5, #0x1
+ zip1 v2.4s, v28.4s, v23.4s
+ csel x20, x10, x20, ne
+ zip1 v3.4s, v13.4s, v25.4s
+ add x0, x0, #32
+ zip2 v6.4s, v12.4s, v31.4s
+ sub x24, x24, #4
+ zip1 v4.2d, v0.2d, v1.2d
+ cmp x24, #3
+ zip2 v7.4s, v29.4s, v30.4s
+ zip1 v5.2d, v2.2d, v3.2d
+ zip2 v0.2d, v0.2d, v1.2d
+ zip2 v1.2d, v2.2d, v3.2d
+ zip2 v2.4s, v28.4s, v23.4s
+ zip2 v3.4s, v13.4s, v25.4s
+ stp q4, q5, [x26]
+ zip2 v4.2d, v6.2d, v7.2d
+ stp q0, q1, [x26, #32]
+ zip1 v0.2d, v6.2d, v7.2d
+ zip1 v1.2d, v2.2d, v3.2d
+ zip2 v2.2d, v2.2d, v3.2d
+ stp q0, q1, [x26, #64]
+ stp q4, q2, [x26, #96]
+ add x26, x26, #128
+ b.ls .LBB3_6
+.LBB3_3:
+ ldr x14, [sp, #40]
+ mov x10, x14
+ add x11, x14, #8
+ add x12, x14, #12
+ add x13, x14, #16
+ ld1r { v12.4s }, [x10], #4
+ ld1r { v29.4s }, [x11]
+ add x11, x14, #20
+ ld1r { v30.4s }, [x12]
+ add x12, x14, #24
+ ld1r { v28.4s }, [x13]
+ ld1r { v23.4s }, [x11]
+ add x11, x14, #28
+ ld1r { v13.4s }, [x12]
+ ld1r { v31.4s }, [x10]
+ ld1r { v25.4s }, [x11]
+ cbz x2, .LBB3_2
+ ldr q1, [sp, #16]
+ dup v0.4s, w20
+ lsr x12, x20, #32
+ mov x10, xzr
+ ldp x13, x14, [x0, #16]
+ add v1.4s, v0.4s, v1.4s
+ mov x15, x2
+ movi v0.4s, #128, lsl #24
+ mov w4, w8
+ str q1, [sp, #112]
+ eor v0.16b, v1.16b, v0.16b
+ ldr q1, [sp]
+ cmgt v0.4s, v1.4s, v0.4s
+ dup v1.4s, w12
+ ldp x11, x12, [x0]
+ sub v0.4s, v1.4s, v0.4s
+ str q0, [sp, #96]
+.LBB3_5:
+ add x17, x11, x10
+ add x21, x12, x10
+ add x16, x13, x10
+ add x6, x14, x10
+ subs x15, x15, #1
+ add x10, x10, #64
+ ldp q0, q1, [x17]
+ csel w3, w27, wzr, eq
+ orr w3, w3, w4
+ mov w4, w19
+ and w3, w3, #0xff
+ ldp q3, q6, [x21]
+ dup v2.4s, w3
+ zip1 v21.4s, v0.4s, v3.4s
+ zip2 v19.4s, v0.4s, v3.4s
+ ldp q5, q7, [x16]
+ zip1 v17.4s, v1.4s, v6.4s
+ zip2 v22.4s, v1.4s, v6.4s
+ ldp q16, q18, [x6]
+ zip1 v4.4s, v5.4s, v16.4s
+ zip2 v0.4s, v5.4s, v16.4s
+ ldp q26, q27, [x17, #32]
+ zip1 v1.4s, v7.4s, v18.4s
+ zip2 v3.4s, v7.4s, v18.4s
+ zip2 v20.2d, v19.2d, v0.2d
+ mov v19.d[1], v0.d[0]
+ dup v18.4s, w9
+ ldp q8, q9, [x21, #32]
+ stur q19, [x29, #-208]
+ zip2 v7.4s, v26.4s, v8.4s
+ zip1 v10.4s, v26.4s, v8.4s
+ ldp q11, q5, [x16, #32]
+ zip2 v26.2d, v17.2d, v1.2d
+ stp q7, q26, [sp, #192]
+ mov v17.d[1], v1.d[0]
+ add v1.4s, v23.4s, v31.4s
+ ldp q16, q6, [x6, #32]
+ stur q17, [x29, #-256]
+ add v1.4s, v1.4s, v19.4s
+ zip1 v8.4s, v11.4s, v16.4s
+ zip2 v7.4s, v11.4s, v16.4s
+ zip1 v11.4s, v27.4s, v9.4s
+ zip2 v9.4s, v27.4s, v9.4s
+ zip2 v27.2d, v21.2d, v4.2d
+ mov v21.d[1], v4.d[0]
+ str q7, [sp, #224]
+ add v4.4s, v28.4s, v12.4s
+ zip1 v15.4s, v5.4s, v6.4s
+ zip2 v14.4s, v5.4s, v6.4s
+ stur q27, [x29, #-192]
+ zip2 v16.2d, v22.2d, v3.2d
+ stp q20, q21, [x29, #-240]
+ add v0.4s, v4.4s, v21.4s
+ ldp q6, q4, [sp, #96]
+ mov v22.d[1], v3.d[0]
+ add v5.4s, v25.4s, v30.4s
+ add v3.4s, v13.4s, v29.4s
+ eor v6.16b, v1.16b, v6.16b
+ add v1.4s, v1.4s, v20.4s
+ str q22, [sp, #256]
+ eor v4.16b, v0.16b, v4.16b
+ add v5.4s, v5.4s, v22.4s
+ add v3.4s, v3.4s, v17.4s
+ ldr q17, [sp, #48]
+ rev32 v6.8h, v6.8h
+ rev32 v4.8h, v4.8h
+ eor v2.16b, v5.16b, v2.16b
+ eor v7.16b, v3.16b, v24.16b
+ add v0.4s, v0.4s, v27.4s
+ add v21.4s, v4.4s, v17.4s
+ rev32 v31.8h, v2.8h
+ ldr q2, [sp, #80]
+ rev32 v7.8h, v7.8h
+ mov v27.16b, v16.16b
+ eor v17.16b, v21.16b, v28.16b
+ add v29.4s, v6.4s, v2.4s
+ ldr q2, [sp, #64]
+ add v24.4s, v31.4s, v18.4s
+ str q27, [sp, #176]
+ ushr v19.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ add v30.4s, v7.4s, v2.4s
+ eor v18.16b, v29.16b, v23.16b
+ orr v12.16b, v17.16b, v19.16b
+ eor v17.16b, v30.16b, v13.16b
+ eor v19.16b, v24.16b, v25.16b
+ ushr v23.4s, v18.4s, #12
+ shl v18.4s, v18.4s, #20
+ ushr v25.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ ushr v28.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ orr v13.16b, v18.16b, v23.16b
+ orr v25.16b, v17.16b, v25.16b
+ orr v2.16b, v19.16b, v28.16b
+ add v28.4s, v0.4s, v12.4s
+ add v0.4s, v3.4s, v26.4s
+ add v18.4s, v1.4s, v13.4s
+ add v3.4s, v5.4s, v16.4s
+ eor v1.16b, v28.16b, v4.16b
+ add v17.4s, v0.4s, v25.4s
+ eor v0.16b, v18.16b, v6.16b
+ add v19.4s, v3.4s, v2.4s
+ ushr v16.4s, v1.4s, #8
+ shl v3.4s, v1.4s, #24
+ eor v4.16b, v17.16b, v7.16b
+ ushr v6.4s, v0.4s, #8
+ shl v1.4s, v0.4s, #24
+ eor v5.16b, v19.16b, v31.16b
+ ushr v23.4s, v4.4s, #8
+ shl v4.4s, v4.4s, #24
+ orr v7.16b, v3.16b, v16.16b
+ orr v6.16b, v1.16b, v6.16b
+ ushr v31.4s, v5.4s, #8
+ shl v0.4s, v5.4s, #24
+ orr v5.16b, v4.16b, v23.16b
+ add v4.4s, v7.4s, v21.4s
+ ldr q21, [sp, #192]
+ add v3.4s, v6.4s, v29.4s
+ orr v31.16b, v0.16b, v31.16b
+ add v23.4s, v5.4s, v30.4s
+ eor v0.16b, v4.16b, v12.16b
+ eor v1.16b, v3.16b, v13.16b
+ add v16.4s, v31.4s, v24.4s
+ eor v20.16b, v23.16b, v25.16b
+ ushr v24.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v29.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ushr v30.4s, v20.4s, #7
+ shl v20.4s, v20.4s, #25
+ orr v25.16b, v0.16b, v24.16b
+ orr v0.16b, v1.16b, v29.16b
+ mov v29.16b, v10.16b
+ orr v1.16b, v20.16b, v30.16b
+ mov v20.16b, v10.16b
+ mov v24.16b, v21.16b
+ ldr q20, [sp, #224]
+ mov v29.d[1], v8.d[0]
+ mov v13.16b, v9.16b
+ zip2 v30.2d, v10.2d, v8.2d
+ zip2 v8.2d, v21.2d, v20.2d
+ mov v26.16b, v11.16b
+ mov v24.d[1], v20.d[0]
+ add v20.4s, v28.4s, v29.4s
+ mov v13.d[1], v14.d[0]
+ str q8, [sp, #128]
+ eor v2.16b, v16.16b, v2.16b
+ mov v26.d[1], v15.d[0]
+ str q24, [sp, #192]
+ add v20.4s, v20.4s, v0.4s
+ add v19.4s, v19.4s, v13.4s
+ ushr v12.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ zip2 v10.2d, v9.2d, v14.2d
+ add v18.4s, v18.4s, v24.4s
+ add v17.4s, v17.4s, v26.4s
+ mov v14.16b, v26.16b
+ eor v26.16b, v20.16b, v31.16b
+ stp q10, q30, [sp, #224]
+ add v19.4s, v19.4s, v25.4s
+ orr v2.16b, v2.16b, v12.16b
+ add v18.4s, v18.4s, v1.4s
+ rev32 v26.8h, v26.8h
+ eor v5.16b, v19.16b, v5.16b
+ add v17.4s, v17.4s, v2.4s
+ eor v7.16b, v18.16b, v7.16b
+ add v23.4s, v23.4s, v26.4s
+ rev32 v5.8h, v5.8h
+ eor v6.16b, v17.16b, v6.16b
+ rev32 v7.8h, v7.8h
+ eor v0.16b, v23.16b, v0.16b
+ add v3.4s, v3.4s, v5.4s
+ rev32 v6.8h, v6.8h
+ add v16.4s, v16.4s, v7.4s
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v4.4s, v6.4s
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ add v20.4s, v20.4s, v30.4s
+ zip2 v21.2d, v11.2d, v15.2d
+ ushr v11.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v31.16b
+ add v19.4s, v19.4s, v10.4s
+ add v20.4s, v20.4s, v0.4s
+ orr v1.16b, v1.16b, v11.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v18.4s, v18.4s, v8.4s
+ add v19.4s, v19.4s, v25.4s
+ eor v26.16b, v20.16b, v26.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v21.4s
+ add v18.4s, v18.4s, v1.4s
+ eor v5.16b, v19.16b, v5.16b
+ ushr v31.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v17.4s, v17.4s, v2.4s
+ ushr v11.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ eor v7.16b, v18.16b, v7.16b
+ orr v26.16b, v26.16b, v31.16b
+ eor v6.16b, v17.16b, v6.16b
+ orr v5.16b, v5.16b, v11.16b
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ add v23.4s, v26.4s, v23.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ orr v7.16b, v7.16b, v31.16b
+ add v3.4s, v5.4s, v3.4s
+ eor v0.16b, v23.16b, v0.16b
+ ldp q28, q12, [x29, #-256]
+ orr v6.16b, v6.16b, v11.16b
+ add v16.4s, v7.4s, v16.4s
+ eor v25.16b, v3.16b, v25.16b
+ ushr v31.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v4.4s, v6.4s, v4.4s
+ ushr v11.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ add v18.4s, v18.4s, v12.4s
+ mov v15.16b, v29.16b
+ ldur q29, [x29, #-208]
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ str q15, [sp, #160]
+ add v20.4s, v20.4s, v29.4s
+ add v18.4s, v18.4s, v0.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v27.4s
+ eor v6.16b, v6.16b, v18.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v28.4s
+ eor v7.16b, v7.16b, v20.16b
+ add v17.4s, v17.4s, v1.4s
+ rev32 v6.8h, v6.8h
+ add v19.4s, v19.4s, v2.4s
+ rev32 v7.8h, v7.8h
+ eor v5.16b, v17.16b, v5.16b
+ add v3.4s, v3.4s, v6.4s
+ eor v26.16b, v19.16b, v26.16b
+ add v4.4s, v4.4s, v7.4s
+ rev32 v5.8h, v5.8h
+ eor v0.16b, v3.16b, v0.16b
+ rev32 v26.8h, v26.8h
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v23.4s, v5.4s
+ ushr v11.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v16.4s, v16.4s, v26.4s
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v18.4s, v18.4s, v24.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v20.4s, v20.4s, v22.4s
+ add v18.4s, v18.4s, v0.4s
+ mov v9.16b, v30.16b
+ mov v30.16b, v21.16b
+ ldur q21, [x29, #-224]
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ str q30, [sp, #144]
+ add v17.4s, v17.4s, v21.4s
+ ldur q21, [x29, #-192]
+ eor v6.16b, v18.16b, v6.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v30.4s
+ eor v7.16b, v20.16b, v7.16b
+ add v17.4s, v17.4s, v1.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ add v19.4s, v19.4s, v2.4s
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ eor v5.16b, v17.16b, v5.16b
+ orr v6.16b, v6.16b, v11.16b
+ eor v26.16b, v19.16b, v26.16b
+ orr v7.16b, v7.16b, v31.16b
+ ushr v31.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ add v3.4s, v6.4s, v3.4s
+ ushr v11.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v4.4s, v7.4s, v4.4s
+ orr v5.16b, v5.16b, v31.16b
+ eor v0.16b, v3.16b, v0.16b
+ orr v26.16b, v26.16b, v11.16b
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v5.4s, v23.4s
+ ushr v11.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v16.4s, v26.4s, v16.4s
+ ushr v31.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v20.4s, v20.4s, v21.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v0.4s
+ add v19.4s, v19.4s, v10.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v18.4s, v18.4s, v14.4s
+ eor v26.16b, v20.16b, v26.16b
+ add v19.4s, v19.4s, v25.4s
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v9.4s
+ ldr q9, [sp, #208]
+ add v18.4s, v18.4s, v1.4s
+ rev32 v26.8h, v26.8h
+ eor v5.16b, v19.16b, v5.16b
+ add v17.4s, v17.4s, v2.4s
+ eor v7.16b, v18.16b, v7.16b
+ add v23.4s, v23.4s, v26.4s
+ rev32 v5.8h, v5.8h
+ eor v6.16b, v17.16b, v6.16b
+ rev32 v7.8h, v7.8h
+ eor v0.16b, v23.16b, v0.16b
+ add v3.4s, v3.4s, v5.4s
+ rev32 v6.8h, v6.8h
+ add v16.4s, v16.4s, v7.4s
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v4.4s, v6.4s
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ add v20.4s, v20.4s, v8.4s
+ ushr v11.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v31.16b
+ add v19.4s, v19.4s, v15.4s
+ add v20.4s, v20.4s, v0.4s
+ orr v1.16b, v1.16b, v11.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v18.4s, v18.4s, v9.4s
+ add v19.4s, v19.4s, v25.4s
+ eor v26.16b, v20.16b, v26.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ add v18.4s, v18.4s, v1.4s
+ eor v5.16b, v19.16b, v5.16b
+ ushr v31.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v17.4s, v17.4s, v2.4s
+ ushr v11.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ eor v7.16b, v18.16b, v7.16b
+ orr v26.16b, v26.16b, v31.16b
+ eor v6.16b, v17.16b, v6.16b
+ orr v5.16b, v5.16b, v11.16b
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ add v23.4s, v26.4s, v23.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ orr v7.16b, v7.16b, v31.16b
+ add v3.4s, v5.4s, v3.4s
+ eor v0.16b, v23.16b, v0.16b
+ orr v6.16b, v6.16b, v11.16b
+ add v16.4s, v7.4s, v16.4s
+ eor v25.16b, v3.16b, v25.16b
+ ushr v31.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v4.4s, v6.4s, v4.4s
+ ushr v11.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ add v18.4s, v18.4s, v24.4s
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v12.4s
+ add v18.4s, v18.4s, v0.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v30.4s
+ eor v6.16b, v6.16b, v18.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v27.4s
+ eor v7.16b, v7.16b, v20.16b
+ add v17.4s, v17.4s, v1.4s
+ rev32 v6.8h, v6.8h
+ add v19.4s, v19.4s, v2.4s
+ rev32 v7.8h, v7.8h
+ eor v5.16b, v17.16b, v5.16b
+ add v3.4s, v3.4s, v6.4s
+ eor v26.16b, v19.16b, v26.16b
+ add v4.4s, v4.4s, v7.4s
+ rev32 v5.8h, v5.8h
+ eor v0.16b, v3.16b, v0.16b
+ rev32 v26.8h, v26.8h
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v23.4s, v5.4s
+ ushr v11.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v16.4s, v16.4s, v26.4s
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v18.4s, v18.4s, v14.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v20.4s, v20.4s, v28.4s
+ add v18.4s, v18.4s, v0.4s
+ mov v10.16b, v13.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v29.4s
+ eor v6.16b, v18.16b, v6.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v10.4s
+ eor v7.16b, v20.16b, v7.16b
+ add v17.4s, v17.4s, v1.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ add v19.4s, v19.4s, v2.4s
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ eor v5.16b, v17.16b, v5.16b
+ orr v6.16b, v6.16b, v11.16b
+ eor v26.16b, v19.16b, v26.16b
+ orr v7.16b, v7.16b, v31.16b
+ ushr v31.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ add v3.4s, v6.4s, v3.4s
+ ushr v11.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v4.4s, v7.4s, v4.4s
+ orr v5.16b, v5.16b, v31.16b
+ eor v0.16b, v3.16b, v0.16b
+ mov v22.16b, v8.16b
+ ldp q8, q28, [sp, #240]
+ orr v26.16b, v26.16b, v11.16b
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v5.4s, v23.4s
+ ushr v11.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v16.4s, v26.4s, v16.4s
+ ushr v31.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v20.4s, v20.4s, v28.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v0.4s
+ add v19.4s, v19.4s, v15.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v18.4s, v18.4s, v8.4s
+ eor v26.16b, v20.16b, v26.16b
+ add v19.4s, v19.4s, v25.4s
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v22.4s
+ ldur q22, [x29, #-256]
+ add v18.4s, v18.4s, v1.4s
+ rev32 v26.8h, v26.8h
+ eor v5.16b, v19.16b, v5.16b
+ add v17.4s, v17.4s, v2.4s
+ eor v7.16b, v18.16b, v7.16b
+ add v23.4s, v23.4s, v26.4s
+ rev32 v5.8h, v5.8h
+ eor v6.16b, v17.16b, v6.16b
+ rev32 v7.8h, v7.8h
+ eor v0.16b, v23.16b, v0.16b
+ add v3.4s, v3.4s, v5.4s
+ rev32 v6.8h, v6.8h
+ add v16.4s, v16.4s, v7.4s
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v4.4s, v6.4s
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ add v20.4s, v20.4s, v9.4s
+ mov v13.16b, v12.16b
+ mov v12.16b, v27.16b
+ mov v27.16b, v9.16b
+ ldur q9, [x29, #-192]
+ mov v21.16b, v15.16b
+ ldr q15, [sp, #224]
+ ushr v11.4s, v1.4s, #12
+ ldur q21, [x29, #-224]
+ shl v1.4s, v1.4s, #20
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v31.16b
+ add v19.4s, v19.4s, v9.4s
+ add v20.4s, v20.4s, v0.4s
+ orr v1.16b, v1.16b, v11.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v18.4s, v18.4s, v21.4s
+ add v19.4s, v19.4s, v25.4s
+ eor v26.16b, v20.16b, v26.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v15.4s
+ add v18.4s, v18.4s, v1.4s
+ eor v5.16b, v19.16b, v5.16b
+ ushr v31.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v17.4s, v17.4s, v2.4s
+ ushr v11.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ eor v7.16b, v18.16b, v7.16b
+ orr v26.16b, v26.16b, v31.16b
+ eor v6.16b, v17.16b, v6.16b
+ orr v5.16b, v5.16b, v11.16b
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ add v23.4s, v26.4s, v23.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ orr v7.16b, v7.16b, v31.16b
+ add v3.4s, v5.4s, v3.4s
+ eor v0.16b, v23.16b, v0.16b
+ orr v6.16b, v6.16b, v11.16b
+ add v16.4s, v7.4s, v16.4s
+ eor v25.16b, v3.16b, v25.16b
+ ushr v31.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v4.4s, v6.4s, v4.4s
+ ushr v11.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ add v18.4s, v18.4s, v14.4s
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v24.4s
+ add v18.4s, v18.4s, v0.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v10.4s
+ eor v6.16b, v6.16b, v18.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v30.4s
+ eor v7.16b, v7.16b, v20.16b
+ add v17.4s, v17.4s, v1.4s
+ rev32 v6.8h, v6.8h
+ add v19.4s, v19.4s, v2.4s
+ rev32 v7.8h, v7.8h
+ eor v5.16b, v17.16b, v5.16b
+ add v3.4s, v3.4s, v6.4s
+ eor v26.16b, v19.16b, v26.16b
+ add v4.4s, v4.4s, v7.4s
+ rev32 v5.8h, v5.8h
+ eor v0.16b, v3.16b, v0.16b
+ rev32 v26.8h, v26.8h
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v23.4s, v5.4s
+ ushr v11.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v16.4s, v16.4s, v26.4s
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v18.4s, v18.4s, v8.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v20.4s, v20.4s, v12.4s
+ add v18.4s, v18.4s, v0.4s
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v13.4s
+ ldr q13, [sp, #160]
+ eor v6.16b, v18.16b, v6.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v15.4s
+ eor v7.16b, v20.16b, v7.16b
+ add v17.4s, v17.4s, v1.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ add v19.4s, v19.4s, v2.4s
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ eor v5.16b, v17.16b, v5.16b
+ orr v6.16b, v6.16b, v11.16b
+ eor v26.16b, v19.16b, v26.16b
+ orr v7.16b, v7.16b, v31.16b
+ ushr v31.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ add v3.4s, v6.4s, v3.4s
+ ushr v11.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v4.4s, v7.4s, v4.4s
+ orr v5.16b, v5.16b, v31.16b
+ eor v0.16b, v3.16b, v0.16b
+ orr v26.16b, v26.16b, v11.16b
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v5.4s, v23.4s
+ ushr v11.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v16.4s, v26.4s, v16.4s
+ ushr v31.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v20.4s, v20.4s, v22.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v0.4s
+ add v19.4s, v19.4s, v9.4s
+ mov v29.16b, v14.16b
+ ldr q14, [sp, #128]
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v18.4s, v18.4s, v14.4s
+ eor v26.16b, v20.16b, v26.16b
+ add v19.4s, v19.4s, v25.4s
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v27.4s
+ add v18.4s, v18.4s, v1.4s
+ rev32 v26.8h, v26.8h
+ eor v5.16b, v19.16b, v5.16b
+ add v17.4s, v17.4s, v2.4s
+ eor v7.16b, v18.16b, v7.16b
+ add v23.4s, v23.4s, v26.4s
+ rev32 v5.8h, v5.8h
+ eor v6.16b, v17.16b, v6.16b
+ rev32 v7.8h, v7.8h
+ eor v0.16b, v23.16b, v0.16b
+ add v3.4s, v3.4s, v5.4s
+ rev32 v6.8h, v6.8h
+ add v16.4s, v16.4s, v7.4s
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v4.4s, v6.4s
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ add v20.4s, v20.4s, v21.4s
+ ushr v11.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v31.16b
+ add v19.4s, v19.4s, v28.4s
+ add v20.4s, v20.4s, v0.4s
+ mov v12.16b, v27.16b
+ ldur q27, [x29, #-208]
+ orr v1.16b, v1.16b, v11.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v18.4s, v18.4s, v27.4s
+ add v19.4s, v19.4s, v25.4s
+ eor v26.16b, v20.16b, v26.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v13.4s
+ add v18.4s, v18.4s, v1.4s
+ eor v5.16b, v19.16b, v5.16b
+ ushr v31.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v17.4s, v17.4s, v2.4s
+ ushr v11.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ eor v7.16b, v18.16b, v7.16b
+ orr v26.16b, v26.16b, v31.16b
+ eor v6.16b, v17.16b, v6.16b
+ orr v5.16b, v5.16b, v11.16b
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ add v23.4s, v26.4s, v23.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ orr v7.16b, v7.16b, v31.16b
+ add v3.4s, v5.4s, v3.4s
+ eor v0.16b, v23.16b, v0.16b
+ orr v6.16b, v6.16b, v11.16b
+ add v16.4s, v7.4s, v16.4s
+ eor v25.16b, v3.16b, v25.16b
+ ushr v31.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v4.4s, v6.4s, v4.4s
+ ushr v11.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ add v18.4s, v18.4s, v8.4s
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v29.4s
+ add v18.4s, v18.4s, v0.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v15.4s
+ eor v6.16b, v6.16b, v18.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v10.4s
+ eor v7.16b, v7.16b, v20.16b
+ add v17.4s, v17.4s, v1.4s
+ rev32 v6.8h, v6.8h
+ add v19.4s, v19.4s, v2.4s
+ rev32 v7.8h, v7.8h
+ eor v5.16b, v17.16b, v5.16b
+ add v3.4s, v3.4s, v6.4s
+ eor v26.16b, v19.16b, v26.16b
+ add v4.4s, v4.4s, v7.4s
+ rev32 v5.8h, v5.8h
+ eor v0.16b, v3.16b, v0.16b
+ rev32 v26.8h, v26.8h
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v23.4s, v5.4s
+ ushr v11.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v16.4s, v16.4s, v26.4s
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v18.4s, v18.4s, v14.4s
+ mov v30.16b, v29.16b
+ mov v29.16b, v15.16b
+ ldr q15, [sp, #144]
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v20.4s, v20.4s, v15.4s
+ add v18.4s, v18.4s, v0.4s
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v24.4s
+ eor v6.16b, v18.16b, v6.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v13.4s
+ eor v7.16b, v20.16b, v7.16b
+ add v17.4s, v17.4s, v1.4s
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ add v19.4s, v19.4s, v2.4s
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ eor v5.16b, v17.16b, v5.16b
+ orr v6.16b, v6.16b, v11.16b
+ eor v26.16b, v19.16b, v26.16b
+ orr v7.16b, v7.16b, v31.16b
+ ushr v31.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ add v3.4s, v6.4s, v3.4s
+ ushr v11.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v4.4s, v7.4s, v4.4s
+ orr v5.16b, v5.16b, v31.16b
+ eor v0.16b, v3.16b, v0.16b
+ orr v26.16b, v26.16b, v11.16b
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v5.4s, v23.4s
+ ushr v11.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ mov v9.16b, v28.16b
+ mov v28.16b, v10.16b
+ ldr q10, [sp, #176]
+ add v16.4s, v26.4s, v16.4s
+ ushr v31.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v23.16b, v1.16b
+ orr v0.16b, v0.16b, v11.16b
+ add v20.4s, v20.4s, v10.4s
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v0.4s
+ add v19.4s, v19.4s, v9.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v18.4s, v18.4s, v12.4s
+ eor v26.16b, v20.16b, v26.16b
+ add v19.4s, v19.4s, v25.4s
+ orr v2.16b, v2.16b, v11.16b
+ add v17.4s, v17.4s, v21.4s
+ add v18.4s, v18.4s, v1.4s
+ rev32 v26.8h, v26.8h
+ eor v5.16b, v19.16b, v5.16b
+ add v17.4s, v17.4s, v2.4s
+ eor v7.16b, v18.16b, v7.16b
+ add v23.4s, v23.4s, v26.4s
+ rev32 v5.8h, v5.8h
+ eor v6.16b, v17.16b, v6.16b
+ rev32 v7.8h, v7.8h
+ eor v0.16b, v23.16b, v0.16b
+ add v3.4s, v3.4s, v5.4s
+ rev32 v6.8h, v6.8h
+ add v16.4s, v16.4s, v7.4s
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v4.4s, v6.4s
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ ushr v11.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ eor v2.16b, v4.16b, v2.16b
+ add v20.4s, v20.4s, v27.4s
+ orr v25.16b, v25.16b, v31.16b
+ add v19.4s, v19.4s, v22.4s
+ mov v9.16b, v22.16b
+ ldur q22, [x29, #-240]
+ orr v1.16b, v1.16b, v11.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v20.4s, v20.4s, v0.4s
+ add v18.4s, v18.4s, v22.4s
+ add v19.4s, v19.4s, v25.4s
+ mov v24.16b, v21.16b
+ ldur q21, [x29, #-192]
+ orr v2.16b, v2.16b, v11.16b
+ eor v26.16b, v20.16b, v26.16b
+ add v17.4s, v17.4s, v21.4s
+ add v18.4s, v18.4s, v1.4s
+ eor v5.16b, v19.16b, v5.16b
+ ushr v31.4s, v26.4s, #8
+ add v17.4s, v17.4s, v2.4s
+ shl v26.4s, v26.4s, #24
+ ushr v11.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ eor v7.16b, v18.16b, v7.16b
+ orr v26.16b, v26.16b, v31.16b
+ eor v6.16b, v17.16b, v6.16b
+ orr v5.16b, v5.16b, v11.16b
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ add v23.4s, v26.4s, v23.4s
+ orr v7.16b, v7.16b, v31.16b
+ add v3.4s, v5.4s, v3.4s
+ orr v6.16b, v6.16b, v11.16b
+ eor v0.16b, v23.16b, v0.16b
+ add v16.4s, v7.4s, v16.4s
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v6.4s, v4.4s
+ ushr v31.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v11.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v16.16b, v1.16b
+ orr v0.16b, v0.16b, v31.16b
+ eor v2.16b, v4.16b, v2.16b
+ orr v25.16b, v25.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v8.4s
+ add v18.4s, v18.4s, v14.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v13.4s
+ add v18.4s, v18.4s, v0.4s
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v29.4s
+ eor v7.16b, v7.16b, v20.16b
+ add v17.4s, v17.4s, v1.4s
+ eor v6.16b, v6.16b, v18.16b
+ add v19.4s, v19.4s, v2.4s
+ rev32 v7.8h, v7.8h
+ eor v5.16b, v17.16b, v5.16b
+ rev32 v6.8h, v6.8h
+ eor v26.16b, v19.16b, v26.16b
+ add v4.4s, v4.4s, v7.4s
+ rev32 v5.8h, v5.8h
+ add v3.4s, v3.4s, v6.4s
+ rev32 v26.8h, v26.8h
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v23.4s, v5.4s
+ eor v0.16b, v3.16b, v0.16b
+ add v16.4s, v16.4s, v26.4s
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ ushr v11.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v1.16b, v23.16b, v1.16b
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ orr v0.16b, v0.16b, v11.16b
+ ushr v31.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v20.4s, v20.4s, v28.4s
+ add v18.4s, v18.4s, v12.4s
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ orr v1.16b, v1.16b, v31.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v30.4s
+ add v18.4s, v18.4s, v0.4s
+ orr v2.16b, v2.16b, v11.16b
+ add v19.4s, v19.4s, v21.4s
+ eor v7.16b, v20.16b, v7.16b
+ add v17.4s, v17.4s, v1.4s
+ eor v6.16b, v18.16b, v6.16b
+ add v19.4s, v19.4s, v2.4s
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ eor v5.16b, v17.16b, v5.16b
+ orr v7.16b, v7.16b, v31.16b
+ eor v26.16b, v19.16b, v26.16b
+ orr v6.16b, v6.16b, v11.16b
+ ushr v31.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ ushr v11.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ add v4.4s, v7.4s, v4.4s
+ orr v5.16b, v5.16b, v31.16b
+ add v3.4s, v6.4s, v3.4s
+ orr v26.16b, v26.16b, v11.16b
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v5.4s, v23.4s
+ eor v0.16b, v3.16b, v0.16b
+ add v16.4s, v26.4s, v16.4s
+ ushr v31.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ ushr v11.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v1.16b, v23.16b, v1.16b
+ orr v25.16b, v25.16b, v31.16b
+ eor v2.16b, v16.16b, v2.16b
+ orr v0.16b, v0.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v20.4s, v20.4s, v15.4s
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v1.16b, v1.16b, v31.16b
+ add v18.4s, v18.4s, v24.4s
+ add v20.4s, v20.4s, v0.4s
+ add v19.4s, v19.4s, v9.4s
+ mov v8.16b, v13.16b
+ ldur q13, [x29, #-208]
+ orr v2.16b, v2.16b, v11.16b
+ add v18.4s, v18.4s, v1.4s
+ add v17.4s, v17.4s, v13.4s
+ eor v26.16b, v20.16b, v26.16b
+ add v19.4s, v19.4s, v25.4s
+ eor v7.16b, v18.16b, v7.16b
+ add v17.4s, v17.4s, v2.4s
+ rev32 v26.8h, v26.8h
+ eor v5.16b, v19.16b, v5.16b
+ rev32 v7.8h, v7.8h
+ eor v6.16b, v17.16b, v6.16b
+ add v23.4s, v23.4s, v26.4s
+ rev32 v5.8h, v5.8h
+ add v16.4s, v16.4s, v7.4s
+ rev32 v6.8h, v6.8h
+ eor v0.16b, v23.16b, v0.16b
+ add v3.4s, v3.4s, v5.4s
+ eor v1.16b, v16.16b, v1.16b
+ add v4.4s, v4.4s, v6.4s
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v25.16b, v3.16b, v25.16b
+ ushr v11.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ orr v0.16b, v0.16b, v31.16b
+ eor v2.16b, v4.16b, v2.16b
+ ushr v31.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ orr v1.16b, v1.16b, v11.16b
+ ushr v11.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v20.4s, v20.4s, v22.4s
+ orr v25.16b, v25.16b, v31.16b
+ add v19.4s, v19.4s, v10.4s
+ mov v27.16b, v12.16b
+ mov v12.16b, v30.16b
+ mov v29.16b, v21.16b
+ mov v21.16b, v24.16b
+ ldr q24, [sp, #192]
+ mov v30.16b, v22.16b
+ ldr q22, [sp, #256]
+ orr v2.16b, v2.16b, v11.16b
+ add v20.4s, v20.4s, v0.4s
+ add v18.4s, v18.4s, v24.4s
+ add v19.4s, v19.4s, v25.4s
+ add v17.4s, v17.4s, v22.4s
+ eor v26.16b, v20.16b, v26.16b
+ add v18.4s, v18.4s, v1.4s
+ eor v5.16b, v19.16b, v5.16b
+ add v17.4s, v17.4s, v2.4s
+ ushr v31.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ ushr v11.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ eor v7.16b, v18.16b, v7.16b
+ eor v6.16b, v17.16b, v6.16b
+ orr v26.16b, v26.16b, v31.16b
+ orr v5.16b, v5.16b, v11.16b
+ ushr v31.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ ushr v11.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ add v23.4s, v26.4s, v23.4s
+ orr v7.16b, v7.16b, v31.16b
+ add v3.4s, v5.4s, v3.4s
+ orr v6.16b, v6.16b, v11.16b
+ eor v0.16b, v23.16b, v0.16b
+ add v16.4s, v7.4s, v16.4s
+ eor v25.16b, v3.16b, v25.16b
+ add v4.4s, v6.4s, v4.4s
+ ushr v31.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v11.4s, v25.4s, #7
+ shl v25.4s, v25.4s, #25
+ eor v1.16b, v16.16b, v1.16b
+ eor v2.16b, v4.16b, v2.16b
+ orr v0.16b, v0.16b, v31.16b
+ orr v25.16b, v25.16b, v11.16b
+ ushr v31.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ushr v11.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v20.4s, v20.4s, v14.4s
+ add v18.4s, v18.4s, v27.4s
+ ldr q27, [sp, #224]
+ orr v1.16b, v1.16b, v31.16b
+ orr v2.16b, v2.16b, v11.16b
+ add v20.4s, v20.4s, v25.4s
+ add v17.4s, v17.4s, v29.4s
+ add v18.4s, v18.4s, v0.4s
+ add v19.4s, v19.4s, v8.4s
+ eor v7.16b, v7.16b, v20.16b
+ add v17.4s, v17.4s, v1.4s
+ eor v6.16b, v6.16b, v18.16b
+ add v19.4s, v19.4s, v2.4s
+ rev32 v7.8h, v7.8h
+ eor v5.16b, v17.16b, v5.16b
+ rev32 v6.8h, v6.8h
+ eor v26.16b, v19.16b, v26.16b
+ add v4.4s, v4.4s, v7.4s
+ rev32 v5.8h, v5.8h
+ add v3.4s, v3.4s, v6.4s
+ rev32 v26.8h, v26.8h
+ eor v25.16b, v4.16b, v25.16b
+ add v23.4s, v23.4s, v5.4s
+ eor v0.16b, v3.16b, v0.16b
+ add v16.4s, v16.4s, v26.4s
+ ushr v29.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ ushr v31.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v1.16b, v23.16b, v1.16b
+ eor v2.16b, v16.16b, v2.16b
+ orr v25.16b, v25.16b, v29.16b
+ orr v0.16b, v0.16b, v31.16b
+ ushr v29.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ ushr v31.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ add v18.4s, v18.4s, v21.4s
+ ldr q21, [sp, #240]
+ add v20.4s, v20.4s, v27.4s
+ prfm pldl1keep, [x17, #256]
+ orr v1.16b, v1.16b, v29.16b
+ prfm pldl1keep, [x21, #256]
+ orr v2.16b, v2.16b, v31.16b
+ prfm pldl1keep, [x16, #256]
+ add v18.4s, v18.4s, v0.4s
+ prfm pldl1keep, [x6, #256]
+ add v17.4s, v17.4s, v21.4s
+ add v19.4s, v19.4s, v22.4s
+ add v20.4s, v20.4s, v25.4s
+ eor v6.16b, v18.16b, v6.16b
+ add v17.4s, v17.4s, v1.4s
+ add v19.4s, v19.4s, v2.4s
+ eor v7.16b, v20.16b, v7.16b
+ ushr v22.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ eor v5.16b, v17.16b, v5.16b
+ eor v26.16b, v19.16b, v26.16b
+ ushr v21.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ orr v6.16b, v6.16b, v22.16b
+ ushr v22.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ ushr v29.4s, v26.4s, #8
+ shl v26.4s, v26.4s, #24
+ orr v7.16b, v7.16b, v21.16b
+ orr v5.16b, v5.16b, v22.16b
+ add v3.4s, v6.4s, v3.4s
+ orr v21.16b, v26.16b, v29.16b
+ add v4.4s, v7.4s, v4.4s
+ add v22.4s, v5.4s, v23.4s
+ eor v0.16b, v3.16b, v0.16b
+ add v16.4s, v21.4s, v16.4s
+ eor v23.16b, v4.16b, v25.16b
+ eor v1.16b, v22.16b, v1.16b
+ ushr v25.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v2.16b, v16.16b, v2.16b
+ ushr v26.4s, v23.4s, #7
+ shl v23.4s, v23.4s, #25
+ orr v0.16b, v0.16b, v25.16b
+ ushr v25.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ushr v29.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ add v20.4s, v20.4s, v28.4s
+ orr v23.16b, v23.16b, v26.16b
+ orr v1.16b, v1.16b, v25.16b
+ orr v2.16b, v2.16b, v29.16b
+ add v20.4s, v20.4s, v0.4s
+ add v18.4s, v18.4s, v13.4s
+ add v17.4s, v17.4s, v30.4s
+ add v19.4s, v19.4s, v10.4s
+ eor v21.16b, v20.16b, v21.16b
+ add v18.4s, v18.4s, v1.4s
+ add v17.4s, v17.4s, v2.4s
+ add v19.4s, v19.4s, v23.4s
+ rev32 v21.8h, v21.8h
+ eor v7.16b, v18.16b, v7.16b
+ eor v6.16b, v17.16b, v6.16b
+ eor v5.16b, v19.16b, v5.16b
+ add v22.4s, v22.4s, v21.4s
+ rev32 v7.8h, v7.8h
+ rev32 v6.8h, v6.8h
+ rev32 v5.8h, v5.8h
+ eor v0.16b, v22.16b, v0.16b
+ add v16.4s, v16.4s, v7.4s
+ add v4.4s, v4.4s, v6.4s
+ add v3.4s, v3.4s, v5.4s
+ ushr v25.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v1.16b, v16.16b, v1.16b
+ eor v2.16b, v4.16b, v2.16b
+ eor v23.16b, v3.16b, v23.16b
+ orr v0.16b, v0.16b, v25.16b
+ ushr v25.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ ushr v26.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ushr v27.4s, v23.4s, #12
+ shl v23.4s, v23.4s, #20
+ orr v1.16b, v1.16b, v25.16b
+ add v20.4s, v20.4s, v24.4s
+ orr v2.16b, v2.16b, v26.16b
+ orr v23.16b, v23.16b, v27.16b
+ add v18.4s, v18.4s, v12.4s
+ add v17.4s, v17.4s, v9.4s
+ add v19.4s, v19.4s, v15.4s
+ add v20.4s, v20.4s, v0.4s
+ add v18.4s, v18.4s, v1.4s
+ add v17.4s, v17.4s, v2.4s
+ add v19.4s, v19.4s, v23.4s
+ eor v21.16b, v20.16b, v21.16b
+ eor v7.16b, v18.16b, v7.16b
+ eor v6.16b, v17.16b, v6.16b
+ eor v5.16b, v19.16b, v5.16b
+ ushr v24.4s, v21.4s, #8
+ shl v21.4s, v21.4s, #24
+ ushr v25.4s, v7.4s, #8
+ shl v7.4s, v7.4s, #24
+ ushr v26.4s, v6.4s, #8
+ shl v6.4s, v6.4s, #24
+ ushr v27.4s, v5.4s, #8
+ shl v5.4s, v5.4s, #24
+ orr v21.16b, v21.16b, v24.16b
+ orr v7.16b, v7.16b, v25.16b
+ orr v6.16b, v6.16b, v26.16b
+ orr v5.16b, v5.16b, v27.16b
+ add v22.4s, v21.4s, v22.4s
+ add v16.4s, v7.4s, v16.4s
+ add v4.4s, v6.4s, v4.4s
+ add v3.4s, v5.4s, v3.4s
+ eor v0.16b, v22.16b, v0.16b
+ eor v1.16b, v16.16b, v1.16b
+ eor v2.16b, v4.16b, v2.16b
+ eor v23.16b, v3.16b, v23.16b
+ ushr v24.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v25.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ushr v26.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ushr v27.4s, v23.4s, #7
+ shl v23.4s, v23.4s, #25
+ orr v0.16b, v0.16b, v24.16b
+ orr v1.16b, v1.16b, v25.16b
+ orr v2.16b, v2.16b, v26.16b
+ orr v23.16b, v23.16b, v27.16b
+ movi v24.4s, #64
+ eor v12.16b, v4.16b, v20.16b
+ eor v31.16b, v18.16b, v3.16b
+ eor v29.16b, v17.16b, v22.16b
+ eor v30.16b, v16.16b, v19.16b
+ eor v28.16b, v7.16b, v23.16b
+ eor v23.16b, v6.16b, v0.16b
+ eor v13.16b, v1.16b, v5.16b
+ eor v25.16b, v2.16b, v21.16b
+ cbnz x15, .LBB3_5
+ b .LBB3_2
+.LBB3_6:
+ cbz x24, .LBB3_14
+ orr w8, w7, w19
+ and x22, x5, #0x1
+ stur w8, [x29, #-192]
+.LBB3_8:
+ ldr x8, [sp, #40]
+ mov x28, x0
+ ldr x25, [x0]
+ mov x23, x2
+ ldur w5, [x29, #-192]
+ ldp q0, q1, [x8]
+ mov x8, x2
+ b .LBB3_11
+.LBB3_9:
+ orr w5, w5, w27
+.LBB3_10:
+ sub x0, x29, #144
+ sub x1, x29, #176
+ mov x2, x25
+ mov w3, #64
+ mov x4, x20
+ bl compress_pre
+ ldp q0, q1, [x29, #-144]
+ add x25, x25, #64
+ mov x8, x21
+ mov w5, w19
+ ldp q2, q3, [x29, #-112]
+ eor v0.16b, v2.16b, v0.16b
+ eor v1.16b, v3.16b, v1.16b
+.LBB3_11:
+ subs x21, x8, #1
+ stp q0, q1, [x29, #-176]
+ b.eq .LBB3_9
+ cbnz x8, .LBB3_10
+ ldp q1, q0, [x29, #-176]
+ mov x0, x28
+ add x20, x20, x22
+ add x0, x28, #8
+ subs x24, x24, #1
+ mov x2, x23
+ stp q1, q0, [x26], #32
+ b.ne .LBB3_8
+.LBB3_14:
+ add sp, sp, #464
+ ldp x20, x19, [sp, #144]
+ ldp x22, x21, [sp, #128]
+ ldp x24, x23, [sp, #112]
+ ldp x26, x25, [sp, #96]
+ ldp x28, x27, [sp, #80]
+ ldp x29, x30, [sp, #64]
+ ldp d9, d8, [sp, #48]
+ ldp d11, d10, [sp, #32]
+ ldp d13, d12, [sp, #16]
+ ldp d15, d14, [sp], #160
+ hint #29
+ ret
+.Lfunc_end3:
+ .size zfs_blake3_hash_many_sse2, .Lfunc_end3-zfs_blake3_hash_many_sse2
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
new file mode 100644
index 000000000000..0b719761dd4c
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
@@ -0,0 +1,2406 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves
+ * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE4.1 -> ARMv8-A
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ *
+ * Should work on FreeBSD, Linux and macOS
+ * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
+ */
+
+#if defined(__aarch64__)
+
+/* make gcc <= 9 happy */
+#if !defined(LD_VERSION) || LD_VERSION >= 233010000
+#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
+#else
+#define CFI_NEGATE_RA_STATE
+#endif
+
+ .text
+ .section .note.gnu.property,"a",@note
+ .p2align 3
+ .word 4
+ .word 16
+ .word 5
+ .asciz "GNU"
+ .word 3221225472
+ .word 4
+ .word 3
+ .word 0
+.Lsec_end0:
+ .text
+ .globl zfs_blake3_compress_in_place_sse41
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse41,@function
+zfs_blake3_compress_in_place_sse41:
+ .cfi_startproc
+ hint #25
+ CFI_NEGATE_RA_STATE
+ sub sp, sp, #96
+ stp x29, x30, [sp, #64]
+ add x29, sp, #64
+ str x19, [sp, #80]
+ .cfi_def_cfa w29, 32
+ .cfi_offset w19, -16
+ .cfi_offset w30, -24
+ .cfi_offset w29, -32
+ mov x19, x0
+ mov w5, w4
+ mov x4, x3
+ mov w3, w2
+ mov x2, x1
+ mov x0, sp
+ mov x1, x19
+ bl compress_pre
+ ldp q0, q1, [sp]
+ ldp q2, q3, [sp, #32]
+ eor v0.16b, v2.16b, v0.16b
+ eor v1.16b, v3.16b, v1.16b
+ ldp x29, x30, [sp, #64]
+ stp q0, q1, [x19]
+ ldr x19, [sp, #80]
+ add sp, sp, #96
+ hint #29
+ ret
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .xword -4942790177982912921
+ .xword -6534734903820487822
+.LCPI1_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI1_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+ .text
+ .p2align 2
+ .type compress_pre,@function
+compress_pre:
+ .cfi_startproc
+ hint #34
+ fmov s1, w3
+ movi d0, #0x0000ff000000ff
+ ldr q2, [x1]
+ adrp x8, .LCPI1_0
+ mov v1.s[1], w5
+ str q2, [x0]
+ ldr q4, [x8, :lo12:.LCPI1_0]
+ ldr q5, [x1, #16]
+ adrp x8, .LCPI1_1
+ and v0.8b, v1.8b, v0.8b
+ fmov d1, x4
+ stp q5, q4, [x0, #16]
+ mov v1.d[1], v0.d[0]
+ str q1, [x0, #48]
+ ldp q6, q7, [x2]
+ uzp1 v3.4s, v6.4s, v7.4s
+ add v0.4s, v2.4s, v3.4s
+ uzp2 v2.4s, v6.4s, v7.4s
+ add v16.4s, v0.4s, v5.4s
+ ldr q0, [x8, :lo12:.LCPI1_1]
+ adrp x8, .LCPI1_2
+ eor v1.16b, v16.16b, v1.16b
+ add v7.4s, v16.4s, v2.4s
+ tbl v1.16b, { v1.16b }, v0.16b
+ add v4.4s, v1.4s, v4.4s
+ eor v5.16b, v4.16b, v5.16b
+ ushr v6.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v6.16b
+ add v6.4s, v7.4s, v5.4s
+ eor v7.16b, v1.16b, v6.16b
+ ldr q1, [x8, :lo12:.LCPI1_2]
+ add x8, x2, #32
+ tbl v7.16b, { v7.16b }, v1.16b
+ ld2 { v16.4s, v17.4s }, [x8]
+ add v4.4s, v4.4s, v7.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ add v6.4s, v6.4s, v16.4s
+ eor v5.16b, v4.16b, v5.16b
+ ext v4.16b, v4.16b, v4.16b, #4
+ ext v16.16b, v16.16b, v16.16b, #12
+ ext v6.16b, v6.16b, v6.16b, #12
+ ushr v18.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v18.16b
+ ext v18.16b, v17.16b, v17.16b, #12
+ add v6.4s, v6.4s, v5.4s
+ mov v17.16b, v18.16b
+ eor v7.16b, v7.16b, v6.16b
+ add v6.4s, v6.4s, v18.4s
+ mov v17.s[1], v16.s[2]
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v4.4s, v4.4s, v7.4s
+ eor v5.16b, v4.16b, v5.16b
+ ushr v19.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v19.16b
+ uzp1 v19.4s, v3.4s, v3.4s
+ add v6.4s, v6.4s, v5.4s
+ ext v19.16b, v19.16b, v3.16b, #8
+ eor v7.16b, v7.16b, v6.16b
+ uzp2 v19.4s, v19.4s, v2.4s
+ tbl v7.16b, { v7.16b }, v1.16b
+ add v6.4s, v6.4s, v19.4s
+ add v4.4s, v4.4s, v7.4s
+ ext v6.16b, v6.16b, v6.16b, #4
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v5.16b, v4.16b, v5.16b
+ ext v4.16b, v4.16b, v4.16b, #12
+ ushr v20.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v20.16b
+ ext v20.16b, v3.16b, v3.16b, #12
+ add v6.4s, v6.4s, v5.4s
+ ext v3.16b, v3.16b, v20.16b, #12
+ eor v7.16b, v7.16b, v6.16b
+ rev64 v3.4s, v3.4s
+ tbl v7.16b, { v7.16b }, v0.16b
+ trn2 v3.4s, v3.4s, v17.4s
+ add v4.4s, v4.4s, v7.4s
+ add v6.4s, v6.4s, v3.4s
+ eor v5.16b, v4.16b, v5.16b
+ ushr v17.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v17.16b
+ zip1 v17.2d, v18.2d, v2.2d
+ zip2 v2.4s, v2.4s, v18.4s
+ add v6.4s, v6.4s, v5.4s
+ mov v17.s[3], v16.s[3]
+ zip1 v18.4s, v2.4s, v16.4s
+ zip1 v2.4s, v16.4s, v2.4s
+ eor v7.16b, v7.16b, v6.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ ext v16.16b, v2.16b, v18.16b, #8
+ tbl v7.16b, { v7.16b }, v1.16b
+ add v20.4s, v4.4s, v7.4s
+ ext v4.16b, v17.16b, v17.16b, #12
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v5.16b, v20.16b, v5.16b
+ uzp1 v4.4s, v17.4s, v4.4s
+ ushr v17.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v6.4s, v6.4s, v4.4s
+ orr v5.16b, v5.16b, v17.16b
+ ext v17.16b, v20.16b, v20.16b, #4
+ add v6.4s, v6.4s, v5.4s
+ eor v7.16b, v7.16b, v6.16b
+ add v6.4s, v6.4s, v16.4s
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v17.4s, v17.4s, v7.4s
+ eor v5.16b, v17.16b, v5.16b
+ ushr v2.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v2.16b, v5.16b, v2.16b
+ add v5.4s, v6.4s, v2.4s
+ ext v6.16b, v19.16b, v19.16b, #4
+ eor v7.16b, v7.16b, v5.16b
+ uzp1 v18.4s, v6.4s, v6.4s
+ tbl v7.16b, { v7.16b }, v1.16b
+ ext v18.16b, v18.16b, v6.16b, #8
+ add v17.4s, v17.4s, v7.4s
+ uzp2 v18.4s, v18.4s, v3.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v2.16b, v17.16b, v2.16b
+ add v5.4s, v5.4s, v18.4s
+ ext v17.16b, v17.16b, v17.16b, #12
+ ushr v19.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v2.16b, v2.16b, v19.16b
+ ext v19.16b, v6.16b, v6.16b, #12
+ add v5.4s, v5.4s, v2.4s
+ ext v6.16b, v6.16b, v19.16b, #12
+ mov v19.16b, v16.16b
+ eor v7.16b, v7.16b, v5.16b
+ rev64 v6.4s, v6.4s
+ mov v19.s[1], v4.s[2]
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v17.4s, v17.4s, v7.4s
+ eor v20.16b, v17.16b, v2.16b
+ trn2 v2.4s, v6.4s, v19.4s
+ ushr v6.4s, v20.4s, #12
+ shl v19.4s, v20.4s, #20
+ add v5.4s, v5.4s, v2.4s
+ orr v6.16b, v19.16b, v6.16b
+ add v19.4s, v5.4s, v6.4s
+ eor v5.16b, v7.16b, v19.16b
+ zip1 v7.2d, v16.2d, v3.2d
+ zip2 v3.4s, v3.4s, v16.4s
+ tbl v20.16b, { v5.16b }, v1.16b
+ mov v7.s[3], v4.s[3]
+ add v17.4s, v17.4s, v20.4s
+ ext v5.16b, v7.16b, v7.16b, #12
+ eor v6.16b, v17.16b, v6.16b
+ uzp1 v5.4s, v7.4s, v5.4s
+ ext v7.16b, v19.16b, v19.16b, #12
+ ext v17.16b, v17.16b, v17.16b, #4
+ ushr v19.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ add v7.4s, v7.4s, v5.4s
+ orr v6.16b, v6.16b, v19.16b
+ ext v19.16b, v20.16b, v20.16b, #8
+ add v7.4s, v7.4s, v6.4s
+ eor v19.16b, v19.16b, v7.16b
+ tbl v19.16b, { v19.16b }, v0.16b
+ add v16.4s, v17.4s, v19.4s
+ zip1 v17.4s, v3.4s, v4.4s
+ zip1 v3.4s, v4.4s, v3.4s
+ eor v4.16b, v16.16b, v6.16b
+ ext v17.16b, v3.16b, v17.16b, #8
+ ushr v3.4s, v4.4s, #12
+ shl v4.4s, v4.4s, #20
+ add v6.4s, v7.4s, v17.4s
+ orr v3.16b, v4.16b, v3.16b
+ add v4.4s, v6.4s, v3.4s
+ ext v6.16b, v18.16b, v18.16b, #4
+ eor v7.16b, v19.16b, v4.16b
+ uzp1 v18.4s, v6.4s, v6.4s
+ tbl v7.16b, { v7.16b }, v1.16b
+ ext v18.16b, v18.16b, v6.16b, #8
+ add v16.4s, v16.4s, v7.4s
+ uzp2 v18.4s, v18.4s, v2.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v3.16b, v16.16b, v3.16b
+ add v4.4s, v4.4s, v18.4s
+ ext v16.16b, v16.16b, v16.16b, #12
+ ushr v19.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ ext v4.16b, v4.16b, v4.16b, #4
+ orr v3.16b, v3.16b, v19.16b
+ ext v19.16b, v6.16b, v6.16b, #12
+ add v4.4s, v4.4s, v3.4s
+ ext v6.16b, v6.16b, v19.16b, #12
+ mov v19.16b, v17.16b
+ eor v7.16b, v7.16b, v4.16b
+ rev64 v6.4s, v6.4s
+ mov v19.s[1], v5.s[2]
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v16.4s, v16.4s, v7.4s
+ eor v20.16b, v16.16b, v3.16b
+ trn2 v3.4s, v6.4s, v19.4s
+ ushr v6.4s, v20.4s, #12
+ shl v19.4s, v20.4s, #20
+ add v4.4s, v4.4s, v3.4s
+ orr v6.16b, v19.16b, v6.16b
+ zip1 v19.2d, v17.2d, v2.2d
+ zip2 v2.4s, v2.4s, v17.4s
+ add v4.4s, v4.4s, v6.4s
+ mov v19.s[3], v5.s[3]
+ zip1 v17.4s, v2.4s, v5.4s
+ zip1 v2.4s, v5.4s, v2.4s
+ eor v7.16b, v7.16b, v4.16b
+ ext v20.16b, v19.16b, v19.16b, #12
+ ext v4.16b, v4.16b, v4.16b, #12
+ ext v2.16b, v2.16b, v17.16b, #8
+ tbl v7.16b, { v7.16b }, v1.16b
+ add v16.4s, v16.4s, v7.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v21.16b, v16.16b, v6.16b
+ uzp1 v6.4s, v19.4s, v20.4s
+ ext v16.16b, v16.16b, v16.16b, #4
+ ushr v19.4s, v21.4s, #7
+ shl v20.4s, v21.4s, #25
+ add v4.4s, v4.4s, v6.4s
+ orr v19.16b, v20.16b, v19.16b
+ add v4.4s, v4.4s, v19.4s
+ eor v7.16b, v7.16b, v4.16b
+ add v4.4s, v4.4s, v2.4s
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v16.4s, v16.4s, v7.4s
+ eor v5.16b, v16.16b, v19.16b
+ ushr v17.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v17.16b
+ ext v17.16b, v18.16b, v18.16b, #4
+ add v4.4s, v4.4s, v5.4s
+ uzp1 v18.4s, v17.4s, v17.4s
+ eor v7.16b, v7.16b, v4.16b
+ ext v18.16b, v18.16b, v17.16b, #8
+ tbl v7.16b, { v7.16b }, v1.16b
+ uzp2 v18.4s, v18.4s, v3.4s
+ add v16.4s, v16.4s, v7.4s
+ add v4.4s, v4.4s, v18.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v5.16b, v16.16b, v5.16b
+ ext v4.16b, v4.16b, v4.16b, #4
+ ext v16.16b, v16.16b, v16.16b, #12
+ ushr v19.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v19.16b
+ add v19.4s, v4.4s, v5.4s
+ eor v4.16b, v7.16b, v19.16b
+ ext v7.16b, v17.16b, v17.16b, #12
+ tbl v20.16b, { v4.16b }, v0.16b
+ ext v4.16b, v17.16b, v7.16b, #12
+ mov v7.16b, v2.16b
+ add v16.4s, v16.4s, v20.4s
+ rev64 v4.4s, v4.4s
+ mov v7.s[1], v6.s[2]
+ eor v5.16b, v16.16b, v5.16b
+ trn2 v4.4s, v4.4s, v7.4s
+ ushr v7.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v17.4s, v19.4s, v4.4s
+ zip1 v19.2d, v2.2d, v3.2d
+ zip2 v2.4s, v3.4s, v2.4s
+ orr v5.16b, v5.16b, v7.16b
+ mov v19.s[3], v6.s[3]
+ add v7.4s, v17.4s, v5.4s
+ eor v17.16b, v20.16b, v7.16b
+ ext v20.16b, v19.16b, v19.16b, #12
+ ext v7.16b, v7.16b, v7.16b, #12
+ tbl v17.16b, { v17.16b }, v1.16b
+ add v16.4s, v16.4s, v17.4s
+ ext v17.16b, v17.16b, v17.16b, #8
+ eor v21.16b, v16.16b, v5.16b
+ uzp1 v5.4s, v19.4s, v20.4s
+ ext v16.16b, v16.16b, v16.16b, #4
+ ushr v19.4s, v21.4s, #7
+ shl v20.4s, v21.4s, #25
+ add v7.4s, v7.4s, v5.4s
+ orr v19.16b, v20.16b, v19.16b
+ add v7.4s, v7.4s, v19.4s
+ eor v17.16b, v17.16b, v7.16b
+ tbl v17.16b, { v17.16b }, v0.16b
+ add v3.4s, v16.4s, v17.4s
+ zip1 v16.4s, v2.4s, v6.4s
+ zip1 v2.4s, v6.4s, v2.4s
+ eor v6.16b, v3.16b, v19.16b
+ ext v16.16b, v2.16b, v16.16b, #8
+ ushr v2.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ add v7.4s, v7.4s, v16.4s
+ orr v2.16b, v6.16b, v2.16b
+ add v6.4s, v7.4s, v2.4s
+ ext v7.16b, v18.16b, v18.16b, #4
+ eor v17.16b, v17.16b, v6.16b
+ uzp1 v18.4s, v7.4s, v7.4s
+ tbl v17.16b, { v17.16b }, v1.16b
+ ext v18.16b, v18.16b, v7.16b, #8
+ add v3.4s, v3.4s, v17.4s
+ uzp2 v18.4s, v18.4s, v4.4s
+ eor v2.16b, v3.16b, v2.16b
+ add v6.4s, v6.4s, v18.4s
+ ext v3.16b, v3.16b, v3.16b, #12
+ ext v18.16b, v18.16b, v18.16b, #4
+ ushr v19.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v19.16b, v2.16b, v19.16b
+ ext v2.16b, v17.16b, v17.16b, #8
+ ext v17.16b, v7.16b, v7.16b, #12
+ add v6.4s, v6.4s, v19.4s
+ eor v2.16b, v2.16b, v6.16b
+ tbl v20.16b, { v2.16b }, v0.16b
+ ext v2.16b, v7.16b, v17.16b, #12
+ mov v7.16b, v16.16b
+ add v17.4s, v3.4s, v20.4s
+ rev64 v3.4s, v2.4s
+ mov v7.s[1], v5.s[2]
+ eor v19.16b, v17.16b, v19.16b
+ trn2 v3.4s, v3.4s, v7.4s
+ ushr v21.4s, v19.4s, #12
+ shl v19.4s, v19.4s, #20
+ add v6.4s, v6.4s, v3.4s
+ orr v19.16b, v19.16b, v21.16b
+ add v21.4s, v6.4s, v19.4s
+ eor v6.16b, v20.16b, v21.16b
+ zip1 v20.2d, v16.2d, v4.2d
+ zip2 v4.4s, v4.4s, v16.4s
+ tbl v22.16b, { v6.16b }, v1.16b
+ mov v20.s[3], v5.s[3]
+ add v17.4s, v17.4s, v22.4s
+ ext v6.16b, v20.16b, v20.16b, #12
+ eor v19.16b, v17.16b, v19.16b
+ uzp1 v6.4s, v20.4s, v6.4s
+ ext v20.16b, v21.16b, v21.16b, #12
+ ext v17.16b, v17.16b, v17.16b, #4
+ ushr v21.4s, v19.4s, #7
+ shl v19.4s, v19.4s, #25
+ add v20.4s, v20.4s, v6.4s
+ orr v19.16b, v19.16b, v21.16b
+ ext v21.16b, v22.16b, v22.16b, #8
+ add v20.4s, v20.4s, v19.4s
+ eor v21.16b, v21.16b, v20.16b
+ tbl v21.16b, { v21.16b }, v0.16b
+ add v16.4s, v17.4s, v21.4s
+ zip1 v17.4s, v4.4s, v5.4s
+ zip1 v4.4s, v5.4s, v4.4s
+ eor v5.16b, v16.16b, v19.16b
+ ext v4.16b, v4.16b, v17.16b, #8
+ ushr v17.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v19.4s, v20.4s, v4.4s
+ ext v20.16b, v18.16b, v18.16b, #8
+ zip1 v3.2d, v4.2d, v3.2d
+ orr v5.16b, v5.16b, v17.16b
+ zip2 v2.4s, v2.4s, v4.4s
+ uzp2 v7.4s, v20.4s, v7.4s
+ mov v3.s[3], v6.s[3]
+ add v17.4s, v19.4s, v5.4s
+ ext v7.16b, v7.16b, v20.16b, #4
+ eor v19.16b, v21.16b, v17.16b
+ ext v17.16b, v17.16b, v17.16b, #4
+ tbl v19.16b, { v19.16b }, v1.16b
+ add v7.4s, v17.4s, v7.4s
+ add v16.4s, v16.4s, v19.4s
+ ext v17.16b, v19.16b, v19.16b, #8
+ ext v19.16b, v18.16b, v18.16b, #12
+ eor v5.16b, v16.16b, v5.16b
+ ext v16.16b, v16.16b, v16.16b, #12
+ ext v18.16b, v18.16b, v19.16b, #12
+ mov v19.16b, v4.16b
+ ushr v20.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ rev64 v18.4s, v18.4s
+ mov v19.s[1], v6.s[2]
+ orr v5.16b, v5.16b, v20.16b
+ trn2 v18.4s, v18.4s, v19.4s
+ add v7.4s, v5.4s, v7.4s
+ eor v17.16b, v17.16b, v7.16b
+ add v7.4s, v7.4s, v18.4s
+ ext v18.16b, v3.16b, v3.16b, #12
+ tbl v17.16b, { v17.16b }, v0.16b
+ uzp1 v3.4s, v3.4s, v18.4s
+ add v16.4s, v16.4s, v17.4s
+ eor v5.16b, v16.16b, v5.16b
+ ushr v19.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v5.16b, v5.16b, v19.16b
+ add v7.4s, v7.4s, v5.4s
+ eor v17.16b, v17.16b, v7.16b
+ ext v7.16b, v7.16b, v7.16b, #12
+ tbl v17.16b, { v17.16b }, v1.16b
+ add v3.4s, v7.4s, v3.4s
+ add v16.4s, v16.4s, v17.4s
+ ext v7.16b, v17.16b, v17.16b, #8
+ eor v5.16b, v16.16b, v5.16b
+ ext v16.16b, v16.16b, v16.16b, #4
+ ushr v18.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v5.16b, v5.16b, v18.16b
+ add v3.4s, v3.4s, v5.4s
+ eor v7.16b, v7.16b, v3.16b
+ tbl v0.16b, { v7.16b }, v0.16b
+ zip1 v7.4s, v2.4s, v6.4s
+ zip1 v2.4s, v6.4s, v2.4s
+ add v4.4s, v16.4s, v0.4s
+ ext v2.16b, v2.16b, v7.16b, #8
+ eor v5.16b, v4.16b, v5.16b
+ add v2.4s, v3.4s, v2.4s
+ ushr v6.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ orr v3.16b, v5.16b, v6.16b
+ add v2.4s, v2.4s, v3.4s
+ eor v0.16b, v0.16b, v2.16b
+ ext v2.16b, v2.16b, v2.16b, #4
+ tbl v0.16b, { v0.16b }, v1.16b
+ add v1.4s, v4.4s, v0.4s
+ ext v0.16b, v0.16b, v0.16b, #8
+ eor v3.16b, v1.16b, v3.16b
+ ext v1.16b, v1.16b, v1.16b, #12
+ ushr v4.4s, v3.4s, #7
+ shl v3.4s, v3.4s, #25
+ stp q1, q0, [x0, #32]
+ orr v3.16b, v3.16b, v4.16b
+ stp q2, q3, [x0]
+ ret
+.Lfunc_end1:
+ .size compress_pre, .Lfunc_end1-compress_pre
+ .cfi_endproc
+
+ .globl zfs_blake3_compress_xof_sse41
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse41,@function
+zfs_blake3_compress_xof_sse41:
+ .cfi_startproc
+ hint #25
+ CFI_NEGATE_RA_STATE
+ sub sp, sp, #96
+ stp x29, x30, [sp, #64]
+ add x29, sp, #64
+ stp x20, x19, [sp, #80]
+ .cfi_def_cfa w29, 32
+ .cfi_offset w19, -8
+ .cfi_offset w20, -16
+ .cfi_offset w30, -24
+ .cfi_offset w29, -32
+ mov x20, x0
+ mov x19, x5
+ mov w5, w4
+ mov x4, x3
+ mov w3, w2
+ mov x2, x1
+ mov x0, sp
+ mov x1, x20
+ bl compress_pre
+ ldp q0, q1, [sp]
+ ldp q2, q3, [sp, #32]
+ eor v0.16b, v2.16b, v0.16b
+ eor v1.16b, v3.16b, v1.16b
+ ldp x29, x30, [sp, #64]
+ stp q0, q1, [x19]
+ ldr q0, [x20]
+ eor v0.16b, v0.16b, v2.16b
+ str q0, [x19, #32]
+ ldr q0, [x20, #16]
+ eor v0.16b, v0.16b, v3.16b
+ str q0, [x19, #48]
+ ldp x20, x19, [sp, #80]
+ add sp, sp, #96
+ hint #29
+ ret
+.Lfunc_end2:
+ .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI3_0:
+ .word 0
+ .word 1
+ .word 2
+ .word 3
+.LCPI3_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI3_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI3_3:
+ .word 1779033703
+ .word 3144134277
+ .word 1013904242
+ .word 2773480762
+ .text
+ .globl zfs_blake3_hash_many_sse41
+ .p2align 2
+ .type zfs_blake3_hash_many_sse41,@function
+zfs_blake3_hash_many_sse41:
+ .cfi_startproc
+ hint #34
+ stp d15, d14, [sp, #-144]!
+ stp d13, d12, [sp, #16]
+ stp d11, d10, [sp, #32]
+ stp d9, d8, [sp, #48]
+ stp x29, x27, [sp, #64]
+ stp x26, x25, [sp, #80]
+ stp x24, x23, [sp, #96]
+ stp x22, x21, [sp, #112]
+ stp x20, x19, [sp, #128]
+ sub sp, sp, #368
+ .cfi_def_cfa_offset 512
+ .cfi_offset w19, -8
+ .cfi_offset w20, -16
+ .cfi_offset w21, -24
+ .cfi_offset w22, -32
+ .cfi_offset w23, -40
+ .cfi_offset w24, -48
+ .cfi_offset w25, -56
+ .cfi_offset w26, -64
+ .cfi_offset w27, -72
+ .cfi_offset w29, -80
+ .cfi_offset b8, -88
+ .cfi_offset b9, -96
+ .cfi_offset b10, -104
+ .cfi_offset b11, -112
+ .cfi_offset b12, -120
+ .cfi_offset b13, -128
+ .cfi_offset b14, -136
+ .cfi_offset b15, -144
+ ldr x8, [sp, #520]
+ adrp x11, .LCPI3_1
+ ldrb w9, [sp, #512]
+ adrp x10, .LCPI3_2
+ cmp x1, #4
+ b.lo .LBB3_6
+ adrp x12, .LCPI3_0
+ sbfx w13, w5, #0, #1
+ mov w15, #58983
+ mov w16, #44677
+ movk w15, #27145, lsl #16
+ movk w16, #47975, lsl #16
+ ldr q0, [x12, :lo12:.LCPI3_0]
+ dup v1.4s, w13
+ movi v13.4s, #64
+ mov w13, #62322
+ mov w14, #62778
+ orr w12, w7, w6
+ and v0.16b, v1.16b, v0.16b
+ ldr q1, [x11, :lo12:.LCPI3_1]
+ movk w13, #15470, lsl #16
+ movk w14, #42319, lsl #16
+ dup v14.4s, w15
+ stp q0, q1, [sp, #16]
+ orr v0.4s, #128, lsl #24
+ str q0, [sp]
+ dup v0.4s, w16
+ stp q0, q14, [sp, #48]
+ b .LBB3_3
+.LBB3_2:
+ zip1 v0.4s, v29.4s, v8.4s
+ add x15, x4, #4
+ zip1 v1.4s, v30.4s, v31.4s
+ tst w5, #0x1
+ zip1 v2.4s, v24.4s, v18.4s
+ csel x4, x15, x4, ne
+ zip1 v3.4s, v25.4s, v26.4s
+ add x0, x0, #32
+ zip2 v6.4s, v29.4s, v8.4s
+ sub x1, x1, #4
+ zip1 v4.2d, v0.2d, v1.2d
+ cmp x1, #3
+ zip2 v7.4s, v30.4s, v31.4s
+ zip1 v5.2d, v2.2d, v3.2d
+ zip2 v0.2d, v0.2d, v1.2d
+ zip2 v1.2d, v2.2d, v3.2d
+ zip2 v2.4s, v24.4s, v18.4s
+ zip2 v3.4s, v25.4s, v26.4s
+ stp q4, q5, [x8]
+ zip2 v4.2d, v6.2d, v7.2d
+ stp q0, q1, [x8, #32]
+ zip1 v0.2d, v6.2d, v7.2d
+ zip1 v1.2d, v2.2d, v3.2d
+ zip2 v2.2d, v2.2d, v3.2d
+ stp q0, q1, [x8, #64]
+ stp q4, q2, [x8, #96]
+ add x8, x8, #128
+ b.ls .LBB3_6
+.LBB3_3:
+ mov x15, x3
+ add x16, x3, #8
+ add x17, x3, #12
+ add x19, x3, #16
+ add x20, x3, #20
+ ld1r { v29.4s }, [x15], #4
+ ld1r { v30.4s }, [x16]
+ add x16, x3, #24
+ ld1r { v31.4s }, [x17]
+ add x17, x3, #28
+ ld1r { v24.4s }, [x19]
+ ld1r { v18.4s }, [x20]
+ ld1r { v25.4s }, [x16]
+ ld1r { v8.4s }, [x15]
+ ld1r { v26.4s }, [x17]
+ cbz x2, .LBB3_2
+ ldr q1, [sp, #16]
+ dup v0.4s, w4
+ lsr x17, x4, #32
+ mov x15, xzr
+ ldp x19, x20, [x0, #16]
+ add v1.4s, v0.4s, v1.4s
+ mov x21, x2
+ movi v0.4s, #128, lsl #24
+ mov w26, w12
+ str q1, [sp, #96]
+ eor v0.16b, v1.16b, v0.16b
+ ldr q1, [sp]
+ cmgt v0.4s, v1.4s, v0.4s
+ dup v1.4s, w17
+ ldp x16, x17, [x0]
+ sub v0.4s, v1.4s, v0.4s
+ str q0, [sp, #80]
+.LBB3_5:
+ add x23, x16, x15
+ add x24, x17, x15
+ add x22, x19, x15
+ add x25, x20, x15
+ subs x21, x21, #1
+ add x15, x15, #64
+ ldp q1, q2, [x23]
+ csel w27, w9, wzr, eq
+ orr w26, w27, w26
+ and w26, w26, #0xff
+ ldp q4, q5, [x24]
+ dup v0.4s, w26
+ mov w26, w6
+ zip1 v22.4s, v1.4s, v4.4s
+ zip2 v20.4s, v1.4s, v4.4s
+ ldp q6, q7, [x22]
+ zip1 v17.4s, v2.4s, v5.4s
+ zip2 v23.4s, v2.4s, v5.4s
+ ldp q16, q21, [x25]
+ zip1 v19.4s, v6.4s, v16.4s
+ zip2 v1.4s, v6.4s, v16.4s
+ ldp q27, q28, [x23, #32]
+ zip1 v4.4s, v7.4s, v21.4s
+ zip2 v5.4s, v7.4s, v21.4s
+ zip2 v15.2d, v17.2d, v4.2d
+ ldp q9, q10, [x24, #32]
+ mov v17.d[1], v4.d[0]
+ add v4.4s, v30.4s, v25.4s
+ zip2 v11.2d, v23.2d, v5.2d
+ zip2 v3.4s, v27.4s, v9.4s
+ zip1 v7.4s, v27.4s, v9.4s
+ ldp q12, q6, [x22, #32]
+ mov v23.d[1], v5.d[0]
+ stp q11, q3, [sp, #256]
+ add v5.4s, v31.4s, v26.4s
+ add v4.4s, v4.4s, v17.4s
+ str q23, [sp, #352]
+ ldp q16, q2, [x25, #32]
+ add v5.4s, v5.4s, v23.4s
+ zip1 v3.4s, v12.4s, v16.4s
+ eor v0.16b, v5.16b, v0.16b
+ zip1 v9.4s, v6.4s, v2.4s
+ zip2 v2.4s, v6.4s, v2.4s
+ stp q7, q3, [sp, #208]
+ zip2 v3.4s, v12.4s, v16.4s
+ zip1 v12.4s, v28.4s, v10.4s
+ zip2 v10.4s, v28.4s, v10.4s
+ stp q17, q2, [sp, #160]
+ zip2 v28.2d, v22.2d, v19.2d
+ mov v22.d[1], v19.d[0]
+ str q3, [sp, #240]
+ add v2.4s, v8.4s, v18.4s
+ eor v16.16b, v4.16b, v13.16b
+ dup v17.4s, w13
+ mov v3.16b, v22.16b
+ stp q22, q28, [sp, #320]
+ zip2 v22.2d, v20.2d, v1.2d
+ mov v20.d[1], v1.d[0]
+ add v1.4s, v29.4s, v24.4s
+ add v4.4s, v4.4s, v15.4s
+ add v5.4s, v5.4s, v11.4s
+ add v2.4s, v2.4s, v20.4s
+ stp q15, q20, [sp, #288]
+ add v1.4s, v1.4s, v3.4s
+ ldr q3, [sp, #96]
+ dup v20.4s, w14
+ mov v23.16b, v22.16b
+ mov v15.16b, v10.16b
+ eor v6.16b, v1.16b, v3.16b
+ ldr q3, [sp, #80]
+ add v1.4s, v1.4s, v28.4s
+ ldr q28, [sp, #272]
+ str q23, [sp, #128]
+ eor v7.16b, v2.16b, v3.16b
+ ldp q27, q3, [sp, #32]
+ add v2.4s, v2.4s, v22.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v7.16b, { v7.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ tbl v0.16b, { v0.16b }, v27.16b
+ add v19.4s, v6.4s, v14.4s
+ add v21.4s, v7.4s, v3.4s
+ add v30.4s, v16.4s, v17.4s
+ add v31.4s, v0.4s, v20.4s
+ eor v24.16b, v19.16b, v24.16b
+ eor v17.16b, v21.16b, v18.16b
+ ushr v18.4s, v24.4s, #12
+ shl v20.4s, v24.4s, #20
+ eor v24.16b, v30.16b, v25.16b
+ eor v25.16b, v31.16b, v26.16b
+ ushr v26.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ ushr v29.4s, v24.4s, #12
+ shl v24.4s, v24.4s, #20
+ ushr v8.4s, v25.4s, #12
+ shl v25.4s, v25.4s, #20
+ orr v3.16b, v20.16b, v18.16b
+ ldr q18, [x10, :lo12:.LCPI3_2]
+ orr v13.16b, v17.16b, v26.16b
+ orr v24.16b, v24.16b, v29.16b
+ orr v14.16b, v25.16b, v8.16b
+ add v8.4s, v1.4s, v3.4s
+ add v29.4s, v2.4s, v13.4s
+ add v17.4s, v4.4s, v24.4s
+ add v20.4s, v5.4s, v14.4s
+ eor v1.16b, v6.16b, v8.16b
+ eor v2.16b, v7.16b, v29.16b
+ eor v4.16b, v16.16b, v17.16b
+ eor v0.16b, v0.16b, v20.16b
+ tbl v25.16b, { v1.16b }, v18.16b
+ tbl v16.16b, { v2.16b }, v18.16b
+ tbl v6.16b, { v4.16b }, v18.16b
+ tbl v4.16b, { v0.16b }, v18.16b
+ add v19.4s, v19.4s, v25.4s
+ add v21.4s, v21.4s, v16.4s
+ add v26.4s, v30.4s, v6.4s
+ add v7.4s, v31.4s, v4.4s
+ eor v0.16b, v19.16b, v3.16b
+ eor v1.16b, v21.16b, v13.16b
+ eor v2.16b, v26.16b, v24.16b
+ eor v3.16b, v7.16b, v14.16b
+ ushr v5.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v24.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ushr v30.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ orr v5.16b, v0.16b, v5.16b
+ orr v0.16b, v1.16b, v24.16b
+ ushr v31.4s, v3.4s, #7
+ orr v2.16b, v2.16b, v30.16b
+ ldp q24, q30, [sp, #208]
+ shl v3.4s, v3.4s, #25
+ zip2 v14.2d, v12.2d, v9.2d
+ mov v22.16b, v24.16b
+ orr v1.16b, v3.16b, v31.16b
+ zip2 v3.2d, v24.2d, v30.2d
+ mov v24.16b, v28.16b
+ mov v22.d[1], v30.d[0]
+ ldr q30, [sp, #240]
+ mov v31.16b, v12.16b
+ stp q22, q14, [sp, #224]
+ mov v24.d[1], v30.d[0]
+ add v12.4s, v8.4s, v22.4s
+ mov v31.d[1], v9.d[0]
+ add v22.4s, v29.4s, v24.4s
+ ldr q29, [sp, #176]
+ zip2 v28.2d, v28.2d, v30.2d
+ mov v9.16b, v24.16b
+ mov v15.d[1], v29.d[0]
+ zip2 v8.2d, v10.2d, v29.2d
+ add v10.4s, v12.4s, v0.4s
+ add v22.4s, v22.4s, v2.4s
+ str q9, [sp, #144]
+ add v20.4s, v20.4s, v15.4s
+ add v17.4s, v17.4s, v31.4s
+ stp q3, q8, [sp, #192]
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v27.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v10.4s, v10.4s, v3.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v22.4s, v22.4s, v28.4s
+ ushr v12.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v13.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v8.4s
+ orr v1.16b, v1.16b, v12.16b
+ add v17.4s, v17.4s, v14.4s
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v18.16b
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v22.4s, v22.4s, v23.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v17.4s, v17.4s, v11.4s
+ mov v30.16b, v28.16b
+ mov v28.16b, v23.16b
+ ldr q23, [sp, #304]
+ ushr v12.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v22.4s, v22.4s, v0.4s
+ mov v29.16b, v31.16b
+ ldr q31, [sp, #160]
+ orr v5.16b, v5.16b, v13.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v23.4s
+ orr v1.16b, v1.16b, v12.16b
+ str q29, [sp, #272]
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v31.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v27.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v5.16b, v19.16b
+ add v22.4s, v22.4s, v24.4s
+ ldr q24, [sp, #320]
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v17.4s, v17.4s, v24.4s
+ ldr q24, [sp, #352]
+ ushr v13.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v22.4s, v22.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v24.4s
+ ldr q24, [sp, #336]
+ orr v1.16b, v1.16b, v13.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v14.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v18.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v19.16b, v5.16b
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ add v10.4s, v10.4s, v24.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v22.4s, v22.4s, v29.4s
+ ushr v13.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v8.4s
+ ldr q8, [sp, #288]
+ orr v1.16b, v1.16b, v13.16b
+ add v17.4s, v17.4s, v3.4s
+ ldr q3, [sp, #352]
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v27.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v10.4s, v10.4s, v30.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v22.4s, v22.4s, v8.4s
+ mov v24.16b, v30.16b
+ mov v30.16b, v15.16b
+ add v17.4s, v17.4s, v15.4s
+ ldr q15, [sp, #224]
+ ushr v12.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v10.4s, v10.4s, v0.4s
+ str q30, [sp, #176]
+ orr v5.16b, v5.16b, v13.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v15.4s
+ orr v1.16b, v1.16b, v12.16b
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v18.16b
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v22.4s, v22.4s, v9.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v17.4s, v17.4s, v14.4s
+ ushr v12.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v22.4s, v22.4s, v0.4s
+ orr v5.16b, v5.16b, v13.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v28.4s
+ orr v1.16b, v1.16b, v12.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v11.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v27.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v5.16b, v19.16b
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ add v22.4s, v22.4s, v29.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v17.4s, v17.4s, v23.4s
+ ushr v13.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v22.4s, v22.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v31.4s
+ orr v1.16b, v1.16b, v13.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v30.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v18.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v19.16b, v5.16b
+ add v10.4s, v10.4s, v3.4s
+ ldr q3, [sp, #192]
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v22.4s, v22.4s, v3.4s
+ ushr v13.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v15.4s
+ ldr q15, [sp, #128]
+ orr v1.16b, v1.16b, v13.16b
+ add v17.4s, v17.4s, v24.4s
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v27.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v21.16b, v5.16b
+ ldp q23, q11, [sp, #320]
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v10.4s, v10.4s, v8.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v22.4s, v22.4s, v23.4s
+ ushr v12.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v10.4s, v10.4s, v0.4s
+ mov v28.16b, v31.16b
+ mov v31.16b, v8.16b
+ ldr q8, [sp, #208]
+ orr v5.16b, v5.16b, v13.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v11.4s
+ orr v1.16b, v1.16b, v12.16b
+ add v17.4s, v17.4s, v8.4s
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v18.16b
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v22.4s, v22.4s, v29.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v17.4s, v17.4s, v30.4s
+ ushr v12.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v22.4s, v22.4s, v0.4s
+ orr v5.16b, v5.16b, v13.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v9.4s
+ orr v1.16b, v1.16b, v12.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v14.4s
+ ldr q14, [sp, #256]
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v27.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v5.16b, v19.16b
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ add v22.4s, v22.4s, v3.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v17.4s, v17.4s, v15.4s
+ ushr v13.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v22.4s, v22.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v14.4s
+ orr v1.16b, v1.16b, v13.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v8.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v18.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v19.16b, v5.16b
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ add v10.4s, v10.4s, v28.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v22.4s, v22.4s, v24.4s
+ ushr v13.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v11.4s
+ ldr q11, [sp, #304]
+ orr v1.16b, v1.16b, v13.16b
+ add v17.4s, v17.4s, v31.4s
+ ldr q31, [sp, #224]
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v27.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v10.4s, v10.4s, v23.4s
+ ldr q23, [sp, #240]
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v22.4s, v22.4s, v11.4s
+ mov v30.16b, v8.16b
+ mov v8.16b, v24.16b
+ ldr q24, [sp, #352]
+ ushr v12.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v13.16b
+ str q8, [sp, #112]
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v24.4s
+ orr v1.16b, v1.16b, v12.16b
+ add v17.4s, v17.4s, v31.4s
+ eor v4.16b, v4.16b, v10.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v18.16b
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ mov v29.16b, v3.16b
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v21.16b, v5.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v22.4s, v22.4s, v29.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v17.4s, v17.4s, v30.4s
+ ldr q30, [sp, #272]
+ ushr v12.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v22.4s, v22.4s, v0.4s
+ mov v3.16b, v28.16b
+ ldr q28, [sp, #176]
+ orr v5.16b, v5.16b, v13.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v30.4s
+ orr v1.16b, v1.16b, v12.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v28.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v27.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v5.16b, v19.16b
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ add v22.4s, v22.4s, v8.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v17.4s, v17.4s, v9.4s
+ ldr q9, [sp, #320]
+ ushr v13.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v22.4s, v22.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v17.4s, v17.4s, v2.4s
+ add v10.4s, v10.4s, v23.4s
+ orr v1.16b, v1.16b, v13.16b
+ eor v16.16b, v16.16b, v22.16b
+ add v20.4s, v20.4s, v31.4s
+ eor v6.16b, v6.16b, v17.16b
+ add v10.4s, v10.4s, v5.4s
+ tbl v16.16b, { v16.16b }, v18.16b
+ add v20.4s, v20.4s, v1.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ eor v25.16b, v25.16b, v10.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v4.16b, v4.16b, v20.16b
+ add v26.4s, v26.4s, v6.4s
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v0.16b, v21.16b, v0.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v2.16b, v26.16b, v2.16b
+ add v19.4s, v19.4s, v25.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ add v7.4s, v7.4s, v4.4s
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v5.16b, v19.16b, v5.16b
+ add v10.4s, v10.4s, v14.4s
+ ldr q14, [sp, #288]
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v7.16b, v1.16b
+ orr v2.16b, v2.16b, v13.16b
+ ushr v12.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v22.4s, v22.4s, v14.4s
+ ushr v13.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v12.16b
+ add v22.4s, v22.4s, v2.4s
+ add v20.4s, v20.4s, v24.4s
+ orr v1.16b, v1.16b, v13.16b
+ eor v4.16b, v4.16b, v10.16b
+ add v17.4s, v17.4s, v9.4s
+ eor v25.16b, v25.16b, v22.16b
+ add v20.4s, v20.4s, v5.4s
+ tbl v4.16b, { v4.16b }, v27.16b
+ add v17.4s, v17.4s, v1.4s
+ tbl v25.16b, { v25.16b }, v27.16b
+ eor v6.16b, v6.16b, v20.16b
+ add v26.4s, v26.4s, v4.4s
+ eor v16.16b, v16.16b, v17.16b
+ add v7.4s, v7.4s, v25.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ eor v0.16b, v26.16b, v0.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v2.16b, v7.16b, v2.16b
+ add v21.4s, v21.4s, v6.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ add v19.4s, v19.4s, v16.4s
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ eor v5.16b, v21.16b, v5.16b
+ orr v0.16b, v0.16b, v12.16b
+ eor v1.16b, v19.16b, v1.16b
+ add v10.4s, v10.4s, v11.4s
+ orr v2.16b, v2.16b, v13.16b
+ ushr v13.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ ushr v12.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v10.4s, v10.4s, v0.4s
+ add v22.4s, v22.4s, v15.4s
+ orr v5.16b, v5.16b, v13.16b
+ add v20.4s, v20.4s, v3.4s
+ mov v24.16b, v3.16b
+ ldr q3, [sp, #336]
+ orr v1.16b, v1.16b, v12.16b
+ eor v4.16b, v4.16b, v10.16b
+ add v22.4s, v22.4s, v2.4s
+ add v17.4s, v17.4s, v3.4s
+ add v20.4s, v20.4s, v5.4s
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v25.16b, v25.16b, v22.16b
+ add v17.4s, v17.4s, v1.4s
+ eor v6.16b, v6.16b, v20.16b
+ add v26.4s, v26.4s, v4.4s
+ tbl v25.16b, { v25.16b }, v18.16b
+ eor v16.16b, v16.16b, v17.16b
+ tbl v6.16b, { v6.16b }, v18.16b
+ eor v0.16b, v26.16b, v0.16b
+ add v7.4s, v7.4s, v25.4s
+ tbl v16.16b, { v16.16b }, v18.16b
+ add v21.4s, v21.4s, v6.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v2.16b, v7.16b, v2.16b
+ add v19.4s, v19.4s, v16.4s
+ eor v5.16b, v21.16b, v5.16b
+ orr v0.16b, v0.16b, v12.16b
+ ushr v12.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ eor v1.16b, v19.16b, v1.16b
+ ushr v13.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v22.4s, v22.4s, v8.4s
+ orr v2.16b, v2.16b, v12.16b
+ ushr v12.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ orr v5.16b, v5.16b, v13.16b
+ add v22.4s, v22.4s, v0.4s
+ add v10.4s, v10.4s, v29.4s
+ ldr q29, [sp, #208]
+ add v17.4s, v17.4s, v31.4s
+ orr v1.16b, v1.16b, v12.16b
+ add v20.4s, v20.4s, v29.4s
+ eor v16.16b, v16.16b, v22.16b
+ add v10.4s, v10.4s, v5.4s
+ add v17.4s, v17.4s, v2.4s
+ add v20.4s, v20.4s, v1.4s
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v25.16b, v25.16b, v10.16b
+ eor v6.16b, v6.16b, v17.16b
+ eor v4.16b, v4.16b, v20.16b
+ add v21.4s, v21.4s, v16.4s
+ tbl v25.16b, { v25.16b }, v27.16b
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ eor v0.16b, v21.16b, v0.16b
+ add v19.4s, v19.4s, v25.4s
+ add v26.4s, v26.4s, v6.4s
+ add v7.4s, v7.4s, v4.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v5.16b, v5.16b, v19.16b
+ eor v2.16b, v26.16b, v2.16b
+ eor v1.16b, v7.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ ushr v12.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v22.4s, v22.4s, v14.4s
+ mov v8.16b, v31.16b
+ ushr v13.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ mov v31.16b, v14.16b
+ ushr v14.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ orr v5.16b, v5.16b, v12.16b
+ add v22.4s, v22.4s, v0.4s
+ add v10.4s, v10.4s, v28.4s
+ ldr q28, [sp, #352]
+ orr v2.16b, v2.16b, v13.16b
+ orr v1.16b, v1.16b, v14.16b
+ add v17.4s, v17.4s, v30.4s
+ add v20.4s, v20.4s, v3.4s
+ eor v16.16b, v16.16b, v22.16b
+ add v10.4s, v10.4s, v5.4s
+ add v17.4s, v17.4s, v2.4s
+ add v20.4s, v20.4s, v1.4s
+ tbl v16.16b, { v16.16b }, v18.16b
+ eor v25.16b, v25.16b, v10.16b
+ eor v6.16b, v6.16b, v17.16b
+ eor v4.16b, v4.16b, v20.16b
+ add v21.4s, v21.4s, v16.4s
+ tbl v25.16b, { v25.16b }, v18.16b
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v0.16b, v21.16b, v0.16b
+ add v19.4s, v19.4s, v25.4s
+ add v26.4s, v26.4s, v6.4s
+ add v7.4s, v7.4s, v4.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v5.16b, v19.16b, v5.16b
+ eor v2.16b, v26.16b, v2.16b
+ eor v1.16b, v7.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ ushr v12.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ add v10.4s, v10.4s, v23.4s
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ushr v14.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ orr v5.16b, v5.16b, v12.16b
+ add v10.4s, v10.4s, v0.4s
+ add v20.4s, v20.4s, v24.4s
+ ldr q24, [sp, #144]
+ orr v2.16b, v2.16b, v13.16b
+ orr v1.16b, v1.16b, v14.16b
+ add v22.4s, v22.4s, v9.4s
+ add v17.4s, v17.4s, v11.4s
+ eor v4.16b, v4.16b, v10.16b
+ add v20.4s, v20.4s, v5.4s
+ add v22.4s, v22.4s, v2.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v27.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v25.16b, v25.16b, v22.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ eor v0.16b, v26.16b, v0.16b
+ add v21.4s, v21.4s, v6.4s
+ add v7.4s, v7.4s, v25.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ eor v5.16b, v21.16b, v5.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ add v10.4s, v10.4s, v15.4s
+ ushr v14.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ mov v30.16b, v3.16b
+ ldr q3, [sp, #256]
+ ushr v12.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ushr v13.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ add v10.4s, v10.4s, v0.4s
+ orr v5.16b, v5.16b, v14.16b
+ add v20.4s, v20.4s, v3.4s
+ orr v2.16b, v2.16b, v12.16b
+ orr v1.16b, v1.16b, v13.16b
+ add v22.4s, v22.4s, v24.4s
+ add v17.4s, v17.4s, v28.4s
+ eor v4.16b, v4.16b, v10.16b
+ add v20.4s, v20.4s, v5.4s
+ add v22.4s, v22.4s, v2.4s
+ add v17.4s, v17.4s, v1.4s
+ tbl v4.16b, { v4.16b }, v18.16b
+ eor v6.16b, v6.16b, v20.16b
+ eor v25.16b, v25.16b, v22.16b
+ eor v16.16b, v16.16b, v17.16b
+ add v26.4s, v26.4s, v4.4s
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v25.16b, { v25.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ eor v0.16b, v26.16b, v0.16b
+ add v21.4s, v21.4s, v6.4s
+ add v7.4s, v7.4s, v25.4s
+ add v19.4s, v19.4s, v16.4s
+ ushr v12.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ eor v5.16b, v21.16b, v5.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v1.16b, v19.16b, v1.16b
+ orr v0.16b, v0.16b, v12.16b
+ ushr v12.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ mov v23.16b, v9.16b
+ ldr q9, [sp, #112]
+ ushr v13.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ushr v14.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ orr v5.16b, v5.16b, v12.16b
+ add v9.4s, v10.4s, v9.4s
+ orr v2.16b, v2.16b, v13.16b
+ orr v1.16b, v1.16b, v14.16b
+ ldr q14, [sp, #64]
+ add v22.4s, v22.4s, v31.4s
+ add v17.4s, v17.4s, v30.4s
+ add v20.4s, v20.4s, v8.4s
+ add v9.4s, v9.4s, v5.4s
+ add v22.4s, v22.4s, v0.4s
+ add v17.4s, v17.4s, v2.4s
+ add v20.4s, v20.4s, v1.4s
+ eor v25.16b, v25.16b, v9.16b
+ eor v16.16b, v16.16b, v22.16b
+ eor v6.16b, v6.16b, v17.16b
+ eor v4.16b, v4.16b, v20.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ tbl v6.16b, { v6.16b }, v27.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ add v19.4s, v19.4s, v25.4s
+ add v21.4s, v21.4s, v16.4s
+ add v26.4s, v26.4s, v6.4s
+ add v7.4s, v7.4s, v4.4s
+ eor v5.16b, v5.16b, v19.16b
+ eor v0.16b, v21.16b, v0.16b
+ eor v2.16b, v26.16b, v2.16b
+ eor v1.16b, v7.16b, v1.16b
+ ushr v30.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ ushr v10.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ ushr v12.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ushr v13.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ orr v5.16b, v5.16b, v30.16b
+ add v30.4s, v9.4s, v29.4s
+ add v22.4s, v22.4s, v23.4s
+ ldr q23, [sp, #192]
+ orr v0.16b, v0.16b, v10.16b
+ orr v2.16b, v2.16b, v12.16b
+ orr v1.16b, v1.16b, v13.16b
+ add v17.4s, v17.4s, v23.4s
+ add v20.4s, v20.4s, v28.4s
+ add v23.4s, v30.4s, v5.4s
+ add v22.4s, v22.4s, v0.4s
+ add v17.4s, v17.4s, v2.4s
+ add v20.4s, v20.4s, v1.4s
+ eor v25.16b, v25.16b, v23.16b
+ eor v16.16b, v16.16b, v22.16b
+ eor v6.16b, v6.16b, v17.16b
+ eor v4.16b, v4.16b, v20.16b
+ tbl v25.16b, { v25.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ tbl v6.16b, { v6.16b }, v18.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ add v19.4s, v19.4s, v25.4s
+ add v21.4s, v21.4s, v16.4s
+ add v26.4s, v26.4s, v6.4s
+ add v7.4s, v7.4s, v4.4s
+ eor v5.16b, v19.16b, v5.16b
+ eor v0.16b, v21.16b, v0.16b
+ eor v2.16b, v26.16b, v2.16b
+ eor v1.16b, v7.16b, v1.16b
+ ushr v28.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ ushr v30.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v31.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ushr v8.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ orr v5.16b, v5.16b, v28.16b
+ ldr q28, [sp, #176]
+ orr v0.16b, v0.16b, v30.16b
+ orr v2.16b, v2.16b, v31.16b
+ orr v1.16b, v1.16b, v8.16b
+ add v23.4s, v23.4s, v28.4s
+ add v22.4s, v22.4s, v11.4s
+ add v17.4s, v17.4s, v15.4s
+ add v20.4s, v20.4s, v3.4s
+ ldr q3, [sp, #272]
+ add v23.4s, v23.4s, v0.4s
+ add v22.4s, v22.4s, v2.4s
+ add v17.4s, v17.4s, v1.4s
+ add v20.4s, v20.4s, v5.4s
+ eor v4.16b, v4.16b, v23.16b
+ eor v25.16b, v25.16b, v22.16b
+ eor v16.16b, v16.16b, v17.16b
+ eor v6.16b, v6.16b, v20.16b
+ tbl v4.16b, { v4.16b }, v27.16b
+ tbl v25.16b, { v25.16b }, v27.16b
+ tbl v16.16b, { v16.16b }, v27.16b
+ tbl v6.16b, { v6.16b }, v27.16b
+ add v26.4s, v26.4s, v4.4s
+ add v7.4s, v7.4s, v25.4s
+ add v19.4s, v19.4s, v16.4s
+ add v21.4s, v21.4s, v6.4s
+ eor v0.16b, v26.16b, v0.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v1.16b, v19.16b, v1.16b
+ eor v5.16b, v21.16b, v5.16b
+ add v3.4s, v22.4s, v3.4s
+ ldr q22, [sp, #160]
+ ushr v28.4s, v0.4s, #12
+ shl v0.4s, v0.4s, #20
+ ushr v29.4s, v2.4s, #12
+ shl v2.4s, v2.4s, #20
+ ushr v30.4s, v1.4s, #12
+ shl v1.4s, v1.4s, #20
+ ushr v31.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ add v17.4s, v17.4s, v22.4s
+ ldr q22, [sp, #240]
+ orr v0.16b, v0.16b, v28.16b
+ prfm pldl1keep, [x23, #256]
+ orr v2.16b, v2.16b, v29.16b
+ prfm pldl1keep, [x24, #256]
+ orr v1.16b, v1.16b, v30.16b
+ prfm pldl1keep, [x22, #256]
+ orr v5.16b, v5.16b, v31.16b
+ prfm pldl1keep, [x25, #256]
+ add v23.4s, v23.4s, v24.4s
+ add v20.4s, v20.4s, v22.4s
+ add v3.4s, v3.4s, v2.4s
+ add v17.4s, v17.4s, v1.4s
+ add v22.4s, v23.4s, v0.4s
+ add v20.4s, v20.4s, v5.4s
+ eor v23.16b, v25.16b, v3.16b
+ eor v16.16b, v16.16b, v17.16b
+ eor v4.16b, v4.16b, v22.16b
+ eor v6.16b, v6.16b, v20.16b
+ tbl v23.16b, { v23.16b }, v18.16b
+ tbl v16.16b, { v16.16b }, v18.16b
+ tbl v4.16b, { v4.16b }, v18.16b
+ tbl v6.16b, { v6.16b }, v18.16b
+ add v7.4s, v7.4s, v23.4s
+ add v19.4s, v19.4s, v16.4s
+ add v18.4s, v26.4s, v4.4s
+ add v21.4s, v21.4s, v6.4s
+ eor v2.16b, v7.16b, v2.16b
+ eor v1.16b, v19.16b, v1.16b
+ eor v0.16b, v18.16b, v0.16b
+ eor v5.16b, v21.16b, v5.16b
+ ushr v25.4s, v2.4s, #7
+ shl v2.4s, v2.4s, #25
+ ushr v24.4s, v0.4s, #7
+ shl v0.4s, v0.4s, #25
+ ushr v26.4s, v1.4s, #7
+ shl v1.4s, v1.4s, #25
+ ushr v27.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ orr v0.16b, v0.16b, v24.16b
+ orr v2.16b, v2.16b, v25.16b
+ orr v1.16b, v1.16b, v26.16b
+ orr v5.16b, v5.16b, v27.16b
+ movi v13.4s, #64
+ eor v29.16b, v19.16b, v22.16b
+ eor v8.16b, v21.16b, v3.16b
+ eor v30.16b, v17.16b, v18.16b
+ eor v31.16b, v20.16b, v7.16b
+ eor v24.16b, v5.16b, v23.16b
+ eor v18.16b, v0.16b, v16.16b
+ eor v25.16b, v2.16b, v6.16b
+ eor v26.16b, v1.16b, v4.16b
+ cbnz x21, .LBB3_5
+ b .LBB3_2
+.LBB3_6:
+ cbz x1, .LBB3_14
+ adrp x12, .LCPI3_3
+ ldr q0, [x11, :lo12:.LCPI3_1]
+ orr w11, w7, w6
+ ldr q2, [x10, :lo12:.LCPI3_2]
+ ldr q1, [x12, :lo12:.LCPI3_3]
+ and x12, x5, #0x1
+.LBB3_8:
+ movi v3.4s, #64
+ lsr x13, x4, #32
+ ldp q5, q4, [x3]
+ mov x15, x2
+ mov w14, w11
+ mov v3.s[0], w4
+ ldr x10, [x0]
+ mov v3.s[1], w13
+ b .LBB3_11
+.LBB3_9:
+ orr w14, w14, w9
+.LBB3_10:
+ ldp q6, q7, [x10]
+ mov v16.16b, v3.16b
+ and w14, w14, #0xff
+ add v5.4s, v5.4s, v4.4s
+ mov x15, x13
+ mov v16.s[3], w14
+ add x14, x10, #32
+ uzp1 v17.4s, v6.4s, v7.4s
+ add x10, x10, #64
+ add v5.4s, v5.4s, v17.4s
+ eor v16.16b, v5.16b, v16.16b
+ tbl v16.16b, { v16.16b }, v0.16b
+ add v18.4s, v16.4s, v1.4s
+ eor v19.16b, v18.16b, v4.16b
+ uzp2 v4.4s, v6.4s, v7.4s
+ ushr v6.4s, v19.4s, #12
+ shl v7.4s, v19.4s, #20
+ ld2 { v19.4s, v20.4s }, [x14]
+ add v5.4s, v5.4s, v4.4s
+ mov w14, w6
+ orr v6.16b, v7.16b, v6.16b
+ add v5.4s, v5.4s, v6.4s
+ eor v7.16b, v16.16b, v5.16b
+ add v5.4s, v5.4s, v19.4s
+ tbl v7.16b, { v7.16b }, v2.16b
+ ext v5.16b, v5.16b, v5.16b, #12
+ add v16.4s, v18.4s, v7.4s
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v6.16b, v6.16b, v16.16b
+ ext v16.16b, v16.16b, v16.16b, #4
+ ushr v18.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ orr v6.16b, v6.16b, v18.16b
+ ext v18.16b, v20.16b, v20.16b, #12
+ add v5.4s, v5.4s, v6.4s
+ eor v7.16b, v5.16b, v7.16b
+ add v5.4s, v5.4s, v18.4s
+ tbl v7.16b, { v7.16b }, v0.16b
+ add v16.4s, v16.4s, v7.4s
+ eor v6.16b, v6.16b, v16.16b
+ ushr v21.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ orr v6.16b, v6.16b, v21.16b
+ uzp1 v21.4s, v17.4s, v17.4s
+ add v5.4s, v5.4s, v6.4s
+ ext v21.16b, v21.16b, v17.16b, #8
+ eor v7.16b, v7.16b, v5.16b
+ uzp2 v21.4s, v21.4s, v4.4s
+ tbl v7.16b, { v7.16b }, v2.16b
+ add v5.4s, v5.4s, v21.4s
+ add v16.4s, v16.4s, v7.4s
+ ext v5.16b, v5.16b, v5.16b, #4
+ ext v7.16b, v7.16b, v7.16b, #8
+ eor v6.16b, v6.16b, v16.16b
+ ushr v22.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ orr v6.16b, v6.16b, v22.16b
+ add v22.4s, v5.4s, v6.4s
+ eor v5.16b, v22.16b, v7.16b
+ ext v7.16b, v16.16b, v16.16b, #12
+ tbl v16.16b, { v5.16b }, v0.16b
+ ext v5.16b, v17.16b, v17.16b, #12
+ add v7.4s, v7.4s, v16.4s
+ ext v5.16b, v17.16b, v5.16b, #12
+ ext v17.16b, v19.16b, v19.16b, #12
+ mov v19.16b, v18.16b
+ eor v6.16b, v6.16b, v7.16b
+ rev64 v5.4s, v5.4s
+ mov v19.s[1], v17.s[2]
+ ushr v20.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ trn2 v5.4s, v5.4s, v19.4s
+ orr v6.16b, v6.16b, v20.16b
+ zip1 v20.2d, v18.2d, v4.2d
+ zip2 v4.4s, v4.4s, v18.4s
+ add v19.4s, v6.4s, v5.4s
+ mov v20.s[3], v17.s[3]
+ add v19.4s, v19.4s, v22.4s
+ ext v22.16b, v20.16b, v20.16b, #12
+ eor v16.16b, v16.16b, v19.16b
+ ext v19.16b, v19.16b, v19.16b, #12
+ tbl v16.16b, { v16.16b }, v2.16b
+ add v7.4s, v7.4s, v16.4s
+ ext v16.16b, v16.16b, v16.16b, #8
+ eor v6.16b, v6.16b, v7.16b
+ ext v7.16b, v7.16b, v7.16b, #4
+ ushr v23.4s, v6.4s, #7
+ shl v24.4s, v6.4s, #25
+ uzp1 v6.4s, v20.4s, v22.4s
+ orr v20.16b, v24.16b, v23.16b
+ add v22.4s, v20.4s, v6.4s
+ add v19.4s, v22.4s, v19.4s
+ eor v16.16b, v19.16b, v16.16b
+ tbl v16.16b, { v16.16b }, v0.16b
+ add v7.4s, v7.4s, v16.4s
+ eor v18.16b, v20.16b, v7.16b
+ zip1 v20.4s, v4.4s, v17.4s
+ zip1 v4.4s, v17.4s, v4.4s
+ ushr v17.4s, v18.4s, #12
+ shl v18.4s, v18.4s, #20
+ ext v20.16b, v4.16b, v20.16b, #8
+ orr v4.16b, v18.16b, v17.16b
+ ext v18.16b, v21.16b, v21.16b, #4
+ add v17.4s, v4.4s, v20.4s
+ add v17.4s, v17.4s, v19.4s
+ uzp1 v19.4s, v18.4s, v18.4s
+ eor v16.16b, v16.16b, v17.16b
+ ext v19.16b, v19.16b, v18.16b, #8
+ tbl v16.16b, { v16.16b }, v2.16b
+ uzp2 v19.4s, v19.4s, v5.4s
+ add v7.4s, v7.4s, v16.4s
+ add v17.4s, v17.4s, v19.4s
+ ext v16.16b, v16.16b, v16.16b, #8
+ eor v4.16b, v4.16b, v7.16b
+ ext v17.16b, v17.16b, v17.16b, #4
+ ext v7.16b, v7.16b, v7.16b, #12
+ ushr v21.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ orr v4.16b, v4.16b, v21.16b
+ ext v21.16b, v18.16b, v18.16b, #12
+ add v17.4s, v17.4s, v4.4s
+ ext v18.16b, v18.16b, v21.16b, #12
+ mov v21.16b, v20.16b
+ eor v16.16b, v17.16b, v16.16b
+ rev64 v18.4s, v18.4s
+ mov v21.s[1], v6.s[2]
+ tbl v16.16b, { v16.16b }, v0.16b
+ add v7.4s, v7.4s, v16.4s
+ eor v4.16b, v4.16b, v7.16b
+ ushr v22.4s, v4.4s, #12
+ shl v23.4s, v4.4s, #20
+ trn2 v4.4s, v18.4s, v21.4s
+ orr v18.16b, v23.16b, v22.16b
+ add v21.4s, v18.4s, v4.4s
+ add v17.4s, v21.4s, v17.4s
+ zip1 v21.2d, v20.2d, v5.2d
+ zip2 v5.4s, v5.4s, v20.4s
+ eor v16.16b, v16.16b, v17.16b
+ mov v21.s[3], v6.s[3]
+ ext v17.16b, v17.16b, v17.16b, #12
+ zip1 v20.4s, v5.4s, v6.4s
+ tbl v16.16b, { v16.16b }, v2.16b
+ zip1 v5.4s, v6.4s, v5.4s
+ add v22.4s, v7.4s, v16.4s
+ ext v16.16b, v16.16b, v16.16b, #8
+ ext v20.16b, v5.16b, v20.16b, #8
+ eor v7.16b, v18.16b, v22.16b
+ ext v18.16b, v21.16b, v21.16b, #12
+ ushr v23.4s, v7.4s, #7
+ shl v24.4s, v7.4s, #25
+ uzp1 v7.4s, v21.4s, v18.4s
+ orr v18.16b, v24.16b, v23.16b
+ add v21.4s, v18.4s, v7.4s
+ add v17.4s, v21.4s, v17.4s
+ ext v21.16b, v22.16b, v22.16b, #4
+ eor v16.16b, v17.16b, v16.16b
+ tbl v16.16b, { v16.16b }, v0.16b
+ add v21.4s, v21.4s, v16.4s
+ eor v18.16b, v18.16b, v21.16b
+ ushr v6.4s, v18.4s, #12
+ shl v18.4s, v18.4s, #20
+ orr v5.16b, v18.16b, v6.16b
+ add v6.4s, v5.4s, v20.4s
+ add v6.4s, v6.4s, v17.4s
+ ext v17.16b, v19.16b, v19.16b, #4
+ eor v16.16b, v16.16b, v6.16b
+ uzp1 v18.4s, v17.4s, v17.4s
+ tbl v16.16b, { v16.16b }, v2.16b
+ ext v18.16b, v18.16b, v17.16b, #8
+ add v19.4s, v21.4s, v16.4s
+ uzp2 v18.4s, v18.4s, v4.4s
+ ext v16.16b, v16.16b, v16.16b, #8
+ eor v5.16b, v5.16b, v19.16b
+ add v6.4s, v6.4s, v18.4s
+ ext v19.16b, v19.16b, v19.16b, #12
+ ushr v21.4s, v5.4s, #7
+ shl v5.4s, v5.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v5.16b, v5.16b, v21.16b
+ ext v21.16b, v17.16b, v17.16b, #12
+ add v6.4s, v6.4s, v5.4s
+ ext v17.16b, v17.16b, v21.16b, #12
+ mov v21.16b, v20.16b
+ eor v16.16b, v6.16b, v16.16b
+ rev64 v17.4s, v17.4s
+ mov v21.s[1], v7.s[2]
+ tbl v16.16b, { v16.16b }, v0.16b
+ add v19.4s, v19.4s, v16.4s
+ eor v5.16b, v5.16b, v19.16b
+ ushr v22.4s, v5.4s, #12
+ shl v23.4s, v5.4s, #20
+ trn2 v5.4s, v17.4s, v21.4s
+ orr v17.16b, v23.16b, v22.16b
+ add v21.4s, v17.4s, v5.4s
+ add v6.4s, v21.4s, v6.4s
+ eor v16.16b, v16.16b, v6.16b
+ ext v6.16b, v6.16b, v6.16b, #12
+ tbl v21.16b, { v16.16b }, v2.16b
+ zip1 v16.2d, v20.2d, v4.2d
+ zip2 v4.4s, v4.4s, v20.4s
+ add v19.4s, v19.4s, v21.4s
+ mov v16.s[3], v7.s[3]
+ ext v21.16b, v21.16b, v21.16b, #8
+ zip1 v20.4s, v4.4s, v7.4s
+ eor v17.16b, v17.16b, v19.16b
+ ext v22.16b, v16.16b, v16.16b, #12
+ ext v19.16b, v19.16b, v19.16b, #4
+ zip1 v4.4s, v7.4s, v4.4s
+ ushr v23.4s, v17.4s, #7
+ shl v17.4s, v17.4s, #25
+ uzp1 v16.4s, v16.4s, v22.4s
+ ext v4.16b, v4.16b, v20.16b, #8
+ orr v17.16b, v17.16b, v23.16b
+ add v22.4s, v17.4s, v16.4s
+ add v6.4s, v22.4s, v6.4s
+ eor v21.16b, v6.16b, v21.16b
+ tbl v21.16b, { v21.16b }, v0.16b
+ add v19.4s, v19.4s, v21.4s
+ eor v17.16b, v17.16b, v19.16b
+ ushr v7.4s, v17.4s, #12
+ shl v17.4s, v17.4s, #20
+ orr v7.16b, v17.16b, v7.16b
+ add v17.4s, v7.4s, v4.4s
+ add v6.4s, v17.4s, v6.4s
+ ext v17.16b, v18.16b, v18.16b, #4
+ eor v18.16b, v21.16b, v6.16b
+ uzp1 v20.4s, v17.4s, v17.4s
+ tbl v18.16b, { v18.16b }, v2.16b
+ ext v20.16b, v20.16b, v17.16b, #8
+ add v19.4s, v19.4s, v18.4s
+ uzp2 v20.4s, v20.4s, v5.4s
+ ext v18.16b, v18.16b, v18.16b, #8
+ eor v7.16b, v7.16b, v19.16b
+ add v6.4s, v6.4s, v20.4s
+ ushr v21.4s, v7.4s, #7
+ shl v7.4s, v7.4s, #25
+ ext v6.16b, v6.16b, v6.16b, #4
+ orr v7.16b, v7.16b, v21.16b
+ add v21.4s, v6.4s, v7.4s
+ eor v6.16b, v21.16b, v18.16b
+ ext v18.16b, v19.16b, v19.16b, #12
+ tbl v19.16b, { v6.16b }, v0.16b
+ ext v6.16b, v17.16b, v17.16b, #12
+ add v18.4s, v18.4s, v19.4s
+ ext v6.16b, v17.16b, v6.16b, #12
+ mov v17.16b, v4.16b
+ eor v7.16b, v7.16b, v18.16b
+ rev64 v6.4s, v6.4s
+ mov v17.s[1], v16.s[2]
+ ushr v22.4s, v7.4s, #12
+ shl v7.4s, v7.4s, #20
+ trn2 v6.4s, v6.4s, v17.4s
+ orr v7.16b, v7.16b, v22.16b
+ add v17.4s, v7.4s, v6.4s
+ add v17.4s, v17.4s, v21.4s
+ zip1 v21.2d, v4.2d, v5.2d
+ zip2 v4.4s, v5.4s, v4.4s
+ eor v19.16b, v19.16b, v17.16b
+ mov v21.s[3], v16.s[3]
+ ext v17.16b, v17.16b, v17.16b, #12
+ tbl v19.16b, { v19.16b }, v2.16b
+ ext v22.16b, v21.16b, v21.16b, #12
+ add v18.4s, v18.4s, v19.4s
+ ext v19.16b, v19.16b, v19.16b, #8
+ eor v7.16b, v7.16b, v18.16b
+ ext v18.16b, v18.16b, v18.16b, #4
+ ushr v23.4s, v7.4s, #7
+ shl v24.4s, v7.4s, #25
+ uzp1 v7.4s, v21.4s, v22.4s
+ orr v21.16b, v24.16b, v23.16b
+ add v22.4s, v21.4s, v7.4s
+ add v17.4s, v22.4s, v17.4s
+ eor v19.16b, v17.16b, v19.16b
+ tbl v19.16b, { v19.16b }, v0.16b
+ add v18.4s, v18.4s, v19.4s
+ eor v5.16b, v21.16b, v18.16b
+ zip1 v21.4s, v4.4s, v16.4s
+ zip1 v4.4s, v16.4s, v4.4s
+ ushr v16.4s, v5.4s, #12
+ shl v5.4s, v5.4s, #20
+ ext v21.16b, v4.16b, v21.16b, #8
+ orr v4.16b, v5.16b, v16.16b
+ ext v16.16b, v20.16b, v20.16b, #4
+ mov v23.16b, v21.16b
+ add v5.4s, v4.4s, v21.4s
+ mov v23.s[1], v7.s[2]
+ add v5.4s, v5.4s, v17.4s
+ eor v17.16b, v19.16b, v5.16b
+ uzp1 v19.4s, v16.4s, v16.4s
+ tbl v17.16b, { v17.16b }, v2.16b
+ ext v19.16b, v19.16b, v16.16b, #8
+ add v18.4s, v18.4s, v17.4s
+ uzp2 v19.4s, v19.4s, v6.4s
+ eor v4.16b, v4.16b, v18.16b
+ add v5.4s, v5.4s, v19.4s
+ ext v19.16b, v19.16b, v19.16b, #4
+ ushr v20.4s, v4.4s, #7
+ shl v4.4s, v4.4s, #25
+ ext v5.16b, v5.16b, v5.16b, #4
+ orr v20.16b, v4.16b, v20.16b
+ ext v4.16b, v17.16b, v17.16b, #8
+ add v17.4s, v5.4s, v20.4s
+ ext v5.16b, v18.16b, v18.16b, #12
+ eor v4.16b, v17.16b, v4.16b
+ tbl v18.16b, { v4.16b }, v0.16b
+ ext v4.16b, v16.16b, v16.16b, #12
+ add v22.4s, v5.4s, v18.4s
+ ext v4.16b, v16.16b, v4.16b, #12
+ eor v5.16b, v20.16b, v22.16b
+ rev64 v16.4s, v4.4s
+ ushr v20.4s, v5.4s, #12
+ shl v24.4s, v5.4s, #20
+ trn2 v5.4s, v16.4s, v23.4s
+ orr v16.16b, v24.16b, v20.16b
+ add v20.4s, v16.4s, v5.4s
+ add v17.4s, v20.4s, v17.4s
+ zip1 v20.2d, v21.2d, v6.2d
+ zip2 v6.4s, v6.4s, v21.4s
+ eor v18.16b, v18.16b, v17.16b
+ mov v20.s[3], v7.s[3]
+ ext v17.16b, v17.16b, v17.16b, #12
+ zip1 v21.4s, v6.4s, v7.4s
+ tbl v18.16b, { v18.16b }, v2.16b
+ ext v24.16b, v20.16b, v20.16b, #12
+ zip1 v6.4s, v7.4s, v6.4s
+ add v22.4s, v22.4s, v18.4s
+ ext v18.16b, v18.16b, v18.16b, #8
+ ext v6.16b, v6.16b, v21.16b, #8
+ eor v16.16b, v16.16b, v22.16b
+ ext v22.16b, v22.16b, v22.16b, #4
+ zip1 v5.2d, v6.2d, v5.2d
+ zip2 v4.4s, v4.4s, v6.4s
+ ushr v25.4s, v16.4s, #7
+ shl v26.4s, v16.4s, #25
+ uzp1 v16.4s, v20.4s, v24.4s
+ orr v20.16b, v26.16b, v25.16b
+ mov v5.s[3], v16.s[3]
+ add v24.4s, v20.4s, v16.4s
+ add v17.4s, v24.4s, v17.4s
+ eor v18.16b, v17.16b, v18.16b
+ tbl v18.16b, { v18.16b }, v0.16b
+ add v22.4s, v22.4s, v18.4s
+ eor v20.16b, v20.16b, v22.16b
+ ushr v7.4s, v20.4s, #12
+ shl v20.4s, v20.4s, #20
+ orr v7.16b, v20.16b, v7.16b
+ add v20.4s, v7.4s, v6.4s
+ add v17.4s, v20.4s, v17.4s
+ ext v20.16b, v19.16b, v19.16b, #8
+ eor v18.16b, v18.16b, v17.16b
+ ext v17.16b, v17.16b, v17.16b, #4
+ tbl v18.16b, { v18.16b }, v2.16b
+ add v21.4s, v22.4s, v18.4s
+ uzp2 v22.4s, v20.4s, v23.4s
+ ext v18.16b, v18.16b, v18.16b, #8
+ eor v7.16b, v7.16b, v21.16b
+ ext v20.16b, v22.16b, v20.16b, #4
+ ushr v22.4s, v7.4s, #7
+ shl v7.4s, v7.4s, #25
+ add v17.4s, v17.4s, v20.4s
+ ext v20.16b, v21.16b, v21.16b, #12
+ ext v21.16b, v19.16b, v19.16b, #12
+ orr v7.16b, v7.16b, v22.16b
+ ext v19.16b, v19.16b, v21.16b, #12
+ add v17.4s, v17.4s, v7.4s
+ mov v21.16b, v6.16b
+ rev64 v19.4s, v19.4s
+ eor v18.16b, v17.16b, v18.16b
+ mov v21.s[1], v16.s[2]
+ tbl v18.16b, { v18.16b }, v0.16b
+ trn2 v19.4s, v19.4s, v21.4s
+ add v20.4s, v20.4s, v18.4s
+ eor v7.16b, v7.16b, v20.16b
+ ushr v22.4s, v7.4s, #12
+ shl v7.4s, v7.4s, #20
+ orr v7.16b, v7.16b, v22.16b
+ add v19.4s, v7.4s, v19.4s
+ add v17.4s, v19.4s, v17.4s
+ eor v18.16b, v18.16b, v17.16b
+ ext v17.16b, v17.16b, v17.16b, #12
+ tbl v18.16b, { v18.16b }, v2.16b
+ add v19.4s, v20.4s, v18.4s
+ ext v20.16b, v5.16b, v5.16b, #12
+ ext v18.16b, v18.16b, v18.16b, #8
+ eor v7.16b, v7.16b, v19.16b
+ uzp1 v5.4s, v5.4s, v20.4s
+ ushr v21.4s, v7.4s, #7
+ shl v7.4s, v7.4s, #25
+ orr v7.16b, v7.16b, v21.16b
+ add v5.4s, v7.4s, v5.4s
+ add v5.4s, v5.4s, v17.4s
+ eor v17.16b, v5.16b, v18.16b
+ ext v18.16b, v19.16b, v19.16b, #4
+ tbl v17.16b, { v17.16b }, v0.16b
+ add v18.4s, v18.4s, v17.4s
+ eor v6.16b, v7.16b, v18.16b
+ zip1 v7.4s, v4.4s, v16.4s
+ zip1 v4.4s, v16.4s, v4.4s
+ ushr v16.4s, v6.4s, #12
+ shl v6.4s, v6.4s, #20
+ ext v4.16b, v4.16b, v7.16b, #8
+ orr v6.16b, v6.16b, v16.16b
+ add v4.4s, v6.4s, v4.4s
+ add v4.4s, v4.4s, v5.4s
+ eor v5.16b, v17.16b, v4.16b
+ ext v4.16b, v4.16b, v4.16b, #4
+ tbl v5.16b, { v5.16b }, v2.16b
+ add v7.4s, v18.4s, v5.4s
+ eor v6.16b, v6.16b, v7.16b
+ ext v7.16b, v7.16b, v7.16b, #12
+ ushr v16.4s, v6.4s, #7
+ shl v6.4s, v6.4s, #25
+ orr v6.16b, v6.16b, v16.16b
+ ext v16.16b, v5.16b, v5.16b, #8
+ eor v5.16b, v4.16b, v7.16b
+ eor v4.16b, v6.16b, v16.16b
+.LBB3_11:
+ subs x13, x15, #1
+ b.eq .LBB3_9
+ cbnz x15, .LBB3_10
+ add x4, x4, x12
+ add x0, x0, #8
+ subs x1, x1, #1
+ stp q5, q4, [x8], #32
+ b.ne .LBB3_8
+.LBB3_14:
+ add sp, sp, #368
+ ldp x20, x19, [sp, #128]
+ ldp x22, x21, [sp, #112]
+ ldp x24, x23, [sp, #96]
+ ldp x26, x25, [sp, #80]
+ ldp x29, x27, [sp, #64]
+ ldp d9, d8, [sp, #48]
+ ldp d11, d10, [sp, #32]
+ ldp d13, d12, [sp, #16]
+ ldp d15, d14, [sp], #144
+ ret
+.Lfunc_end3:
+ .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S
new file mode 100644
index 000000000000..4dcdd3b65d0b
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S
@@ -0,0 +1,2012 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if defined(__aarch64__)
+
+ .section .note.gnu.property,"a",@note
+ .p2align 3
+ .word 4
+ .word 16
+ .word 5
+ .asciz "GNU"
+ .word 3221225472
+ .word 4
+ .word 3
+ .word 0
+.text
+
+.align 6
+.type .LK256,%object
+.LK256:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ .long 0 //terminator
+.size .LK256,.-.LK256
+
+.globl zfs_sha256_block_armv7
+.type zfs_sha256_block_armv7,%function
+.align 6
+zfs_sha256_block_armv7:
+ hint #34 // bti c
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+ sub sp,sp,#4*4
+
+ ldp w20,w21,[x0] // load context
+ ldp w22,w23,[x0,#2*4]
+ ldp w24,w25,[x0,#4*4]
+ add x2,x1,x2,lsl#6 // end of input
+ ldp w26,w27,[x0,#6*4]
+ adr x30,.LK256
+ stp x0,x2,[x29,#96]
+
+.Loop:
+ ldp w3,w4,[x1],#2*4
+ ldr w19,[x30],#4 // *K++
+ eor w28,w21,w22 // magic seed
+ str x1,[x29,#112]
+#ifndef __AARCH64EB__
+ rev w3,w3 // 0
+#endif
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ eor w6,w24,w24,ror#14
+ and w17,w25,w24
+ bic w19,w26,w24
+ add w27,w27,w3 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w6,ror#11 // Sigma1(e)
+ ror w6,w20,#2
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ eor w17,w20,w20,ror#9
+ add w27,w27,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w23,w23,w27 // d+=h
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w6,w17,ror#13 // Sigma0(a)
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w27,w27,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w4,w4 // 1
+#endif
+ ldp w5,w6,[x1],#2*4
+ add w27,w27,w17 // h+=Sigma0(a)
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ eor w7,w23,w23,ror#14
+ and w17,w24,w23
+ bic w28,w25,w23
+ add w26,w26,w4 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w7,ror#11 // Sigma1(e)
+ ror w7,w27,#2
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ eor w17,w27,w27,ror#9
+ add w26,w26,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w22,w22,w26 // d+=h
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w7,w17,ror#13 // Sigma0(a)
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w26,w26,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w5,w5 // 2
+#endif
+ add w26,w26,w17 // h+=Sigma0(a)
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ eor w8,w22,w22,ror#14
+ and w17,w23,w22
+ bic w19,w24,w22
+ add w25,w25,w5 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w8,ror#11 // Sigma1(e)
+ ror w8,w26,#2
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ eor w17,w26,w26,ror#9
+ add w25,w25,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w21,w21,w25 // d+=h
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w8,w17,ror#13 // Sigma0(a)
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w25,w25,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w6,w6 // 3
+#endif
+ ldp w7,w8,[x1],#2*4
+ add w25,w25,w17 // h+=Sigma0(a)
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ eor w9,w21,w21,ror#14
+ and w17,w22,w21
+ bic w28,w23,w21
+ add w24,w24,w6 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w9,ror#11 // Sigma1(e)
+ ror w9,w25,#2
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ eor w17,w25,w25,ror#9
+ add w24,w24,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w20,w20,w24 // d+=h
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w9,w17,ror#13 // Sigma0(a)
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w24,w24,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w7,w7 // 4
+#endif
+ add w24,w24,w17 // h+=Sigma0(a)
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ eor w10,w20,w20,ror#14
+ and w17,w21,w20
+ bic w19,w22,w20
+ add w23,w23,w7 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w10,ror#11 // Sigma1(e)
+ ror w10,w24,#2
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ eor w17,w24,w24,ror#9
+ add w23,w23,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w27,w27,w23 // d+=h
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w10,w17,ror#13 // Sigma0(a)
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w23,w23,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w8,w8 // 5
+#endif
+ ldp w9,w10,[x1],#2*4
+ add w23,w23,w17 // h+=Sigma0(a)
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ eor w11,w27,w27,ror#14
+ and w17,w20,w27
+ bic w28,w21,w27
+ add w22,w22,w8 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w11,ror#11 // Sigma1(e)
+ ror w11,w23,#2
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ eor w17,w23,w23,ror#9
+ add w22,w22,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w26,w26,w22 // d+=h
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w11,w17,ror#13 // Sigma0(a)
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w22,w22,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w9,w9 // 6
+#endif
+ add w22,w22,w17 // h+=Sigma0(a)
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ eor w12,w26,w26,ror#14
+ and w17,w27,w26
+ bic w19,w20,w26
+ add w21,w21,w9 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w12,ror#11 // Sigma1(e)
+ ror w12,w22,#2
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ eor w17,w22,w22,ror#9
+ add w21,w21,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w25,w25,w21 // d+=h
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w12,w17,ror#13 // Sigma0(a)
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w21,w21,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w10,w10 // 7
+#endif
+ ldp w11,w12,[x1],#2*4
+ add w21,w21,w17 // h+=Sigma0(a)
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ eor w13,w25,w25,ror#14
+ and w17,w26,w25
+ bic w28,w27,w25
+ add w20,w20,w10 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w13,ror#11 // Sigma1(e)
+ ror w13,w21,#2
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ eor w17,w21,w21,ror#9
+ add w20,w20,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w24,w24,w20 // d+=h
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w13,w17,ror#13 // Sigma0(a)
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w20,w20,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w11,w11 // 8
+#endif
+ add w20,w20,w17 // h+=Sigma0(a)
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ eor w14,w24,w24,ror#14
+ and w17,w25,w24
+ bic w19,w26,w24
+ add w27,w27,w11 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w14,ror#11 // Sigma1(e)
+ ror w14,w20,#2
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ eor w17,w20,w20,ror#9
+ add w27,w27,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w23,w23,w27 // d+=h
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w14,w17,ror#13 // Sigma0(a)
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w27,w27,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w12,w12 // 9
+#endif
+ ldp w13,w14,[x1],#2*4
+ add w27,w27,w17 // h+=Sigma0(a)
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ eor w15,w23,w23,ror#14
+ and w17,w24,w23
+ bic w28,w25,w23
+ add w26,w26,w12 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w15,ror#11 // Sigma1(e)
+ ror w15,w27,#2
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ eor w17,w27,w27,ror#9
+ add w26,w26,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w22,w22,w26 // d+=h
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w15,w17,ror#13 // Sigma0(a)
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w26,w26,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w13,w13 // 10
+#endif
+ add w26,w26,w17 // h+=Sigma0(a)
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ eor w0,w22,w22,ror#14
+ and w17,w23,w22
+ bic w19,w24,w22
+ add w25,w25,w13 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w0,ror#11 // Sigma1(e)
+ ror w0,w26,#2
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ eor w17,w26,w26,ror#9
+ add w25,w25,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w21,w21,w25 // d+=h
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w0,w17,ror#13 // Sigma0(a)
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w25,w25,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w14,w14 // 11
+#endif
+ ldp w15,w0,[x1],#2*4
+ add w25,w25,w17 // h+=Sigma0(a)
+ str w6,[sp,#12]
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ eor w6,w21,w21,ror#14
+ and w17,w22,w21
+ bic w28,w23,w21
+ add w24,w24,w14 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w6,ror#11 // Sigma1(e)
+ ror w6,w25,#2
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ eor w17,w25,w25,ror#9
+ add w24,w24,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w20,w20,w24 // d+=h
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w6,w17,ror#13 // Sigma0(a)
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w24,w24,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w15,w15 // 12
+#endif
+ add w24,w24,w17 // h+=Sigma0(a)
+ str w7,[sp,#0]
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ eor w7,w20,w20,ror#14
+ and w17,w21,w20
+ bic w19,w22,w20
+ add w23,w23,w15 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w7,ror#11 // Sigma1(e)
+ ror w7,w24,#2
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ eor w17,w24,w24,ror#9
+ add w23,w23,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w27,w27,w23 // d+=h
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w7,w17,ror#13 // Sigma0(a)
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w23,w23,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w0,w0 // 13
+#endif
+ ldp w1,w2,[x1]
+ add w23,w23,w17 // h+=Sigma0(a)
+ str w8,[sp,#4]
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ eor w8,w27,w27,ror#14
+ and w17,w20,w27
+ bic w28,w21,w27
+ add w22,w22,w0 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w8,ror#11 // Sigma1(e)
+ ror w8,w23,#2
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ eor w17,w23,w23,ror#9
+ add w22,w22,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w26,w26,w22 // d+=h
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w8,w17,ror#13 // Sigma0(a)
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w22,w22,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w1,w1 // 14
+#endif
+ ldr w6,[sp,#12]
+ add w22,w22,w17 // h+=Sigma0(a)
+ str w9,[sp,#8]
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ eor w9,w26,w26,ror#14
+ and w17,w27,w26
+ bic w19,w20,w26
+ add w21,w21,w1 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w9,ror#11 // Sigma1(e)
+ ror w9,w22,#2
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ eor w17,w22,w22,ror#9
+ add w21,w21,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w25,w25,w21 // d+=h
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w9,w17,ror#13 // Sigma0(a)
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w21,w21,w17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev w2,w2 // 15
+#endif
+ ldr w7,[sp,#0]
+ add w21,w21,w17 // h+=Sigma0(a)
+ str w10,[sp,#12]
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ ror w9,w4,#7
+ and w17,w26,w25
+ ror w8,w1,#17
+ bic w28,w27,w25
+ ror w10,w21,#2
+ add w20,w20,w2 // h+=X[i]
+ eor w16,w16,w25,ror#11
+ eor w9,w9,w4,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w25,ror#25 // Sigma1(e)
+ eor w10,w10,w21,ror#13
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w8,w8,w1,ror#19
+ eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
+ add w20,w20,w16 // h+=Sigma1(e)
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w10,w21,ror#22 // Sigma0(a)
+ eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
+ add w3,w3,w12
+ add w24,w24,w20 // d+=h
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w3,w3,w9
+ add w20,w20,w17 // h+=Sigma0(a)
+ add w3,w3,w8
+.Loop_16_xx:
+ ldr w8,[sp,#4]
+ str w11,[sp,#0]
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ ror w10,w5,#7
+ and w17,w25,w24
+ ror w9,w2,#17
+ bic w19,w26,w24
+ ror w11,w20,#2
+ add w27,w27,w3 // h+=X[i]
+ eor w16,w16,w24,ror#11
+ eor w10,w10,w5,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w24,ror#25 // Sigma1(e)
+ eor w11,w11,w20,ror#13
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w9,w9,w2,ror#19
+ eor w10,w10,w5,lsr#3 // sigma0(X[i+1])
+ add w27,w27,w16 // h+=Sigma1(e)
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w11,w20,ror#22 // Sigma0(a)
+ eor w9,w9,w2,lsr#10 // sigma1(X[i+14])
+ add w4,w4,w13
+ add w23,w23,w27 // d+=h
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w4,w4,w10
+ add w27,w27,w17 // h+=Sigma0(a)
+ add w4,w4,w9
+ ldr w9,[sp,#8]
+ str w12,[sp,#4]
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ ror w11,w6,#7
+ and w17,w24,w23
+ ror w10,w3,#17
+ bic w28,w25,w23
+ ror w12,w27,#2
+ add w26,w26,w4 // h+=X[i]
+ eor w16,w16,w23,ror#11
+ eor w11,w11,w6,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w23,ror#25 // Sigma1(e)
+ eor w12,w12,w27,ror#13
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w10,w10,w3,ror#19
+ eor w11,w11,w6,lsr#3 // sigma0(X[i+1])
+ add w26,w26,w16 // h+=Sigma1(e)
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w12,w27,ror#22 // Sigma0(a)
+ eor w10,w10,w3,lsr#10 // sigma1(X[i+14])
+ add w5,w5,w14
+ add w22,w22,w26 // d+=h
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w5,w5,w11
+ add w26,w26,w17 // h+=Sigma0(a)
+ add w5,w5,w10
+ ldr w10,[sp,#12]
+ str w13,[sp,#8]
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ ror w12,w7,#7
+ and w17,w23,w22
+ ror w11,w4,#17
+ bic w19,w24,w22
+ ror w13,w26,#2
+ add w25,w25,w5 // h+=X[i]
+ eor w16,w16,w22,ror#11
+ eor w12,w12,w7,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w22,ror#25 // Sigma1(e)
+ eor w13,w13,w26,ror#13
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w11,w11,w4,ror#19
+ eor w12,w12,w7,lsr#3 // sigma0(X[i+1])
+ add w25,w25,w16 // h+=Sigma1(e)
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w13,w26,ror#22 // Sigma0(a)
+ eor w11,w11,w4,lsr#10 // sigma1(X[i+14])
+ add w6,w6,w15
+ add w21,w21,w25 // d+=h
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w6,w6,w12
+ add w25,w25,w17 // h+=Sigma0(a)
+ add w6,w6,w11
+ ldr w11,[sp,#0]
+ str w14,[sp,#12]
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ ror w13,w8,#7
+ and w17,w22,w21
+ ror w12,w5,#17
+ bic w28,w23,w21
+ ror w14,w25,#2
+ add w24,w24,w6 // h+=X[i]
+ eor w16,w16,w21,ror#11
+ eor w13,w13,w8,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w21,ror#25 // Sigma1(e)
+ eor w14,w14,w25,ror#13
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w12,w12,w5,ror#19
+ eor w13,w13,w8,lsr#3 // sigma0(X[i+1])
+ add w24,w24,w16 // h+=Sigma1(e)
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w14,w25,ror#22 // Sigma0(a)
+ eor w12,w12,w5,lsr#10 // sigma1(X[i+14])
+ add w7,w7,w0
+ add w20,w20,w24 // d+=h
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w7,w7,w13
+ add w24,w24,w17 // h+=Sigma0(a)
+ add w7,w7,w12
+ ldr w12,[sp,#4]
+ str w15,[sp,#0]
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ ror w14,w9,#7
+ and w17,w21,w20
+ ror w13,w6,#17
+ bic w19,w22,w20
+ ror w15,w24,#2
+ add w23,w23,w7 // h+=X[i]
+ eor w16,w16,w20,ror#11
+ eor w14,w14,w9,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w20,ror#25 // Sigma1(e)
+ eor w15,w15,w24,ror#13
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w13,w13,w6,ror#19
+ eor w14,w14,w9,lsr#3 // sigma0(X[i+1])
+ add w23,w23,w16 // h+=Sigma1(e)
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w15,w24,ror#22 // Sigma0(a)
+ eor w13,w13,w6,lsr#10 // sigma1(X[i+14])
+ add w8,w8,w1
+ add w27,w27,w23 // d+=h
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w8,w8,w14
+ add w23,w23,w17 // h+=Sigma0(a)
+ add w8,w8,w13
+ ldr w13,[sp,#8]
+ str w0,[sp,#4]
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ ror w15,w10,#7
+ and w17,w20,w27
+ ror w14,w7,#17
+ bic w28,w21,w27
+ ror w0,w23,#2
+ add w22,w22,w8 // h+=X[i]
+ eor w16,w16,w27,ror#11
+ eor w15,w15,w10,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w27,ror#25 // Sigma1(e)
+ eor w0,w0,w23,ror#13
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w14,w14,w7,ror#19
+ eor w15,w15,w10,lsr#3 // sigma0(X[i+1])
+ add w22,w22,w16 // h+=Sigma1(e)
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w0,w23,ror#22 // Sigma0(a)
+ eor w14,w14,w7,lsr#10 // sigma1(X[i+14])
+ add w9,w9,w2
+ add w26,w26,w22 // d+=h
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w9,w9,w15
+ add w22,w22,w17 // h+=Sigma0(a)
+ add w9,w9,w14
+ ldr w14,[sp,#12]
+ str w1,[sp,#8]
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ ror w0,w11,#7
+ and w17,w27,w26
+ ror w15,w8,#17
+ bic w19,w20,w26
+ ror w1,w22,#2
+ add w21,w21,w9 // h+=X[i]
+ eor w16,w16,w26,ror#11
+ eor w0,w0,w11,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w26,ror#25 // Sigma1(e)
+ eor w1,w1,w22,ror#13
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w15,w15,w8,ror#19
+ eor w0,w0,w11,lsr#3 // sigma0(X[i+1])
+ add w21,w21,w16 // h+=Sigma1(e)
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w1,w22,ror#22 // Sigma0(a)
+ eor w15,w15,w8,lsr#10 // sigma1(X[i+14])
+ add w10,w10,w3
+ add w25,w25,w21 // d+=h
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w10,w10,w0
+ add w21,w21,w17 // h+=Sigma0(a)
+ add w10,w10,w15
+ ldr w15,[sp,#0]
+ str w2,[sp,#12]
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ ror w1,w12,#7
+ and w17,w26,w25
+ ror w0,w9,#17
+ bic w28,w27,w25
+ ror w2,w21,#2
+ add w20,w20,w10 // h+=X[i]
+ eor w16,w16,w25,ror#11
+ eor w1,w1,w12,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w25,ror#25 // Sigma1(e)
+ eor w2,w2,w21,ror#13
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w0,w0,w9,ror#19
+ eor w1,w1,w12,lsr#3 // sigma0(X[i+1])
+ add w20,w20,w16 // h+=Sigma1(e)
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w2,w21,ror#22 // Sigma0(a)
+ eor w0,w0,w9,lsr#10 // sigma1(X[i+14])
+ add w11,w11,w4
+ add w24,w24,w20 // d+=h
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w11,w11,w1
+ add w20,w20,w17 // h+=Sigma0(a)
+ add w11,w11,w0
+ ldr w0,[sp,#4]
+ str w3,[sp,#0]
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ ror w2,w13,#7
+ and w17,w25,w24
+ ror w1,w10,#17
+ bic w19,w26,w24
+ ror w3,w20,#2
+ add w27,w27,w11 // h+=X[i]
+ eor w16,w16,w24,ror#11
+ eor w2,w2,w13,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w24,ror#25 // Sigma1(e)
+ eor w3,w3,w20,ror#13
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w1,w1,w10,ror#19
+ eor w2,w2,w13,lsr#3 // sigma0(X[i+1])
+ add w27,w27,w16 // h+=Sigma1(e)
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w3,w20,ror#22 // Sigma0(a)
+ eor w1,w1,w10,lsr#10 // sigma1(X[i+14])
+ add w12,w12,w5
+ add w23,w23,w27 // d+=h
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w12,w12,w2
+ add w27,w27,w17 // h+=Sigma0(a)
+ add w12,w12,w1
+ ldr w1,[sp,#8]
+ str w4,[sp,#4]
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ ror w3,w14,#7
+ and w17,w24,w23
+ ror w2,w11,#17
+ bic w28,w25,w23
+ ror w4,w27,#2
+ add w26,w26,w12 // h+=X[i]
+ eor w16,w16,w23,ror#11
+ eor w3,w3,w14,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w23,ror#25 // Sigma1(e)
+ eor w4,w4,w27,ror#13
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w2,w2,w11,ror#19
+ eor w3,w3,w14,lsr#3 // sigma0(X[i+1])
+ add w26,w26,w16 // h+=Sigma1(e)
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w4,w27,ror#22 // Sigma0(a)
+ eor w2,w2,w11,lsr#10 // sigma1(X[i+14])
+ add w13,w13,w6
+ add w22,w22,w26 // d+=h
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w13,w13,w3
+ add w26,w26,w17 // h+=Sigma0(a)
+ add w13,w13,w2
+ ldr w2,[sp,#12]
+ str w5,[sp,#8]
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ ror w4,w15,#7
+ and w17,w23,w22
+ ror w3,w12,#17
+ bic w19,w24,w22
+ ror w5,w26,#2
+ add w25,w25,w13 // h+=X[i]
+ eor w16,w16,w22,ror#11
+ eor w4,w4,w15,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w22,ror#25 // Sigma1(e)
+ eor w5,w5,w26,ror#13
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w3,w3,w12,ror#19
+ eor w4,w4,w15,lsr#3 // sigma0(X[i+1])
+ add w25,w25,w16 // h+=Sigma1(e)
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w5,w26,ror#22 // Sigma0(a)
+ eor w3,w3,w12,lsr#10 // sigma1(X[i+14])
+ add w14,w14,w7
+ add w21,w21,w25 // d+=h
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w14,w14,w4
+ add w25,w25,w17 // h+=Sigma0(a)
+ add w14,w14,w3
+ ldr w3,[sp,#0]
+ str w6,[sp,#12]
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ ror w5,w0,#7
+ and w17,w22,w21
+ ror w4,w13,#17
+ bic w28,w23,w21
+ ror w6,w25,#2
+ add w24,w24,w14 // h+=X[i]
+ eor w16,w16,w21,ror#11
+ eor w5,w5,w0,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w21,ror#25 // Sigma1(e)
+ eor w6,w6,w25,ror#13
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w4,w4,w13,ror#19
+ eor w5,w5,w0,lsr#3 // sigma0(X[i+1])
+ add w24,w24,w16 // h+=Sigma1(e)
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w6,w25,ror#22 // Sigma0(a)
+ eor w4,w4,w13,lsr#10 // sigma1(X[i+14])
+ add w15,w15,w8
+ add w20,w20,w24 // d+=h
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w15,w15,w5
+ add w24,w24,w17 // h+=Sigma0(a)
+ add w15,w15,w4
+ ldr w4,[sp,#4]
+ str w7,[sp,#0]
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ ror w6,w1,#7
+ and w17,w21,w20
+ ror w5,w14,#17
+ bic w19,w22,w20
+ ror w7,w24,#2
+ add w23,w23,w15 // h+=X[i]
+ eor w16,w16,w20,ror#11
+ eor w6,w6,w1,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w20,ror#25 // Sigma1(e)
+ eor w7,w7,w24,ror#13
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w5,w5,w14,ror#19
+ eor w6,w6,w1,lsr#3 // sigma0(X[i+1])
+ add w23,w23,w16 // h+=Sigma1(e)
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w7,w24,ror#22 // Sigma0(a)
+ eor w5,w5,w14,lsr#10 // sigma1(X[i+14])
+ add w0,w0,w9
+ add w27,w27,w23 // d+=h
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w0,w0,w6
+ add w23,w23,w17 // h+=Sigma0(a)
+ add w0,w0,w5
+ ldr w5,[sp,#8]
+ str w8,[sp,#4]
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ ror w7,w2,#7
+ and w17,w20,w27
+ ror w6,w15,#17
+ bic w28,w21,w27
+ ror w8,w23,#2
+ add w22,w22,w0 // h+=X[i]
+ eor w16,w16,w27,ror#11
+ eor w7,w7,w2,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w27,ror#25 // Sigma1(e)
+ eor w8,w8,w23,ror#13
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w6,w6,w15,ror#19
+ eor w7,w7,w2,lsr#3 // sigma0(X[i+1])
+ add w22,w22,w16 // h+=Sigma1(e)
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w8,w23,ror#22 // Sigma0(a)
+ eor w6,w6,w15,lsr#10 // sigma1(X[i+14])
+ add w1,w1,w10
+ add w26,w26,w22 // d+=h
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w1,w1,w7
+ add w22,w22,w17 // h+=Sigma0(a)
+ add w1,w1,w6
+ ldr w6,[sp,#12]
+ str w9,[sp,#8]
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ ror w8,w3,#7
+ and w17,w27,w26
+ ror w7,w0,#17
+ bic w19,w20,w26
+ ror w9,w22,#2
+ add w21,w21,w1 // h+=X[i]
+ eor w16,w16,w26,ror#11
+ eor w8,w8,w3,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w26,ror#25 // Sigma1(e)
+ eor w9,w9,w22,ror#13
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w7,w7,w0,ror#19
+ eor w8,w8,w3,lsr#3 // sigma0(X[i+1])
+ add w21,w21,w16 // h+=Sigma1(e)
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w9,w22,ror#22 // Sigma0(a)
+ eor w7,w7,w0,lsr#10 // sigma1(X[i+14])
+ add w2,w2,w11
+ add w25,w25,w21 // d+=h
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w2,w2,w8
+ add w21,w21,w17 // h+=Sigma0(a)
+ add w2,w2,w7
+ ldr w7,[sp,#0]
+ str w10,[sp,#12]
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ ror w9,w4,#7
+ and w17,w26,w25
+ ror w8,w1,#17
+ bic w28,w27,w25
+ ror w10,w21,#2
+ add w20,w20,w2 // h+=X[i]
+ eor w16,w16,w25,ror#11
+ eor w9,w9,w4,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w25,ror#25 // Sigma1(e)
+ eor w10,w10,w21,ror#13
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w8,w8,w1,ror#19
+ eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
+ add w20,w20,w16 // h+=Sigma1(e)
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w10,w21,ror#22 // Sigma0(a)
+ eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
+ add w3,w3,w12
+ add w24,w24,w20 // d+=h
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w3,w3,w9
+ add w20,w20,w17 // h+=Sigma0(a)
+ add w3,w3,w8
+ cbnz w19,.Loop_16_xx
+
+ ldp x0,x2,[x29,#96]
+ ldr x1,[x29,#112]
+ sub x30,x30,#260 // rewind
+
+ ldp w3,w4,[x0]
+ ldp w5,w6,[x0,#2*4]
+ add x1,x1,#14*4 // advance input pointer
+ ldp w7,w8,[x0,#4*4]
+ add w20,w20,w3
+ ldp w9,w10,[x0,#6*4]
+ add w21,w21,w4
+ add w22,w22,w5
+ add w23,w23,w6
+ stp w20,w21,[x0]
+ add w24,w24,w7
+ add w25,w25,w8
+ stp w22,w23,[x0,#2*4]
+ add w26,w26,w9
+ add w27,w27,w10
+ cmp x1,x2
+ stp w24,w25,[x0,#4*4]
+ stp w26,w27,[x0,#6*4]
+ b.ne .Loop
+
+ ldp x19,x20,[x29,#16]
+ add sp,sp,#4*4
+ ldp x21,x22,[x29,#32]
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#128
+ ret
+.size zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
+
+.globl zfs_sha256_block_armv8
+.type zfs_sha256_block_armv8,%function
+.align 6
+zfs_sha256_block_armv8:
+ hint #34 // bti c
+.Lv8_entry:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ ld1 {v0.4s,v1.4s},[x0]
+ adr x3,.LK256
+
+.Loop_hw:
+ ld1 {v4.16b-v7.16b},[x1],#64
+ sub x2,x2,#1
+ ld1 {v16.4s},[x3],#16
+ rev32 v4.16b,v4.16b
+ rev32 v5.16b,v5.16b
+ rev32 v6.16b,v6.16b
+ rev32 v7.16b,v7.16b
+ orr v18.16b,v0.16b,v0.16b // offload
+ orr v19.16b,v1.16b,v1.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+
+ ld1 {v17.4s},[x3]
+ add v16.4s,v16.4s,v6.4s
+ sub x3,x3,#64*4-16 // rewind
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+
+ add v17.4s,v17.4s,v7.4s
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+
+ add v0.4s,v0.4s,v18.4s
+ add v1.4s,v1.4s,v19.4s
+
+ cbnz x2,.Loop_hw
+
+ st1 {v0.4s,v1.4s},[x0]
+
+ ldr x29,[sp],#16
+ ret
+.size zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
+
+.globl zfs_sha256_block_neon
+.type zfs_sha256_block_neon,%function
+.align 4
+zfs_sha256_block_neon:
+ hint #34 // bti c
+.Lneon_entry:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ sub sp,sp,#16*4
+
+ adr x16,.LK256
+ add x2,x1,x2,lsl#6 // len to point at the end of inp
+
+ ld1 {v0.16b},[x1], #16
+ ld1 {v1.16b},[x1], #16
+ ld1 {v2.16b},[x1], #16
+ ld1 {v3.16b},[x1], #16
+ ld1 {v4.4s},[x16], #16
+ ld1 {v5.4s},[x16], #16
+ ld1 {v6.4s},[x16], #16
+ ld1 {v7.4s},[x16], #16
+ rev32 v0.16b,v0.16b // yes, even on
+ rev32 v1.16b,v1.16b // big-endian
+ rev32 v2.16b,v2.16b
+ rev32 v3.16b,v3.16b
+ mov x17,sp
+ add v4.4s,v4.4s,v0.4s
+ add v5.4s,v5.4s,v1.4s
+ add v6.4s,v6.4s,v2.4s
+ st1 {v4.4s-v5.4s},[x17], #32
+ add v7.4s,v7.4s,v3.4s
+ st1 {v6.4s-v7.4s},[x17]
+ sub x17,x17,#32
+
+ ldp w3,w4,[x0]
+ ldp w5,w6,[x0,#8]
+ ldp w7,w8,[x0,#16]
+ ldp w9,w10,[x0,#24]
+ ldr w12,[sp,#0]
+ mov w13,wzr
+ eor w14,w4,w5
+ mov w15,wzr
+ b .L_00_48
+
+.align 4
+.L_00_48:
+ ext v4.16b,v0.16b,v1.16b,#4
+ add w10,w10,w12
+ add w3,w3,w15
+ and w12,w8,w7
+ bic w15,w9,w7
+ ext v7.16b,v2.16b,v3.16b,#4
+ eor w11,w7,w7,ror#5
+ add w3,w3,w13
+ mov d19,v3.d[1]
+ orr w12,w12,w15
+ eor w11,w11,w7,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w15,w3,w3,ror#11
+ ushr v5.4s,v4.4s,#3
+ add w10,w10,w12
+ add v0.4s,v0.4s,v7.4s
+ ror w11,w11,#6
+ sli v6.4s,v4.4s,#25
+ eor w13,w3,w4
+ eor w15,w15,w3,ror#20
+ ushr v7.4s,v4.4s,#18
+ add w10,w10,w11
+ ldr w12,[sp,#4]
+ and w14,w14,w13
+ eor v5.16b,v5.16b,v6.16b
+ ror w15,w15,#2
+ add w6,w6,w10
+ sli v7.4s,v4.4s,#14
+ eor w14,w14,w4
+ ushr v16.4s,v19.4s,#17
+ add w9,w9,w12
+ add w10,w10,w15
+ and w12,w7,w6
+ eor v5.16b,v5.16b,v7.16b
+ bic w15,w8,w6
+ eor w11,w6,w6,ror#5
+ sli v16.4s,v19.4s,#15
+ add w10,w10,w14
+ orr w12,w12,w15
+ ushr v17.4s,v19.4s,#10
+ eor w11,w11,w6,ror#19
+ eor w15,w10,w10,ror#11
+ ushr v7.4s,v19.4s,#19
+ add w9,w9,w12
+ ror w11,w11,#6
+ add v0.4s,v0.4s,v5.4s
+ eor w14,w10,w3
+ eor w15,w15,w10,ror#20
+ sli v7.4s,v19.4s,#13
+ add w9,w9,w11
+ ldr w12,[sp,#8]
+ and w13,w13,w14
+ eor v17.16b,v17.16b,v16.16b
+ ror w15,w15,#2
+ add w5,w5,w9
+ eor w13,w13,w3
+ eor v17.16b,v17.16b,v7.16b
+ add w8,w8,w12
+ add w9,w9,w15
+ and w12,w6,w5
+ add v0.4s,v0.4s,v17.4s
+ bic w15,w7,w5
+ eor w11,w5,w5,ror#5
+ add w9,w9,w13
+ ushr v18.4s,v0.4s,#17
+ orr w12,w12,w15
+ ushr v19.4s,v0.4s,#10
+ eor w11,w11,w5,ror#19
+ eor w15,w9,w9,ror#11
+ sli v18.4s,v0.4s,#15
+ add w8,w8,w12
+ ushr v17.4s,v0.4s,#19
+ ror w11,w11,#6
+ eor w13,w9,w10
+ eor v19.16b,v19.16b,v18.16b
+ eor w15,w15,w9,ror#20
+ add w8,w8,w11
+ sli v17.4s,v0.4s,#13
+ ldr w12,[sp,#12]
+ and w14,w14,w13
+ ror w15,w15,#2
+ ld1 {v4.4s},[x16], #16
+ add w4,w4,w8
+ eor v19.16b,v19.16b,v17.16b
+ eor w14,w14,w10
+ eor v17.16b,v17.16b,v17.16b
+ add w7,w7,w12
+ add w8,w8,w15
+ and w12,w5,w4
+ mov v17.d[1],v19.d[0]
+ bic w15,w6,w4
+ eor w11,w4,w4,ror#5
+ add w8,w8,w14
+ add v0.4s,v0.4s,v17.4s
+ orr w12,w12,w15
+ eor w11,w11,w4,ror#19
+ eor w15,w8,w8,ror#11
+ add v4.4s,v4.4s,v0.4s
+ add w7,w7,w12
+ ror w11,w11,#6
+ eor w14,w8,w9
+ eor w15,w15,w8,ror#20
+ add w7,w7,w11
+ ldr w12,[sp,#16]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w3,w3,w7
+ eor w13,w13,w9
+ st1 {v4.4s},[x17], #16
+ ext v4.16b,v1.16b,v2.16b,#4
+ add w6,w6,w12
+ add w7,w7,w15
+ and w12,w4,w3
+ bic w15,w5,w3
+ ext v7.16b,v3.16b,v0.16b,#4
+ eor w11,w3,w3,ror#5
+ add w7,w7,w13
+ mov d19,v0.d[1]
+ orr w12,w12,w15
+ eor w11,w11,w3,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w15,w7,w7,ror#11
+ ushr v5.4s,v4.4s,#3
+ add w6,w6,w12
+ add v1.4s,v1.4s,v7.4s
+ ror w11,w11,#6
+ sli v6.4s,v4.4s,#25
+ eor w13,w7,w8
+ eor w15,w15,w7,ror#20
+ ushr v7.4s,v4.4s,#18
+ add w6,w6,w11
+ ldr w12,[sp,#20]
+ and w14,w14,w13
+ eor v5.16b,v5.16b,v6.16b
+ ror w15,w15,#2
+ add w10,w10,w6
+ sli v7.4s,v4.4s,#14
+ eor w14,w14,w8
+ ushr v16.4s,v19.4s,#17
+ add w5,w5,w12
+ add w6,w6,w15
+ and w12,w3,w10
+ eor v5.16b,v5.16b,v7.16b
+ bic w15,w4,w10
+ eor w11,w10,w10,ror#5
+ sli v16.4s,v19.4s,#15
+ add w6,w6,w14
+ orr w12,w12,w15
+ ushr v17.4s,v19.4s,#10
+ eor w11,w11,w10,ror#19
+ eor w15,w6,w6,ror#11
+ ushr v7.4s,v19.4s,#19
+ add w5,w5,w12
+ ror w11,w11,#6
+ add v1.4s,v1.4s,v5.4s
+ eor w14,w6,w7
+ eor w15,w15,w6,ror#20
+ sli v7.4s,v19.4s,#13
+ add w5,w5,w11
+ ldr w12,[sp,#24]
+ and w13,w13,w14
+ eor v17.16b,v17.16b,v16.16b
+ ror w15,w15,#2
+ add w9,w9,w5
+ eor w13,w13,w7
+ eor v17.16b,v17.16b,v7.16b
+ add w4,w4,w12
+ add w5,w5,w15
+ and w12,w10,w9
+ add v1.4s,v1.4s,v17.4s
+ bic w15,w3,w9
+ eor w11,w9,w9,ror#5
+ add w5,w5,w13
+ ushr v18.4s,v1.4s,#17
+ orr w12,w12,w15
+ ushr v19.4s,v1.4s,#10
+ eor w11,w11,w9,ror#19
+ eor w15,w5,w5,ror#11
+ sli v18.4s,v1.4s,#15
+ add w4,w4,w12
+ ushr v17.4s,v1.4s,#19
+ ror w11,w11,#6
+ eor w13,w5,w6
+ eor v19.16b,v19.16b,v18.16b
+ eor w15,w15,w5,ror#20
+ add w4,w4,w11
+ sli v17.4s,v1.4s,#13
+ ldr w12,[sp,#28]
+ and w14,w14,w13
+ ror w15,w15,#2
+ ld1 {v4.4s},[x16], #16
+ add w8,w8,w4
+ eor v19.16b,v19.16b,v17.16b
+ eor w14,w14,w6
+ eor v17.16b,v17.16b,v17.16b
+ add w3,w3,w12
+ add w4,w4,w15
+ and w12,w9,w8
+ mov v17.d[1],v19.d[0]
+ bic w15,w10,w8
+ eor w11,w8,w8,ror#5
+ add w4,w4,w14
+ add v1.4s,v1.4s,v17.4s
+ orr w12,w12,w15
+ eor w11,w11,w8,ror#19
+ eor w15,w4,w4,ror#11
+ add v4.4s,v4.4s,v1.4s
+ add w3,w3,w12
+ ror w11,w11,#6
+ eor w14,w4,w5
+ eor w15,w15,w4,ror#20
+ add w3,w3,w11
+ ldr w12,[sp,#32]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w7,w7,w3
+ eor w13,w13,w5
+ st1 {v4.4s},[x17], #16
+ ext v4.16b,v2.16b,v3.16b,#4
+ add w10,w10,w12
+ add w3,w3,w15
+ and w12,w8,w7
+ bic w15,w9,w7
+ ext v7.16b,v0.16b,v1.16b,#4
+ eor w11,w7,w7,ror#5
+ add w3,w3,w13
+ mov d19,v1.d[1]
+ orr w12,w12,w15
+ eor w11,w11,w7,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w15,w3,w3,ror#11
+ ushr v5.4s,v4.4s,#3
+ add w10,w10,w12
+ add v2.4s,v2.4s,v7.4s
+ ror w11,w11,#6
+ sli v6.4s,v4.4s,#25
+ eor w13,w3,w4
+ eor w15,w15,w3,ror#20
+ ushr v7.4s,v4.4s,#18
+ add w10,w10,w11
+ ldr w12,[sp,#36]
+ and w14,w14,w13
+ eor v5.16b,v5.16b,v6.16b
+ ror w15,w15,#2
+ add w6,w6,w10
+ sli v7.4s,v4.4s,#14
+ eor w14,w14,w4
+ ushr v16.4s,v19.4s,#17
+ add w9,w9,w12
+ add w10,w10,w15
+ and w12,w7,w6
+ eor v5.16b,v5.16b,v7.16b
+ bic w15,w8,w6
+ eor w11,w6,w6,ror#5
+ sli v16.4s,v19.4s,#15
+ add w10,w10,w14
+ orr w12,w12,w15
+ ushr v17.4s,v19.4s,#10
+ eor w11,w11,w6,ror#19
+ eor w15,w10,w10,ror#11
+ ushr v7.4s,v19.4s,#19
+ add w9,w9,w12
+ ror w11,w11,#6
+ add v2.4s,v2.4s,v5.4s
+ eor w14,w10,w3
+ eor w15,w15,w10,ror#20
+ sli v7.4s,v19.4s,#13
+ add w9,w9,w11
+ ldr w12,[sp,#40]
+ and w13,w13,w14
+ eor v17.16b,v17.16b,v16.16b
+ ror w15,w15,#2
+ add w5,w5,w9
+ eor w13,w13,w3
+ eor v17.16b,v17.16b,v7.16b
+ add w8,w8,w12
+ add w9,w9,w15
+ and w12,w6,w5
+ add v2.4s,v2.4s,v17.4s
+ bic w15,w7,w5
+ eor w11,w5,w5,ror#5
+ add w9,w9,w13
+ ushr v18.4s,v2.4s,#17
+ orr w12,w12,w15
+ ushr v19.4s,v2.4s,#10
+ eor w11,w11,w5,ror#19
+ eor w15,w9,w9,ror#11
+ sli v18.4s,v2.4s,#15
+ add w8,w8,w12
+ ushr v17.4s,v2.4s,#19
+ ror w11,w11,#6
+ eor w13,w9,w10
+ eor v19.16b,v19.16b,v18.16b
+ eor w15,w15,w9,ror#20
+ add w8,w8,w11
+ sli v17.4s,v2.4s,#13
+ ldr w12,[sp,#44]
+ and w14,w14,w13
+ ror w15,w15,#2
+ ld1 {v4.4s},[x16], #16
+ add w4,w4,w8
+ eor v19.16b,v19.16b,v17.16b
+ eor w14,w14,w10
+ eor v17.16b,v17.16b,v17.16b
+ add w7,w7,w12
+ add w8,w8,w15
+ and w12,w5,w4
+ mov v17.d[1],v19.d[0]
+ bic w15,w6,w4
+ eor w11,w4,w4,ror#5
+ add w8,w8,w14
+ add v2.4s,v2.4s,v17.4s
+ orr w12,w12,w15
+ eor w11,w11,w4,ror#19
+ eor w15,w8,w8,ror#11
+ add v4.4s,v4.4s,v2.4s
+ add w7,w7,w12
+ ror w11,w11,#6
+ eor w14,w8,w9
+ eor w15,w15,w8,ror#20
+ add w7,w7,w11
+ ldr w12,[sp,#48]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w3,w3,w7
+ eor w13,w13,w9
+ st1 {v4.4s},[x17], #16
+ ext v4.16b,v3.16b,v0.16b,#4
+ add w6,w6,w12
+ add w7,w7,w15
+ and w12,w4,w3
+ bic w15,w5,w3
+ ext v7.16b,v1.16b,v2.16b,#4
+ eor w11,w3,w3,ror#5
+ add w7,w7,w13
+ mov d19,v2.d[1]
+ orr w12,w12,w15
+ eor w11,w11,w3,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w15,w7,w7,ror#11
+ ushr v5.4s,v4.4s,#3
+ add w6,w6,w12
+ add v3.4s,v3.4s,v7.4s
+ ror w11,w11,#6
+ sli v6.4s,v4.4s,#25
+ eor w13,w7,w8
+ eor w15,w15,w7,ror#20
+ ushr v7.4s,v4.4s,#18
+ add w6,w6,w11
+ ldr w12,[sp,#52]
+ and w14,w14,w13
+ eor v5.16b,v5.16b,v6.16b
+ ror w15,w15,#2
+ add w10,w10,w6
+ sli v7.4s,v4.4s,#14
+ eor w14,w14,w8
+ ushr v16.4s,v19.4s,#17
+ add w5,w5,w12
+ add w6,w6,w15
+ and w12,w3,w10
+ eor v5.16b,v5.16b,v7.16b
+ bic w15,w4,w10
+ eor w11,w10,w10,ror#5
+ sli v16.4s,v19.4s,#15
+ add w6,w6,w14
+ orr w12,w12,w15
+ ushr v17.4s,v19.4s,#10
+ eor w11,w11,w10,ror#19
+ eor w15,w6,w6,ror#11
+ ushr v7.4s,v19.4s,#19
+ add w5,w5,w12
+ ror w11,w11,#6
+ add v3.4s,v3.4s,v5.4s
+ eor w14,w6,w7
+ eor w15,w15,w6,ror#20
+ sli v7.4s,v19.4s,#13
+ add w5,w5,w11
+ ldr w12,[sp,#56]
+ and w13,w13,w14
+ eor v17.16b,v17.16b,v16.16b
+ ror w15,w15,#2
+ add w9,w9,w5
+ eor w13,w13,w7
+ eor v17.16b,v17.16b,v7.16b
+ add w4,w4,w12
+ add w5,w5,w15
+ and w12,w10,w9
+ add v3.4s,v3.4s,v17.4s
+ bic w15,w3,w9
+ eor w11,w9,w9,ror#5
+ add w5,w5,w13
+ ushr v18.4s,v3.4s,#17
+ orr w12,w12,w15
+ ushr v19.4s,v3.4s,#10
+ eor w11,w11,w9,ror#19
+ eor w15,w5,w5,ror#11
+ sli v18.4s,v3.4s,#15
+ add w4,w4,w12
+ ushr v17.4s,v3.4s,#19
+ ror w11,w11,#6
+ eor w13,w5,w6
+ eor v19.16b,v19.16b,v18.16b
+ eor w15,w15,w5,ror#20
+ add w4,w4,w11
+ sli v17.4s,v3.4s,#13
+ ldr w12,[sp,#60]
+ and w14,w14,w13
+ ror w15,w15,#2
+ ld1 {v4.4s},[x16], #16
+ add w8,w8,w4
+ eor v19.16b,v19.16b,v17.16b
+ eor w14,w14,w6
+ eor v17.16b,v17.16b,v17.16b
+ add w3,w3,w12
+ add w4,w4,w15
+ and w12,w9,w8
+ mov v17.d[1],v19.d[0]
+ bic w15,w10,w8
+ eor w11,w8,w8,ror#5
+ add w4,w4,w14
+ add v3.4s,v3.4s,v17.4s
+ orr w12,w12,w15
+ eor w11,w11,w8,ror#19
+ eor w15,w4,w4,ror#11
+ add v4.4s,v4.4s,v3.4s
+ add w3,w3,w12
+ ror w11,w11,#6
+ eor w14,w4,w5
+ eor w15,w15,w4,ror#20
+ add w3,w3,w11
+ ldr w12,[x16]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w7,w7,w3
+ eor w13,w13,w5
+ st1 {v4.4s},[x17], #16
+ cmp w12,#0 // check for K256 terminator
+ ldr w12,[sp,#0]
+ sub x17,x17,#64
+ bne .L_00_48
+
+ sub x16,x16,#256 // rewind x16
+ cmp x1,x2
+ mov x17, #64
+ csel x17, x17, xzr, eq
+ sub x1,x1,x17 // avoid SEGV
+ mov x17,sp
+ add w10,w10,w12
+ add w3,w3,w15
+ and w12,w8,w7
+ ld1 {v0.16b},[x1],#16
+ bic w15,w9,w7
+ eor w11,w7,w7,ror#5
+ ld1 {v4.4s},[x16],#16
+ add w3,w3,w13
+ orr w12,w12,w15
+ eor w11,w11,w7,ror#19
+ eor w15,w3,w3,ror#11
+ rev32 v0.16b,v0.16b
+ add w10,w10,w12
+ ror w11,w11,#6
+ eor w13,w3,w4
+ eor w15,w15,w3,ror#20
+ add v4.4s,v4.4s,v0.4s
+ add w10,w10,w11
+ ldr w12,[sp,#4]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w6,w6,w10
+ eor w14,w14,w4
+ add w9,w9,w12
+ add w10,w10,w15
+ and w12,w7,w6
+ bic w15,w8,w6
+ eor w11,w6,w6,ror#5
+ add w10,w10,w14
+ orr w12,w12,w15
+ eor w11,w11,w6,ror#19
+ eor w15,w10,w10,ror#11
+ add w9,w9,w12
+ ror w11,w11,#6
+ eor w14,w10,w3
+ eor w15,w15,w10,ror#20
+ add w9,w9,w11
+ ldr w12,[sp,#8]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w5,w5,w9
+ eor w13,w13,w3
+ add w8,w8,w12
+ add w9,w9,w15
+ and w12,w6,w5
+ bic w15,w7,w5
+ eor w11,w5,w5,ror#5
+ add w9,w9,w13
+ orr w12,w12,w15
+ eor w11,w11,w5,ror#19
+ eor w15,w9,w9,ror#11
+ add w8,w8,w12
+ ror w11,w11,#6
+ eor w13,w9,w10
+ eor w15,w15,w9,ror#20
+ add w8,w8,w11
+ ldr w12,[sp,#12]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w4,w4,w8
+ eor w14,w14,w10
+ add w7,w7,w12
+ add w8,w8,w15
+ and w12,w5,w4
+ bic w15,w6,w4
+ eor w11,w4,w4,ror#5
+ add w8,w8,w14
+ orr w12,w12,w15
+ eor w11,w11,w4,ror#19
+ eor w15,w8,w8,ror#11
+ add w7,w7,w12
+ ror w11,w11,#6
+ eor w14,w8,w9
+ eor w15,w15,w8,ror#20
+ add w7,w7,w11
+ ldr w12,[sp,#16]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w3,w3,w7
+ eor w13,w13,w9
+ st1 {v4.4s},[x17], #16
+ add w6,w6,w12
+ add w7,w7,w15
+ and w12,w4,w3
+ ld1 {v1.16b},[x1],#16
+ bic w15,w5,w3
+ eor w11,w3,w3,ror#5
+ ld1 {v4.4s},[x16],#16
+ add w7,w7,w13
+ orr w12,w12,w15
+ eor w11,w11,w3,ror#19
+ eor w15,w7,w7,ror#11
+ rev32 v1.16b,v1.16b
+ add w6,w6,w12
+ ror w11,w11,#6
+ eor w13,w7,w8
+ eor w15,w15,w7,ror#20
+ add v4.4s,v4.4s,v1.4s
+ add w6,w6,w11
+ ldr w12,[sp,#20]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w10,w10,w6
+ eor w14,w14,w8
+ add w5,w5,w12
+ add w6,w6,w15
+ and w12,w3,w10
+ bic w15,w4,w10
+ eor w11,w10,w10,ror#5
+ add w6,w6,w14
+ orr w12,w12,w15
+ eor w11,w11,w10,ror#19
+ eor w15,w6,w6,ror#11
+ add w5,w5,w12
+ ror w11,w11,#6
+ eor w14,w6,w7
+ eor w15,w15,w6,ror#20
+ add w5,w5,w11
+ ldr w12,[sp,#24]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w9,w9,w5
+ eor w13,w13,w7
+ add w4,w4,w12
+ add w5,w5,w15
+ and w12,w10,w9
+ bic w15,w3,w9
+ eor w11,w9,w9,ror#5
+ add w5,w5,w13
+ orr w12,w12,w15
+ eor w11,w11,w9,ror#19
+ eor w15,w5,w5,ror#11
+ add w4,w4,w12
+ ror w11,w11,#6
+ eor w13,w5,w6
+ eor w15,w15,w5,ror#20
+ add w4,w4,w11
+ ldr w12,[sp,#28]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w8,w8,w4
+ eor w14,w14,w6
+ add w3,w3,w12
+ add w4,w4,w15
+ and w12,w9,w8
+ bic w15,w10,w8
+ eor w11,w8,w8,ror#5
+ add w4,w4,w14
+ orr w12,w12,w15
+ eor w11,w11,w8,ror#19
+ eor w15,w4,w4,ror#11
+ add w3,w3,w12
+ ror w11,w11,#6
+ eor w14,w4,w5
+ eor w15,w15,w4,ror#20
+ add w3,w3,w11
+ ldr w12,[sp,#32]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w7,w7,w3
+ eor w13,w13,w5
+ st1 {v4.4s},[x17], #16
+ add w10,w10,w12
+ add w3,w3,w15
+ and w12,w8,w7
+ ld1 {v2.16b},[x1],#16
+ bic w15,w9,w7
+ eor w11,w7,w7,ror#5
+ ld1 {v4.4s},[x16],#16
+ add w3,w3,w13
+ orr w12,w12,w15
+ eor w11,w11,w7,ror#19
+ eor w15,w3,w3,ror#11
+ rev32 v2.16b,v2.16b
+ add w10,w10,w12
+ ror w11,w11,#6
+ eor w13,w3,w4
+ eor w15,w15,w3,ror#20
+ add v4.4s,v4.4s,v2.4s
+ add w10,w10,w11
+ ldr w12,[sp,#36]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w6,w6,w10
+ eor w14,w14,w4
+ add w9,w9,w12
+ add w10,w10,w15
+ and w12,w7,w6
+ bic w15,w8,w6
+ eor w11,w6,w6,ror#5
+ add w10,w10,w14
+ orr w12,w12,w15
+ eor w11,w11,w6,ror#19
+ eor w15,w10,w10,ror#11
+ add w9,w9,w12
+ ror w11,w11,#6
+ eor w14,w10,w3
+ eor w15,w15,w10,ror#20
+ add w9,w9,w11
+ ldr w12,[sp,#40]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w5,w5,w9
+ eor w13,w13,w3
+ add w8,w8,w12
+ add w9,w9,w15
+ and w12,w6,w5
+ bic w15,w7,w5
+ eor w11,w5,w5,ror#5
+ add w9,w9,w13
+ orr w12,w12,w15
+ eor w11,w11,w5,ror#19
+ eor w15,w9,w9,ror#11
+ add w8,w8,w12
+ ror w11,w11,#6
+ eor w13,w9,w10
+ eor w15,w15,w9,ror#20
+ add w8,w8,w11
+ ldr w12,[sp,#44]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w4,w4,w8
+ eor w14,w14,w10
+ add w7,w7,w12
+ add w8,w8,w15
+ and w12,w5,w4
+ bic w15,w6,w4
+ eor w11,w4,w4,ror#5
+ add w8,w8,w14
+ orr w12,w12,w15
+ eor w11,w11,w4,ror#19
+ eor w15,w8,w8,ror#11
+ add w7,w7,w12
+ ror w11,w11,#6
+ eor w14,w8,w9
+ eor w15,w15,w8,ror#20
+ add w7,w7,w11
+ ldr w12,[sp,#48]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w3,w3,w7
+ eor w13,w13,w9
+ st1 {v4.4s},[x17], #16
+ add w6,w6,w12
+ add w7,w7,w15
+ and w12,w4,w3
+ ld1 {v3.16b},[x1],#16
+ bic w15,w5,w3
+ eor w11,w3,w3,ror#5
+ ld1 {v4.4s},[x16],#16
+ add w7,w7,w13
+ orr w12,w12,w15
+ eor w11,w11,w3,ror#19
+ eor w15,w7,w7,ror#11
+ rev32 v3.16b,v3.16b
+ add w6,w6,w12
+ ror w11,w11,#6
+ eor w13,w7,w8
+ eor w15,w15,w7,ror#20
+ add v4.4s,v4.4s,v3.4s
+ add w6,w6,w11
+ ldr w12,[sp,#52]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w10,w10,w6
+ eor w14,w14,w8
+ add w5,w5,w12
+ add w6,w6,w15
+ and w12,w3,w10
+ bic w15,w4,w10
+ eor w11,w10,w10,ror#5
+ add w6,w6,w14
+ orr w12,w12,w15
+ eor w11,w11,w10,ror#19
+ eor w15,w6,w6,ror#11
+ add w5,w5,w12
+ ror w11,w11,#6
+ eor w14,w6,w7
+ eor w15,w15,w6,ror#20
+ add w5,w5,w11
+ ldr w12,[sp,#56]
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w9,w9,w5
+ eor w13,w13,w7
+ add w4,w4,w12
+ add w5,w5,w15
+ and w12,w10,w9
+ bic w15,w3,w9
+ eor w11,w9,w9,ror#5
+ add w5,w5,w13
+ orr w12,w12,w15
+ eor w11,w11,w9,ror#19
+ eor w15,w5,w5,ror#11
+ add w4,w4,w12
+ ror w11,w11,#6
+ eor w13,w5,w6
+ eor w15,w15,w5,ror#20
+ add w4,w4,w11
+ ldr w12,[sp,#60]
+ and w14,w14,w13
+ ror w15,w15,#2
+ add w8,w8,w4
+ eor w14,w14,w6
+ add w3,w3,w12
+ add w4,w4,w15
+ and w12,w9,w8
+ bic w15,w10,w8
+ eor w11,w8,w8,ror#5
+ add w4,w4,w14
+ orr w12,w12,w15
+ eor w11,w11,w8,ror#19
+ eor w15,w4,w4,ror#11
+ add w3,w3,w12
+ ror w11,w11,#6
+ eor w14,w4,w5
+ eor w15,w15,w4,ror#20
+ add w3,w3,w11
+ and w13,w13,w14
+ ror w15,w15,#2
+ add w7,w7,w3
+ eor w13,w13,w5
+ st1 {v4.4s},[x17], #16
+ add w3,w3,w15 // h+=Sigma0(a) from the past
+ ldp w11,w12,[x0,#0]
+ add w3,w3,w13 // h+=Maj(a,b,c) from the past
+ ldp w13,w14,[x0,#8]
+ add w3,w3,w11 // accumulate
+ add w4,w4,w12
+ ldp w11,w12,[x0,#16]
+ add w5,w5,w13
+ add w6,w6,w14
+ ldp w13,w14,[x0,#24]
+ add w7,w7,w11
+ add w8,w8,w12
+ ldr w12,[sp,#0]
+ stp w3,w4,[x0,#0]
+ add w9,w9,w13
+ mov w13,wzr
+ stp w5,w6,[x0,#8]
+ add w10,w10,w14
+ stp w7,w8,[x0,#16]
+ eor w14,w4,w5
+ stp w9,w10,[x0,#24]
+ mov w15,wzr
+ mov x17,sp
+ b.ne .L_00_48
+
+ ldr x29,[x29]
+ add sp,sp,#16*4+16
+ ret
+.size zfs_sha256_block_neon,.-zfs_sha256_block_neon
+
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S
new file mode 100644
index 000000000000..f6c8f7742912
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S
@@ -0,0 +1,1570 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if defined(__aarch64__)
+
+ .section .note.gnu.property,"a",@note
+ .p2align 3
+ .word 4
+ .word 16
+ .word 5
+ .asciz "GNU"
+ .word 3221225472
+ .word 4
+ .word 3
+ .word 0
+.text
+
+.align 6
+.type .LK512,%object
+.LK512:
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+ .quad 0 // terminator
+.size .LK512,.-.LK512
+
+.globl zfs_sha512_block_armv7
+.type zfs_sha512_block_armv7,%function
+.align 6
+zfs_sha512_block_armv7:
+ hint #34 // bti c
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+ sub sp,sp,#4*8
+
+ ldp x20,x21,[x0] // load context
+ ldp x22,x23,[x0,#2*8]
+ ldp x24,x25,[x0,#4*8]
+ add x2,x1,x2,lsl#7 // end of input
+ ldp x26,x27,[x0,#6*8]
+ adr x30,.LK512
+ stp x0,x2,[x29,#96]
+
+.Loop:
+ ldp x3,x4,[x1],#2*8
+ ldr x19,[x30],#8 // *K++
+ eor x28,x21,x22 // magic seed
+ str x1,[x29,#112]
+#ifndef __AARCH64EB__
+ rev x3,x3 // 0
+#endif
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ eor x6,x24,x24,ror#23
+ and x17,x25,x24
+ bic x19,x26,x24
+ add x27,x27,x3 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x6,ror#18 // Sigma1(e)
+ ror x6,x20,#28
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ eor x17,x20,x20,ror#5
+ add x27,x27,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x23,x23,x27 // d+=h
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x6,x17,ror#34 // Sigma0(a)
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x27,x27,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x4,x4 // 1
+#endif
+ ldp x5,x6,[x1],#2*8
+ add x27,x27,x17 // h+=Sigma0(a)
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ eor x7,x23,x23,ror#23
+ and x17,x24,x23
+ bic x28,x25,x23
+ add x26,x26,x4 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x7,ror#18 // Sigma1(e)
+ ror x7,x27,#28
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ eor x17,x27,x27,ror#5
+ add x26,x26,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x22,x22,x26 // d+=h
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x7,x17,ror#34 // Sigma0(a)
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x26,x26,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x5,x5 // 2
+#endif
+ add x26,x26,x17 // h+=Sigma0(a)
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ eor x8,x22,x22,ror#23
+ and x17,x23,x22
+ bic x19,x24,x22
+ add x25,x25,x5 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x8,ror#18 // Sigma1(e)
+ ror x8,x26,#28
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ eor x17,x26,x26,ror#5
+ add x25,x25,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x21,x21,x25 // d+=h
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x8,x17,ror#34 // Sigma0(a)
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x25,x25,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x6,x6 // 3
+#endif
+ ldp x7,x8,[x1],#2*8
+ add x25,x25,x17 // h+=Sigma0(a)
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ eor x9,x21,x21,ror#23
+ and x17,x22,x21
+ bic x28,x23,x21
+ add x24,x24,x6 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x9,ror#18 // Sigma1(e)
+ ror x9,x25,#28
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ eor x17,x25,x25,ror#5
+ add x24,x24,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x20,x20,x24 // d+=h
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x9,x17,ror#34 // Sigma0(a)
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x24,x24,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x7,x7 // 4
+#endif
+ add x24,x24,x17 // h+=Sigma0(a)
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ eor x10,x20,x20,ror#23
+ and x17,x21,x20
+ bic x19,x22,x20
+ add x23,x23,x7 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x10,ror#18 // Sigma1(e)
+ ror x10,x24,#28
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ eor x17,x24,x24,ror#5
+ add x23,x23,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x27,x27,x23 // d+=h
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x10,x17,ror#34 // Sigma0(a)
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x23,x23,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x8,x8 // 5
+#endif
+ ldp x9,x10,[x1],#2*8
+ add x23,x23,x17 // h+=Sigma0(a)
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ eor x11,x27,x27,ror#23
+ and x17,x20,x27
+ bic x28,x21,x27
+ add x22,x22,x8 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x11,ror#18 // Sigma1(e)
+ ror x11,x23,#28
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ eor x17,x23,x23,ror#5
+ add x22,x22,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x26,x26,x22 // d+=h
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x11,x17,ror#34 // Sigma0(a)
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x22,x22,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x9,x9 // 6
+#endif
+ add x22,x22,x17 // h+=Sigma0(a)
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ eor x12,x26,x26,ror#23
+ and x17,x27,x26
+ bic x19,x20,x26
+ add x21,x21,x9 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x12,ror#18 // Sigma1(e)
+ ror x12,x22,#28
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ eor x17,x22,x22,ror#5
+ add x21,x21,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x25,x25,x21 // d+=h
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x12,x17,ror#34 // Sigma0(a)
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x21,x21,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x10,x10 // 7
+#endif
+ ldp x11,x12,[x1],#2*8
+ add x21,x21,x17 // h+=Sigma0(a)
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ eor x13,x25,x25,ror#23
+ and x17,x26,x25
+ bic x28,x27,x25
+ add x20,x20,x10 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x13,ror#18 // Sigma1(e)
+ ror x13,x21,#28
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ eor x17,x21,x21,ror#5
+ add x20,x20,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x24,x24,x20 // d+=h
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x13,x17,ror#34 // Sigma0(a)
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x20,x20,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x11,x11 // 8
+#endif
+ add x20,x20,x17 // h+=Sigma0(a)
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ eor x14,x24,x24,ror#23
+ and x17,x25,x24
+ bic x19,x26,x24
+ add x27,x27,x11 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x14,ror#18 // Sigma1(e)
+ ror x14,x20,#28
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ eor x17,x20,x20,ror#5
+ add x27,x27,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x23,x23,x27 // d+=h
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x14,x17,ror#34 // Sigma0(a)
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x27,x27,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x12,x12 // 9
+#endif
+ ldp x13,x14,[x1],#2*8
+ add x27,x27,x17 // h+=Sigma0(a)
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ eor x15,x23,x23,ror#23
+ and x17,x24,x23
+ bic x28,x25,x23
+ add x26,x26,x12 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x15,ror#18 // Sigma1(e)
+ ror x15,x27,#28
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ eor x17,x27,x27,ror#5
+ add x26,x26,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x22,x22,x26 // d+=h
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x15,x17,ror#34 // Sigma0(a)
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x26,x26,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x13,x13 // 10
+#endif
+ add x26,x26,x17 // h+=Sigma0(a)
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ eor x0,x22,x22,ror#23
+ and x17,x23,x22
+ bic x19,x24,x22
+ add x25,x25,x13 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x0,ror#18 // Sigma1(e)
+ ror x0,x26,#28
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ eor x17,x26,x26,ror#5
+ add x25,x25,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x21,x21,x25 // d+=h
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x0,x17,ror#34 // Sigma0(a)
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x25,x25,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x14,x14 // 11
+#endif
+ ldp x15,x0,[x1],#2*8
+ add x25,x25,x17 // h+=Sigma0(a)
+ str x6,[sp,#24]
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ eor x6,x21,x21,ror#23
+ and x17,x22,x21
+ bic x28,x23,x21
+ add x24,x24,x14 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x6,ror#18 // Sigma1(e)
+ ror x6,x25,#28
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ eor x17,x25,x25,ror#5
+ add x24,x24,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x20,x20,x24 // d+=h
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x6,x17,ror#34 // Sigma0(a)
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x24,x24,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x15,x15 // 12
+#endif
+ add x24,x24,x17 // h+=Sigma0(a)
+ str x7,[sp,#0]
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ eor x7,x20,x20,ror#23
+ and x17,x21,x20
+ bic x19,x22,x20
+ add x23,x23,x15 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x7,ror#18 // Sigma1(e)
+ ror x7,x24,#28
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ eor x17,x24,x24,ror#5
+ add x23,x23,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x27,x27,x23 // d+=h
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x7,x17,ror#34 // Sigma0(a)
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x23,x23,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x0,x0 // 13
+#endif
+ ldp x1,x2,[x1]
+ add x23,x23,x17 // h+=Sigma0(a)
+ str x8,[sp,#8]
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ eor x8,x27,x27,ror#23
+ and x17,x20,x27
+ bic x28,x21,x27
+ add x22,x22,x0 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x8,ror#18 // Sigma1(e)
+ ror x8,x23,#28
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ eor x17,x23,x23,ror#5
+ add x22,x22,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x26,x26,x22 // d+=h
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x8,x17,ror#34 // Sigma0(a)
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x22,x22,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x1,x1 // 14
+#endif
+ ldr x6,[sp,#24]
+ add x22,x22,x17 // h+=Sigma0(a)
+ str x9,[sp,#16]
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ eor x9,x26,x26,ror#23
+ and x17,x27,x26
+ bic x19,x20,x26
+ add x21,x21,x1 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x9,ror#18 // Sigma1(e)
+ ror x9,x22,#28
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ eor x17,x22,x22,ror#5
+ add x21,x21,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x25,x25,x21 // d+=h
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x9,x17,ror#34 // Sigma0(a)
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x21,x21,x17 // h+=Sigma0(a)
+#ifndef __AARCH64EB__
+ rev x2,x2 // 15
+#endif
+ ldr x7,[sp,#0]
+ add x21,x21,x17 // h+=Sigma0(a)
+ str x10,[sp,#24]
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ ror x9,x4,#1
+ and x17,x26,x25
+ ror x8,x1,#19
+ bic x28,x27,x25
+ ror x10,x21,#28
+ add x20,x20,x2 // h+=X[i]
+ eor x16,x16,x25,ror#18
+ eor x9,x9,x4,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x25,ror#41 // Sigma1(e)
+ eor x10,x10,x21,ror#34
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x8,x8,x1,ror#61
+ eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
+ add x20,x20,x16 // h+=Sigma1(e)
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x10,x21,ror#39 // Sigma0(a)
+ eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
+ add x3,x3,x12
+ add x24,x24,x20 // d+=h
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x3,x3,x9
+ add x20,x20,x17 // h+=Sigma0(a)
+ add x3,x3,x8
+.Loop_16_xx:
+ ldr x8,[sp,#8]
+ str x11,[sp,#0]
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ ror x10,x5,#1
+ and x17,x25,x24
+ ror x9,x2,#19
+ bic x19,x26,x24
+ ror x11,x20,#28
+ add x27,x27,x3 // h+=X[i]
+ eor x16,x16,x24,ror#18
+ eor x10,x10,x5,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x24,ror#41 // Sigma1(e)
+ eor x11,x11,x20,ror#34
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x9,x9,x2,ror#61
+ eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
+ add x27,x27,x16 // h+=Sigma1(e)
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x11,x20,ror#39 // Sigma0(a)
+ eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
+ add x4,x4,x13
+ add x23,x23,x27 // d+=h
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x4,x4,x10
+ add x27,x27,x17 // h+=Sigma0(a)
+ add x4,x4,x9
+ ldr x9,[sp,#16]
+ str x12,[sp,#8]
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ ror x11,x6,#1
+ and x17,x24,x23
+ ror x10,x3,#19
+ bic x28,x25,x23
+ ror x12,x27,#28
+ add x26,x26,x4 // h+=X[i]
+ eor x16,x16,x23,ror#18
+ eor x11,x11,x6,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x23,ror#41 // Sigma1(e)
+ eor x12,x12,x27,ror#34
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x10,x10,x3,ror#61
+ eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
+ add x26,x26,x16 // h+=Sigma1(e)
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x12,x27,ror#39 // Sigma0(a)
+ eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
+ add x5,x5,x14
+ add x22,x22,x26 // d+=h
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x5,x5,x11
+ add x26,x26,x17 // h+=Sigma0(a)
+ add x5,x5,x10
+ ldr x10,[sp,#24]
+ str x13,[sp,#16]
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ ror x12,x7,#1
+ and x17,x23,x22
+ ror x11,x4,#19
+ bic x19,x24,x22
+ ror x13,x26,#28
+ add x25,x25,x5 // h+=X[i]
+ eor x16,x16,x22,ror#18
+ eor x12,x12,x7,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x22,ror#41 // Sigma1(e)
+ eor x13,x13,x26,ror#34
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x11,x11,x4,ror#61
+ eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
+ add x25,x25,x16 // h+=Sigma1(e)
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x13,x26,ror#39 // Sigma0(a)
+ eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
+ add x6,x6,x15
+ add x21,x21,x25 // d+=h
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x6,x6,x12
+ add x25,x25,x17 // h+=Sigma0(a)
+ add x6,x6,x11
+ ldr x11,[sp,#0]
+ str x14,[sp,#24]
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ ror x13,x8,#1
+ and x17,x22,x21
+ ror x12,x5,#19
+ bic x28,x23,x21
+ ror x14,x25,#28
+ add x24,x24,x6 // h+=X[i]
+ eor x16,x16,x21,ror#18
+ eor x13,x13,x8,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x21,ror#41 // Sigma1(e)
+ eor x14,x14,x25,ror#34
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x12,x12,x5,ror#61
+ eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
+ add x24,x24,x16 // h+=Sigma1(e)
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x14,x25,ror#39 // Sigma0(a)
+ eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
+ add x7,x7,x0
+ add x20,x20,x24 // d+=h
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x7,x7,x13
+ add x24,x24,x17 // h+=Sigma0(a)
+ add x7,x7,x12
+ ldr x12,[sp,#8]
+ str x15,[sp,#0]
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ ror x14,x9,#1
+ and x17,x21,x20
+ ror x13,x6,#19
+ bic x19,x22,x20
+ ror x15,x24,#28
+ add x23,x23,x7 // h+=X[i]
+ eor x16,x16,x20,ror#18
+ eor x14,x14,x9,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x20,ror#41 // Sigma1(e)
+ eor x15,x15,x24,ror#34
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x13,x13,x6,ror#61
+ eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
+ add x23,x23,x16 // h+=Sigma1(e)
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x15,x24,ror#39 // Sigma0(a)
+ eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
+ add x8,x8,x1
+ add x27,x27,x23 // d+=h
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x8,x8,x14
+ add x23,x23,x17 // h+=Sigma0(a)
+ add x8,x8,x13
+ ldr x13,[sp,#16]
+ str x0,[sp,#8]
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ ror x15,x10,#1
+ and x17,x20,x27
+ ror x14,x7,#19
+ bic x28,x21,x27
+ ror x0,x23,#28
+ add x22,x22,x8 // h+=X[i]
+ eor x16,x16,x27,ror#18
+ eor x15,x15,x10,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x27,ror#41 // Sigma1(e)
+ eor x0,x0,x23,ror#34
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x14,x14,x7,ror#61
+ eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
+ add x22,x22,x16 // h+=Sigma1(e)
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x0,x23,ror#39 // Sigma0(a)
+ eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
+ add x9,x9,x2
+ add x26,x26,x22 // d+=h
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x9,x9,x15
+ add x22,x22,x17 // h+=Sigma0(a)
+ add x9,x9,x14
+ ldr x14,[sp,#24]
+ str x1,[sp,#16]
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ ror x0,x11,#1
+ and x17,x27,x26
+ ror x15,x8,#19
+ bic x19,x20,x26
+ ror x1,x22,#28
+ add x21,x21,x9 // h+=X[i]
+ eor x16,x16,x26,ror#18
+ eor x0,x0,x11,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x26,ror#41 // Sigma1(e)
+ eor x1,x1,x22,ror#34
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x15,x15,x8,ror#61
+ eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
+ add x21,x21,x16 // h+=Sigma1(e)
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x1,x22,ror#39 // Sigma0(a)
+ eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
+ add x10,x10,x3
+ add x25,x25,x21 // d+=h
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x10,x10,x0
+ add x21,x21,x17 // h+=Sigma0(a)
+ add x10,x10,x15
+ ldr x15,[sp,#0]
+ str x2,[sp,#24]
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ ror x1,x12,#1
+ and x17,x26,x25
+ ror x0,x9,#19
+ bic x28,x27,x25
+ ror x2,x21,#28
+ add x20,x20,x10 // h+=X[i]
+ eor x16,x16,x25,ror#18
+ eor x1,x1,x12,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x25,ror#41 // Sigma1(e)
+ eor x2,x2,x21,ror#34
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x0,x0,x9,ror#61
+ eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
+ add x20,x20,x16 // h+=Sigma1(e)
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x2,x21,ror#39 // Sigma0(a)
+ eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
+ add x11,x11,x4
+ add x24,x24,x20 // d+=h
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x11,x11,x1
+ add x20,x20,x17 // h+=Sigma0(a)
+ add x11,x11,x0
+ ldr x0,[sp,#8]
+ str x3,[sp,#0]
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ ror x2,x13,#1
+ and x17,x25,x24
+ ror x1,x10,#19
+ bic x19,x26,x24
+ ror x3,x20,#28
+ add x27,x27,x11 // h+=X[i]
+ eor x16,x16,x24,ror#18
+ eor x2,x2,x13,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x24,ror#41 // Sigma1(e)
+ eor x3,x3,x20,ror#34
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x1,x1,x10,ror#61
+ eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
+ add x27,x27,x16 // h+=Sigma1(e)
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x3,x20,ror#39 // Sigma0(a)
+ eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
+ add x12,x12,x5
+ add x23,x23,x27 // d+=h
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x12,x12,x2
+ add x27,x27,x17 // h+=Sigma0(a)
+ add x12,x12,x1
+ ldr x1,[sp,#16]
+ str x4,[sp,#8]
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ ror x3,x14,#1
+ and x17,x24,x23
+ ror x2,x11,#19
+ bic x28,x25,x23
+ ror x4,x27,#28
+ add x26,x26,x12 // h+=X[i]
+ eor x16,x16,x23,ror#18
+ eor x3,x3,x14,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x23,ror#41 // Sigma1(e)
+ eor x4,x4,x27,ror#34
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x2,x2,x11,ror#61
+ eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
+ add x26,x26,x16 // h+=Sigma1(e)
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x4,x27,ror#39 // Sigma0(a)
+ eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
+ add x13,x13,x6
+ add x22,x22,x26 // d+=h
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x13,x13,x3
+ add x26,x26,x17 // h+=Sigma0(a)
+ add x13,x13,x2
+ ldr x2,[sp,#24]
+ str x5,[sp,#16]
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ ror x4,x15,#1
+ and x17,x23,x22
+ ror x3,x12,#19
+ bic x19,x24,x22
+ ror x5,x26,#28
+ add x25,x25,x13 // h+=X[i]
+ eor x16,x16,x22,ror#18
+ eor x4,x4,x15,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x22,ror#41 // Sigma1(e)
+ eor x5,x5,x26,ror#34
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x3,x3,x12,ror#61
+ eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
+ add x25,x25,x16 // h+=Sigma1(e)
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x5,x26,ror#39 // Sigma0(a)
+ eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
+ add x14,x14,x7
+ add x21,x21,x25 // d+=h
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x14,x14,x4
+ add x25,x25,x17 // h+=Sigma0(a)
+ add x14,x14,x3
+ ldr x3,[sp,#0]
+ str x6,[sp,#24]
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ ror x5,x0,#1
+ and x17,x22,x21
+ ror x4,x13,#19
+ bic x28,x23,x21
+ ror x6,x25,#28
+ add x24,x24,x14 // h+=X[i]
+ eor x16,x16,x21,ror#18
+ eor x5,x5,x0,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x21,ror#41 // Sigma1(e)
+ eor x6,x6,x25,ror#34
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x4,x4,x13,ror#61
+ eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
+ add x24,x24,x16 // h+=Sigma1(e)
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x6,x25,ror#39 // Sigma0(a)
+ eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
+ add x15,x15,x8
+ add x20,x20,x24 // d+=h
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x15,x15,x5
+ add x24,x24,x17 // h+=Sigma0(a)
+ add x15,x15,x4
+ ldr x4,[sp,#8]
+ str x7,[sp,#0]
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ ror x6,x1,#1
+ and x17,x21,x20
+ ror x5,x14,#19
+ bic x19,x22,x20
+ ror x7,x24,#28
+ add x23,x23,x15 // h+=X[i]
+ eor x16,x16,x20,ror#18
+ eor x6,x6,x1,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x20,ror#41 // Sigma1(e)
+ eor x7,x7,x24,ror#34
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x5,x5,x14,ror#61
+ eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
+ add x23,x23,x16 // h+=Sigma1(e)
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x7,x24,ror#39 // Sigma0(a)
+ eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
+ add x0,x0,x9
+ add x27,x27,x23 // d+=h
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x0,x0,x6
+ add x23,x23,x17 // h+=Sigma0(a)
+ add x0,x0,x5
+ ldr x5,[sp,#16]
+ str x8,[sp,#8]
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ ror x7,x2,#1
+ and x17,x20,x27
+ ror x6,x15,#19
+ bic x28,x21,x27
+ ror x8,x23,#28
+ add x22,x22,x0 // h+=X[i]
+ eor x16,x16,x27,ror#18
+ eor x7,x7,x2,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x27,ror#41 // Sigma1(e)
+ eor x8,x8,x23,ror#34
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x6,x6,x15,ror#61
+ eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
+ add x22,x22,x16 // h+=Sigma1(e)
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x8,x23,ror#39 // Sigma0(a)
+ eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
+ add x1,x1,x10
+ add x26,x26,x22 // d+=h
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x1,x1,x7
+ add x22,x22,x17 // h+=Sigma0(a)
+ add x1,x1,x6
+ ldr x6,[sp,#24]
+ str x9,[sp,#16]
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ ror x8,x3,#1
+ and x17,x27,x26
+ ror x7,x0,#19
+ bic x19,x20,x26
+ ror x9,x22,#28
+ add x21,x21,x1 // h+=X[i]
+ eor x16,x16,x26,ror#18
+ eor x8,x8,x3,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x26,ror#41 // Sigma1(e)
+ eor x9,x9,x22,ror#34
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x7,x7,x0,ror#61
+ eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
+ add x21,x21,x16 // h+=Sigma1(e)
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x9,x22,ror#39 // Sigma0(a)
+ eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
+ add x2,x2,x11
+ add x25,x25,x21 // d+=h
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x2,x2,x8
+ add x21,x21,x17 // h+=Sigma0(a)
+ add x2,x2,x7
+ ldr x7,[sp,#0]
+ str x10,[sp,#24]
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ ror x9,x4,#1
+ and x17,x26,x25
+ ror x8,x1,#19
+ bic x28,x27,x25
+ ror x10,x21,#28
+ add x20,x20,x2 // h+=X[i]
+ eor x16,x16,x25,ror#18
+ eor x9,x9,x4,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x25,ror#41 // Sigma1(e)
+ eor x10,x10,x21,ror#34
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x8,x8,x1,ror#61
+ eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
+ add x20,x20,x16 // h+=Sigma1(e)
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x10,x21,ror#39 // Sigma0(a)
+ eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
+ add x3,x3,x12
+ add x24,x24,x20 // d+=h
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x3,x3,x9
+ add x20,x20,x17 // h+=Sigma0(a)
+ add x3,x3,x8
+ cbnz x19,.Loop_16_xx
+
+ ldp x0,x2,[x29,#96]
+ ldr x1,[x29,#112]
+ sub x30,x30,#648 // rewind
+
+ ldp x3,x4,[x0]
+ ldp x5,x6,[x0,#2*8]
+ add x1,x1,#14*8 // advance input pointer
+ ldp x7,x8,[x0,#4*8]
+ add x20,x20,x3
+ ldp x9,x10,[x0,#6*8]
+ add x21,x21,x4
+ add x22,x22,x5
+ add x23,x23,x6
+ stp x20,x21,[x0]
+ add x24,x24,x7
+ add x25,x25,x8
+ stp x22,x23,[x0,#2*8]
+ add x26,x26,x9
+ add x27,x27,x10
+ cmp x1,x2
+ stp x24,x25,[x0,#4*8]
+ stp x26,x27,[x0,#6*8]
+ b.ne .Loop
+
+ ldp x19,x20,[x29,#16]
+ add sp,sp,#4*8
+ ldp x21,x22,[x29,#32]
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#128
+ ret
+.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
+
+
+.globl zfs_sha512_block_armv8
+.type zfs_sha512_block_armv8,%function
+.align 6
+zfs_sha512_block_armv8:
+ hint #34 // bti c
+.Lv8_entry:
+ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ ld1 {v16.16b-v19.16b},[x1],#64 // load input
+ ld1 {v20.16b-v23.16b},[x1],#64
+
+ ld1 {v0.2d-v3.2d},[x0] // load context
+ adr x3,.LK512
+
+ rev64 v16.16b,v16.16b
+ rev64 v17.16b,v17.16b
+ rev64 v18.16b,v18.16b
+ rev64 v19.16b,v19.16b
+ rev64 v20.16b,v20.16b
+ rev64 v21.16b,v21.16b
+ rev64 v22.16b,v22.16b
+ rev64 v23.16b,v23.16b
+ b .Loop_hw
+
+.align 4
+.Loop_hw:
+ ld1 {v24.2d},[x3],#16
+ subs x2,x2,#1
+ sub x4,x1,#128
+ orr v26.16b,v0.16b,v0.16b // offload
+ orr v27.16b,v1.16b,v1.16b
+ orr v28.16b,v2.16b,v2.16b
+ orr v29.16b,v3.16b,v3.16b
+ csel x1,x1,x4,ne // conditional rewind
+ add v24.2d,v24.2d,v16.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
+ ext v7.16b,v20.16b,v21.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v25.2d,v25.2d,v17.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
+ ext v7.16b,v21.16b,v22.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ add v24.2d,v24.2d,v18.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
+ ext v7.16b,v22.16b,v23.16b,#8
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ add v25.2d,v25.2d,v19.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
+ ext v7.16b,v23.16b,v16.16b,#8
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ add v24.2d,v24.2d,v20.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
+ ext v7.16b,v16.16b,v17.16b,#8
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v25.2d,v25.2d,v21.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
+ ext v7.16b,v17.16b,v18.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v24.2d,v24.2d,v22.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
+ ext v7.16b,v18.16b,v19.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ add v25.2d,v25.2d,v23.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
+ ext v7.16b,v19.16b,v20.16b,#8
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ add v24.2d,v24.2d,v16.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
+ ext v7.16b,v20.16b,v21.16b,#8
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ add v25.2d,v25.2d,v17.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
+ ext v7.16b,v21.16b,v22.16b,#8
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v24.2d,v24.2d,v18.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
+ ext v7.16b,v22.16b,v23.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v25.2d,v25.2d,v19.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
+ ext v7.16b,v23.16b,v16.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ add v24.2d,v24.2d,v20.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
+ ext v7.16b,v16.16b,v17.16b,#8
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ add v25.2d,v25.2d,v21.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
+ ext v7.16b,v17.16b,v18.16b,#8
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ add v24.2d,v24.2d,v22.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
+ ext v7.16b,v18.16b,v19.16b,#8
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v25.2d,v25.2d,v23.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
+ ext v7.16b,v19.16b,v20.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v24.2d,v24.2d,v16.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
+ ext v7.16b,v20.16b,v21.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ add v25.2d,v25.2d,v17.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
+ ext v7.16b,v21.16b,v22.16b,#8
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ add v24.2d,v24.2d,v18.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
+ ext v7.16b,v22.16b,v23.16b,#8
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ add v25.2d,v25.2d,v19.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
+ ext v7.16b,v23.16b,v16.16b,#8
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v24.2d,v24.2d,v20.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
+ ext v7.16b,v16.16b,v17.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v25.2d,v25.2d,v21.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
+ ext v7.16b,v17.16b,v18.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ add v24.2d,v24.2d,v22.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
+ ext v7.16b,v18.16b,v19.16b,#8
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ add v25.2d,v25.2d,v23.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
+ ext v7.16b,v19.16b,v20.16b,#8
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ add v24.2d,v24.2d,v16.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
+ ext v7.16b,v20.16b,v21.16b,#8
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v25.2d,v25.2d,v17.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
+ ext v7.16b,v21.16b,v22.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v24.2d,v24.2d,v18.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
+ ext v7.16b,v22.16b,v23.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ add v25.2d,v25.2d,v19.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
+ ext v7.16b,v23.16b,v16.16b,#8
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ add v24.2d,v24.2d,v20.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
+ ext v7.16b,v16.16b,v17.16b,#8
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ add v25.2d,v25.2d,v21.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
+ ext v7.16b,v17.16b,v18.16b,#8
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v24.2d,v24.2d,v22.2d
+ ld1 {v25.2d},[x3],#16
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
+ ext v7.16b,v18.16b,v19.16b,#8
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ add v25.2d,v25.2d,v23.2d
+ ld1 {v24.2d},[x3],#16
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
+ ext v7.16b,v19.16b,v20.16b,#8
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ ld1 {v25.2d},[x3],#16
+ add v24.2d,v24.2d,v16.2d
+ ld1 {v16.16b},[x1],#16 // load next input
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ rev64 v16.16b,v16.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ ld1 {v24.2d},[x3],#16
+ add v25.2d,v25.2d,v17.2d
+ ld1 {v17.16b},[x1],#16 // load next input
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ rev64 v17.16b,v17.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ ld1 {v25.2d},[x3],#16
+ add v24.2d,v24.2d,v18.2d
+ ld1 {v18.16b},[x1],#16 // load next input
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ rev64 v18.16b,v18.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ ld1 {v24.2d},[x3],#16
+ add v25.2d,v25.2d,v19.2d
+ ld1 {v19.16b},[x1],#16 // load next input
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v2.16b,v3.16b,#8
+ ext v6.16b,v1.16b,v2.16b,#8
+ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
+ rev64 v19.16b,v19.16b
+ add v4.2d,v1.2d,v3.2d // "D + T1"
+ .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
+ ld1 {v25.2d},[x3],#16
+ add v24.2d,v24.2d,v20.2d
+ ld1 {v20.16b},[x1],#16 // load next input
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v4.16b,v2.16b,#8
+ ext v6.16b,v0.16b,v4.16b,#8
+ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
+ rev64 v20.16b,v20.16b
+ add v1.2d,v0.2d,v2.2d // "D + T1"
+ .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
+ ld1 {v24.2d},[x3],#16
+ add v25.2d,v25.2d,v21.2d
+ ld1 {v21.16b},[x1],#16 // load next input
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v1.16b,v4.16b,#8
+ ext v6.16b,v3.16b,v1.16b,#8
+ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
+ rev64 v21.16b,v21.16b
+ add v0.2d,v3.2d,v4.2d // "D + T1"
+ .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
+ ld1 {v25.2d},[x3],#16
+ add v24.2d,v24.2d,v22.2d
+ ld1 {v22.16b},[x1],#16 // load next input
+ ext v24.16b,v24.16b,v24.16b,#8
+ ext v5.16b,v0.16b,v1.16b,#8
+ ext v6.16b,v2.16b,v0.16b,#8
+ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
+ rev64 v22.16b,v22.16b
+ add v3.2d,v2.2d,v1.2d // "D + T1"
+ .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
+ sub x3,x3,#80*8 // rewind
+ add v25.2d,v25.2d,v23.2d
+ ld1 {v23.16b},[x1],#16 // load next input
+ ext v25.16b,v25.16b,v25.16b,#8
+ ext v5.16b,v3.16b,v0.16b,#8
+ ext v6.16b,v4.16b,v3.16b,#8
+ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
+ .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
+ rev64 v23.16b,v23.16b
+ add v2.2d,v4.2d,v0.2d // "D + T1"
+ .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
+ add v0.2d,v0.2d,v26.2d // accumulate
+ add v1.2d,v1.2d,v27.2d
+ add v2.2d,v2.2d,v28.2d
+ add v3.2d,v3.2d,v29.2d
+
+ cbnz x2,.Loop_hw
+
+ st1 {v0.2d-v3.2d},[x0] // store context
+
+ ldr x29,[sp],#16
+ ret
+.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S
new file mode 100644
index 000000000000..3ae66626df31
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S
@@ -0,0 +1,2774 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if defined(__arm__)
+
+#ifndef __ARM_ARCH
+# define __ARM_ARCH__ 7
+#else
+# define __ARM_ARCH__ __ARM_ARCH
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 @ terminator
+
+.align 5
+.globl zfs_sha256_block_armv7
+.type zfs_sha256_block_armv7,%function
+zfs_sha256_block_armv7:
+.Lzfs_sha256_block_armv7:
+
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
+ sub r3,pc,#8 @ zfs_sha256_block_armv7
+#else
+ adr r3,.Lzfs_sha256_block_armv7
+#endif
+
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+ stmdb sp!,{r0,r1,r2,r4-r11,lr}
+ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+ sub r14,r3,#256+32 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ magic
+ eor r12,r12,r12
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 0
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 0
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 0==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 0<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 1
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 1
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 1==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 1<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 2
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 2
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 2==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 2<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 3
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 3
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 3==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 3<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 4
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 4
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 4==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 4<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 5
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 5==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 5<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 6
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 6
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 6==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 6<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 7
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 7==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 7<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 8
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 8
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 8==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 8<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 9
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 9
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 9==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 9<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 10
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 10
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 10==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 10<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 11
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 11
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 11==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 11<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 12
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 12
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 12==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 12<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 13
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 13
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 13==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 13<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 14
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 14
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 14==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 14<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 15
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 15
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 15==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 15<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+.Lrounds_16_xx:
+ @ ldr r2,[sp,#1*4] @ 16
+ @ ldr r1,[sp,#14*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#0*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#9*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 16==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 16<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#2*4] @ 17
+ @ ldr r1,[sp,#15*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#1*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#10*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 17==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 17<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#3*4] @ 18
+ @ ldr r1,[sp,#0*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#2*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#11*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 18==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 18<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#4*4] @ 19
+ @ ldr r1,[sp,#1*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#3*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#12*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 19==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 19<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#5*4] @ 20
+ @ ldr r1,[sp,#2*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#4*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#13*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 20==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 20<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#6*4] @ 21
+ @ ldr r1,[sp,#3*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#5*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#14*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 21==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 21<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#7*4] @ 22
+ @ ldr r1,[sp,#4*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#6*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#15*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 22==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 22<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#8*4] @ 23
+ @ ldr r1,[sp,#5*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#7*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#0*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 23==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 23<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#9*4] @ 24
+ @ ldr r1,[sp,#6*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#8*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#1*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 24==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 24<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#10*4] @ 25
+ @ ldr r1,[sp,#7*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#9*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#2*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 25==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 25<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#11*4] @ 26
+ @ ldr r1,[sp,#8*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#10*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#3*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 26==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 26<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#12*4] @ 27
+ @ ldr r1,[sp,#9*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#11*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#4*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 27==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 27<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#13*4] @ 28
+ @ ldr r1,[sp,#10*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#12*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#5*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 28==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 28<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#14*4] @ 29
+ @ ldr r1,[sp,#11*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#13*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#6*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 29==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 29<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#15*4] @ 30
+ @ ldr r1,[sp,#12*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#14*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#7*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 30==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 30<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#0*4] @ 31
+ @ ldr r1,[sp,#13*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#15*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#8*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 31==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 31<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#ifdef __thumb2__
+ ite eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq r3,[sp,#16*4] @ pull ctx
+ bne .Lrounds_16_xx
+
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r0,[r3,#0]
+ ldr r2,[r3,#4]
+ ldr r12,[r3,#8]
+ add r4,r4,r0
+ ldr r0,[r3,#12]
+ add r5,r5,r2
+ ldr r2,[r3,#16]
+ add r6,r6,r12
+ ldr r12,[r3,#20]
+ add r7,r7,r0
+ ldr r0,[r3,#24]
+ add r8,r8,r2
+ ldr r2,[r3,#28]
+ add r9,r9,r12
+ ldr r1,[sp,#17*4] @ pull inp
+ ldr r12,[sp,#18*4] @ pull inp+len
+ add r10,r10,r0
+ add r11,r11,r2
+ stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+ cmp r1,r12
+ sub r14,r14,#256 @ rewind Ktbl
+ bne .Loop
+
+ add sp,sp,#19*4 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
+
+#if __ARM_ARCH__ >= 7
+.arch armv7-a
+.fpu neon
+
+.globl zfs_sha256_block_neon
+.type zfs_sha256_block_neon,%function
+.align 5
+.skip 16
+zfs_sha256_block_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ sub r11,sp,#16*4+16
+ adr r14,K256
+ bic r11,r11,#15 @ align for 128-bit stores
+ mov r12,sp
+ mov sp,r11 @ alloca
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+
+ vld1.8 {q0},[r1]!
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ vld1.32 {q8},[r14,:128]!
+ vld1.32 {q9},[r14,:128]!
+ vld1.32 {q10},[r14,:128]!
+ vld1.32 {q11},[r14,:128]!
+ vrev32.8 q0,q0 @ yes, even on
+ str r0,[sp,#64]
+ vrev32.8 q1,q1 @ big-endian
+ str r1,[sp,#68]
+ mov r1,sp
+ vrev32.8 q2,q2
+ str r2,[sp,#72]
+ vrev32.8 q3,q3
+ str r12,[sp,#76] @ save original sp
+ vadd.i32 q8,q8,q0
+ vadd.i32 q9,q9,q1
+ vst1.32 {q8},[r1,:128]!
+ vadd.i32 q10,q10,q2
+ vst1.32 {q9},[r1,:128]!
+ vadd.i32 q11,q11,q3
+ vst1.32 {q10},[r1,:128]!
+ vst1.32 {q11},[r1,:128]!
+
+ ldmia r0,{r4-r11}
+ sub r1,r1,#64
+ ldr r2,[sp,#0]
+ eor r12,r12,r12
+ eor r3,r5,r6
+ b .L_00_48
+
+.align 4
+.L_00_48:
+ vext.8 q8,q0,q1,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q2,q3,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q0,q0,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d7,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d7,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d7,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q0,q0,q9
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d7,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d7,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d0,d0,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d0,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d0,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d0,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ vshr.u32 d24,d0,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d0,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d1,d1,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q0
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q1,q2,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q3,q0,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q1,q1,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d1,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d1,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d1,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q1,q1,q9
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d1,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d1,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d2,d2,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d2,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d2,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d2,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ vshr.u32 d24,d2,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d2,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d3,d3,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q1
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vext.8 q8,q2,q3,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q0,q1,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q2,q2,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d3,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d3,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d3,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q2,q2,q9
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d3,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d3,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d4,d4,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d4,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d4,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d4,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ vshr.u32 d24,d4,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d4,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d5,d5,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q2
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q3,q0,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q1,q2,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q3,q3,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d5,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d5,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d5,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q3,q3,q9
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d5,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d5,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d6,d6,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d6,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d6,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d6,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ vshr.u32 d24,d6,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d6,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d7,d7,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q3
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[r14]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ teq r2,#0 @ check for K256 terminator
+ ldr r2,[sp,#0]
+ sub r1,r1,#64
+ bne .L_00_48
+
+ ldr r1,[sp,#68]
+ ldr r0,[sp,#72]
+ sub r14,r14,#256 @ rewind r14
+ teq r1,r0
+ it eq
+ subeq r1,r1,#64 @ avoid SEGV
+ vld1.8 {q0},[r1]! @ load next input block
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ it ne
+ strne r1,[sp,#68]
+ mov r1,sp
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q0,q0
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q0
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q1,q1
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q1
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q2,q2
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q2
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q3,q3
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q3
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#64]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ ldr r0,[r2,#0]
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r12,[r2,#4]
+ ldr r3,[r2,#8]
+ ldr r1,[r2,#12]
+ add r4,r4,r0 @ accumulate
+ ldr r0,[r2,#16]
+ add r5,r5,r12
+ ldr r12,[r2,#20]
+ add r6,r6,r3
+ ldr r3,[r2,#24]
+ add r7,r7,r1
+ ldr r1,[r2,#28]
+ add r8,r8,r0
+ str r4,[r2],#4
+ add r9,r9,r12
+ str r5,[r2],#4
+ add r10,r10,r3
+ str r6,[r2],#4
+ add r11,r11,r1
+ str r7,[r2],#4
+ stmia r2,{r8-r11}
+
+ ittte ne
+ movne r1,sp
+ ldrne r2,[sp,#0]
+ eorne r12,r12,r12
+ ldreq sp,[sp,#76] @ restore original sp
+ itt ne
+ eorne r3,r5,r6
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size zfs_sha256_block_neon,.-zfs_sha256_block_neon
+
+# if defined(__thumb2__)
+# define INST(a,b,c,d) .byte c,d|0xc,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d
+# endif
+
+.globl zfs_sha256_block_armv8
+.type zfs_sha256_block_armv8,%function
+.align 5
+zfs_sha256_block_armv8:
+.LARMv8:
+ vld1.32 {q0,q1},[r0]
+ sub r3,r3,#256+32
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+ b .Loop_v8
+
+.align 4
+.Loop_v8:
+ vld1.8 {q8-q9},[r1]!
+ vld1.8 {q10-q11},[r1]!
+ vld1.32 {q12},[r3]!
+ vrev32.8 q8,q8
+ vrev32.8 q9,q9
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vmov q14,q0 @ offload
+ vmov q15,q1
+ teq r1,r2
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+
+ vld1.32 {q13},[r3]
+ vadd.i32 q12,q12,q10
+ sub r3,r3,#256-16 @ rewind
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+
+ vadd.i32 q13,q13,q11
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+
+ vadd.i32 q0,q0,q14
+ vadd.i32 q1,q1,q15
+ it ne
+ bne .Loop_v8
+
+ vst1.32 {q0,q1},[r0]
+
+ bx lr @ bx lr
+.size zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
+
+#endif // #if __ARM_ARCH__ >= 7
+#endif // #if defined(__arm__)
diff --git a/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S
new file mode 100644
index 000000000000..66d7dd3cf0f7
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S
@@ -0,0 +1,1827 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if defined(__arm__)
+
+#ifndef __ARM_ARCH
+# define __ARM_ARCH__ 7
+#else
+# define __ARM_ARCH__ __ARM_ARCH
+#endif
+
+#ifndef __KERNEL__
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+# define adrl adr
+#else
+.code 32
+#endif
+
+.text
+
+.type K512,%object
+.align 5
+K512:
+ WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+ WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+ WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+ WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+ WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+ WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+ WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+ WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+ WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+ WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+ WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+ WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+ WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+ WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+ WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+ WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+ WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+ WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+ WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+ WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+ WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+ WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+ WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+ WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+ WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+ WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+ WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+ WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+ WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+ WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+ WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+ WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+ WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+ WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+ WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+ WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+ WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size K512,.-K512
+.word 0 @ terminator
+
+.align 5
+.globl zfs_sha512_block_armv7
+.type zfs_sha512_block_armv7,%function
+zfs_sha512_block_armv7:
+.Lzfs_sha512_block_armv7:
+
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
+ sub r3,pc,#8 @ zfs_sha512_block_armv7
+#else
+ adr r3,.Lzfs_sha512_block_armv7
+#endif
+
+ add r2,r1,r2,lsl#7 @ len to point at the end of inp
+ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
+ sub r14,r3,#672 @ K512
+ sub sp,sp,#9*8
+
+ ldr r7,[r0,#32+LO]
+ ldr r8,[r0,#32+HI]
+ ldr r9, [r0,#48+LO]
+ ldr r10, [r0,#48+HI]
+ ldr r11, [r0,#56+LO]
+ ldr r12, [r0,#56+HI]
+.Loop:
+ str r9, [sp,#48+0]
+ str r10, [sp,#48+4]
+ str r11, [sp,#56+0]
+ str r12, [sp,#56+4]
+ ldr r5,[r0,#0+LO]
+ ldr r6,[r0,#0+HI]
+ ldr r3,[r0,#8+LO]
+ ldr r4,[r0,#8+HI]
+ ldr r9, [r0,#16+LO]
+ ldr r10, [r0,#16+HI]
+ ldr r11, [r0,#24+LO]
+ ldr r12, [r0,#24+HI]
+ str r3,[sp,#8+0]
+ str r4,[sp,#8+4]
+ str r9, [sp,#16+0]
+ str r10, [sp,#16+4]
+ str r11, [sp,#24+0]
+ str r12, [sp,#24+4]
+ ldr r3,[r0,#40+LO]
+ ldr r4,[r0,#40+HI]
+ str r3,[sp,#40+0]
+ str r4,[sp,#40+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+ ldrb r3,[r1,#7]
+ ldrb r9, [r1,#6]
+ ldrb r10, [r1,#5]
+ ldrb r11, [r1,#4]
+ ldrb r4,[r1,#3]
+ ldrb r12, [r1,#2]
+ orr r3,r3,r9,lsl#8
+ ldrb r9, [r1,#1]
+ orr r3,r3,r10,lsl#16
+ ldrb r10, [r1],#8
+ orr r3,r3,r11,lsl#24
+ orr r4,r4,r12,lsl#8
+ orr r4,r4,r9,lsl#16
+ orr r4,r4,r10,lsl#24
+#else
+ ldr r3,[r1,#4]
+ ldr r4,[r1],#8
+#ifdef __ARMEL__
+ rev r3,r3
+ rev r4,r4
+#endif
+#endif
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov r9,r7,lsr#14
+ str r3,[sp,#64+0]
+ mov r10,r8,lsr#14
+ str r4,[sp,#64+4]
+ eor r9,r9,r8,lsl#18
+ ldr r11,[sp,#56+0] @ h.lo
+ eor r10,r10,r7,lsl#18
+ ldr r12,[sp,#56+4] @ h.hi
+ eor r9,r9,r7,lsr#18
+ eor r10,r10,r8,lsr#18
+ eor r9,r9,r8,lsl#14
+ eor r10,r10,r7,lsl#14
+ eor r9,r9,r8,lsr#9
+ eor r10,r10,r7,lsr#9
+ eor r9,r9,r7,lsl#23
+ eor r10,r10,r8,lsl#23 @ Sigma1(e)
+ adds r3,r3,r9
+ ldr r9,[sp,#40+0] @ f.lo
+ adc r4,r4,r10 @ T += Sigma1(e)
+ ldr r10,[sp,#40+4] @ f.hi
+ adds r3,r3,r11
+ ldr r11,[sp,#48+0] @ g.lo
+ adc r4,r4,r12 @ T += h
+ ldr r12,[sp,#48+4] @ g.hi
+
+ eor r9,r9,r11
+ str r7,[sp,#32+0]
+ eor r10,r10,r12
+ str r8,[sp,#32+4]
+ and r9,r9,r7
+ str r5,[sp,#0+0]
+ and r10,r10,r8
+ str r6,[sp,#0+4]
+ eor r9,r9,r11
+ ldr r11,[r14,#LO] @ K[i].lo
+ eor r10,r10,r12 @ Ch(e,f,g)
+ ldr r12,[r14,#HI] @ K[i].hi
+
+ adds r3,r3,r9
+ ldr r7,[sp,#24+0] @ d.lo
+ adc r4,r4,r10 @ T += Ch(e,f,g)
+ ldr r8,[sp,#24+4] @ d.hi
+ adds r3,r3,r11
+ and r9,r11,#0xff
+ adc r4,r4,r12 @ T += K[i]
+ adds r7,r7,r3
+ ldr r11,[sp,#8+0] @ b.lo
+ adc r8,r8,r4 @ d += T
+ teq r9,#148
+
+ ldr r12,[sp,#16+0] @ c.lo
+#ifdef __thumb2__
+ it eq @ Thumb2 thing, sanity check in ARM
+#endif
+ orreq r14,r14,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov r9,r5,lsr#28
+ mov r10,r6,lsr#28
+ eor r9,r9,r6,lsl#4
+ eor r10,r10,r5,lsl#4
+ eor r9,r9,r6,lsr#2
+ eor r10,r10,r5,lsr#2
+ eor r9,r9,r5,lsl#30
+ eor r10,r10,r6,lsl#30
+ eor r9,r9,r6,lsr#7
+ eor r10,r10,r5,lsr#7
+ eor r9,r9,r5,lsl#25
+ eor r10,r10,r6,lsl#25 @ Sigma0(a)
+ adds r3,r3,r9
+ and r9,r5,r11
+ adc r4,r4,r10 @ T += Sigma0(a)
+
+ ldr r10,[sp,#8+4] @ b.hi
+ orr r5,r5,r11
+ ldr r11,[sp,#16+4] @ c.hi
+ and r5,r5,r12
+ and r12,r6,r10
+ orr r6,r6,r10
+ orr r5,r5,r9 @ Maj(a,b,c).lo
+ and r6,r6,r11
+ adds r5,r5,r3
+ orr r6,r6,r12 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc r6,r6,r4 @ h += T
+ tst r14,#1
+ add r14,r14,#8
+ tst r14,#1
+ beq .L00_15
+ ldr r9,[sp,#184+0]
+ ldr r10,[sp,#184+4]
+ bic r14,r14,#1
+.L16_79:
+ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
+ mov r3,r9,lsr#1
+ ldr r11,[sp,#80+0]
+ mov r4,r10,lsr#1
+ ldr r12,[sp,#80+4]
+ eor r3,r3,r10,lsl#31
+ eor r4,r4,r9,lsl#31
+ eor r3,r3,r9,lsr#8
+ eor r4,r4,r10,lsr#8
+ eor r3,r3,r10,lsl#24
+ eor r4,r4,r9,lsl#24
+ eor r3,r3,r9,lsr#7
+ eor r4,r4,r10,lsr#7
+ eor r3,r3,r10,lsl#25
+
+ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+ mov r9,r11,lsr#19
+ mov r10,r12,lsr#19
+ eor r9,r9,r12,lsl#13
+ eor r10,r10,r11,lsl#13
+ eor r9,r9,r12,lsr#29
+ eor r10,r10,r11,lsr#29
+ eor r9,r9,r11,lsl#3
+ eor r10,r10,r12,lsl#3
+ eor r9,r9,r11,lsr#6
+ eor r10,r10,r12,lsr#6
+ ldr r11,[sp,#120+0]
+ eor r9,r9,r12,lsl#26
+
+ ldr r12,[sp,#120+4]
+ adds r3,r3,r9
+ ldr r9,[sp,#192+0]
+ adc r4,r4,r10
+
+ ldr r10,[sp,#192+4]
+ adds r3,r3,r11
+ adc r4,r4,r12
+ adds r3,r3,r9
+ adc r4,r4,r10
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov r9,r7,lsr#14
+ str r3,[sp,#64+0]
+ mov r10,r8,lsr#14
+ str r4,[sp,#64+4]
+ eor r9,r9,r8,lsl#18
+ ldr r11,[sp,#56+0] @ h.lo
+ eor r10,r10,r7,lsl#18
+ ldr r12,[sp,#56+4] @ h.hi
+ eor r9,r9,r7,lsr#18
+ eor r10,r10,r8,lsr#18
+ eor r9,r9,r8,lsl#14
+ eor r10,r10,r7,lsl#14
+ eor r9,r9,r8,lsr#9
+ eor r10,r10,r7,lsr#9
+ eor r9,r9,r7,lsl#23
+ eor r10,r10,r8,lsl#23 @ Sigma1(e)
+ adds r3,r3,r9
+ ldr r9,[sp,#40+0] @ f.lo
+ adc r4,r4,r10 @ T += Sigma1(e)
+ ldr r10,[sp,#40+4] @ f.hi
+ adds r3,r3,r11
+ ldr r11,[sp,#48+0] @ g.lo
+ adc r4,r4,r12 @ T += h
+ ldr r12,[sp,#48+4] @ g.hi
+
+ eor r9,r9,r11
+ str r7,[sp,#32+0]
+ eor r10,r10,r12
+ str r8,[sp,#32+4]
+ and r9,r9,r7
+ str r5,[sp,#0+0]
+ and r10,r10,r8
+ str r6,[sp,#0+4]
+ eor r9,r9,r11
+ ldr r11,[r14,#LO] @ K[i].lo
+ eor r10,r10,r12 @ Ch(e,f,g)
+ ldr r12,[r14,#HI] @ K[i].hi
+
+ adds r3,r3,r9
+ ldr r7,[sp,#24+0] @ d.lo
+ adc r4,r4,r10 @ T += Ch(e,f,g)
+ ldr r8,[sp,#24+4] @ d.hi
+ adds r3,r3,r11
+ and r9,r11,#0xff
+ adc r4,r4,r12 @ T += K[i]
+ adds r7,r7,r3
+ ldr r11,[sp,#8+0] @ b.lo
+ adc r8,r8,r4 @ d += T
+ teq r9,#23
+
+ ldr r12,[sp,#16+0] @ c.lo
+#ifdef __thumb2__
+ it eq @ Thumb2 thing, sanity check in ARM
+#endif
+ orreq r14,r14,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov r9,r5,lsr#28
+ mov r10,r6,lsr#28
+ eor r9,r9,r6,lsl#4
+ eor r10,r10,r5,lsl#4
+ eor r9,r9,r6,lsr#2
+ eor r10,r10,r5,lsr#2
+ eor r9,r9,r5,lsl#30
+ eor r10,r10,r6,lsl#30
+ eor r9,r9,r6,lsr#7
+ eor r10,r10,r5,lsr#7
+ eor r9,r9,r5,lsl#25
+ eor r10,r10,r6,lsl#25 @ Sigma0(a)
+ adds r3,r3,r9
+ and r9,r5,r11
+ adc r4,r4,r10 @ T += Sigma0(a)
+
+ ldr r10,[sp,#8+4] @ b.hi
+ orr r5,r5,r11
+ ldr r11,[sp,#16+4] @ c.hi
+ and r5,r5,r12
+ and r12,r6,r10
+ orr r6,r6,r10
+ orr r5,r5,r9 @ Maj(a,b,c).lo
+ and r6,r6,r11
+ adds r5,r5,r3
+ orr r6,r6,r12 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc r6,r6,r4 @ h += T
+ tst r14,#1
+ add r14,r14,#8
+#ifdef __thumb2__
+ ittt eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq r9,[sp,#184+0]
+ ldreq r10,[sp,#184+4]
+ beq .L16_79
+ bic r14,r14,#1
+
+ ldr r3,[sp,#8+0]
+ ldr r4,[sp,#8+4]
+ ldr r9, [r0,#0+LO]
+ ldr r10, [r0,#0+HI]
+ ldr r11, [r0,#8+LO]
+ ldr r12, [r0,#8+HI]
+ adds r9,r5,r9
+ str r9, [r0,#0+LO]
+ adc r10,r6,r10
+ str r10, [r0,#0+HI]
+ adds r11,r3,r11
+ str r11, [r0,#8+LO]
+ adc r12,r4,r12
+ str r12, [r0,#8+HI]
+
+ ldr r5,[sp,#16+0]
+ ldr r6,[sp,#16+4]
+ ldr r3,[sp,#24+0]
+ ldr r4,[sp,#24+4]
+ ldr r9, [r0,#16+LO]
+ ldr r10, [r0,#16+HI]
+ ldr r11, [r0,#24+LO]
+ ldr r12, [r0,#24+HI]
+ adds r9,r5,r9
+ str r9, [r0,#16+LO]
+ adc r10,r6,r10
+ str r10, [r0,#16+HI]
+ adds r11,r3,r11
+ str r11, [r0,#24+LO]
+ adc r12,r4,r12
+ str r12, [r0,#24+HI]
+
+ ldr r3,[sp,#40+0]
+ ldr r4,[sp,#40+4]
+ ldr r9, [r0,#32+LO]
+ ldr r10, [r0,#32+HI]
+ ldr r11, [r0,#40+LO]
+ ldr r12, [r0,#40+HI]
+ adds r7,r7,r9
+ str r7,[r0,#32+LO]
+ adc r8,r8,r10
+ str r8,[r0,#32+HI]
+ adds r11,r3,r11
+ str r11, [r0,#40+LO]
+ adc r12,r4,r12
+ str r12, [r0,#40+HI]
+
+ ldr r5,[sp,#48+0]
+ ldr r6,[sp,#48+4]
+ ldr r3,[sp,#56+0]
+ ldr r4,[sp,#56+4]
+ ldr r9, [r0,#48+LO]
+ ldr r10, [r0,#48+HI]
+ ldr r11, [r0,#56+LO]
+ ldr r12, [r0,#56+HI]
+ adds r9,r5,r9
+ str r9, [r0,#48+LO]
+ adc r10,r6,r10
+ str r10, [r0,#48+HI]
+ adds r11,r3,r11
+ str r11, [r0,#56+LO]
+ adc r12,r4,r12
+ str r12, [r0,#56+HI]
+
+ add sp,sp,#640
+ sub r14,r14,#640
+
+ teq r1,r2
+ bne .Loop
+
+ add sp,sp,#8*9 @ destroy frame
+
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
+#else
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+.word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
+
+#if __ARM_ARCH__ >= 7
+.arch armv7-a
+.fpu neon
+
+.globl zfs_sha512_block_neon
+.type zfs_sha512_block_neon,%function
+.align 4
+zfs_sha512_block_neon:
+.LNEON:
+ dmb @ errata #451034 on early Cortex A8
+ add r2,r1,r2,lsl#7 @ len to point at the end of inp
+ adr r3,K512
+ VFP_ABI_PUSH
+ vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
+.Loop_neon:
+ vshr.u64 d24,d20,#14 @ 0
+#if 0<16
+ vld1.64 {d0},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d20,#18
+#if 0>0
+ vadd.i64 d16,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d20,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 0<16 && defined(__ARMEL__)
+ vrev64.8 d0,d0
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d0
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 1
+#if 1<16
+ vld1.64 {d1},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 1>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 1<16 && defined(__ARMEL__)
+ vrev64.8 d1,d1
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d1
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 d24,d18,#14 @ 2
+#if 2<16
+ vld1.64 {d2},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d18,#18
+#if 2>0
+ vadd.i64 d22,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d18,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 2<16 && defined(__ARMEL__)
+ vrev64.8 d2,d2
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d2
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 3
+#if 3<16
+ vld1.64 {d3},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 3>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 3<16 && defined(__ARMEL__)
+ vrev64.8 d3,d3
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d3
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 d24,d16,#14 @ 4
+#if 4<16
+ vld1.64 {d4},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d16,#18
+#if 4>0
+ vadd.i64 d20,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d16,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 4<16 && defined(__ARMEL__)
+ vrev64.8 d4,d4
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d4
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 5
+#if 5<16
+ vld1.64 {d5},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 5>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 5<16 && defined(__ARMEL__)
+ vrev64.8 d5,d5
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d5
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 d24,d22,#14 @ 6
+#if 6<16
+ vld1.64 {d6},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d22,#18
+#if 6>0
+ vadd.i64 d18,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d22,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 6<16 && defined(__ARMEL__)
+ vrev64.8 d6,d6
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d6
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 7
+#if 7<16
+ vld1.64 {d7},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 7>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 7<16 && defined(__ARMEL__)
+ vrev64.8 d7,d7
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d7
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ vshr.u64 d24,d20,#14 @ 8
+#if 8<16
+ vld1.64 {d8},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d20,#18
+#if 8>0
+ vadd.i64 d16,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d20,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 8<16 && defined(__ARMEL__)
+ vrev64.8 d8,d8
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d8
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 9
+#if 9<16
+ vld1.64 {d9},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 9>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 9<16 && defined(__ARMEL__)
+ vrev64.8 d9,d9
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d9
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 d24,d18,#14 @ 10
+#if 10<16
+ vld1.64 {d10},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d18,#18
+#if 10>0
+ vadd.i64 d22,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d18,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 10<16 && defined(__ARMEL__)
+ vrev64.8 d10,d10
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d10
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 11
+#if 11<16
+ vld1.64 {d11},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 11>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 11<16 && defined(__ARMEL__)
+ vrev64.8 d11,d11
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d11
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 d24,d16,#14 @ 12
+#if 12<16
+ vld1.64 {d12},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d16,#18
+#if 12>0
+ vadd.i64 d20,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d16,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 12<16 && defined(__ARMEL__)
+ vrev64.8 d12,d12
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d12
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 13
+#if 13<16
+ vld1.64 {d13},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 13>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 13<16 && defined(__ARMEL__)
+ vrev64.8 d13,d13
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d13
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 d24,d22,#14 @ 14
+#if 14<16
+ vld1.64 {d14},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d22,#18
+#if 14>0
+ vadd.i64 d18,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d22,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 14<16 && defined(__ARMEL__)
+ vrev64.8 d14,d14
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d14
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 15
+#if 15<16
+ vld1.64 {d15},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 15>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 15<16 && defined(__ARMEL__)
+ vrev64.8 d15,d15
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d15
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ mov r12,#4
+.L16_79_neon:
+ subs r12,#1
+ vshr.u64 q12,q7,#19
+ vshr.u64 q13,q7,#61
+ vadd.i64 d16,d30 @ h+=Maj from the past
+ vshr.u64 q15,q7,#6
+ vsli.64 q12,q7,#45
+ vext.8 q14,q0,q1,#8 @ X[i+1]
+ vsli.64 q13,q7,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q0,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q4,q5,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d20,#14 @ from NEON_00_15
+ vadd.i64 q0,q14
+ vshr.u64 d25,d20,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d20,#41 @ from NEON_00_15
+ vadd.i64 q0,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 16<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d0
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 17
+#if 17<16
+ vld1.64 {d1},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 17>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 17<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d1
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 q12,q0,#19
+ vshr.u64 q13,q0,#61
+ vadd.i64 d22,d30 @ h+=Maj from the past
+ vshr.u64 q15,q0,#6
+ vsli.64 q12,q0,#45
+ vext.8 q14,q1,q2,#8 @ X[i+1]
+ vsli.64 q13,q0,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q1,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q5,q6,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d18,#14 @ from NEON_00_15
+ vadd.i64 q1,q14
+ vshr.u64 d25,d18,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d18,#41 @ from NEON_00_15
+ vadd.i64 q1,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 18<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d2
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 19
+#if 19<16
+ vld1.64 {d3},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 19>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 19<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d3
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 q12,q1,#19
+ vshr.u64 q13,q1,#61
+ vadd.i64 d20,d30 @ h+=Maj from the past
+ vshr.u64 q15,q1,#6
+ vsli.64 q12,q1,#45
+ vext.8 q14,q2,q3,#8 @ X[i+1]
+ vsli.64 q13,q1,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q2,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q6,q7,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d16,#14 @ from NEON_00_15
+ vadd.i64 q2,q14
+ vshr.u64 d25,d16,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d16,#41 @ from NEON_00_15
+ vadd.i64 q2,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 20<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d4
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 21
+#if 21<16
+ vld1.64 {d5},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 21>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 21<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d5
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 q12,q2,#19
+ vshr.u64 q13,q2,#61
+ vadd.i64 d18,d30 @ h+=Maj from the past
+ vshr.u64 q15,q2,#6
+ vsli.64 q12,q2,#45
+ vext.8 q14,q3,q4,#8 @ X[i+1]
+ vsli.64 q13,q2,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q3,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q7,q0,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d22,#14 @ from NEON_00_15
+ vadd.i64 q3,q14
+ vshr.u64 d25,d22,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d22,#41 @ from NEON_00_15
+ vadd.i64 q3,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 22<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d6
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 23
+#if 23<16
+ vld1.64 {d7},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 23>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 23<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d7
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ vshr.u64 q12,q3,#19
+ vshr.u64 q13,q3,#61
+ vadd.i64 d16,d30 @ h+=Maj from the past
+ vshr.u64 q15,q3,#6
+ vsli.64 q12,q3,#45
+ vext.8 q14,q4,q5,#8 @ X[i+1]
+ vsli.64 q13,q3,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q4,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q0,q1,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d20,#14 @ from NEON_00_15
+ vadd.i64 q4,q14
+ vshr.u64 d25,d20,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d20,#41 @ from NEON_00_15
+ vadd.i64 q4,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vmov d29,d20
+ vsli.64 d26,d20,#23
+#if 24<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d21,d22 @ Ch(e,f,g)
+ vshr.u64 d24,d16,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d23
+ vshr.u64 d25,d16,#34
+ vsli.64 d24,d16,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d16,#39
+ vadd.i64 d28,d8
+ vsli.64 d25,d16,#30
+ veor d30,d16,d17
+ vsli.64 d26,d16,#25
+ veor d23,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d18,d17 @ Maj(a,b,c)
+ veor d23,d26 @ Sigma0(a)
+ vadd.i64 d19,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 25
+#if 25<16
+ vld1.64 {d9},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+#if 25>0
+ vadd.i64 d23,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vmov d29,d19
+ vsli.64 d26,d19,#23
+#if 25<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d20,d21 @ Ch(e,f,g)
+ vshr.u64 d24,d23,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d22
+ vshr.u64 d25,d23,#34
+ vsli.64 d24,d23,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d23,#39
+ vadd.i64 d28,d9
+ vsli.64 d25,d23,#30
+ veor d30,d23,d16
+ vsli.64 d26,d23,#25
+ veor d22,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d17,d16 @ Maj(a,b,c)
+ veor d22,d26 @ Sigma0(a)
+ vadd.i64 d18,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d22,d30
+ vshr.u64 q12,q4,#19
+ vshr.u64 q13,q4,#61
+ vadd.i64 d22,d30 @ h+=Maj from the past
+ vshr.u64 q15,q4,#6
+ vsli.64 q12,q4,#45
+ vext.8 q14,q5,q6,#8 @ X[i+1]
+ vsli.64 q13,q4,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q5,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q1,q2,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d18,#14 @ from NEON_00_15
+ vadd.i64 q5,q14
+ vshr.u64 d25,d18,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d18,#41 @ from NEON_00_15
+ vadd.i64 q5,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vmov d29,d18
+ vsli.64 d26,d18,#23
+#if 26<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d19,d20 @ Ch(e,f,g)
+ vshr.u64 d24,d22,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d21
+ vshr.u64 d25,d22,#34
+ vsli.64 d24,d22,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d22,#39
+ vadd.i64 d28,d10
+ vsli.64 d25,d22,#30
+ veor d30,d22,d23
+ vsli.64 d26,d22,#25
+ veor d21,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d16,d23 @ Maj(a,b,c)
+ veor d21,d26 @ Sigma0(a)
+ vadd.i64 d17,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 27
+#if 27<16
+ vld1.64 {d11},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+#if 27>0
+ vadd.i64 d21,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vmov d29,d17
+ vsli.64 d26,d17,#23
+#if 27<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d18,d19 @ Ch(e,f,g)
+ vshr.u64 d24,d21,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d20
+ vshr.u64 d25,d21,#34
+ vsli.64 d24,d21,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d21,#39
+ vadd.i64 d28,d11
+ vsli.64 d25,d21,#30
+ veor d30,d21,d22
+ vsli.64 d26,d21,#25
+ veor d20,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d23,d22 @ Maj(a,b,c)
+ veor d20,d26 @ Sigma0(a)
+ vadd.i64 d16,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d20,d30
+ vshr.u64 q12,q5,#19
+ vshr.u64 q13,q5,#61
+ vadd.i64 d20,d30 @ h+=Maj from the past
+ vshr.u64 q15,q5,#6
+ vsli.64 q12,q5,#45
+ vext.8 q14,q6,q7,#8 @ X[i+1]
+ vsli.64 q13,q5,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q6,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q2,q3,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d16,#14 @ from NEON_00_15
+ vadd.i64 q6,q14
+ vshr.u64 d25,d16,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d16,#41 @ from NEON_00_15
+ vadd.i64 q6,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vmov d29,d16
+ vsli.64 d26,d16,#23
+#if 28<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d17,d18 @ Ch(e,f,g)
+ vshr.u64 d24,d20,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d19
+ vshr.u64 d25,d20,#34
+ vsli.64 d24,d20,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d20,#39
+ vadd.i64 d28,d12
+ vsli.64 d25,d20,#30
+ veor d30,d20,d21
+ vsli.64 d26,d20,#25
+ veor d19,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d22,d21 @ Maj(a,b,c)
+ veor d19,d26 @ Sigma0(a)
+ vadd.i64 d23,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 29
+#if 29<16
+ vld1.64 {d13},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+#if 29>0
+ vadd.i64 d19,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vmov d29,d23
+ vsli.64 d26,d23,#23
+#if 29<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d16,d17 @ Ch(e,f,g)
+ vshr.u64 d24,d19,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d18
+ vshr.u64 d25,d19,#34
+ vsli.64 d24,d19,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d19,#39
+ vadd.i64 d28,d13
+ vsli.64 d25,d19,#30
+ veor d30,d19,d20
+ vsli.64 d26,d19,#25
+ veor d18,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d21,d20 @ Maj(a,b,c)
+ veor d18,d26 @ Sigma0(a)
+ vadd.i64 d22,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d18,d30
+ vshr.u64 q12,q6,#19
+ vshr.u64 q13,q6,#61
+ vadd.i64 d18,d30 @ h+=Maj from the past
+ vshr.u64 q15,q6,#6
+ vsli.64 q12,q6,#45
+ vext.8 q14,q7,q0,#8 @ X[i+1]
+ vsli.64 q13,q6,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q7,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q3,q4,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d22,#14 @ from NEON_00_15
+ vadd.i64 q7,q14
+ vshr.u64 d25,d22,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d22,#41 @ from NEON_00_15
+ vadd.i64 q7,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vmov d29,d22
+ vsli.64 d26,d22,#23
+#if 30<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d23,d16 @ Ch(e,f,g)
+ vshr.u64 d24,d18,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d17
+ vshr.u64 d25,d18,#34
+ vsli.64 d24,d18,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d18,#39
+ vadd.i64 d28,d14
+ vsli.64 d25,d18,#30
+ veor d30,d18,d19
+ vsli.64 d26,d18,#25
+ veor d17,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d20,d19 @ Maj(a,b,c)
+ veor d17,d26 @ Sigma0(a)
+ vadd.i64 d21,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 31
+#if 31<16
+ vld1.64 {d15},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+#if 31>0
+ vadd.i64 d17,d30 @ h+=Maj from the past
+#endif
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vmov d29,d21
+ vsli.64 d26,d21,#23
+#if 31<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ veor d25,d24
+ vbsl d29,d22,d23 @ Ch(e,f,g)
+ vshr.u64 d24,d17,#28
+ veor d26,d25 @ Sigma1(e)
+ vadd.i64 d27,d29,d16
+ vshr.u64 d25,d17,#34
+ vsli.64 d24,d17,#36
+ vadd.i64 d27,d26
+ vshr.u64 d26,d17,#39
+ vadd.i64 d28,d15
+ vsli.64 d25,d17,#30
+ veor d30,d17,d18
+ vsli.64 d26,d17,#25
+ veor d16,d24,d25
+ vadd.i64 d27,d28
+ vbsl d30,d19,d18 @ Maj(a,b,c)
+ veor d16,d26 @ Sigma0(a)
+ vadd.i64 d20,d27
+ vadd.i64 d30,d27
+ @ vadd.i64 d16,d30
+ bne .L16_79_neon
+
+ vadd.i64 d16,d30 @ h+=Maj from the past
+ vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp
+ vadd.i64 q8,q12 @ vectorized accumulate
+ vadd.i64 q9,q13
+ vadd.i64 q10,q14
+ vadd.i64 q11,q15
+ vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context
+ teq r1,r2
+ sub r3,#640 @ rewind K512
+ bne .Loop_neon
+
+ VFP_ABI_POP
+ bx lr @ .word 0xe12fff1e
+.size zfs_sha512_block_neon,.-zfs_sha512_block_neon
+#endif // #if __ARM_ARCH__ >= 7
+#endif // #if defined(__arm__)
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
new file mode 100644
index 000000000000..ae8d0fad7c83
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
@@ -0,0 +1,2823 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE2 -> POWER8 PPC64 Little Endian
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ */
+
+#if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ .text
+ .abiversion 2
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI0_0:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI0_1:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI0_2:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_3:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_4:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_5:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_6:
+ .short 1
+ .short 2
+ .short 4
+ .short 8
+ .short 16
+ .short 32
+ .short 64
+ .short 128
+.LCPI0_7:
+ .short 0
+ .short 0
+ .short 4
+ .short 8
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI0_8:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI0_9:
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI0_10:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 7
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_12:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI0_13:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_14:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .text
+ .globl zfs_blake3_compress_in_place_sse2
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse2,@function
+zfs_blake3_compress_in_place_sse2:
+.Lfunc_begin0:
+ .cfi_startproc
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0@ha
+ addi 2, 2, .TOC.-.Lfunc_gep0@l
+.Lfunc_lep0:
+ .localentry zfs_blake3_compress_in_place_sse2, .Lfunc_lep0-.Lfunc_gep0
+ li 8, -64
+ mtvsrd 35, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ vspltisw 12, 9
+ stxvd2x 60, 1, 8
+ li 8, -48
+ mtvsrd 36, 7
+ lfd 2, 16(4)
+ stxvd2x 61, 1, 8
+ li 8, -32
+ lfd 1, 8(4)
+ mtvsrwz 37, 6
+ rldicl 6, 6, 32, 32
+ addis 7, 2, .LCPI0_2@toc@ha
+ stxvd2x 62, 1, 8
+ li 8, -16
+ addi 7, 7, .LCPI0_2@toc@l
+ stxvd2x 63, 1, 8
+ li 8, 0
+ lvx 9, 0, 7
+ li 7, 48
+ mtvsrd 34, 8
+ xxmrghd 32, 1, 0
+ lxvd2x 0, 0, 3
+ lxvd2x 1, 3, 5
+ lfd 3, 24(4)
+ addis 8, 2, .LCPI0_5@toc@ha
+ vmrghb 3, 2, 3
+ addi 8, 8, .LCPI0_5@toc@l
+ vmrghb 4, 2, 4
+ vspltb 2, 2, 7
+ xxmrghd 33, 3, 2
+ vpkudum 7, 1, 0
+ vmrglh 3, 2, 3
+ vmrglh 2, 2, 4
+ mtvsrwz 36, 6
+ addis 6, 2, .LCPI0_0@toc@ha
+ addi 6, 6, .LCPI0_0@toc@l
+ vperm 10, 1, 0, 9
+ vmrghw 4, 4, 5
+ xxswapd 37, 1
+ lxvd2x 1, 4, 7
+ addis 7, 2, .LCPI0_8@toc@ha
+ addi 7, 7, .LCPI0_8@toc@l
+ vmrglw 2, 2, 3
+ xxswapd 35, 0
+ xxswapd 41, 1
+ xxspltd 62, 42, 1
+ vadduwm 3, 7, 3
+ vadduwm 6, 3, 5
+ xxmrgld 36, 34, 36
+ lvx 2, 0, 6
+ addis 6, 2, .LCPI0_1@toc@ha
+ addi 6, 6, .LCPI0_1@toc@l
+ xxlxor 35, 38, 36
+ lvx 4, 0, 6
+ li 6, 32
+ lxvd2x 0, 4, 6
+ addis 4, 2, .LCPI0_3@toc@ha
+ addis 6, 2, .LCPI0_7@toc@ha
+ vperm 8, 3, 3, 2
+ vspltisw 3, 10
+ addi 4, 4, .LCPI0_3@toc@l
+ addi 6, 6, .LCPI0_7@toc@l
+ vadduwm 3, 3, 3
+ vadduwm 11, 8, 4
+ xxlxor 36, 43, 37
+ vadduwm 5, 6, 10
+ vrlw 0, 4, 3
+ vspltisw 4, 12
+ vadduwm 4, 4, 4
+ vadduwm 1, 0, 5
+ xxlxor 37, 33, 40
+ xxswapd 40, 0
+ vrlw 6, 5, 4
+ vspltisw 5, -16
+ vpkudum 13, 9, 8
+ vsubuwm 5, 12, 5
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI0_4@toc@ha
+ addi 4, 4, .LCPI0_4@toc@l
+ vadduwm 11, 6, 11
+ xxswapd 0, 38
+ vadduwm 1, 1, 13
+ xxsldwi 50, 45, 45, 1
+ xxlxor 32, 43, 32
+ xxsldwi 43, 43, 43, 3
+ xxsldwi 33, 33, 33, 1
+ vperm 12, 8, 9, 12
+ vrlw 0, 0, 5
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 0
+ vadduwm 1, 1, 12
+ vperm 6, 6, 6, 2
+ vadduwm 15, 6, 11
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI0_6@toc@ha
+ addi 4, 4, .LCPI0_6@toc@l
+ xxlxor 32, 47, 32
+ lvx 17, 0, 4
+ addis 4, 2, .LCPI0_9@toc@ha
+ vperm 14, 10, 7, 11
+ addi 4, 4, .LCPI0_9@toc@l
+ vrlw 0, 0, 3
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 38
+ vrlw 6, 6, 4
+ vadduwm 8, 6, 15
+ xxswapd 0, 38
+ lvx 6, 0, 8
+ xxlxor 32, 40, 32
+ xxsldwi 40, 40, 40, 1
+ vperm 13, 12, 18, 6
+ vrlw 9, 0, 5
+ vadduwm 0, 1, 14
+ lvx 1, 0, 7
+ xxsldwi 46, 46, 46, 3
+ xxsldwi 32, 32, 32, 3
+ vperm 7, 7, 7, 1
+ vadduwm 15, 9, 0
+ xxlxor 32, 47, 0
+ vperm 16, 0, 0, 2
+ lvx 0, 0, 6
+ addis 6, 2, .LCPI0_10@toc@ha
+ vcmpequh 0, 0, 17
+ vadduwm 19, 16, 8
+ xxlxor 40, 51, 41
+ xxsel 45, 39, 45, 32
+ vrlw 31, 8, 3
+ lvx 8, 0, 4
+ addis 4, 2, .LCPI0_11@toc@ha
+ addi 4, 4, .LCPI0_11@toc@l
+ vcmpequh 7, 8, 17
+ vadduwm 8, 15, 13
+ vadduwm 15, 31, 8
+ lvx 8, 0, 4
+ addi 4, 6, .LCPI0_10@toc@l
+ lvx 17, 0, 4
+ addis 4, 2, .LCPI0_12@toc@ha
+ xxlxor 41, 47, 48
+ xxsldwi 47, 47, 47, 1
+ addi 4, 4, .LCPI0_12@toc@l
+ xxlnor 48, 39, 39
+ vrlw 29, 9, 4
+ vperm 9, 16, 16, 8
+ xxland 48, 50, 39
+ vperm 17, 30, 12, 17
+ vperm 16, 16, 16, 8
+ vmrghw 12, 12, 10
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI0_13@toc@ha
+ vadduwm 19, 29, 19
+ addi 4, 4, .LCPI0_13@toc@l
+ xxlxor 63, 51, 63
+ xxsldwi 51, 51, 51, 3
+ xxland 0, 49, 41
+ vrlw 17, 31, 5
+ xxlor 48, 0, 48
+ xxswapd 0, 61
+ vperm 18, 12, 18, 10
+ vadduwm 15, 15, 16
+ xxland 60, 48, 39
+ vadduwm 15, 17, 15
+ vperm 28, 28, 28, 8
+ xxlxor 63, 47, 0
+ vadduwm 15, 15, 18
+ vperm 31, 31, 31, 2
+ vperm 30, 18, 16, 6
+ vadduwm 19, 31, 19
+ xxlxor 44, 51, 49
+ vrlw 12, 12, 3
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 63
+ vperm 31, 13, 14, 11
+ vrlw 17, 17, 4
+ vperm 14, 14, 14, 1
+ vadduwm 15, 15, 31
+ vadduwm 19, 17, 19
+ xxswapd 0, 49
+ xxsldwi 47, 47, 47, 3
+ xxsel 46, 46, 62, 32
+ xxlxor 44, 51, 44
+ xxsldwi 51, 51, 51, 1
+ vrlw 12, 12, 5
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 19, 17, 19
+ xxlxor 44, 51, 44
+ vrlw 29, 12, 3
+ vadduwm 12, 15, 14
+ vadduwm 15, 29, 12
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI0_14@toc@ha
+ addi 4, 4, .LCPI0_14@toc@l
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ vperm 30, 13, 18, 12
+ vrlw 17, 17, 4
+ vmrghw 13, 18, 13
+ xxland 0, 62, 41
+ vadduwm 19, 17, 19
+ vperm 16, 13, 16, 10
+ xxlxor 61, 51, 61
+ xxsldwi 50, 51, 51, 3
+ xxsldwi 51, 63, 63, 3
+ vrlw 30, 29, 5
+ xxlor 61, 60, 0
+ xxswapd 0, 49
+ vperm 31, 14, 19, 11
+ vadduwm 15, 15, 29
+ vperm 19, 19, 19, 1
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 16
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 62
+ vperm 30, 16, 29, 6
+ vrlw 13, 13, 3
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 31
+ xxsldwi 63, 63, 63, 3
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 45, 50, 45
+ xxsldwi 50, 50, 50, 1
+ vrlw 13, 13, 5
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 45
+ vrlw 28, 13, 3
+ xxsel 45, 51, 62, 32
+ xxland 51, 61, 39
+ vperm 30, 14, 16, 12
+ vadduwm 15, 15, 13
+ vperm 19, 19, 19, 8
+ vmrghw 14, 16, 14
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 62, 41
+ vrlw 17, 17, 4
+ xxlor 51, 51, 0
+ vadduwm 15, 15, 19
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 60, 50, 60
+ xxsldwi 48, 50, 50, 3
+ vperm 18, 14, 29, 10
+ vrlw 30, 28, 5
+ vperm 29, 18, 19, 6
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 18
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 62
+ vperm 30, 13, 31, 11
+ vrlw 14, 14, 3
+ vperm 31, 31, 31, 1
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 46, 48, 46
+ xxsldwi 48, 48, 48, 1
+ vrlw 14, 14, 5
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 46
+ vrlw 28, 14, 3
+ xxsel 46, 63, 61, 32
+ xxland 63, 51, 39
+ vperm 29, 13, 18, 12
+ vadduwm 15, 15, 14
+ vperm 31, 31, 31, 8
+ vmrghw 13, 18, 13
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 61, 41
+ vrlw 17, 17, 4
+ xxlor 63, 63, 0
+ vperm 13, 13, 19, 10
+ xxsldwi 51, 62, 62, 3
+ vadduwm 15, 15, 31
+ vperm 30, 14, 19, 11
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 60, 48, 60
+ xxsldwi 48, 48, 48, 3
+ vrlw 29, 28, 5
+ vadduwm 15, 29, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 50, 48, 61
+ vrlw 18, 18, 3
+ vadduwm 15, 18, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 11, 17, 16
+ xxswapd 0, 49
+ xxlxor 48, 43, 50
+ xxsldwi 43, 43, 43, 1
+ vperm 18, 19, 19, 1
+ vrlw 16, 16, 5
+ vperm 19, 13, 31, 6
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 29, 17, 11
+ xxlxor 43, 61, 48
+ vrlw 16, 11, 3
+ xxsel 43, 50, 51, 32
+ xxland 50, 63, 39
+ vperm 19, 14, 13, 12
+ vadduwm 15, 15, 11
+ vperm 18, 18, 18, 8
+ vmrghw 13, 13, 14
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 51, 41
+ lvx 19, 0, 4
+ vrlw 17, 17, 4
+ xxlor 50, 50, 0
+ vperm 13, 13, 31, 10
+ xxsldwi 63, 62, 62, 3
+ vadduwm 15, 15, 18
+ vperm 19, 11, 31, 19
+ vadduwm 29, 17, 29
+ xxswapd 0, 49
+ vperm 1, 31, 31, 1
+ xxlxor 48, 61, 48
+ xxsldwi 46, 61, 61, 3
+ vperm 6, 13, 18, 6
+ vrlw 16, 16, 5
+ xxsel 32, 33, 38, 32
+ xxland 38, 50, 39
+ vadduwm 15, 16, 15
+ vperm 7, 11, 13, 12
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vperm 6, 6, 6, 8
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 3
+ vrlw 17, 17, 4
+ vadduwm 15, 15, 19
+ vadduwm 14, 17, 14
+ xxswapd 0, 49
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 5
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vadduwm 0, 15, 0
+ vperm 17, 17, 17, 2
+ xxland 0, 39, 41
+ xxlor 38, 38, 0
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 0, 16, 0
+ xxlxor 33, 32, 49
+ xxsldwi 32, 32, 32, 1
+ vrlw 1, 1, 4
+ vadduwm 0, 0, 6
+ vadduwm 8, 1, 14
+ xxswapd 0, 33
+ xxlxor 44, 40, 48
+ xxsldwi 38, 40, 40, 3
+ vrlw 7, 12, 5
+ vadduwm 0, 7, 0
+ xxlxor 33, 32, 0
+ vperm 2, 1, 1, 2
+ vmrghw 1, 13, 11
+ vadduwm 6, 2, 6
+ vperm 1, 1, 18, 10
+ xxlxor 39, 38, 39
+ vrlw 3, 7, 3
+ vadduwm 0, 0, 1
+ vadduwm 0, 3, 0
+ xxlxor 34, 32, 34
+ xxsldwi 0, 32, 32, 3
+ vrlw 2, 2, 4
+ vadduwm 4, 2, 6
+ xxswapd 2, 34
+ xxlxor 35, 36, 35
+ xxsldwi 1, 36, 36, 1
+ vrlw 3, 3, 5
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 1, 35, 2
+ stxvd2x 0, 0, 3
+ xxswapd 1, 1
+ stxvd2x 1, 3, 5
+ li 3, -16
+ lxvd2x 63, 1, 3
+ li 3, -32
+ lxvd2x 62, 1, 3
+ li 3, -48
+ lxvd2x 61, 1, 3
+ li 3, -64
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI1_1:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI1_2:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_3:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_4:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_5:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_6:
+ .short 1
+ .short 2
+ .short 4
+ .short 8
+ .short 16
+ .short 32
+ .short 64
+ .short 128
+.LCPI1_7:
+ .short 0
+ .short 0
+ .short 4
+ .short 8
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI1_8:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI1_9:
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 0
+ .short 64
+ .short 128
+.LCPI1_10:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 7
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_12:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI1_13:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_14:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .text
+ .globl zfs_blake3_compress_xof_sse2
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse2,@function
+zfs_blake3_compress_xof_sse2:
+.Lfunc_begin1:
+ .cfi_startproc
+.Lfunc_gep1:
+ addis 2, 12, .TOC.-.Lfunc_gep1@ha
+ addi 2, 2, .TOC.-.Lfunc_gep1@l
+.Lfunc_lep1:
+ .localentry zfs_blake3_compress_xof_sse2, .Lfunc_lep1-.Lfunc_gep1
+ li 9, -80
+ mtvsrd 35, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ addis 10, 2, .LCPI1_2@toc@ha
+ vspltisw 12, 9
+ std 30, -16(1)
+ addis 12, 2, .LCPI1_8@toc@ha
+ addis 30, 2, .LCPI1_5@toc@ha
+ addis 11, 2, .LCPI1_7@toc@ha
+ stxvd2x 60, 1, 9
+ li 9, -64
+ mtvsrd 36, 7
+ lfd 2, 16(4)
+ addi 10, 10, .LCPI1_2@toc@l
+ addi 12, 12, .LCPI1_8@toc@l
+ addi 11, 11, .LCPI1_7@toc@l
+ stxvd2x 61, 1, 9
+ li 9, -48
+ lfd 3, 24(4)
+ mtvsrwz 37, 6
+ rldicl 6, 6, 32, 32
+ lvx 9, 0, 10
+ stxvd2x 62, 1, 9
+ li 9, -32
+ li 10, 32
+ stxvd2x 63, 1, 9
+ li 9, 0
+ mtvsrd 34, 9
+ xxmrghd 33, 3, 2
+ lfd 1, 8(4)
+ vmrghb 3, 2, 3
+ vmrghb 4, 2, 4
+ vspltb 2, 2, 7
+ xxmrghd 32, 1, 0
+ lxvd2x 0, 0, 3
+ lxvd2x 1, 3, 5
+ vpkudum 7, 1, 0
+ vmrglh 3, 2, 3
+ vmrglh 2, 2, 4
+ mtvsrwz 36, 6
+ addis 6, 2, .LCPI1_0@toc@ha
+ addi 6, 6, .LCPI1_0@toc@l
+ vperm 10, 1, 0, 9
+ vmrghw 4, 4, 5
+ xxswapd 37, 1
+ vmrglw 2, 2, 3
+ xxswapd 35, 0
+ lxvd2x 0, 4, 10
+ xxspltd 62, 42, 1
+ vadduwm 3, 7, 3
+ vadduwm 6, 3, 5
+ xxmrgld 36, 34, 36
+ lvx 2, 0, 6
+ addis 6, 2, .LCPI1_1@toc@ha
+ addi 6, 6, .LCPI1_1@toc@l
+ xxlxor 35, 38, 36
+ lvx 4, 0, 6
+ li 6, 48
+ lxvd2x 1, 4, 6
+ addis 4, 2, .LCPI1_3@toc@ha
+ vperm 8, 3, 3, 2
+ vspltisw 3, 10
+ addi 4, 4, .LCPI1_3@toc@l
+ xxswapd 41, 1
+ vadduwm 3, 3, 3
+ vadduwm 11, 8, 4
+ xxlxor 36, 43, 37
+ vadduwm 5, 6, 10
+ vrlw 0, 4, 3
+ vspltisw 4, 12
+ vadduwm 4, 4, 4
+ vadduwm 1, 0, 5
+ xxlxor 37, 33, 40
+ xxswapd 40, 0
+ vrlw 6, 5, 4
+ vspltisw 5, -16
+ vpkudum 13, 9, 8
+ vsubuwm 5, 12, 5
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI1_4@toc@ha
+ addi 4, 4, .LCPI1_4@toc@l
+ vadduwm 11, 6, 11
+ xxswapd 0, 38
+ vadduwm 1, 1, 13
+ xxsldwi 50, 45, 45, 1
+ xxlxor 32, 43, 32
+ xxsldwi 43, 43, 43, 3
+ xxsldwi 33, 33, 33, 1
+ vperm 12, 8, 9, 12
+ vrlw 0, 0, 5
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 0
+ vadduwm 1, 1, 12
+ vperm 6, 6, 6, 2
+ vadduwm 15, 6, 11
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI1_6@toc@ha
+ addi 4, 4, .LCPI1_6@toc@l
+ xxlxor 32, 47, 32
+ lvx 17, 0, 4
+ addi 4, 30, .LCPI1_5@toc@l
+ vperm 14, 10, 7, 11
+ vrlw 0, 0, 3
+ vadduwm 1, 0, 1
+ xxlxor 38, 33, 38
+ vrlw 6, 6, 4
+ vadduwm 8, 6, 15
+ xxswapd 0, 38
+ lvx 6, 0, 4
+ addis 4, 2, .LCPI1_9@toc@ha
+ addi 4, 4, .LCPI1_9@toc@l
+ xxlxor 32, 40, 32
+ xxsldwi 40, 40, 40, 1
+ vperm 13, 12, 18, 6
+ vrlw 9, 0, 5
+ vadduwm 0, 1, 14
+ lvx 1, 0, 12
+ xxsldwi 46, 46, 46, 3
+ xxsldwi 32, 32, 32, 3
+ vperm 7, 7, 7, 1
+ vadduwm 15, 9, 0
+ xxlxor 32, 47, 0
+ vperm 16, 0, 0, 2
+ lvx 0, 0, 11
+ addis 11, 2, .LCPI1_10@toc@ha
+ vcmpequh 0, 0, 17
+ vadduwm 19, 16, 8
+ xxlxor 40, 51, 41
+ xxsel 45, 39, 45, 32
+ vrlw 31, 8, 3
+ lvx 8, 0, 4
+ addis 4, 2, .LCPI1_11@toc@ha
+ addi 4, 4, .LCPI1_11@toc@l
+ vcmpequh 7, 8, 17
+ vadduwm 8, 15, 13
+ vadduwm 15, 31, 8
+ lvx 8, 0, 4
+ addi 4, 11, .LCPI1_10@toc@l
+ lvx 17, 0, 4
+ addis 4, 2, .LCPI1_12@toc@ha
+ xxlxor 41, 47, 48
+ xxsldwi 47, 47, 47, 1
+ addi 4, 4, .LCPI1_12@toc@l
+ xxlnor 48, 39, 39
+ vrlw 29, 9, 4
+ vperm 9, 16, 16, 8
+ xxland 48, 50, 39
+ vperm 17, 30, 12, 17
+ vperm 16, 16, 16, 8
+ vmrghw 12, 12, 10
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI1_13@toc@ha
+ vadduwm 19, 29, 19
+ addi 4, 4, .LCPI1_13@toc@l
+ xxlxor 63, 51, 63
+ xxsldwi 51, 51, 51, 3
+ xxland 0, 49, 41
+ vrlw 17, 31, 5
+ xxlor 48, 0, 48
+ xxswapd 0, 61
+ vperm 18, 12, 18, 10
+ vadduwm 15, 15, 16
+ xxland 60, 48, 39
+ vadduwm 15, 17, 15
+ vperm 28, 28, 28, 8
+ xxlxor 63, 47, 0
+ vadduwm 15, 15, 18
+ vperm 31, 31, 31, 2
+ vperm 30, 18, 16, 6
+ vadduwm 19, 31, 19
+ xxlxor 44, 51, 49
+ vrlw 12, 12, 3
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 63
+ vperm 31, 13, 14, 11
+ vrlw 17, 17, 4
+ vperm 14, 14, 14, 1
+ vadduwm 15, 15, 31
+ vadduwm 19, 17, 19
+ xxswapd 0, 49
+ xxsldwi 47, 47, 47, 3
+ xxsel 46, 46, 62, 32
+ xxlxor 44, 51, 44
+ xxsldwi 51, 51, 51, 1
+ vrlw 12, 12, 5
+ vadduwm 15, 12, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 19, 17, 19
+ xxlxor 44, 51, 44
+ vrlw 29, 12, 3
+ vadduwm 12, 15, 14
+ vadduwm 15, 29, 12
+ lvx 12, 0, 4
+ addis 4, 2, .LCPI1_14@toc@ha
+ addi 4, 4, .LCPI1_14@toc@l
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ vperm 30, 13, 18, 12
+ vrlw 17, 17, 4
+ vmrghw 13, 18, 13
+ xxland 0, 62, 41
+ vadduwm 19, 17, 19
+ vperm 16, 13, 16, 10
+ xxlxor 61, 51, 61
+ xxsldwi 50, 51, 51, 3
+ xxsldwi 51, 63, 63, 3
+ vrlw 30, 29, 5
+ xxlor 61, 60, 0
+ xxswapd 0, 49
+ vperm 31, 14, 19, 11
+ vadduwm 15, 15, 29
+ vperm 19, 19, 19, 1
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 16
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 62
+ vperm 30, 16, 29, 6
+ vrlw 13, 13, 3
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 31
+ xxsldwi 63, 63, 63, 3
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 45, 50, 45
+ xxsldwi 50, 50, 50, 1
+ vrlw 13, 13, 5
+ vadduwm 15, 13, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 18, 17, 18
+ xxlxor 45, 50, 45
+ vrlw 28, 13, 3
+ xxsel 45, 51, 62, 32
+ xxland 51, 61, 39
+ vperm 30, 14, 16, 12
+ vadduwm 15, 15, 13
+ vperm 19, 19, 19, 8
+ vmrghw 14, 16, 14
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 62, 41
+ vrlw 17, 17, 4
+ xxlor 51, 51, 0
+ vadduwm 15, 15, 19
+ vadduwm 18, 17, 18
+ xxswapd 0, 49
+ xxlxor 60, 50, 60
+ xxsldwi 48, 50, 50, 3
+ vperm 18, 14, 29, 10
+ vrlw 30, 28, 5
+ vperm 29, 18, 19, 6
+ vadduwm 15, 30, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 18
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 62
+ vperm 30, 13, 31, 11
+ vrlw 14, 14, 3
+ vperm 31, 31, 31, 1
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 46, 48, 46
+ xxsldwi 48, 48, 48, 1
+ vrlw 14, 14, 5
+ vadduwm 15, 14, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 46, 48, 46
+ vrlw 28, 14, 3
+ xxsel 46, 63, 61, 32
+ xxland 63, 51, 39
+ vperm 29, 13, 18, 12
+ vadduwm 15, 15, 14
+ vperm 31, 31, 31, 8
+ vmrghw 13, 18, 13
+ vadduwm 15, 28, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 61, 41
+ vrlw 17, 17, 4
+ xxlor 63, 63, 0
+ vperm 13, 13, 19, 10
+ xxsldwi 51, 62, 62, 3
+ vadduwm 15, 15, 31
+ vperm 30, 14, 19, 11
+ vadduwm 16, 17, 16
+ xxswapd 0, 49
+ xxlxor 60, 48, 60
+ xxsldwi 48, 48, 48, 3
+ vrlw 29, 28, 5
+ vadduwm 15, 29, 15
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vadduwm 16, 17, 16
+ xxlxor 50, 48, 61
+ vrlw 18, 18, 3
+ vadduwm 15, 18, 15
+ xxlxor 49, 47, 49
+ vadduwm 15, 15, 30
+ vrlw 17, 17, 4
+ xxsldwi 47, 47, 47, 3
+ vadduwm 11, 17, 16
+ xxswapd 0, 49
+ xxlxor 48, 43, 50
+ xxsldwi 43, 43, 43, 1
+ vperm 18, 19, 19, 1
+ vrlw 16, 16, 5
+ vperm 19, 13, 31, 6
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vperm 17, 17, 17, 2
+ vadduwm 29, 17, 11
+ xxlxor 43, 61, 48
+ vrlw 16, 11, 3
+ xxsel 43, 50, 51, 32
+ xxland 50, 63, 39
+ vperm 19, 14, 13, 12
+ vadduwm 15, 15, 11
+ vperm 18, 18, 18, 8
+ vmrghw 13, 13, 14
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 1
+ xxland 0, 51, 41
+ lvx 19, 0, 4
+ vrlw 17, 17, 4
+ xxlor 50, 50, 0
+ vperm 13, 13, 31, 10
+ xxsldwi 63, 62, 62, 3
+ vadduwm 15, 15, 18
+ vperm 19, 11, 31, 19
+ vadduwm 29, 17, 29
+ xxswapd 0, 49
+ vperm 1, 31, 31, 1
+ xxlxor 48, 61, 48
+ xxsldwi 46, 61, 61, 3
+ vperm 6, 13, 18, 6
+ vrlw 16, 16, 5
+ xxsel 32, 33, 38, 32
+ xxland 38, 50, 39
+ vadduwm 15, 16, 15
+ vperm 7, 11, 13, 12
+ xxlxor 49, 47, 0
+ vadduwm 15, 15, 13
+ vperm 17, 17, 17, 2
+ vperm 6, 6, 6, 8
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 49
+ xxsldwi 47, 47, 47, 3
+ vrlw 17, 17, 4
+ vadduwm 15, 15, 19
+ vadduwm 14, 17, 14
+ xxswapd 0, 49
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 5
+ vadduwm 15, 16, 15
+ xxlxor 49, 47, 0
+ vadduwm 0, 15, 0
+ vperm 17, 17, 17, 2
+ xxland 0, 39, 41
+ xxlor 38, 38, 0
+ vadduwm 14, 17, 14
+ xxlxor 48, 46, 48
+ vrlw 16, 16, 3
+ vadduwm 0, 16, 0
+ xxlxor 33, 32, 49
+ xxsldwi 32, 32, 32, 1
+ vrlw 1, 1, 4
+ vadduwm 0, 0, 6
+ vadduwm 8, 1, 14
+ xxswapd 0, 33
+ xxlxor 44, 40, 48
+ xxsldwi 38, 40, 40, 3
+ vrlw 7, 12, 5
+ vadduwm 0, 7, 0
+ xxlxor 33, 32, 0
+ vperm 2, 1, 1, 2
+ vmrghw 1, 13, 11
+ vadduwm 6, 2, 6
+ vperm 1, 1, 18, 10
+ xxlxor 39, 38, 39
+ vrlw 3, 7, 3
+ vadduwm 0, 0, 1
+ vadduwm 0, 3, 0
+ xxlxor 34, 32, 34
+ xxsldwi 0, 32, 32, 3
+ vrlw 2, 2, 4
+ vadduwm 4, 2, 6
+ xxswapd 2, 34
+ xxlxor 35, 36, 35
+ xxsldwi 1, 36, 36, 1
+ vrlw 3, 3, 5
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 3, 35, 2
+ stxvd2x 0, 0, 8
+ xxswapd 3, 3
+ stxvd2x 3, 8, 5
+ lfdx 0, 0, 3
+ lfd 3, 8(3)
+ xxmrghd 34, 3, 0
+ xxlxor 0, 1, 34
+ xxswapd 0, 0
+ stxvd2x 0, 8, 10
+ lfd 0, 16(3)
+ lfd 1, 24(3)
+ li 3, -32
+ xxmrghd 34, 1, 0
+ xxlxor 0, 2, 34
+ xxswapd 0, 0
+ stxvd2x 0, 8, 6
+ lxvd2x 63, 1, 3
+ li 3, -48
+ ld 30, -16(1)
+ lxvd2x 62, 1, 3
+ li 3, -64
+ lxvd2x 61, 1, 3
+ li 3, -80
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end1:
+ .size zfs_blake3_compress_xof_sse2, .Lfunc_end1-.Lfunc_begin1
+ .cfi_endproc
+
+ .globl zfs_blake3_hash_many_sse2
+ .p2align 2
+ .type zfs_blake3_hash_many_sse2,@function
+zfs_blake3_hash_many_sse2:
+.Lfunc_begin2:
+ .cfi_startproc
+.Lfunc_gep2:
+ addis 2, 12, .TOC.-.Lfunc_gep2@ha
+ addi 2, 2, .TOC.-.Lfunc_gep2@l
+.Lfunc_lep2:
+ .localentry zfs_blake3_hash_many_sse2, .Lfunc_lep2-.Lfunc_gep2
+ mfocrf 12, 32
+ mflr 0
+ std 0, 16(1)
+ stw 12, 8(1)
+ stdu 1, -256(1)
+ .cfi_def_cfa_offset 256
+ .cfi_offset lr, 16
+ .cfi_offset r17, -120
+ .cfi_offset r18, -112
+ .cfi_offset r19, -104
+ .cfi_offset r20, -96
+ .cfi_offset r21, -88
+ .cfi_offset r22, -80
+ .cfi_offset r23, -72
+ .cfi_offset r24, -64
+ .cfi_offset r25, -56
+ .cfi_offset r26, -48
+ .cfi_offset r27, -40
+ .cfi_offset r28, -32
+ .cfi_offset r29, -24
+ .cfi_offset r30, -16
+ .cfi_offset cr2, 8
+ std 26, 208(1)
+ mr 26, 4
+ cmpldi 1, 4, 4
+ andi. 4, 8, 1
+ std 18, 144(1)
+ std 19, 152(1)
+ crmove 8, 1
+ ld 19, 360(1)
+ lwz 18, 352(1)
+ std 24, 192(1)
+ std 25, 200(1)
+ std 27, 216(1)
+ std 28, 224(1)
+ mr 24, 10
+ mr 28, 6
+ mr 27, 5
+ mr 25, 3
+ std 29, 232(1)
+ std 30, 240(1)
+ mr 30, 9
+ mr 29, 7
+ std 17, 136(1)
+ std 20, 160(1)
+ std 21, 168(1)
+ std 22, 176(1)
+ std 23, 184(1)
+ blt 1, .LBB2_3
+ li 3, 0
+ li 4, 1
+ clrldi 23, 30, 32
+ isel 22, 4, 3, 8
+ clrldi 21, 24, 32
+ clrldi 20, 18, 32
+.LBB2_2:
+ mr 3, 25
+ mr 4, 27
+ mr 5, 28
+ mr 6, 29
+ mr 7, 22
+ mr 8, 23
+ mr 9, 21
+ mr 10, 20
+ std 19, 32(1)
+ bl blake3_hash4_sse2
+ addi 26, 26, -4
+ addi 3, 29, 4
+ addi 25, 25, 32
+ addi 19, 19, 128
+ cmpldi 26, 3
+ isel 29, 3, 29, 8
+ bgt 0, .LBB2_2
+.LBB2_3:
+ cmpldi 26, 0
+ beq 0, .LBB2_11
+ li 3, 0
+ li 4, 1
+ or 21, 24, 30
+ li 20, 16
+ addi 24, 1, 96
+ isel 22, 4, 3, 8
+.LBB2_5:
+ lxvd2x 0, 28, 20
+ ld 23, 0(25)
+ mr 17, 27
+ mr 3, 21
+ stxvd2x 0, 24, 20
+ lxvd2x 0, 0, 28
+ stxvd2x 0, 0, 24
+.LBB2_6:
+ cmpldi 17, 1
+ beq 0, .LBB2_8
+ cmpldi 17, 0
+ bne 0, .LBB2_9
+ b .LBB2_10
+.LBB2_8:
+ or 3, 3, 18
+.LBB2_9:
+ clrldi 7, 3, 56
+ mr 3, 24
+ mr 4, 23
+ li 5, 64
+ mr 6, 29
+ bl zfs_blake3_compress_in_place_sse2
+ addi 23, 23, 64
+ addi 17, 17, -1
+ mr 3, 30
+ b .LBB2_6
+.LBB2_10:
+ lxvd2x 0, 24, 20
+ addi 26, 26, -1
+ add 29, 29, 22
+ addi 25, 25, 8
+ cmpldi 26, 0
+ stxvd2x 0, 19, 20
+ lxvd2x 0, 0, 24
+ stxvd2x 0, 0, 19
+ addi 19, 19, 32
+ bne 0, .LBB2_5
+.LBB2_11:
+ ld 30, 240(1)
+ ld 29, 232(1)
+ ld 28, 224(1)
+ ld 27, 216(1)
+ ld 26, 208(1)
+ ld 25, 200(1)
+ ld 24, 192(1)
+ ld 23, 184(1)
+ ld 22, 176(1)
+ ld 21, 168(1)
+ ld 20, 160(1)
+ ld 19, 152(1)
+ ld 18, 144(1)
+ ld 17, 136(1)
+ addi 1, 1, 256
+ ld 0, 16(1)
+ lwz 12, 8(1)
+ mtocrf 32, 12
+ mtlr 0
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end2:
+ .size zfs_blake3_hash_many_sse2, .Lfunc_end2-.Lfunc_begin2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI3_0:
+ .quad 4294967296
+ .quad 12884901890
+.LCPI3_1:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI3_2:
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+.LCPI3_3:
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+.LCPI3_4:
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+.LCPI3_5:
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+ .text
+ .p2align 2
+ .type blake3_hash4_sse2,@function
+blake3_hash4_sse2:
+.Lfunc_begin3:
+ .cfi_startproc
+.Lfunc_gep3:
+ addis 2, 12, .TOC.-.Lfunc_gep3@ha
+ addi 2, 2, .TOC.-.Lfunc_gep3@l
+.Lfunc_lep3:
+ .localentry blake3_hash4_sse2, .Lfunc_lep3-.Lfunc_gep3
+ stdu 1, -400(1)
+ .cfi_def_cfa_offset 400
+ .cfi_offset r22, -152
+ .cfi_offset r23, -144
+ .cfi_offset r24, -136
+ .cfi_offset r25, -128
+ .cfi_offset r26, -120
+ .cfi_offset r27, -112
+ .cfi_offset r28, -104
+ .cfi_offset r29, -96
+ .cfi_offset r30, -88
+ .cfi_offset f23, -72
+ .cfi_offset f24, -64
+ .cfi_offset f25, -56
+ .cfi_offset f26, -48
+ .cfi_offset f27, -40
+ .cfi_offset f28, -32
+ .cfi_offset f29, -24
+ .cfi_offset f30, -16
+ .cfi_offset f31, -8
+ .cfi_offset v20, -352
+ .cfi_offset v21, -336
+ .cfi_offset v22, -320
+ .cfi_offset v23, -304
+ .cfi_offset v24, -288
+ .cfi_offset v25, -272
+ .cfi_offset v26, -256
+ .cfi_offset v27, -240
+ .cfi_offset v28, -224
+ .cfi_offset v29, -208
+ .cfi_offset v30, -192
+ .cfi_offset v31, -176
+ li 11, 48
+ li 0, 8
+ std 30, 312(1)
+ li 30, 12
+ li 12, 4
+ lfiwzx 0, 0, 5
+ stxvd2x 52, 1, 11
+ li 11, 64
+ lfiwzx 2, 5, 0
+ li 0, 20
+ lfiwzx 3, 5, 30
+ stxvd2x 53, 1, 11
+ li 11, 80
+ li 30, 24
+ lfiwzx 4, 5, 0
+ li 0, 28
+ stxvd2x 54, 1, 11
+ li 11, 96
+ lfiwzx 1, 5, 12
+ lfiwzx 6, 5, 30
+ xxspltw 45, 0, 1
+ cmpldi 4, 0
+ std 22, 248(1)
+ stxvd2x 55, 1, 11
+ li 11, 112
+ lfiwzx 7, 5, 0
+ xxspltw 40, 2, 1
+ std 23, 256(1)
+ xxspltw 38, 3, 1
+ xxspltw 50, 4, 1
+ std 24, 264(1)
+ std 25, 272(1)
+ std 26, 280(1)
+ xxspltw 54, 7, 1
+ std 27, 288(1)
+ std 28, 296(1)
+ std 29, 304(1)
+ stxvd2x 56, 1, 11
+ li 11, 128
+ stfd 23, 328(1)
+ stxvd2x 57, 1, 11
+ li 11, 144
+ stfd 24, 336(1)
+ stxvd2x 58, 1, 11
+ li 11, 160
+ stfd 25, 344(1)
+ stxvd2x 59, 1, 11
+ li 11, 176
+ xxspltw 59, 1, 1
+ stxvd2x 60, 1, 11
+ li 11, 192
+ stfd 26, 352(1)
+ stxvd2x 61, 1, 11
+ li 11, 208
+ stfd 27, 360(1)
+ stxvd2x 62, 1, 11
+ li 11, 224
+ xxspltw 62, 6, 1
+ stxvd2x 63, 1, 11
+ li 11, 16
+ stfd 28, 368(1)
+ lfiwzx 5, 5, 11
+ ld 5, 432(1)
+ stfd 29, 376(1)
+ stfd 30, 384(1)
+ stfd 31, 392(1)
+ xxspltw 61, 5, 1
+ beq 0, .LBB3_5
+ addis 30, 2, .LCPI3_0@toc@ha
+ neg 7, 7
+ xxleqv 34, 34, 34
+ addis 28, 2, .LCPI3_2@toc@ha
+ addis 27, 2, .LCPI3_3@toc@ha
+ addis 26, 2, .LCPI3_4@toc@ha
+ addis 25, 2, .LCPI3_5@toc@ha
+ ld 29, 24(3)
+ addi 0, 30, .LCPI3_0@toc@l
+ mtfprwz 1, 7
+ addis 7, 2, .LCPI3_1@toc@ha
+ ld 30, 16(3)
+ lxvd2x 0, 0, 0
+ mtfprwz 2, 6
+ rldicl 6, 6, 32, 32
+ addi 0, 7, .LCPI3_1@toc@l
+ ld 7, 8(3)
+ vslw 2, 2, 2
+ lvx 5, 0, 0
+ addi 0, 28, .LCPI3_2@toc@l
+ addi 28, 27, .LCPI3_3@toc@l
+ addi 27, 26, .LCPI3_4@toc@l
+ addi 26, 25, .LCPI3_5@toc@l
+ or 25, 9, 8
+ li 9, 0
+ xxspltw 36, 2, 1
+ xxswapd 35, 0
+ xxspltw 0, 1, 1
+ xxland 35, 0, 35
+ mtfprwz 0, 6
+ ld 6, 0(3)
+ addi 3, 3, -8
+ vadduwm 4, 3, 4
+ xxlor 35, 35, 34
+ xxlxor 34, 36, 34
+ xxlor 9, 36, 36
+ vspltisw 4, 4
+ vcmpgtsw 2, 3, 2
+ xxspltw 35, 0, 1
+ xxlor 10, 36, 36
+ vsubuwm 2, 3, 2
+ xxlor 11, 34, 34
+ lvx 2, 0, 0
+ li 0, 32
+ xxlor 12, 34, 34
+ lvx 2, 0, 28
+ li 28, 48
+ xxlor 13, 34, 34
+ lvx 2, 0, 27
+ li 27, 0
+ xxlor 31, 34, 34
+ lvx 2, 0, 26
+ xxlor 30, 34, 34
+.LBB3_2:
+ mr 26, 27
+ addi 27, 27, 1
+ xxlor 28, 40, 40
+ cmpld 27, 4
+ sldi 26, 26, 6
+ xxlor 24, 45, 45
+ iseleq 24, 10, 9
+ add 23, 6, 26
+ add 22, 30, 26
+ lxvd2x 0, 6, 26
+ lxvd2x 1, 7, 26
+ or 25, 24, 25
+ add 24, 7, 26
+ lxvd2x 2, 30, 26
+ lxvd2x 3, 29, 26
+ xxlor 29, 38, 38
+ lxvd2x 4, 23, 11
+ lxvd2x 6, 24, 11
+ clrlwi 25, 25, 24
+ lxvd2x 7, 22, 11
+ lxvd2x 8, 23, 0
+ mtfprd 5, 25
+ add 25, 29, 26
+ xxswapd 34, 0
+ lxvd2x 0, 25, 11
+ xxswapd 36, 1
+ xxswapd 33, 2
+ lxvd2x 1, 24, 0
+ lxvd2x 2, 22, 0
+ xxswapd 39, 3
+ xxswapd 32, 4
+ lxvd2x 3, 25, 0
+ lxvd2x 4, 23, 28
+ xxswapd 49, 6
+ xxswapd 51, 7
+ lxvd2x 6, 24, 28
+ xxswapd 58, 8
+ lxvd2x 7, 22, 28
+ lxvd2x 8, 25, 28
+ xxswapd 60, 0
+ mr 25, 3
+ xxswapd 57, 1
+ xxswapd 53, 2
+ xxswapd 52, 3
+ xxswapd 56, 4
+ xxswapd 55, 6
+ xxswapd 0, 5
+ xxswapd 40, 7
+ xxswapd 41, 8
+ mtctr 12
+.LBB3_3:
+ ldu 24, 8(25)
+ add 24, 24, 26
+ addi 24, 24, 256
+ dcbt 0, 24
+ bdnz .LBB3_3
+ vmrgew 3, 4, 2
+ vspltisw 31, 9
+ mr 25, 8
+ vmrglw 10, 4, 2
+ vspltisw 14, 10
+ vmrghw 6, 4, 2
+ xxspltw 0, 0, 3
+ vmrgew 4, 17, 0
+ vmrglw 11, 17, 0
+ vmrghw 16, 17, 0
+ vmrgew 0, 25, 26
+ vmrgew 13, 7, 1
+ vmrglw 2, 7, 1
+ vmrghw 7, 7, 1
+ xxlor 25, 36, 36
+ vmrgew 4, 28, 19
+ xxlor 26, 32, 32
+ vmrglw 0, 25, 26
+ vmrglw 1, 28, 19
+ xxmrgld 47, 34, 42
+ xxlor 44, 28, 28
+ vmrghw 25, 25, 26
+ xxlor 23, 36, 36
+ vmrghw 4, 28, 19
+ vspltisw 19, -16
+ xxlor 5, 32, 32
+ vmrgew 0, 20, 21
+ xxmrgld 34, 33, 43
+ vmrglw 28, 20, 21
+ vmrghw 21, 20, 21
+ vmrglw 20, 23, 24
+ vmrghw 26, 23, 24
+ vmrglw 17, 9, 8
+ xxlor 8, 32, 32
+ vmrgew 0, 23, 24
+ xxmrgld 56, 39, 38
+ vmrgew 23, 9, 8
+ xxlor 33, 24, 24
+ xxlor 2, 34, 34
+ vadduwm 11, 15, 1
+ xxmrgld 33, 36, 48
+ xxlor 6, 47, 47
+ xxlor 27, 32, 32
+ vmrghw 0, 9, 8
+ vspltisw 9, 12
+ vsubuwm 8, 31, 19
+ xxmrgld 51, 23, 25
+ vadduwm 31, 2, 12
+ xxlor 34, 10, 10
+ vadduwm 10, 14, 14
+ vslw 15, 2, 2
+ xxlor 34, 29, 29
+ vadduwm 14, 24, 27
+ xxlor 24, 48, 48
+ vadduwm 16, 1, 2
+ xxmrgld 34, 45, 35
+ vadduwm 31, 31, 30
+ xxmrghd 36, 36, 24
+ vadduwm 11, 11, 29
+ vadduwm 14, 14, 18
+ vadduwm 13, 16, 22
+ xxlxor 47, 63, 47
+ xxlor 1, 9, 9
+ xxlor 1, 11, 11
+ xxlxor 48, 43, 9
+ vadduwm 11, 11, 2
+ xxlor 7, 34, 34
+ xxmrghd 34, 39, 38
+ xxlxor 39, 46, 11
+ xxlor 1, 50, 50
+ xxlxor 50, 45, 0
+ vperm 15, 15, 15, 5
+ vperm 16, 16, 16, 5
+ vperm 7, 7, 7, 5
+ vperm 18, 18, 18, 5
+ xxlor 4, 33, 33
+ xxlor 33, 31, 31
+ vadduwm 14, 14, 2
+ xxlor 3, 34, 34
+ xxlor 34, 12, 12
+ xxlor 35, 13, 13
+ vadduwm 6, 15, 1
+ xxlor 33, 30, 30
+ vadduwm 2, 16, 2
+ vadduwm 3, 7, 3
+ vadduwm 12, 18, 1
+ xxlxor 59, 34, 61
+ xxlxor 61, 35, 1
+ xxlxor 33, 38, 62
+ xxlxor 62, 44, 54
+ vrlw 22, 27, 10
+ vrlw 29, 29, 10
+ vrlw 1, 1, 10
+ vrlw 30, 30, 10
+ vadduwm 31, 31, 19
+ vadduwm 13, 13, 4
+ vadduwm 11, 22, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 1, 31
+ vadduwm 13, 30, 13
+ vadduwm 9, 9, 9
+ xxlor 1, 36, 36
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 39
+ xxmrgld 39, 60, 5
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 28, 4, 9
+ xxmrgld 36, 53, 57
+ vrlw 15, 15, 9
+ xxmrghd 57, 53, 57
+ vrlw 18, 18, 9
+ vadduwm 14, 14, 4
+ xxlor 0, 36, 36
+ xxmrgld 36, 49, 52
+ vadduwm 2, 16, 2
+ xxmrgld 49, 8, 26
+ vadduwm 3, 28, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 54, 34, 54
+ xxlxor 61, 35, 61
+ xxlxor 33, 38, 33
+ xxlxor 62, 44, 62
+ vrlw 29, 29, 8
+ vrlw 20, 1, 8
+ xxmrgld 33, 55, 27
+ vrlw 30, 30, 8
+ vrlw 22, 22, 8
+ vadduwm 11, 11, 7
+ xxlor 5, 39, 39
+ xxmrgld 39, 32, 58
+ vadduwm 31, 31, 4
+ vadduwm 11, 29, 11
+ vadduwm 13, 13, 7
+ vadduwm 14, 20, 14
+ vadduwm 31, 30, 31
+ vadduwm 13, 22, 13
+ xxlor 28, 36, 36
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 60
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vadduwm 11, 11, 17
+ vmr 28, 17
+ xxmrghd 49, 32, 58
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 21, 4, 2
+ vadduwm 3, 15, 3
+ xxlxor 34, 38, 61
+ xxlxor 61, 44, 52
+ xxlxor 62, 53, 62
+ xxlxor 54, 35, 54
+ vrlw 20, 2, 10
+ vrlw 29, 29, 10
+ vrlw 0, 30, 10
+ vrlw 30, 22, 10
+ vadduwm 14, 14, 25
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 17
+ vadduwm 11, 20, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 11, 11, 24
+ xxlor 8, 56, 56
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 21
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 52
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ xxlor 25, 51, 51
+ vmr 26, 17
+ xxlor 49, 3, 3
+ xxlor 52, 1, 1
+ xxlor 51, 2, 2
+ vadduwm 14, 14, 17
+ vadduwm 31, 31, 20
+ vadduwm 13, 13, 19
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 29, 39, 39
+ xxlor 59, 4, 4
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 53, 0, 0
+ xxlor 39, 6, 6
+ vadduwm 11, 11, 27
+ vadduwm 14, 14, 21
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 1
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 34, 7, 7
+ vadduwm 31, 31, 28
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 11, 11, 2
+ xxlor 34, 28, 28
+ vadduwm 13, 13, 26
+ vadduwm 14, 14, 2
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 2, 58, 58
+ xxlor 39, 25, 25
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ xxlor 54, 29, 29
+ xxlor 58, 5, 5
+ vadduwm 11, 11, 25
+ vadduwm 14, 14, 7
+ vadduwm 31, 31, 22
+ vadduwm 13, 13, 26
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 21
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 20
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 0, 33, 33
+ xxlor 33, 8, 8
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vadduwm 11, 11, 19
+ vadduwm 14, 14, 2
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 22
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ vadduwm 11, 11, 27
+ vadduwm 14, 14, 28
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 31, 31, 25
+ vadduwm 13, 13, 26
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 3, 7, 7
+ vadduwm 11, 11, 7
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ xxlor 33, 6, 6
+ xxlor 58, 2, 2
+ xxlor 39, 3, 3
+ vadduwm 14, 14, 1
+ vadduwm 31, 31, 26
+ vadduwm 13, 13, 7
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ xxlor 52, 0, 0
+ vadduwm 11, 11, 21
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vadduwm 14, 14, 2
+ vadduwm 31, 31, 22
+ vadduwm 13, 13, 20
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 7, 49, 49
+ vmr 17, 2
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 54, 1, 1
+ xxlor 34, 7, 7
+ vadduwm 11, 11, 22
+ vadduwm 14, 14, 28
+ vadduwm 31, 31, 2
+ vadduwm 13, 13, 26
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 59, 25, 25
+ vadduwm 11, 11, 19
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 14, 14, 25
+ vadduwm 31, 31, 27
+ vadduwm 13, 13, 7
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vmr 2, 19
+ xxlor 0, 7, 7
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ xxlor 1, 51, 51
+ xxlor 7, 39, 39
+ xxlor 51, 8, 8
+ xxlor 39, 5, 5
+ xxlor 34, 4, 4
+ vadduwm 11, 11, 1
+ vadduwm 14, 14, 19
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 2
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ xxlor 2, 53, 53
+ vmr 21, 28
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ xxlor 53, 29, 29
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 28
+ vadduwm 31, 31, 26
+ vadduwm 13, 13, 21
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ vadduwm 11, 11, 20
+ xxlor 5, 52, 52
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 52, 2, 2
+ vadduwm 14, 14, 25
+ vadduwm 31, 31, 20
+ vadduwm 13, 13, 7
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ vadduwm 11, 11, 22
+ vadduwm 14, 14, 27
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 31, 31, 1
+ vadduwm 13, 13, 2
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 3, 29, 29
+ xxlor 4, 49, 49
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ vmr 17, 28
+ xxlor 2, 54, 54
+ xxlor 3, 34, 34
+ xxlor 34, 8, 8
+ xxlor 51, 0, 0
+ xxlor 60, 7, 7
+ xxlor 54, 1, 1
+ vadduwm 11, 11, 2
+ vadduwm 14, 14, 19
+ vadduwm 31, 31, 28
+ vadduwm 13, 13, 22
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 25
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 56, 32
+ vrlw 30, 30, 8
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 26
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 6, 39, 39
+ xxlor 39, 4, 4
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 30, 30, 10
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vadduwm 11, 11, 21
+ vadduwm 14, 14, 27
+ vadduwm 31, 31, 7
+ vadduwm 13, 13, 28
+ vadduwm 11, 30, 11
+ vadduwm 14, 23, 14
+ vadduwm 31, 29, 31
+ vadduwm 13, 0, 13
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 63, 47
+ xxlxor 50, 45, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 0, 49, 49
+ xxlor 49, 5, 5
+ vadduwm 24, 16, 24
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 62, 56, 62
+ xxlxor 55, 35, 55
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 23, 23, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ vrlw 30, 30, 8
+ vadduwm 11, 11, 17
+ vadduwm 14, 14, 1
+ vadduwm 31, 31, 2
+ vadduwm 13, 13, 22
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 34, 3, 3
+ xxlor 49, 2, 2
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 24, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 55, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 32, 56, 32
+ xxlxor 62, 35, 62
+ vrlw 23, 23, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ vrlw 30, 30, 10
+ vadduwm 11, 11, 19
+ vadduwm 14, 14, 20
+ vadduwm 31, 31, 2
+ vadduwm 13, 13, 17
+ vadduwm 11, 23, 11
+ vadduwm 14, 29, 14
+ vadduwm 31, 0, 31
+ vadduwm 13, 30, 13
+ xxlxor 50, 43, 50
+ xxlxor 48, 46, 48
+ xxlxor 36, 63, 36
+ xxlxor 47, 45, 47
+ vrlw 18, 18, 9
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vadduwm 14, 14, 27
+ vadduwm 11, 11, 25
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 27, 4, 24
+ vadduwm 3, 15, 3
+ xxlxor 57, 38, 55
+ xxlxor 61, 44, 61
+ xxlxor 62, 35, 62
+ xxlxor 32, 59, 32
+ xxlor 39, 7, 7
+ vrlw 30, 30, 8
+ vrlw 25, 25, 8
+ vrlw 29, 29, 8
+ vrlw 0, 0, 8
+ xxlor 1, 58, 58
+ vmr 26, 19
+ vadduwm 19, 31, 7
+ xxlor 39, 6, 6
+ vadduwm 11, 30, 11
+ vadduwm 7, 13, 7
+ vadduwm 13, 25, 14
+ vadduwm 14, 29, 19
+ vadduwm 7, 0, 7
+ xxlxor 48, 43, 48
+ xxlxor 36, 45, 36
+ xxlxor 47, 46, 47
+ xxlxor 50, 39, 50
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ vperm 18, 18, 18, 5
+ xxlor 51, 1, 1
+ vadduwm 13, 13, 1
+ vadduwm 11, 11, 19
+ vadduwm 19, 16, 27
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 63, 51, 62
+ xxlxor 62, 35, 57
+ xxlxor 61, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 31, 31, 10
+ vrlw 30, 30, 10
+ vrlw 29, 29, 10
+ vrlw 0, 0, 10
+ xxlor 33, 0, 0
+ vadduwm 7, 7, 2
+ vadduwm 14, 14, 1
+ vadduwm 11, 31, 11
+ vadduwm 13, 30, 13
+ vadduwm 14, 29, 14
+ vadduwm 7, 0, 7
+ xxlxor 48, 43, 48
+ xxlxor 36, 45, 36
+ xxlxor 47, 46, 47
+ xxlxor 50, 39, 50
+ vrlw 16, 16, 9
+ vrlw 4, 4, 9
+ vrlw 15, 15, 9
+ vrlw 18, 18, 9
+ xxlor 60, 8, 8
+ vadduwm 1, 11, 21
+ vadduwm 11, 13, 28
+ vadduwm 13, 16, 19
+ vadduwm 3, 4, 3
+ vadduwm 6, 15, 6
+ vadduwm 12, 18, 12
+ xxlxor 51, 45, 63
+ xxlxor 63, 35, 62
+ xxlxor 62, 38, 61
+ xxlxor 32, 44, 32
+ vrlw 31, 31, 8
+ vrlw 30, 30, 8
+ vrlw 0, 0, 8
+ vrlw 19, 19, 8
+ vadduwm 14, 14, 26
+ vadduwm 7, 7, 17
+ vadduwm 1, 31, 1
+ vadduwm 11, 30, 11
+ vadduwm 14, 0, 14
+ vadduwm 7, 19, 7
+ xxlxor 50, 33, 50
+ xxlxor 48, 43, 48
+ xxlxor 36, 46, 36
+ xxlxor 47, 39, 47
+ vperm 18, 18, 18, 5
+ vperm 16, 16, 16, 5
+ vperm 4, 4, 4, 5
+ vperm 15, 15, 15, 5
+ xxlor 34, 4, 4
+ vadduwm 14, 14, 22
+ vadduwm 6, 18, 6
+ vadduwm 12, 16, 12
+ vadduwm 13, 4, 13
+ vadduwm 3, 15, 3
+ xxlxor 49, 38, 63
+ xxlxor 63, 44, 62
+ xxlxor 32, 45, 32
+ xxlxor 51, 35, 51
+ vrlw 17, 17, 10
+ vrlw 31, 31, 10
+ vrlw 0, 0, 10
+ vrlw 10, 19, 10
+ vadduwm 11, 11, 2
+ xxlor 34, 5, 5
+ vadduwm 1, 1, 20
+ vadduwm 2, 7, 2
+ vadduwm 7, 31, 11
+ vadduwm 11, 0, 14
+ vadduwm 2, 10, 2
+ vadduwm 1, 17, 1
+ xxlxor 36, 43, 36
+ xxlxor 46, 34, 47
+ vrlw 4, 4, 9
+ vrlw 14, 14, 9
+ xxlxor 47, 33, 50
+ xxlxor 48, 39, 48
+ vrlw 15, 15, 9
+ vrlw 9, 16, 9
+ vadduwm 13, 4, 13
+ vadduwm 3, 14, 3
+ xxlxor 32, 45, 32
+ xxlxor 45, 45, 33
+ xxlxor 33, 35, 42
+ xxlxor 59, 35, 39
+ vadduwm 3, 15, 6
+ vadduwm 6, 9, 12
+ xxlxor 39, 35, 49
+ xxlxor 42, 38, 63
+ vrlw 1, 1, 8
+ vrlw 7, 7, 8
+ vrlw 10, 10, 8
+ vrlw 0, 0, 8
+ xxlxor 40, 35, 43
+ xxlxor 38, 38, 34
+ xxlxor 61, 33, 41
+ xxlxor 50, 39, 36
+ xxlxor 62, 42, 46
+ xxlxor 54, 32, 47
+ bne 0, .LBB3_2
+.LBB3_5:
+ vmrglw 2, 27, 13
+ li 3, 32
+ li 4, 48
+ vmrglw 4, 6, 8
+ vmrglw 0, 18, 29
+ vmrglw 1, 22, 30
+ vmrghw 3, 27, 13
+ vmrghw 5, 6, 8
+ vmrghw 6, 18, 29
+ vmrghw 7, 22, 30
+ xxmrgld 40, 36, 34
+ xxmrghd 34, 36, 34
+ xxmrgld 41, 33, 32
+ xxswapd 0, 40
+ xxmrgld 36, 37, 35
+ xxmrghd 35, 37, 35
+ xxmrghd 37, 33, 32
+ xxswapd 1, 41
+ xxmrgld 32, 39, 38
+ xxmrghd 33, 39, 38
+ xxswapd 2, 34
+ xxswapd 4, 36
+ xxswapd 3, 37
+ stxvd2x 0, 0, 5
+ xxswapd 5, 32
+ stxvd2x 1, 5, 11
+ xxswapd 0, 35
+ xxswapd 1, 33
+ stxvd2x 2, 5, 3
+ li 3, 64
+ stxvd2x 3, 5, 4
+ li 4, 80
+ stxvd2x 4, 5, 3
+ li 3, 96
+ stxvd2x 5, 5, 4
+ li 4, 112
+ stxvd2x 0, 5, 3
+ stxvd2x 1, 5, 4
+ li 3, 224
+ lxvd2x 63, 1, 3
+ li 3, 208
+ lfd 31, 392(1)
+ ld 30, 312(1)
+ ld 29, 304(1)
+ lxvd2x 62, 1, 3
+ li 3, 192
+ lfd 30, 384(1)
+ ld 28, 296(1)
+ ld 27, 288(1)
+ lxvd2x 61, 1, 3
+ li 3, 176
+ lfd 29, 376(1)
+ ld 26, 280(1)
+ ld 25, 272(1)
+ lxvd2x 60, 1, 3
+ li 3, 160
+ lfd 28, 368(1)
+ ld 24, 264(1)
+ ld 23, 256(1)
+ lxvd2x 59, 1, 3
+ li 3, 144
+ lfd 27, 360(1)
+ ld 22, 248(1)
+ lxvd2x 58, 1, 3
+ li 3, 128
+ lfd 26, 352(1)
+ lxvd2x 57, 1, 3
+ li 3, 112
+ lfd 25, 344(1)
+ lxvd2x 56, 1, 3
+ li 3, 96
+ lfd 24, 336(1)
+ lxvd2x 55, 1, 3
+ li 3, 80
+ lfd 23, 328(1)
+ lxvd2x 54, 1, 3
+ li 3, 64
+ lxvd2x 53, 1, 3
+ li 3, 48
+ lxvd2x 52, 1, 3
+ addi 1, 1, 400
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end3:
+ .size blake3_hash4_sse2, .Lfunc_end3-.Lfunc_begin3
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
new file mode 100644
index 000000000000..315561d4497a
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
@@ -0,0 +1,3064 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2022 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ *
+ * This is converted assembly: SSE4.1 -> POWER8 PPC64 Little Endian
+ * Used tools: SIMDe https://github.com/simd-everywhere/simde
+ */
+
+#if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ .text
+ .abiversion 2
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI0_0:
+ .byte 31
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 30
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 29
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 28
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI0_2:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI0_3:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI0_4:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_5:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI0_6:
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+.LCPI0_7:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_8:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_9:
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_10:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+.LCPI0_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI0_12:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI0_13:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI0_14:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .text
+ .globl zfs_blake3_compress_in_place_sse41
+ .p2align 2
+ .type zfs_blake3_compress_in_place_sse41,@function
+zfs_blake3_compress_in_place_sse41:
+.Lfunc_begin0:
+ .cfi_startproc
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0@ha
+ addi 2, 2, .TOC.-.Lfunc_gep0@l
+.Lfunc_lep0:
+ .localentry zfs_blake3_compress_in_place_sse41, .Lfunc_lep0-.Lfunc_gep0
+ li 8, -64
+ mtvsrd 34, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ vspltisw 13, -16
+ stxvd2x 60, 1, 8
+ li 8, -48
+ mtvsrd 35, 7
+ lfd 2, 16(4)
+ lfd 3, 24(4)
+ addis 7, 2, .LCPI0_0@toc@ha
+ stxvd2x 61, 1, 8
+ li 8, -32
+ mtvsrwz 36, 6
+ rldicl 6, 6, 32, 32
+ stxvd2x 62, 1, 8
+ li 8, -16
+ vmrghb 2, 3, 2
+ stxvd2x 63, 1, 8
+ mtvsrwz 35, 6
+ addi 6, 7, .LCPI0_0@toc@l
+ addis 7, 2, .LCPI0_2@toc@ha
+ lfd 1, 8(4)
+ xxmrghd 32, 3, 2
+ lvx 6, 0, 6
+ xxlxor 33, 33, 33
+ addis 6, 2, .LCPI0_1@toc@ha
+ addi 7, 7, .LCPI0_2@toc@l
+ vmrghw 3, 3, 4
+ addi 6, 6, .LCPI0_1@toc@l
+ vspltisw 14, 9
+ xxmrghd 37, 1, 0
+ lxvd2x 0, 0, 3
+ lxvd2x 1, 3, 5
+ vperm 2, 1, 2, 6
+ vpkudum 9, 0, 5
+ xxswapd 36, 0
+ xxswapd 38, 1
+ xxmrgld 34, 34, 35
+ lvx 3, 0, 7
+ addis 7, 2, .LCPI0_4@toc@ha
+ addi 7, 7, .LCPI0_4@toc@l
+ vadduwm 4, 9, 4
+ lvx 11, 0, 7
+ addis 7, 2, .LCPI0_6@toc@ha
+ addi 7, 7, .LCPI0_6@toc@l
+ vadduwm 7, 4, 6
+ lvx 4, 0, 6
+ addis 6, 2, .LCPI0_3@toc@ha
+ addi 6, 6, .LCPI0_3@toc@l
+ vperm 11, 0, 5, 11
+ lvx 0, 0, 7
+ li 7, 48
+ xxlxor 40, 39, 34
+ lvx 10, 0, 6
+ addis 6, 2, .LCPI0_5@toc@ha
+ lxvd2x 1, 4, 7
+ vcmpgtsb 2, 1, 4
+ addi 6, 6, .LCPI0_5@toc@l
+ vperm 4, 8, 8, 3
+ vspltisw 8, 10
+ xxlandc 44, 36, 34
+ vadduwm 4, 8, 8
+ vadduwm 8, 12, 10
+ xxlxor 37, 40, 38
+ vrlw 6, 5, 4
+ vadduwm 5, 7, 11
+ vadduwm 7, 6, 5
+ lvx 5, 0, 6
+ li 6, 32
+ lxvd2x 0, 4, 6
+ addis 4, 2, .LCPI0_7@toc@ha
+ addis 6, 2, .LCPI0_9@toc@ha
+ xxlxor 42, 39, 44
+ xxswapd 44, 1
+ addi 4, 4, .LCPI0_7@toc@l
+ addi 6, 6, .LCPI0_9@toc@l
+ vcmpgtsb 5, 1, 5
+ vperm 1, 10, 10, 0
+ xxswapd 42, 0
+ vpkudum 16, 12, 10
+ xxlandc 47, 33, 37
+ vsubuwm 1, 14, 13
+ lvx 14, 0, 4
+ addis 4, 2, .LCPI0_8@toc@ha
+ vadduwm 8, 15, 8
+ xxswapd 45, 47
+ addi 4, 4, .LCPI0_8@toc@l
+ vadduwm 7, 7, 16
+ xxsldwi 48, 48, 48, 1
+ xxlxor 38, 40, 38
+ xxsldwi 40, 40, 40, 3
+ xxsldwi 39, 39, 39, 1
+ vperm 14, 10, 12, 14
+ vrlw 6, 6, 1
+ vadduwm 7, 6, 7
+ xxlxor 45, 39, 45
+ vperm 13, 13, 13, 3
+ xxlandc 45, 45, 34
+ vadduwm 8, 13, 8
+ xxlxor 38, 40, 38
+ vrlw 10, 6, 4
+ vadduwm 6, 7, 14
+ vadduwm 7, 10, 6
+ xxlxor 38, 39, 45
+ vperm 12, 6, 6, 0
+ lvx 6, 0, 4
+ addis 4, 2, .LCPI0_10@toc@ha
+ addi 4, 4, .LCPI0_10@toc@l
+ vperm 13, 11, 9, 6
+ xxlandc 44, 44, 37
+ vadduwm 15, 12, 8
+ vadduwm 7, 7, 13
+ xxsldwi 45, 45, 45, 3
+ xxlxor 40, 47, 42
+ xxsldwi 47, 47, 47, 1
+ xxsldwi 39, 39, 39, 3
+ vrlw 10, 8, 1
+ xxswapd 40, 44
+ vadduwm 17, 10, 7
+ lvx 7, 0, 4
+ addis 4, 2, .LCPI0_11@toc@ha
+ addi 4, 4, .LCPI0_11@toc@l
+ xxlxor 44, 49, 40
+ lvx 8, 0, 6
+ vperm 18, 9, 9, 7
+ lvx 9, 0, 4
+ addis 4, 2, .LCPI0_12@toc@ha
+ vperm 12, 12, 12, 3
+ addi 4, 4, .LCPI0_12@toc@l
+ vperm 19, 14, 16, 8
+ xxlandc 63, 44, 34
+ vperm 12, 19, 18, 9
+ vadduwm 15, 31, 15
+ xxlxor 42, 47, 42
+ vrlw 18, 10, 4
+ vadduwm 10, 17, 12
+ vadduwm 17, 18, 10
+ xxlxor 42, 49, 63
+ xxmrgld 63, 43, 46
+ xxsldwi 49, 49, 49, 1
+ vmrghw 14, 14, 11
+ vperm 19, 10, 10, 0
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI0_13@toc@ha
+ addi 4, 4, .LCPI0_13@toc@l
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI0_14@toc@ha
+ vperm 31, 16, 31, 10
+ addi 4, 4, .LCPI0_14@toc@l
+ vperm 14, 14, 16, 11
+ xxlandc 51, 51, 37
+ vadduwm 15, 19, 15
+ xxswapd 51, 51
+ vadduwm 17, 17, 31
+ xxlxor 50, 47, 50
+ xxsldwi 47, 47, 47, 3
+ vperm 30, 14, 31, 8
+ vrlw 18, 18, 1
+ vadduwm 17, 18, 17
+ xxlxor 51, 49, 51
+ vadduwm 17, 17, 14
+ vperm 19, 19, 19, 3
+ xxlandc 51, 51, 34
+ vadduwm 15, 19, 15
+ xxlxor 48, 47, 50
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 51
+ vperm 19, 12, 13, 6
+ vperm 18, 18, 18, 0
+ vperm 13, 13, 13, 7
+ vadduwm 17, 17, 19
+ xxlandc 50, 50, 37
+ xxsldwi 49, 49, 49, 3
+ vperm 13, 30, 13, 9
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxmrgld 62, 44, 46
+ vmrghw 12, 14, 12
+ xxlxor 48, 47, 48
+ xxsldwi 47, 47, 47, 1
+ vrlw 16, 16, 1
+ vperm 30, 31, 30, 10
+ vperm 12, 12, 31, 11
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 13
+ vperm 18, 18, 18, 3
+ vperm 31, 12, 30, 8
+ xxlandc 50, 50, 34
+ vadduwm 15, 18, 15
+ xxlxor 48, 47, 48
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ xxsldwi 49, 49, 49, 1
+ vperm 18, 18, 18, 0
+ vadduwm 17, 17, 30
+ xxlandc 50, 50, 37
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxlxor 48, 47, 48
+ xxsldwi 46, 47, 47, 3
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 12
+ vperm 18, 18, 18, 3
+ xxlandc 47, 50, 34
+ xxsldwi 50, 51, 51, 3
+ vadduwm 14, 15, 14
+ vperm 19, 13, 18, 6
+ xxlxor 48, 46, 48
+ vperm 18, 18, 18, 7
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vadduwm 17, 17, 19
+ vperm 15, 15, 15, 0
+ xxsldwi 49, 49, 49, 3
+ xxlandc 47, 47, 37
+ vadduwm 14, 15, 14
+ xxswapd 47, 47
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 29, 15, 14
+ vperm 14, 31, 18, 9
+ xxmrgld 50, 45, 44
+ xxlxor 48, 61, 48
+ vmrghw 12, 12, 13
+ vrlw 16, 16, 4
+ vperm 18, 30, 18, 10
+ vadduwm 17, 17, 14
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ xxsldwi 49, 49, 49, 1
+ vperm 15, 15, 15, 0
+ vadduwm 17, 17, 18
+ xxlandc 47, 47, 37
+ vadduwm 31, 15, 29
+ xxswapd 47, 47
+ xxlxor 48, 63, 48
+ xxsldwi 45, 63, 63, 3
+ vperm 31, 12, 30, 11
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 13, 15, 13
+ xxlxor 44, 45, 48
+ vadduwm 16, 17, 31
+ xxsldwi 49, 51, 51, 3
+ vrlw 12, 12, 4
+ vperm 19, 14, 17, 6
+ vadduwm 16, 12, 16
+ xxlxor 47, 48, 47
+ vperm 15, 15, 15, 0
+ xxlandc 47, 47, 37
+ vadduwm 13, 15, 13
+ xxswapd 47, 47
+ xxlxor 44, 45, 44
+ xxsldwi 45, 45, 45, 1
+ vrlw 30, 12, 1
+ vadduwm 12, 16, 19
+ xxsldwi 44, 44, 44, 3
+ vadduwm 16, 30, 12
+ xxlxor 44, 48, 47
+ vperm 15, 17, 17, 7
+ vperm 12, 12, 12, 3
+ vperm 17, 31, 18, 8
+ xxlandc 61, 44, 34
+ vperm 12, 17, 15, 9
+ vadduwm 13, 29, 13
+ xxlxor 47, 45, 62
+ xxmrgld 62, 46, 63
+ vmrghw 14, 31, 14
+ vrlw 15, 15, 4
+ vadduwm 16, 16, 12
+ vperm 30, 18, 30, 10
+ vperm 14, 14, 18, 11
+ xxsldwi 50, 51, 51, 3
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 61
+ xxsldwi 48, 48, 48, 1
+ vperm 19, 12, 18, 6
+ vperm 17, 17, 17, 0
+ vadduwm 16, 16, 30
+ xxmrgld 60, 44, 46
+ vmrghw 12, 14, 12
+ vperm 28, 30, 28, 10
+ xxlandc 49, 49, 37
+ vadduwm 13, 17, 13
+ xxswapd 49, 49
+ vperm 12, 12, 30, 11
+ xxlxor 47, 45, 47
+ xxsldwi 45, 45, 45, 3
+ vrlw 15, 15, 1
+ vperm 8, 12, 28, 8
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vadduwm 16, 16, 14
+ vperm 17, 17, 17, 3
+ xxlandc 49, 49, 34
+ vadduwm 13, 17, 13
+ xxlxor 47, 45, 47
+ vrlw 15, 15, 4
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vperm 17, 17, 17, 0
+ xxlandc 49, 49, 37
+ vadduwm 31, 17, 13
+ xxlxor 45, 63, 47
+ vrlw 15, 13, 1
+ vadduwm 13, 16, 19
+ xxswapd 48, 49
+ xxsldwi 51, 51, 51, 3
+ xxsldwi 45, 45, 45, 3
+ vadduwm 17, 15, 13
+ xxlxor 45, 49, 48
+ lvx 16, 0, 4
+ vperm 29, 13, 13, 3
+ vperm 13, 18, 18, 7
+ xxsldwi 50, 63, 63, 1
+ vperm 16, 14, 30, 16
+ vperm 7, 19, 19, 7
+ xxlandc 63, 61, 34
+ vadduwm 18, 31, 18
+ vperm 29, 16, 13, 9
+ xxlxor 47, 50, 47
+ vperm 6, 16, 19, 6
+ vrlw 15, 15, 4
+ vperm 7, 8, 7, 9
+ vadduwm 17, 17, 29
+ xxmrgld 41, 61, 44
+ vadduwm 17, 15, 17
+ vperm 9, 28, 9, 10
+ xxlxor 63, 49, 63
+ xxsldwi 49, 49, 49, 1
+ vperm 31, 31, 31, 0
+ vadduwm 17, 17, 28
+ xxlandc 63, 63, 37
+ vadduwm 18, 31, 18
+ xxswapd 63, 63
+ xxlxor 47, 50, 47
+ xxsldwi 46, 50, 50, 3
+ vrlw 15, 15, 1
+ vadduwm 17, 15, 17
+ xxlxor 63, 49, 63
+ vadduwm 17, 17, 12
+ vperm 31, 31, 31, 3
+ xxlandc 50, 63, 34
+ vadduwm 14, 18, 14
+ xxlxor 47, 46, 47
+ vrlw 15, 15, 4
+ vadduwm 17, 15, 17
+ xxlxor 50, 49, 50
+ vadduwm 6, 17, 6
+ vperm 18, 18, 18, 0
+ xxsldwi 38, 38, 38, 3
+ xxlandc 50, 50, 37
+ vadduwm 14, 18, 14
+ xxswapd 48, 50
+ xxlxor 47, 46, 47
+ xxsldwi 46, 46, 46, 1
+ vrlw 15, 15, 1
+ vadduwm 6, 15, 6
+ xxlxor 48, 38, 48
+ vadduwm 6, 6, 7
+ vperm 16, 16, 16, 3
+ xxlandc 48, 48, 34
+ vadduwm 14, 16, 14
+ xxlxor 40, 46, 47
+ vrlw 8, 8, 4
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 48
+ xxsldwi 38, 38, 38, 1
+ vperm 7, 7, 7, 0
+ vadduwm 6, 6, 9
+ xxlandc 39, 39, 37
+ vadduwm 14, 7, 14
+ xxswapd 39, 39
+ xxlxor 40, 46, 40
+ xxsldwi 41, 46, 46, 3
+ vrlw 8, 8, 1
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 39
+ vperm 3, 7, 7, 3
+ vmrghw 7, 12, 13
+ xxlandc 34, 35, 34
+ vperm 7, 7, 28, 11
+ vadduwm 3, 2, 9
+ xxlxor 40, 35, 40
+ vrlw 4, 8, 4
+ vadduwm 6, 6, 7
+ vadduwm 6, 4, 6
+ xxlxor 34, 38, 34
+ xxsldwi 0, 38, 38, 3
+ vperm 2, 2, 2, 0
+ xxlandc 34, 34, 37
+ vadduwm 3, 2, 3
+ xxswapd 34, 34
+ xxlxor 36, 35, 36
+ xxsldwi 1, 35, 35, 1
+ vrlw 4, 4, 1
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 1, 36, 34
+ stxvd2x 0, 0, 3
+ xxswapd 1, 1
+ stxvd2x 1, 3, 5
+ li 3, -16
+ lxvd2x 63, 1, 3
+ li 3, -32
+ lxvd2x 62, 1, 3
+ li 3, -48
+ lxvd2x 61, 1, 3
+ li 3, -64
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI1_0:
+ .byte 31
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 30
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 29
+ .byte 6
+ .byte 5
+ .byte 4
+ .byte 28
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI1_2:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI1_3:
+ .long 1779033703
+ .long 3144134277
+ .long 1013904242
+ .long 2773480762
+.LCPI1_4:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_5:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI1_6:
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+.LCPI1_7:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_8:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_9:
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_10:
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 31
+ .byte 31
+ .byte 31
+.LCPI1_11:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+.LCPI1_12:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+.LCPI1_13:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+.LCPI1_14:
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .text
+ .globl zfs_blake3_compress_xof_sse41
+ .p2align 2
+ .type zfs_blake3_compress_xof_sse41,@function
+zfs_blake3_compress_xof_sse41:
+.Lfunc_begin1:
+ .cfi_startproc
+.Lfunc_gep1:
+ addis 2, 12, .TOC.-.Lfunc_gep1@ha
+ addi 2, 2, .TOC.-.Lfunc_gep1@l
+.Lfunc_lep1:
+ .localentry zfs_blake3_compress_xof_sse41, .Lfunc_lep1-.Lfunc_gep1
+ li 9, -64
+ mtvsrd 34, 5
+ li 5, 16
+ lfdx 0, 0, 4
+ vspltisw 13, -16
+ addis 11, 2, .LCPI1_9@toc@ha
+ stxvd2x 60, 1, 9
+ li 9, -48
+ mtvsrd 35, 7
+ lfd 1, 8(4)
+ lfd 2, 16(4)
+ addis 7, 2, .LCPI1_0@toc@ha
+ stxvd2x 61, 1, 9
+ li 9, -32
+ mtvsrwz 36, 6
+ rldicl 6, 6, 32, 32
+ stxvd2x 62, 1, 9
+ li 9, -16
+ vmrghb 2, 3, 2
+ stxvd2x 63, 1, 9
+ mtvsrwz 35, 6
+ addi 6, 7, .LCPI1_0@toc@l
+ addis 7, 2, .LCPI1_2@toc@ha
+ lfd 3, 24(4)
+ xxmrghd 37, 1, 0
+ lvx 6, 0, 6
+ xxlxor 33, 33, 33
+ lxvd2x 0, 0, 3
+ addis 6, 2, .LCPI1_1@toc@ha
+ addi 7, 7, .LCPI1_2@toc@l
+ vmrghw 3, 3, 4
+ lxvd2x 1, 3, 5
+ addi 6, 6, .LCPI1_1@toc@l
+ vspltisw 14, 9
+ xxmrghd 32, 3, 2
+ xxswapd 36, 0
+ vperm 2, 1, 2, 6
+ xxswapd 38, 1
+ vpkudum 9, 0, 5
+ xxmrgld 34, 34, 35
+ lvx 3, 0, 7
+ addis 7, 2, .LCPI1_4@toc@ha
+ addi 7, 7, .LCPI1_4@toc@l
+ vadduwm 4, 9, 4
+ lvx 11, 0, 7
+ addis 7, 2, .LCPI1_6@toc@ha
+ addi 7, 7, .LCPI1_6@toc@l
+ vadduwm 7, 4, 6
+ lvx 4, 0, 6
+ addis 6, 2, .LCPI1_3@toc@ha
+ addi 6, 6, .LCPI1_3@toc@l
+ vperm 11, 0, 5, 11
+ lvx 0, 0, 7
+ li 7, 32
+ xxlxor 40, 39, 34
+ lvx 10, 0, 6
+ addis 6, 2, .LCPI1_5@toc@ha
+ lxvd2x 0, 4, 7
+ vcmpgtsb 2, 1, 4
+ addi 6, 6, .LCPI1_5@toc@l
+ vperm 4, 8, 8, 3
+ vspltisw 8, 10
+ xxlandc 44, 36, 34
+ vadduwm 4, 8, 8
+ vadduwm 8, 12, 10
+ xxlxor 37, 40, 38
+ vrlw 6, 5, 4
+ vadduwm 5, 7, 11
+ vadduwm 7, 6, 5
+ lvx 5, 0, 6
+ li 6, 48
+ lxvd2x 1, 4, 6
+ addis 4, 2, .LCPI1_7@toc@ha
+ xxlxor 42, 39, 44
+ addi 4, 4, .LCPI1_7@toc@l
+ vcmpgtsb 5, 1, 5
+ vperm 1, 10, 10, 0
+ xxswapd 42, 0
+ xxswapd 44, 1
+ vpkudum 16, 12, 10
+ xxlandc 47, 33, 37
+ vsubuwm 1, 14, 13
+ lvx 14, 0, 4
+ addis 4, 2, .LCPI1_8@toc@ha
+ vadduwm 8, 15, 8
+ xxswapd 45, 47
+ addi 4, 4, .LCPI1_8@toc@l
+ xxlxor 38, 40, 38
+ xxsldwi 40, 40, 40, 3
+ vadduwm 7, 7, 16
+ xxsldwi 48, 48, 48, 1
+ vrlw 6, 6, 1
+ xxsldwi 39, 39, 39, 1
+ vperm 14, 10, 12, 14
+ vadduwm 7, 6, 7
+ xxlxor 45, 39, 45
+ vperm 13, 13, 13, 3
+ xxlandc 45, 45, 34
+ vadduwm 8, 13, 8
+ xxlxor 38, 40, 38
+ vrlw 10, 6, 4
+ vadduwm 6, 7, 14
+ vadduwm 7, 10, 6
+ xxlxor 38, 39, 45
+ vperm 12, 6, 6, 0
+ lvx 6, 0, 4
+ addis 4, 2, .LCPI1_10@toc@ha
+ addi 4, 4, .LCPI1_10@toc@l
+ vperm 13, 11, 9, 6
+ xxlandc 44, 44, 37
+ vadduwm 15, 12, 8
+ vadduwm 7, 7, 13
+ xxsldwi 45, 45, 45, 3
+ xxlxor 40, 47, 42
+ xxsldwi 47, 47, 47, 1
+ xxsldwi 39, 39, 39, 3
+ vrlw 10, 8, 1
+ xxswapd 40, 44
+ vadduwm 17, 10, 7
+ lvx 7, 0, 4
+ addi 4, 11, .LCPI1_9@toc@l
+ xxlxor 44, 49, 40
+ lvx 8, 0, 4
+ addis 4, 2, .LCPI1_11@toc@ha
+ vperm 18, 9, 9, 7
+ addi 4, 4, .LCPI1_11@toc@l
+ vperm 12, 12, 12, 3
+ lvx 9, 0, 4
+ addis 4, 2, .LCPI1_12@toc@ha
+ vperm 19, 14, 16, 8
+ addi 4, 4, .LCPI1_12@toc@l
+ xxlandc 63, 44, 34
+ vperm 12, 19, 18, 9
+ vadduwm 15, 31, 15
+ xxlxor 42, 47, 42
+ vrlw 18, 10, 4
+ vadduwm 10, 17, 12
+ vadduwm 17, 18, 10
+ xxlxor 42, 49, 63
+ xxmrgld 63, 43, 46
+ xxsldwi 49, 49, 49, 1
+ vmrghw 14, 14, 11
+ vperm 19, 10, 10, 0
+ lvx 10, 0, 4
+ addis 4, 2, .LCPI1_13@toc@ha
+ addi 4, 4, .LCPI1_13@toc@l
+ lvx 11, 0, 4
+ addis 4, 2, .LCPI1_14@toc@ha
+ vperm 31, 16, 31, 10
+ addi 4, 4, .LCPI1_14@toc@l
+ vperm 14, 14, 16, 11
+ xxlandc 51, 51, 37
+ vadduwm 15, 19, 15
+ xxswapd 51, 51
+ vadduwm 17, 17, 31
+ xxlxor 50, 47, 50
+ xxsldwi 47, 47, 47, 3
+ vperm 30, 14, 31, 8
+ vrlw 18, 18, 1
+ vadduwm 17, 18, 17
+ xxlxor 51, 49, 51
+ vadduwm 17, 17, 14
+ vperm 19, 19, 19, 3
+ xxlandc 51, 51, 34
+ vadduwm 15, 19, 15
+ xxlxor 48, 47, 50
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 51
+ vperm 19, 12, 13, 6
+ vperm 18, 18, 18, 0
+ vperm 13, 13, 13, 7
+ vadduwm 17, 17, 19
+ xxlandc 50, 50, 37
+ xxsldwi 49, 49, 49, 3
+ vperm 13, 30, 13, 9
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxmrgld 62, 44, 46
+ vmrghw 12, 14, 12
+ xxlxor 48, 47, 48
+ xxsldwi 47, 47, 47, 1
+ vrlw 16, 16, 1
+ vperm 30, 31, 30, 10
+ vperm 12, 12, 31, 11
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 13
+ vperm 18, 18, 18, 3
+ vperm 31, 12, 30, 8
+ xxlandc 50, 50, 34
+ vadduwm 15, 18, 15
+ xxlxor 48, 47, 48
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ xxsldwi 49, 49, 49, 1
+ vperm 18, 18, 18, 0
+ vadduwm 17, 17, 30
+ xxlandc 50, 50, 37
+ vadduwm 15, 18, 15
+ xxswapd 50, 50
+ xxlxor 48, 47, 48
+ xxsldwi 46, 47, 47, 3
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 50, 49, 50
+ vadduwm 17, 17, 12
+ vperm 18, 18, 18, 3
+ xxlandc 47, 50, 34
+ xxsldwi 50, 51, 51, 3
+ vadduwm 14, 15, 14
+ vperm 19, 13, 18, 6
+ xxlxor 48, 46, 48
+ vperm 18, 18, 18, 7
+ vrlw 16, 16, 4
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vadduwm 17, 17, 19
+ vperm 15, 15, 15, 0
+ xxsldwi 49, 49, 49, 3
+ xxlandc 47, 47, 37
+ vadduwm 14, 15, 14
+ xxswapd 47, 47
+ xxlxor 48, 46, 48
+ xxsldwi 46, 46, 46, 1
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 29, 15, 14
+ vperm 14, 31, 18, 9
+ xxmrgld 50, 45, 44
+ xxlxor 48, 61, 48
+ vmrghw 12, 12, 13
+ vrlw 16, 16, 4
+ vperm 18, 30, 18, 10
+ vadduwm 17, 17, 14
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ xxsldwi 49, 49, 49, 1
+ vperm 15, 15, 15, 0
+ vadduwm 17, 17, 18
+ xxlandc 47, 47, 37
+ vadduwm 31, 15, 29
+ xxswapd 47, 47
+ xxlxor 48, 63, 48
+ xxsldwi 45, 63, 63, 3
+ vperm 31, 12, 30, 11
+ vrlw 16, 16, 1
+ vadduwm 17, 16, 17
+ xxlxor 47, 49, 47
+ vperm 15, 15, 15, 3
+ xxlandc 47, 47, 34
+ vadduwm 13, 15, 13
+ xxlxor 44, 45, 48
+ vadduwm 16, 17, 31
+ xxsldwi 49, 51, 51, 3
+ vrlw 12, 12, 4
+ vperm 19, 14, 17, 6
+ vadduwm 16, 12, 16
+ xxlxor 47, 48, 47
+ vperm 15, 15, 15, 0
+ xxlandc 47, 47, 37
+ vadduwm 13, 15, 13
+ xxswapd 47, 47
+ xxlxor 44, 45, 44
+ xxsldwi 45, 45, 45, 1
+ vrlw 30, 12, 1
+ vadduwm 12, 16, 19
+ xxsldwi 44, 44, 44, 3
+ vadduwm 16, 30, 12
+ xxlxor 44, 48, 47
+ vperm 15, 17, 17, 7
+ vperm 12, 12, 12, 3
+ vperm 17, 31, 18, 8
+ xxlandc 61, 44, 34
+ vperm 12, 17, 15, 9
+ vadduwm 13, 29, 13
+ xxlxor 47, 45, 62
+ xxmrgld 62, 46, 63
+ vmrghw 14, 31, 14
+ vrlw 15, 15, 4
+ vadduwm 16, 16, 12
+ vperm 30, 18, 30, 10
+ vperm 14, 14, 18, 11
+ xxsldwi 50, 51, 51, 3
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 61
+ xxsldwi 48, 48, 48, 1
+ vperm 19, 12, 18, 6
+ vperm 17, 17, 17, 0
+ vadduwm 16, 16, 30
+ xxmrgld 60, 44, 46
+ vmrghw 12, 14, 12
+ vperm 28, 30, 28, 10
+ xxlandc 49, 49, 37
+ vadduwm 13, 17, 13
+ xxswapd 49, 49
+ vperm 12, 12, 30, 11
+ xxlxor 47, 45, 47
+ xxsldwi 45, 45, 45, 3
+ vrlw 15, 15, 1
+ vperm 8, 12, 28, 8
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vadduwm 16, 16, 14
+ vperm 17, 17, 17, 3
+ xxlandc 49, 49, 34
+ vadduwm 13, 17, 13
+ xxlxor 47, 45, 47
+ vrlw 15, 15, 4
+ vadduwm 16, 15, 16
+ xxlxor 49, 48, 49
+ vperm 17, 17, 17, 0
+ xxlandc 49, 49, 37
+ vadduwm 31, 17, 13
+ xxlxor 45, 63, 47
+ vrlw 15, 13, 1
+ vadduwm 13, 16, 19
+ xxswapd 48, 49
+ xxsldwi 51, 51, 51, 3
+ xxsldwi 45, 45, 45, 3
+ vadduwm 17, 15, 13
+ xxlxor 45, 49, 48
+ lvx 16, 0, 4
+ vperm 29, 13, 13, 3
+ vperm 13, 18, 18, 7
+ xxsldwi 50, 63, 63, 1
+ vperm 16, 14, 30, 16
+ vperm 7, 19, 19, 7
+ xxlandc 63, 61, 34
+ vadduwm 18, 31, 18
+ vperm 29, 16, 13, 9
+ xxlxor 47, 50, 47
+ vperm 6, 16, 19, 6
+ vrlw 15, 15, 4
+ vperm 7, 8, 7, 9
+ vadduwm 17, 17, 29
+ xxmrgld 41, 61, 44
+ vadduwm 17, 15, 17
+ vperm 9, 28, 9, 10
+ xxlxor 63, 49, 63
+ xxsldwi 49, 49, 49, 1
+ vperm 31, 31, 31, 0
+ vadduwm 17, 17, 28
+ xxlandc 63, 63, 37
+ vadduwm 18, 31, 18
+ xxswapd 63, 63
+ xxlxor 47, 50, 47
+ xxsldwi 46, 50, 50, 3
+ vrlw 15, 15, 1
+ vadduwm 17, 15, 17
+ xxlxor 63, 49, 63
+ vadduwm 17, 17, 12
+ vperm 31, 31, 31, 3
+ xxlandc 50, 63, 34
+ vadduwm 14, 18, 14
+ xxlxor 47, 46, 47
+ vrlw 15, 15, 4
+ vadduwm 17, 15, 17
+ xxlxor 50, 49, 50
+ vadduwm 6, 17, 6
+ vperm 18, 18, 18, 0
+ xxsldwi 38, 38, 38, 3
+ xxlandc 50, 50, 37
+ vadduwm 14, 18, 14
+ xxswapd 48, 50
+ xxlxor 47, 46, 47
+ xxsldwi 46, 46, 46, 1
+ vrlw 15, 15, 1
+ vadduwm 6, 15, 6
+ xxlxor 48, 38, 48
+ vadduwm 6, 6, 7
+ vperm 16, 16, 16, 3
+ xxlandc 48, 48, 34
+ vadduwm 14, 16, 14
+ xxlxor 40, 46, 47
+ vrlw 8, 8, 4
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 48
+ xxsldwi 38, 38, 38, 1
+ vperm 7, 7, 7, 0
+ vadduwm 6, 6, 9
+ xxlandc 39, 39, 37
+ vadduwm 14, 7, 14
+ xxswapd 39, 39
+ xxlxor 40, 46, 40
+ xxsldwi 41, 46, 46, 3
+ vrlw 8, 8, 1
+ vadduwm 6, 8, 6
+ xxlxor 39, 38, 39
+ vperm 3, 7, 7, 3
+ vmrghw 7, 12, 13
+ xxlandc 34, 35, 34
+ vperm 7, 7, 28, 11
+ vadduwm 3, 2, 9
+ xxlxor 40, 35, 40
+ vrlw 4, 8, 4
+ vadduwm 6, 6, 7
+ vadduwm 6, 4, 6
+ xxlxor 34, 38, 34
+ xxsldwi 0, 38, 38, 3
+ vperm 2, 2, 2, 0
+ xxlandc 34, 34, 37
+ vadduwm 3, 2, 3
+ xxswapd 34, 34
+ xxlxor 36, 35, 36
+ xxsldwi 1, 35, 35, 1
+ vrlw 4, 4, 1
+ xxlxor 0, 1, 0
+ xxswapd 0, 0
+ xxlxor 2, 36, 34
+ stxvd2x 0, 0, 8
+ xxswapd 2, 2
+ stxvd2x 2, 8, 5
+ lfdx 0, 0, 3
+ lfd 2, 8(3)
+ xxmrghd 35, 2, 0
+ xxlxor 0, 1, 35
+ xxswapd 0, 0
+ stxvd2x 0, 8, 7
+ lfd 0, 16(3)
+ lfd 1, 24(3)
+ li 3, -16
+ xxmrghd 35, 1, 0
+ xxlxor 0, 34, 35
+ xxswapd 0, 0
+ stxvd2x 0, 8, 6
+ lxvd2x 63, 1, 3
+ li 3, -32
+ lxvd2x 62, 1, 3
+ li 3, -48
+ lxvd2x 61, 1, 3
+ li 3, -64
+ lxvd2x 60, 1, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end1:
+ .size zfs_blake3_compress_xof_sse41, .Lfunc_end1-.Lfunc_begin1
+ .cfi_endproc
+
+ .globl zfs_blake3_hash_many_sse41
+ .p2align 2
+ .type zfs_blake3_hash_many_sse41,@function
+zfs_blake3_hash_many_sse41:
+.Lfunc_begin2:
+ .cfi_startproc
+.Lfunc_gep2:
+ addis 2, 12, .TOC.-.Lfunc_gep2@ha
+ addi 2, 2, .TOC.-.Lfunc_gep2@l
+.Lfunc_lep2:
+ .localentry zfs_blake3_hash_many_sse41, .Lfunc_lep2-.Lfunc_gep2
+ mfocrf 12, 32
+ mflr 0
+ std 0, 16(1)
+ stw 12, 8(1)
+ stdu 1, -256(1)
+ .cfi_def_cfa_offset 256
+ .cfi_offset lr, 16
+ .cfi_offset r17, -120
+ .cfi_offset r18, -112
+ .cfi_offset r19, -104
+ .cfi_offset r20, -96
+ .cfi_offset r21, -88
+ .cfi_offset r22, -80
+ .cfi_offset r23, -72
+ .cfi_offset r24, -64
+ .cfi_offset r25, -56
+ .cfi_offset r26, -48
+ .cfi_offset r27, -40
+ .cfi_offset r28, -32
+ .cfi_offset r29, -24
+ .cfi_offset r30, -16
+ .cfi_offset cr2, 8
+ std 26, 208(1)
+ mr 26, 4
+ cmpldi 1, 4, 4
+ andi. 4, 8, 1
+ std 18, 144(1)
+ std 19, 152(1)
+ crmove 8, 1
+ ld 19, 360(1)
+ lwz 18, 352(1)
+ std 24, 192(1)
+ std 25, 200(1)
+ std 27, 216(1)
+ std 28, 224(1)
+ mr 24, 10
+ mr 28, 6
+ mr 27, 5
+ mr 25, 3
+ std 29, 232(1)
+ std 30, 240(1)
+ mr 30, 9
+ mr 29, 7
+ std 17, 136(1)
+ std 20, 160(1)
+ std 21, 168(1)
+ std 22, 176(1)
+ std 23, 184(1)
+ blt 1, .LBB2_3
+ li 3, 0
+ li 4, 1
+ clrldi 23, 30, 32
+ isel 22, 4, 3, 8
+ clrldi 21, 24, 32
+ clrldi 20, 18, 32
+.LBB2_2:
+ mr 3, 25
+ mr 4, 27
+ mr 5, 28
+ mr 6, 29
+ mr 7, 22
+ mr 8, 23
+ mr 9, 21
+ mr 10, 20
+ std 19, 32(1)
+ bl blake3_hash4_sse41
+ addi 26, 26, -4
+ addi 3, 29, 4
+ addi 25, 25, 32
+ addi 19, 19, 128
+ cmpldi 26, 3
+ isel 29, 3, 29, 8
+ bgt 0, .LBB2_2
+.LBB2_3:
+ cmpldi 26, 0
+ beq 0, .LBB2_11
+ li 3, 0
+ li 4, 1
+ or 21, 24, 30
+ li 20, 16
+ addi 24, 1, 96
+ isel 22, 4, 3, 8
+.LBB2_5:
+ lxvd2x 0, 28, 20
+ ld 23, 0(25)
+ mr 17, 27
+ mr 3, 21
+ stxvd2x 0, 24, 20
+ lxvd2x 0, 0, 28
+ stxvd2x 0, 0, 24
+.LBB2_6:
+ cmpldi 17, 1
+ beq 0, .LBB2_8
+ cmpldi 17, 0
+ bne 0, .LBB2_9
+ b .LBB2_10
+.LBB2_8:
+ or 3, 3, 18
+.LBB2_9:
+ clrldi 7, 3, 56
+ mr 3, 24
+ mr 4, 23
+ li 5, 64
+ mr 6, 29
+ bl zfs_blake3_compress_in_place_sse41
+ addi 23, 23, 64
+ addi 17, 17, -1
+ mr 3, 30
+ b .LBB2_6
+.LBB2_10:
+ lxvd2x 0, 24, 20
+ addi 26, 26, -1
+ add 29, 29, 22
+ addi 25, 25, 8
+ cmpldi 26, 0
+ stxvd2x 0, 19, 20
+ lxvd2x 0, 0, 24
+ stxvd2x 0, 0, 19
+ addi 19, 19, 32
+ bne 0, .LBB2_5
+.LBB2_11:
+ ld 30, 240(1)
+ ld 29, 232(1)
+ ld 28, 224(1)
+ ld 27, 216(1)
+ ld 26, 208(1)
+ ld 25, 200(1)
+ ld 24, 192(1)
+ ld 23, 184(1)
+ ld 22, 176(1)
+ ld 21, 168(1)
+ ld 20, 160(1)
+ ld 19, 152(1)
+ ld 18, 144(1)
+ ld 17, 136(1)
+ addi 1, 1, 256
+ ld 0, 16(1)
+ lwz 12, 8(1)
+ mtocrf 32, 12
+ mtlr 0
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end2:
+ .size zfs_blake3_hash_many_sse41, .Lfunc_end2-.Lfunc_begin2
+ .cfi_endproc
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .p2align 4
+.LCPI3_0:
+ .quad 4294967296
+ .quad 12884901890
+.LCPI3_1:
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 5
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 9
+ .byte 14
+ .byte 15
+ .byte 12
+ .byte 13
+.LCPI3_2:
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 0
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 4
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 8
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 12
+.LCPI3_3:
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 30
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 26
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 22
+ .byte 17
+ .byte 16
+ .byte 19
+ .byte 18
+.LCPI3_4:
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+ .long 1779033703
+.LCPI3_5:
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+ .long 3144134277
+.LCPI3_6:
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+ .long 1013904242
+.LCPI3_7:
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+ .long 2773480762
+.LCPI3_8:
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 31
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 27
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 23
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 19
+ .text
+ .p2align 2
+ .type blake3_hash4_sse41,@function
+blake3_hash4_sse41:
+.Lfunc_begin3:
+ .cfi_startproc
+.Lfunc_gep3:
+ addis 2, 12, .TOC.-.Lfunc_gep3@ha
+ addi 2, 2, .TOC.-.Lfunc_gep3@l
+.Lfunc_lep3:
+ .localentry blake3_hash4_sse41, .Lfunc_lep3-.Lfunc_gep3
+ stdu 1, -416(1)
+ .cfi_def_cfa_offset 416
+ .cfi_offset r22, -176
+ .cfi_offset r23, -168
+ .cfi_offset r24, -160
+ .cfi_offset r25, -152
+ .cfi_offset r26, -144
+ .cfi_offset r27, -136
+ .cfi_offset r28, -128
+ .cfi_offset r29, -120
+ .cfi_offset r30, -112
+ .cfi_offset f20, -96
+ .cfi_offset f21, -88
+ .cfi_offset f22, -80
+ .cfi_offset f23, -72
+ .cfi_offset f24, -64
+ .cfi_offset f25, -56
+ .cfi_offset f26, -48
+ .cfi_offset f27, -40
+ .cfi_offset f28, -32
+ .cfi_offset f29, -24
+ .cfi_offset f30, -16
+ .cfi_offset f31, -8
+ .cfi_offset v20, -368
+ .cfi_offset v21, -352
+ .cfi_offset v22, -336
+ .cfi_offset v23, -320
+ .cfi_offset v24, -304
+ .cfi_offset v25, -288
+ .cfi_offset v26, -272
+ .cfi_offset v27, -256
+ .cfi_offset v28, -240
+ .cfi_offset v29, -224
+ .cfi_offset v30, -208
+ .cfi_offset v31, -192
+ li 11, 48
+ li 0, 8
+ std 30, 304(1)
+ li 30, 12
+ li 12, 4
+ lfiwzx 0, 0, 5
+ stxvd2x 52, 1, 11
+ li 11, 64
+ lfiwzx 2, 5, 0
+ li 0, 20
+ lfiwzx 3, 5, 30
+ stxvd2x 53, 1, 11
+ li 11, 80
+ li 30, 24
+ lfiwzx 4, 5, 0
+ li 0, 28
+ stxvd2x 54, 1, 11
+ li 11, 96
+ lfiwzx 1, 5, 12
+ lfiwzx 6, 5, 30
+ xxspltw 47, 0, 1
+ cmpldi 4, 0
+ std 22, 240(1)
+ stxvd2x 55, 1, 11
+ li 11, 112
+ lfiwzx 7, 5, 0
+ xxspltw 40, 2, 1
+ std 23, 248(1)
+ xxspltw 39, 3, 1
+ std 24, 256(1)
+ std 25, 264(1)
+ xxspltw 51, 1, 1
+ xxspltw 43, 6, 1
+ std 26, 272(1)
+ xxspltw 41, 7, 1
+ std 27, 280(1)
+ std 28, 288(1)
+ std 29, 296(1)
+ stxvd2x 56, 1, 11
+ li 11, 128
+ stfd 20, 320(1)
+ stxvd2x 57, 1, 11
+ li 11, 144
+ stfd 21, 328(1)
+ stxvd2x 58, 1, 11
+ li 11, 160
+ stfd 22, 336(1)
+ stxvd2x 59, 1, 11
+ li 11, 176
+ stfd 23, 344(1)
+ stxvd2x 60, 1, 11
+ li 11, 192
+ stfd 24, 352(1)
+ stxvd2x 61, 1, 11
+ li 11, 208
+ stfd 25, 360(1)
+ stxvd2x 62, 1, 11
+ li 11, 224
+ stfd 26, 368(1)
+ stxvd2x 63, 1, 11
+ li 11, 16
+ xxspltw 63, 4, 1
+ lfiwzx 5, 5, 11
+ ld 5, 448(1)
+ stfd 27, 376(1)
+ stfd 28, 384(1)
+ stfd 29, 392(1)
+ stfd 30, 400(1)
+ stfd 31, 408(1)
+ xxspltw 50, 5, 1
+ beq 0, .LBB3_5
+ addis 30, 2, .LCPI3_0@toc@ha
+ neg 7, 7
+ xxleqv 34, 34, 34
+ addis 28, 2, .LCPI3_5@toc@ha
+ addis 27, 2, .LCPI3_6@toc@ha
+ addis 26, 2, .LCPI3_7@toc@ha
+ addis 29, 2, .LCPI3_4@toc@ha
+ addis 25, 2, .LCPI3_8@toc@ha
+ addi 0, 30, .LCPI3_0@toc@l
+ mtfprwz 2, 7
+ addis 7, 2, .LCPI3_1@toc@ha
+ addis 30, 2, .LCPI3_3@toc@ha
+ addi 24, 29, .LCPI3_4@toc@l
+ ld 29, 24(3)
+ lxvd2x 1, 0, 0
+ mtfprwz 0, 6
+ rldicl 6, 6, 32, 32
+ addi 0, 30, .LCPI3_3@toc@l
+ ld 30, 16(3)
+ xxspltw 2, 2, 1
+ vslw 2, 2, 2
+ xxspltw 37, 0, 1
+ mtfprwz 0, 6
+ addi 6, 7, .LCPI3_1@toc@l
+ addis 7, 2, .LCPI3_2@toc@ha
+ xxswapd 35, 1
+ xxlxor 36, 36, 36
+ xxspltw 33, 0, 1
+ xxland 35, 2, 35
+ vadduwm 0, 3, 5
+ lvx 5, 0, 6
+ addi 6, 7, .LCPI3_2@toc@l
+ ld 7, 8(3)
+ xxlor 35, 35, 34
+ xxlxor 34, 32, 34
+ xxlor 9, 32, 32
+ lvx 0, 0, 6
+ ld 6, 0(3)
+ addi 3, 3, -8
+ vcmpgtsw 2, 3, 2
+ lvx 3, 0, 0
+ addi 0, 28, .LCPI3_5@toc@l
+ addi 28, 27, .LCPI3_6@toc@l
+ addi 27, 26, .LCPI3_7@toc@l
+ addi 26, 25, .LCPI3_8@toc@l
+ or 25, 9, 8
+ li 9, 0
+ vcmpgtsb 5, 4, 5
+ vcmpgtsb 0, 4, 0
+ xxlor 11, 35, 35
+ lvx 3, 0, 24
+ xxlor 12, 35, 35
+ vsubuwm 2, 1, 2
+ xxlnor 10, 37, 37
+ xxlor 13, 34, 34
+ lvx 2, 0, 0
+ li 0, 32
+ xxlnor 31, 32, 32
+ xxlor 30, 34, 34
+ lvx 2, 0, 28
+ li 28, 48
+ xxlor 29, 34, 34
+ lvx 2, 0, 27
+ li 27, 0
+ xxlor 28, 34, 34
+ lvx 2, 0, 26
+ xxlor 27, 34, 34
+.LBB3_2:
+ mr 26, 27
+ addi 27, 27, 1
+ xxlor 23, 39, 39
+ cmpld 27, 4
+ sldi 26, 26, 6
+ xxlor 24, 40, 40
+ iseleq 24, 10, 9
+ add 23, 6, 26
+ add 22, 30, 26
+ lxvd2x 0, 6, 26
+ lxvd2x 1, 7, 26
+ or 25, 24, 25
+ add 24, 7, 26
+ lxvd2x 2, 30, 26
+ lxvd2x 3, 29, 26
+ xxlor 26, 47, 47
+ lxvd2x 4, 23, 11
+ lxvd2x 6, 24, 11
+ clrlwi 25, 25, 24
+ xxlor 25, 51, 51
+ lxvd2x 7, 22, 11
+ lxvd2x 8, 23, 0
+ mtfprd 5, 25
+ add 25, 29, 26
+ xxswapd 34, 0
+ lxvd2x 0, 25, 11
+ xxswapd 38, 1
+ xxswapd 32, 2
+ lxvd2x 1, 24, 0
+ lxvd2x 2, 22, 0
+ xxswapd 40, 3
+ xxswapd 39, 4
+ lxvd2x 3, 25, 0
+ lxvd2x 4, 23, 28
+ xxswapd 60, 6
+ xxswapd 47, 7
+ lxvd2x 6, 24, 28
+ xxswapd 57, 8
+ lxvd2x 7, 22, 28
+ lxvd2x 8, 25, 28
+ xxswapd 58, 0
+ mr 25, 3
+ xxswapd 53, 1
+ xxswapd 56, 2
+ xxswapd 52, 3
+ xxswapd 55, 4
+ xxswapd 54, 6
+ xxswapd 0, 5
+ xxswapd 42, 7
+ xxswapd 48, 8
+ mtctr 12
+.LBB3_3:
+ ldu 24, 8(25)
+ add 24, 24, 26
+ addi 24, 24, 256
+ dcbt 0, 24
+ bdnz .LBB3_3
+ vmrgew 4, 28, 7
+ vspltisw 14, 9
+ mr 25, 8
+ vmrgew 27, 6, 2
+ vspltisw 17, 4
+ vmrglw 12, 6, 2
+ vspltisw 19, 10
+ vmrghw 30, 6, 2
+ xxspltw 0, 0, 3
+ vmrglw 2, 8, 0
+ vmrghw 13, 8, 0
+ xxlor 7, 36, 36
+ vmrgew 4, 21, 25
+ vmrglw 29, 28, 7
+ vmrghw 1, 28, 7
+ vmrglw 28, 26, 15
+ xxmrgld 37, 34, 44
+ vmrgew 7, 26, 15
+ vmrghw 15, 26, 15
+ xxlor 21, 36, 36
+ vmrglw 4, 21, 25
+ vmrghw 21, 21, 25
+ vmrglw 25, 20, 24
+ xxmrgld 34, 60, 61
+ vmrghw 26, 20, 24
+ xxlor 38, 26, 26
+ vmrgew 3, 8, 0
+ xxlor 5, 36, 36
+ vmrgew 4, 20, 24
+ vspltisw 24, -16
+ vmrglw 20, 22, 23
+ xxmrgld 57, 57, 5
+ vmrglw 8, 16, 10
+ vmrghw 0, 16, 10
+ vadduwm 12, 19, 19
+ xxlor 8, 37, 37
+ xxlor 20, 36, 36
+ vmrgew 4, 22, 23
+ vmrghw 23, 22, 23
+ xxmrgld 40, 40, 52
+ vmrgew 22, 16, 10
+ vsubuwm 10, 14, 24
+ vslw 14, 17, 17
+ vadduwm 17, 5, 6
+ xxmrgld 37, 47, 33
+ xxlor 22, 36, 36
+ xxmrgld 36, 45, 62
+ xxlor 38, 25, 25
+ xxlor 2, 34, 34
+ vadduwm 19, 4, 6
+ xxmrgld 38, 39, 7
+ xxlor 3, 36, 36
+ xxmrghd 39, 47, 33
+ xxlor 36, 24, 24
+ xxmrgld 33, 58, 53
+ vadduwm 17, 17, 18
+ vadduwm 29, 2, 4
+ xxmrgld 36, 35, 59
+ xxlor 34, 23, 23
+ xxmrghd 35, 45, 62
+ xxlor 1, 9, 9
+ vadduwm 28, 5, 2
+ xxlor 1, 13, 13
+ vadduwm 19, 19, 31
+ vadduwm 24, 29, 11
+ vadduwm 28, 28, 9
+ xxlxor 61, 49, 9
+ xxlor 1, 41, 41
+ xxlor 41, 11, 11
+ xxlxor 34, 51, 13
+ vperm 29, 29, 29, 9
+ xxlxor 46, 56, 46
+ vperm 2, 2, 2, 9
+ xxlxor 59, 60, 0
+ vperm 14, 14, 14, 9
+ vperm 30, 27, 27, 9
+ vadduwm 19, 19, 3
+ xxlor 4, 35, 35
+ xxland 61, 61, 10
+ xxlor 35, 12, 12
+ xxland 34, 34, 10
+ vadduwm 27, 29, 3
+ xxlor 35, 30, 30
+ vadduwm 17, 17, 4
+ xxlor 26, 36, 36
+ xxland 46, 46, 10
+ vadduwm 3, 2, 3
+ xxlor 36, 29, 29
+ xxland 62, 62, 10
+ xxlxor 45, 59, 50
+ xxlxor 50, 35, 63
+ vadduwm 31, 14, 4
+ xxlor 36, 28, 28
+ xxlor 6, 37, 37
+ vadduwm 16, 30, 4
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 1
+ vrlw 4, 13, 12
+ vrlw 18, 18, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 15, 24, 6
+ vadduwm 28, 28, 7
+ vadduwm 17, 4, 17
+ vadduwm 19, 18, 19
+ vadduwm 15, 11, 15
+ vadduwm 28, 5, 28
+ xxlor 25, 38, 38
+ xxlxor 61, 49, 61
+ xxlxor 34, 51, 34
+ xxlxor 46, 47, 46
+ xxlxor 62, 60, 62
+ xxlor 38, 27, 27
+ vadduwm 19, 19, 1
+ vperm 29, 29, 29, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 14, 14, 6
+ vperm 30, 30, 30, 6
+ xxlor 5, 33, 33
+ vadduwm 17, 17, 25
+ xxland 61, 61, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ xxland 62, 62, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 2, 3
+ vadduwm 31, 24, 31
+ vadduwm 16, 30, 16
+ xxlxor 36, 59, 36
+ xxlxor 50, 35, 50
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 18, 10
+ xxmrgld 50, 32, 55
+ vrlw 11, 11, 10
+ xxmrghd 55, 32, 55
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 15, 15, 8
+ vadduwm 28, 28, 18
+ vadduwm 17, 1, 17
+ vadduwm 19, 11, 19
+ vadduwm 15, 5, 15
+ vadduwm 28, 4, 28
+ xxlor 7, 57, 57
+ xxlxor 62, 49, 62
+ xxlxor 61, 51, 61
+ xxlxor 57, 47, 34
+ xxlxor 34, 60, 56
+ vperm 24, 30, 30, 9
+ xxmrgld 62, 20, 21
+ vperm 29, 29, 29, 9
+ vperm 25, 25, 25, 9
+ vperm 2, 2, 2, 9
+ vmr 14, 8
+ xxmrghd 40, 58, 53
+ xxmrgld 58, 54, 22
+ vadduwm 17, 17, 30
+ xxland 56, 56, 10
+ vadduwm 21, 19, 8
+ xxland 61, 61, 10
+ xxland 51, 57, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vadduwm 0, 15, 26
+ vadduwm 15, 28, 23
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 21
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vmr 13, 8
+ xxlor 53, 3, 3
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 52, 4, 4
+ xxlor 40, 2, 2
+ vadduwm 17, 17, 21
+ vadduwm 28, 28, 20
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 8
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ vmr 25, 26
+ xxlor 3, 39, 39
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ xxlor 54, 6, 6
+ xxlor 58, 5, 5
+ xxlor 39, 8, 8
+ vadduwm 17, 17, 22
+ vadduwm 28, 28, 26
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 25
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 39, 26, 26
+ vadduwm 28, 28, 14
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 17, 17, 7
+ vadduwm 0, 0, 30
+ vadduwm 15, 15, 23
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 24, 55, 55
+ vadduwm 17, 17, 13
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vmr 23, 13
+ xxlor 45, 25, 25
+ xxlor 39, 7, 7
+ vadduwm 28, 28, 13
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 7
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 2, 46, 46
+ xxlor 46, 3, 3
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vadduwm 17, 17, 20
+ vadduwm 28, 28, 26
+ vadduwm 0, 0, 25
+ vadduwm 15, 15, 14
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 52, 2, 2
+ vadduwm 17, 17, 8
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 28, 28, 20
+ vadduwm 0, 0, 21
+ vadduwm 15, 15, 18
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ vadduwm 17, 17, 22
+ vadduwm 28, 28, 30
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 0, 0, 23
+ vadduwm 15, 15, 7
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 5, 4, 4
+ xxlor 4, 58, 58
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ xxlor 39, 8, 8
+ xxlor 54, 24, 24
+ xxlor 58, 26, 26
+ vadduwm 17, 17, 13
+ vadduwm 28, 28, 7
+ vadduwm 0, 0, 22
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 3, 53, 53
+ xxlor 53, 4, 4
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vadduwm 17, 17, 21
+ vadduwm 28, 28, 20
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 25
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 2, 55, 55
+ vmr 23, 18
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ xxlor 50, 5, 5
+ vadduwm 17, 17, 14
+ vadduwm 28, 28, 30
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 22
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 25, 40, 40
+ vmr 8, 13
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ xxlor 45, 25, 25
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 17, 17, 13
+ xxlor 45, 2, 2
+ vadduwm 0, 0, 8
+ vadduwm 28, 28, 13
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 4, 57, 57
+ xxlor 26, 46, 46
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ xxlor 8, 62, 62
+ xxlor 57, 3, 3
+ xxlor 46, 7, 7
+ xxlor 62, 6, 6
+ vadduwm 17, 17, 7
+ vadduwm 28, 28, 25
+ vadduwm 0, 0, 14
+ vadduwm 15, 15, 30
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vadduwm 17, 17, 20
+ xxlor 3, 52, 52
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 52, 8, 8
+ vadduwm 0, 0, 22
+ vadduwm 28, 28, 20
+ vadduwm 15, 15, 23
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 6, 55, 55
+ xxlor 55, 4, 4
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 17, 17, 23
+ vadduwm 28, 28, 13
+ vadduwm 0, 0, 21
+ vadduwm 15, 15, 14
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 4, 53, 53
+ xxlor 53, 26, 26
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 17, 17, 21
+ vadduwm 28, 28, 8
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 30
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 5, 25, 25
+ xxlor 2, 58, 58
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vmr 22, 26
+ vadduwm 0, 0, 26
+ xxlor 58, 5, 5
+ vadduwm 17, 17, 25
+ vadduwm 28, 28, 18
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 7, 24, 24
+ xxlor 8, 57, 57
+ xxland 56, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 59, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 57, 7, 7
+ vadduwm 17, 17, 20
+ vadduwm 28, 28, 13
+ vadduwm 0, 0, 14
+ vadduwm 15, 15, 25
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 24, 24, 24, 9
+ xxlor 5, 52, 52
+ xxlor 23, 45, 45
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 56, 56, 10
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ xxlor 52, 6, 6
+ vadduwm 28, 28, 8
+ vmr 13, 8
+ xxlor 40, 3, 3
+ vadduwm 17, 17, 20
+ vadduwm 0, 0, 8
+ vadduwm 15, 15, 22
+ vadduwm 17, 4, 17
+ vadduwm 28, 1, 28
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 49, 61
+ xxlxor 51, 60, 51
+ xxlxor 34, 32, 34
+ xxlxor 56, 47, 56
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 24, 24, 24, 6
+ xxlor 25, 39, 39
+ vmr 7, 30
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 56, 56, 31
+ vadduwm 27, 29, 27
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 24, 16
+ xxlxor 36, 59, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vmr 30, 18
+ xxlor 24, 46, 46
+ xxlor 46, 25, 25
+ xxlor 50, 8, 8
+ vadduwm 17, 17, 23
+ vadduwm 28, 28, 14
+ vadduwm 0, 0, 18
+ vadduwm 15, 15, 26
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 9
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ xxlor 6, 58, 58
+ xxlor 58, 4, 4
+ xxland 56, 56, 10
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ vadduwm 31, 24, 31
+ vadduwm 16, 29, 16
+ vadduwm 27, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 59, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ vadduwm 17, 17, 30
+ vadduwm 28, 28, 26
+ vadduwm 0, 0, 7
+ vadduwm 15, 15, 21
+ vadduwm 17, 1, 17
+ vadduwm 28, 11, 28
+ vadduwm 0, 5, 0
+ vadduwm 15, 4, 15
+ xxlxor 56, 49, 56
+ xxlxor 61, 60, 61
+ xxlxor 51, 32, 51
+ xxlxor 34, 47, 34
+ vperm 24, 24, 24, 6
+ vperm 29, 29, 29, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ xxlor 40, 23, 23
+ vadduwm 13, 28, 13
+ vadduwm 8, 17, 8
+ xxland 49, 56, 31
+ xxland 61, 61, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ vadduwm 31, 17, 31
+ vadduwm 16, 29, 16
+ vadduwm 28, 19, 27
+ vadduwm 3, 2, 3
+ xxlxor 33, 63, 33
+ xxlxor 43, 48, 43
+ xxlxor 36, 35, 36
+ xxlxor 37, 60, 37
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlor 2, 55, 55
+ vmr 23, 30
+ xxlor 62, 24, 24
+ vadduwm 0, 0, 22
+ vadduwm 15, 15, 30
+ vadduwm 8, 4, 8
+ vadduwm 13, 1, 13
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 61, 40, 61
+ xxlxor 51, 45, 51
+ xxlxor 34, 32, 34
+ xxlxor 49, 47, 49
+ vperm 29, 29, 29, 9
+ vperm 19, 19, 19, 9
+ vperm 2, 2, 2, 9
+ vperm 17, 17, 17, 9
+ vadduwm 13, 13, 14
+ xxlor 46, 5, 5
+ xxland 61, 61, 10
+ xxland 51, 51, 10
+ xxland 34, 34, 10
+ xxland 49, 49, 10
+ vadduwm 28, 29, 28
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 17, 16
+ xxlxor 36, 60, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 4, 4, 12
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vadduwm 8, 8, 25
+ vadduwm 0, 0, 14
+ vadduwm 15, 15, 7
+ vadduwm 8, 4, 8
+ vadduwm 13, 1, 13
+ vadduwm 0, 11, 0
+ vadduwm 15, 5, 15
+ xxlxor 62, 40, 61
+ xxlxor 51, 45, 51
+ xxlxor 34, 32, 34
+ xxlxor 49, 47, 49
+ vperm 30, 30, 30, 6
+ vperm 19, 19, 19, 6
+ vperm 2, 2, 2, 6
+ vperm 17, 17, 17, 6
+ vadduwm 29, 8, 20
+ vadduwm 8, 13, 18
+ xxland 45, 62, 31
+ xxland 51, 51, 31
+ xxland 34, 34, 31
+ xxland 49, 49, 31
+ vadduwm 30, 13, 28
+ vadduwm 3, 19, 3
+ vadduwm 31, 2, 31
+ vadduwm 16, 17, 16
+ xxlxor 36, 62, 36
+ xxlxor 33, 35, 33
+ xxlxor 43, 63, 43
+ xxlxor 37, 48, 37
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ vrlw 4, 4, 10
+ vadduwm 0, 0, 23
+ vadduwm 7, 15, 21
+ vadduwm 29, 1, 29
+ vadduwm 8, 11, 8
+ vadduwm 0, 5, 0
+ vadduwm 7, 4, 7
+ xxlxor 47, 61, 49
+ xxlxor 45, 40, 45
+ xxlxor 49, 32, 51
+ xxlxor 34, 39, 34
+ vperm 15, 15, 15, 9
+ vperm 13, 13, 13, 9
+ vperm 17, 17, 17, 9
+ vperm 2, 2, 2, 9
+ xxlor 46, 3, 3
+ vadduwm 9, 29, 26
+ vadduwm 8, 8, 14
+ xxland 46, 47, 10
+ xxland 45, 45, 10
+ xxland 47, 49, 10
+ xxland 34, 34, 10
+ vadduwm 17, 14, 31
+ vadduwm 16, 13, 16
+ vadduwm 18, 15, 30
+ vadduwm 3, 2, 3
+ xxlxor 33, 49, 33
+ xxlxor 43, 48, 43
+ xxlxor 37, 50, 37
+ xxlxor 36, 35, 36
+ vrlw 1, 1, 12
+ vrlw 11, 11, 12
+ vrlw 5, 5, 12
+ vrlw 4, 4, 12
+ xxlor 44, 6, 6
+ xxlor 0, 10, 10
+ vadduwm 0, 0, 12
+ xxlor 44, 2, 2
+ vadduwm 9, 1, 9
+ vadduwm 7, 7, 12
+ vadduwm 8, 11, 8
+ vadduwm 7, 4, 7
+ vadduwm 0, 5, 0
+ xxlxor 34, 39, 34
+ xxlxor 44, 32, 47
+ vperm 2, 2, 2, 6
+ xxlxor 46, 41, 46
+ xxlxor 45, 40, 45
+ vperm 12, 12, 12, 6
+ vperm 14, 14, 14, 6
+ vperm 13, 13, 13, 6
+ xxland 34, 34, 31
+ xxlor 1, 31, 31
+ vadduwm 3, 2, 3
+ xxland 44, 44, 31
+ xxlxor 36, 35, 36
+ xxlxor 51, 35, 40
+ xxland 35, 46, 31
+ xxland 38, 45, 31
+ vadduwm 15, 12, 18
+ vadduwm 8, 3, 17
+ vadduwm 13, 6, 16
+ xxlxor 37, 47, 37
+ xxlxor 33, 40, 33
+ xxlxor 43, 45, 43
+ vrlw 4, 4, 10
+ vrlw 1, 1, 10
+ vrlw 11, 11, 10
+ vrlw 5, 5, 10
+ xxlxor 47, 47, 41
+ xxlxor 40, 40, 32
+ xxlxor 39, 45, 39
+ xxlxor 50, 36, 38
+ xxlxor 63, 33, 44
+ xxlxor 43, 43, 34
+ xxlxor 41, 37, 35
+ bne 0, .LBB3_2
+.LBB3_5:
+ vmrglw 2, 19, 15
+ li 3, 32
+ li 4, 48
+ vmrglw 4, 7, 8
+ vmrglw 0, 31, 18
+ vmrglw 1, 9, 11
+ vmrghw 3, 19, 15
+ vmrghw 5, 7, 8
+ vmrghw 6, 31, 18
+ vmrghw 7, 9, 11
+ xxmrgld 40, 36, 34
+ xxmrghd 34, 36, 34
+ xxmrgld 41, 33, 32
+ xxswapd 0, 40
+ xxmrgld 36, 37, 35
+ xxmrghd 35, 37, 35
+ xxmrghd 37, 33, 32
+ xxswapd 1, 41
+ xxmrgld 32, 39, 38
+ xxmrghd 33, 39, 38
+ xxswapd 2, 34
+ xxswapd 4, 36
+ xxswapd 3, 37
+ stxvd2x 0, 0, 5
+ xxswapd 5, 32
+ stxvd2x 1, 5, 11
+ xxswapd 0, 35
+ xxswapd 1, 33
+ stxvd2x 2, 5, 3
+ li 3, 64
+ stxvd2x 3, 5, 4
+ li 4, 80
+ stxvd2x 4, 5, 3
+ li 3, 96
+ stxvd2x 5, 5, 4
+ li 4, 112
+ stxvd2x 0, 5, 3
+ stxvd2x 1, 5, 4
+ li 3, 224
+ lxvd2x 63, 1, 3
+ li 3, 208
+ lfd 31, 408(1)
+ ld 30, 304(1)
+ ld 29, 296(1)
+ lxvd2x 62, 1, 3
+ li 3, 192
+ lfd 30, 400(1)
+ ld 28, 288(1)
+ ld 27, 280(1)
+ lxvd2x 61, 1, 3
+ li 3, 176
+ lfd 29, 392(1)
+ ld 26, 272(1)
+ ld 25, 264(1)
+ lxvd2x 60, 1, 3
+ li 3, 160
+ lfd 28, 384(1)
+ ld 24, 256(1)
+ ld 23, 248(1)
+ lxvd2x 59, 1, 3
+ li 3, 144
+ lfd 27, 376(1)
+ ld 22, 240(1)
+ lxvd2x 58, 1, 3
+ li 3, 128
+ lfd 26, 368(1)
+ lxvd2x 57, 1, 3
+ li 3, 112
+ lfd 25, 360(1)
+ lxvd2x 56, 1, 3
+ li 3, 96
+ lfd 24, 352(1)
+ lxvd2x 55, 1, 3
+ li 3, 80
+ lfd 23, 344(1)
+ lxvd2x 54, 1, 3
+ li 3, 64
+ lfd 22, 336(1)
+ lxvd2x 53, 1, 3
+ li 3, 48
+ lfd 21, 328(1)
+ lxvd2x 52, 1, 3
+ lfd 20, 320(1)
+ addi 1, 1, 416
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end3:
+ .size blake3_hash4_sse41, .Lfunc_end3-.Lfunc_begin3
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S
new file mode 100644
index 000000000000..dc3c4cea669c
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S
@@ -0,0 +1,1520 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if (defined(__PPC64__) && defined(__BIG_ENDIAN__))
+
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.text
+
+.globl zfs_sha256_power8
+.globl .zfs_sha256_power8
+.type zfs_sha256_power8,@function
+.section ".opd","aw"
+.align 3
+zfs_sha256_power8:
+.quad .zfs_sha256_power8,.TOC.@tocbase,0
+.previous
+.align 6
+.zfs_sha256_power8:
+#else
+.abiversion 2
+.text
+
+.globl zfs_sha256_power8
+.type zfs_sha256_power8,@function
+.align 6
+zfs_sha256_power8:
+.localentry zfs_sha256_power8,0
+#endif
+ stdu 1,-384(1)
+ mflr 8
+ li 10,207
+ li 11,223
+ stvx 24,10,1
+ addi 10,10,32
+ mfspr 12,256
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 11,-4096+255
+ stw 12,332(1)
+ li 10,0x10
+ std 26,336(1)
+ li 26,0x20
+ std 27,344(1)
+ li 27,0x30
+ std 28,352(1)
+ li 28,0x40
+ std 29,360(1)
+ li 29,0x50
+ std 30,368(1)
+ li 30,0x60
+ std 31,376(1)
+ li 31,0x70
+ std 8,400(1)
+ mtspr 256,11
+
+ bl .LPICmeup
+ addi 11,1,79
+ .long 0x7C001E19
+ .long 0x7C8A1E19
+ vsldoi 1,0,0,4
+ vsldoi 2,0,0,8
+ vsldoi 3,0,0,12
+ vsldoi 5,4,4,4
+ vsldoi 6,4,4,8
+ vsldoi 7,4,4,12
+ li 0,3
+ b .Loop
+.align 5
+.Loop:
+ lvx 28,0,6
+ .long 0x7D002699
+ addi 4,4,16
+ mr 7,6
+ stvx 0,0,11
+ stvx 1,10,11
+ stvx 2,26,11
+ stvx 3,27,11
+ stvx 4,28,11
+ stvx 5,29,11
+ stvx 6,30,11
+ stvx 7,31,11
+ vadduwm 7,7,28
+ lvx 28,10,6
+ vadduwm 7,7,8
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ vsldoi 9,8,8,4
+ vadduwm 6,6,9
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ vsldoi 10,9,9,4
+ vadduwm 5,5,10
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x7D802699
+ addi 4,4,16
+ vsldoi 11,10,10,4
+ vadduwm 4,4,11
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ vadduwm 3,3,12
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ vsldoi 13,12,12,4
+ vadduwm 2,2,13
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ vsldoi 14,13,13,4
+ vadduwm 1,1,14
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ .long 0x7E002699
+ addi 4,4,16
+ vsldoi 15,14,14,4
+ vadduwm 0,0,15
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ vadduwm 7,7,16
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ vsldoi 17,16,16,4
+ vadduwm 6,6,17
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ vsldoi 18,17,17,4
+ vadduwm 5,5,18
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x7F002699
+ addi 4,4,16
+ vsldoi 19,18,18,4
+ vadduwm 4,4,19
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ vadduwm 3,3,24
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ vsldoi 25,24,24,4
+ vadduwm 2,2,25
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ vsldoi 26,25,25,4
+ vadduwm 1,1,26
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ vsldoi 27,26,26,4
+ .long 0x13C90682
+ vadduwm 8,8,30
+ .long 0x13DA7E82
+ vadduwm 8,8,30
+ vadduwm 8,8,17
+ vadduwm 0,0,27
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ mtctr 0
+ b .L16_xx
+.align 5
+.L16_xx:
+ .long 0x13CA0682
+ vadduwm 9,9,30
+ .long 0x13DB7E82
+ vadduwm 9,9,30
+ vadduwm 9,9,18
+ vadduwm 7,7,8
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ .long 0x13CB0682
+ vadduwm 10,10,30
+ .long 0x13C87E82
+ vadduwm 10,10,30
+ vadduwm 10,10,19
+ vadduwm 6,6,9
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ .long 0x13CC0682
+ vadduwm 11,11,30
+ .long 0x13C97E82
+ vadduwm 11,11,30
+ vadduwm 11,11,24
+ vadduwm 5,5,10
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x13CD0682
+ vadduwm 12,12,30
+ .long 0x13CA7E82
+ vadduwm 12,12,30
+ vadduwm 12,12,25
+ vadduwm 4,4,11
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ .long 0x13CE0682
+ vadduwm 13,13,30
+ .long 0x13CB7E82
+ vadduwm 13,13,30
+ vadduwm 13,13,26
+ vadduwm 3,3,12
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ .long 0x13CF0682
+ vadduwm 14,14,30
+ .long 0x13CC7E82
+ vadduwm 14,14,30
+ vadduwm 14,14,27
+ vadduwm 2,2,13
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13D00682
+ vadduwm 15,15,30
+ .long 0x13CD7E82
+ vadduwm 15,15,30
+ vadduwm 15,15,8
+ vadduwm 1,1,14
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ .long 0x13D10682
+ vadduwm 16,16,30
+ .long 0x13CE7E82
+ vadduwm 16,16,30
+ vadduwm 16,16,9
+ vadduwm 0,0,15
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ .long 0x13D20682
+ vadduwm 17,17,30
+ .long 0x13CF7E82
+ vadduwm 17,17,30
+ vadduwm 17,17,10
+ vadduwm 7,7,16
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ .long 0x13D30682
+ vadduwm 18,18,30
+ .long 0x13D07E82
+ vadduwm 18,18,30
+ vadduwm 18,18,11
+ vadduwm 6,6,17
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ .long 0x13D80682
+ vadduwm 19,19,30
+ .long 0x13D17E82
+ vadduwm 19,19,30
+ vadduwm 19,19,12
+ vadduwm 5,5,18
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x13D90682
+ vadduwm 24,24,30
+ .long 0x13D27E82
+ vadduwm 24,24,30
+ vadduwm 24,24,13
+ vadduwm 4,4,19
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ .long 0x13DA0682
+ vadduwm 25,25,30
+ .long 0x13D37E82
+ vadduwm 25,25,30
+ vadduwm 25,25,14
+ vadduwm 3,3,24
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ .long 0x13DB0682
+ vadduwm 26,26,30
+ .long 0x13D87E82
+ vadduwm 26,26,30
+ vadduwm 26,26,15
+ vadduwm 2,2,25
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13C80682
+ vadduwm 27,27,30
+ .long 0x13D97E82
+ vadduwm 27,27,30
+ vadduwm 27,27,16
+ vadduwm 1,1,26
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ .long 0x13C90682
+ vadduwm 8,8,30
+ .long 0x13DA7E82
+ vadduwm 8,8,30
+ vadduwm 8,8,17
+ vadduwm 0,0,27
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ bdnz .L16_xx
+
+ lvx 10,0,11
+ subic. 5,5,1
+ lvx 11,10,11
+ vadduwm 0,0,10
+ lvx 12,26,11
+ vadduwm 1,1,11
+ lvx 13,27,11
+ vadduwm 2,2,12
+ lvx 14,28,11
+ vadduwm 3,3,13
+ lvx 15,29,11
+ vadduwm 4,4,14
+ lvx 16,30,11
+ vadduwm 5,5,15
+ lvx 17,31,11
+ vadduwm 6,6,16
+ vadduwm 7,7,17
+ bne .Loop
+ lvx 8,26,7
+ vperm 0,0,1,28
+ lvx 9,27,7
+ vperm 4,4,5,28
+ vperm 0,0,2,8
+ vperm 4,4,6,8
+ vperm 0,0,3,9
+ vperm 4,4,7,9
+ .long 0x7C001F19
+ .long 0x7C8A1F19
+ addi 11,1,207
+ mtlr 8
+ mtspr 256,12
+ lvx 24,0,11
+ lvx 25,10,11
+ lvx 26,26,11
+ lvx 27,27,11
+ lvx 28,28,11
+ lvx 29,29,11
+ lvx 30,30,11
+ lvx 31,31,11
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,6,3,0
+.long 0
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.size .zfs_sha256_power8,.-.zfs_sha256_power8
+.size zfs_sha256_power8,.-.zfs_sha256_power8
+#else
+.size zfs_sha256_power8,.-zfs_sha256_power8
+#endif
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 6
+ addi 6,6,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98
+.long 0x71374491,0x71374491,0x71374491,0x71374491
+.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf
+.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5
+.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b
+.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1
+.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4
+.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5
+.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98
+.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01
+.long 0x243185be,0x243185be,0x243185be,0x243185be
+.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3
+.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74
+.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe
+.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7
+.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174
+.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1
+.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786
+.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6
+.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc
+.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f
+.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa
+.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc
+.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da
+.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152
+.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d
+.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8
+.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7
+.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3
+.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147
+.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351
+.long 0x14292967,0x14292967,0x14292967,0x14292967
+.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85
+.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138
+.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc
+.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13
+.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354
+.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb
+.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e
+.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85
+.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1
+.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b
+.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70
+.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3
+.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819
+.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624
+.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585
+.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070
+.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116
+.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08
+.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c
+.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5
+.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3
+.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a
+.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f
+.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3
+.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee
+.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f
+.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814
+.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208
+.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa
+.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb
+.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7
+.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2
+.long 0,0,0,0
+.long 0x00010203,0x10111213,0x10111213,0x10111213
+.long 0x00010203,0x04050607,0x10111213,0x10111213
+.long 0x00010203,0x04050607,0x08090a0b,0x10111213
+
+#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+.abiversion 2
+.text
+
+.globl zfs_sha256_power8
+.type zfs_sha256_power8,@function
+.align 6
+zfs_sha256_power8:
+.localentry zfs_sha256_power8,0
+
+ stdu 1,-384(1)
+ mflr 8
+ li 10,207
+ li 11,223
+ stvx 24,10,1
+ addi 10,10,32
+ li 12,-1
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 11,-4096+255
+ stw 12,332(1)
+ li 10,0x10
+ std 26,336(1)
+ li 26,0x20
+ std 27,344(1)
+ li 27,0x30
+ std 28,352(1)
+ li 28,0x40
+ std 29,360(1)
+ li 29,0x50
+ std 30,368(1)
+ li 30,0x60
+ std 31,376(1)
+ li 31,0x70
+ std 8,400(1)
+ or 11,11,11
+
+ bl .LPICmeup
+ addi 11,1,79
+ li 7,8
+ lvsl 31,0,7
+ vspltisb 28,0x0f
+ vxor 31,31,28
+ .long 0x7C001E19
+ .long 0x7C8A1E19
+ vsldoi 1,0,0,4
+ vsldoi 2,0,0,8
+ vsldoi 3,0,0,12
+ vsldoi 5,4,4,4
+ vsldoi 6,4,4,8
+ vsldoi 7,4,4,12
+ li 0,3
+ b .Loop
+.align 5
+.Loop:
+ lvx 28,0,6
+ .long 0x7D002699
+ addi 4,4,16
+ mr 7,6
+ stvx 0,0,11
+ stvx 1,10,11
+ stvx 2,26,11
+ stvx 3,27,11
+ stvx 4,28,11
+ stvx 5,29,11
+ stvx 6,30,11
+ stvx 7,31,11
+ vadduwm 7,7,28
+ lvx 28,10,6
+ vperm 8,8,8,31
+ vadduwm 7,7,8
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ vsldoi 9,8,8,4
+ vadduwm 6,6,9
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ vsldoi 10,9,9,4
+ vadduwm 5,5,10
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x7D802699
+ addi 4,4,16
+ vsldoi 11,10,10,4
+ vadduwm 4,4,11
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ vperm 12,12,12,31
+ vadduwm 3,3,12
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ vsldoi 13,12,12,4
+ vadduwm 2,2,13
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ vsldoi 14,13,13,4
+ vadduwm 1,1,14
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ .long 0x7E002699
+ addi 4,4,16
+ vsldoi 15,14,14,4
+ vadduwm 0,0,15
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ vperm 16,16,16,31
+ vadduwm 7,7,16
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ vsldoi 17,16,16,4
+ vadduwm 6,6,17
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ vsldoi 18,17,17,4
+ vadduwm 5,5,18
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x7F002699
+ addi 4,4,16
+ vsldoi 19,18,18,4
+ vadduwm 4,4,19
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ vperm 24,24,24,31
+ vadduwm 3,3,24
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ vsldoi 25,24,24,4
+ vadduwm 2,2,25
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ vsldoi 26,25,25,4
+ vadduwm 1,1,26
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ vsldoi 27,26,26,4
+ .long 0x13C90682
+ vadduwm 8,8,30
+ .long 0x13DA7E82
+ vadduwm 8,8,30
+ vadduwm 8,8,17
+ vadduwm 0,0,27
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ mtctr 0
+ b .L16_xx
+.align 5
+.L16_xx:
+ .long 0x13CA0682
+ vadduwm 9,9,30
+ .long 0x13DB7E82
+ vadduwm 9,9,30
+ vadduwm 9,9,18
+ vadduwm 7,7,8
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ .long 0x13CB0682
+ vadduwm 10,10,30
+ .long 0x13C87E82
+ vadduwm 10,10,30
+ vadduwm 10,10,19
+ vadduwm 6,6,9
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ .long 0x13CC0682
+ vadduwm 11,11,30
+ .long 0x13C97E82
+ vadduwm 11,11,30
+ vadduwm 11,11,24
+ vadduwm 5,5,10
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x13CD0682
+ vadduwm 12,12,30
+ .long 0x13CA7E82
+ vadduwm 12,12,30
+ vadduwm 12,12,25
+ vadduwm 4,4,11
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ .long 0x13CE0682
+ vadduwm 13,13,30
+ .long 0x13CB7E82
+ vadduwm 13,13,30
+ vadduwm 13,13,26
+ vadduwm 3,3,12
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ .long 0x13CF0682
+ vadduwm 14,14,30
+ .long 0x13CC7E82
+ vadduwm 14,14,30
+ vadduwm 14,14,27
+ vadduwm 2,2,13
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13D00682
+ vadduwm 15,15,30
+ .long 0x13CD7E82
+ vadduwm 15,15,30
+ vadduwm 15,15,8
+ vadduwm 1,1,14
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ .long 0x13D10682
+ vadduwm 16,16,30
+ .long 0x13CE7E82
+ vadduwm 16,16,30
+ vadduwm 16,16,9
+ vadduwm 0,0,15
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ .long 0x13D20682
+ vadduwm 17,17,30
+ .long 0x13CF7E82
+ vadduwm 17,17,30
+ vadduwm 17,17,10
+ vadduwm 7,7,16
+ vsel 29,6,5,4
+ vadduwm 6,6,28
+ vadduwm 7,7,29
+ .long 0x13C4FE82
+ vadduwm 7,7,30
+ vxor 29,0,1
+ vsel 29,1,2,29
+ vadduwm 3,3,7
+ .long 0x13C08682
+ vadduwm 30,30,29
+ vadduwm 7,7,30
+ lvx 28,26,7
+ .long 0x13D30682
+ vadduwm 18,18,30
+ .long 0x13D07E82
+ vadduwm 18,18,30
+ vadduwm 18,18,11
+ vadduwm 6,6,17
+ vsel 29,5,4,3
+ vadduwm 5,5,28
+ vadduwm 6,6,29
+ .long 0x13C3FE82
+ vadduwm 6,6,30
+ vxor 29,7,0
+ vsel 29,0,1,29
+ vadduwm 2,2,6
+ .long 0x13C78682
+ vadduwm 30,30,29
+ vadduwm 6,6,30
+ lvx 28,27,7
+ .long 0x13D80682
+ vadduwm 19,19,30
+ .long 0x13D17E82
+ vadduwm 19,19,30
+ vadduwm 19,19,12
+ vadduwm 5,5,18
+ vsel 29,4,3,2
+ vadduwm 4,4,28
+ vadduwm 5,5,29
+ .long 0x13C2FE82
+ vadduwm 5,5,30
+ vxor 29,6,7
+ vsel 29,7,0,29
+ vadduwm 1,1,5
+ .long 0x13C68682
+ vadduwm 30,30,29
+ vadduwm 5,5,30
+ lvx 28,28,7
+ .long 0x13D90682
+ vadduwm 24,24,30
+ .long 0x13D27E82
+ vadduwm 24,24,30
+ vadduwm 24,24,13
+ vadduwm 4,4,19
+ vsel 29,3,2,1
+ vadduwm 3,3,28
+ vadduwm 4,4,29
+ .long 0x13C1FE82
+ vadduwm 4,4,30
+ vxor 29,5,6
+ vsel 29,6,7,29
+ vadduwm 0,0,4
+ .long 0x13C58682
+ vadduwm 30,30,29
+ vadduwm 4,4,30
+ lvx 28,29,7
+ .long 0x13DA0682
+ vadduwm 25,25,30
+ .long 0x13D37E82
+ vadduwm 25,25,30
+ vadduwm 25,25,14
+ vadduwm 3,3,24
+ vsel 29,2,1,0
+ vadduwm 2,2,28
+ vadduwm 3,3,29
+ .long 0x13C0FE82
+ vadduwm 3,3,30
+ vxor 29,4,5
+ vsel 29,5,6,29
+ vadduwm 7,7,3
+ .long 0x13C48682
+ vadduwm 30,30,29
+ vadduwm 3,3,30
+ lvx 28,30,7
+ .long 0x13DB0682
+ vadduwm 26,26,30
+ .long 0x13D87E82
+ vadduwm 26,26,30
+ vadduwm 26,26,15
+ vadduwm 2,2,25
+ vsel 29,1,0,7
+ vadduwm 1,1,28
+ vadduwm 2,2,29
+ .long 0x13C7FE82
+ vadduwm 2,2,30
+ vxor 29,3,4
+ vsel 29,4,5,29
+ vadduwm 6,6,2
+ .long 0x13C38682
+ vadduwm 30,30,29
+ vadduwm 2,2,30
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13C80682
+ vadduwm 27,27,30
+ .long 0x13D97E82
+ vadduwm 27,27,30
+ vadduwm 27,27,16
+ vadduwm 1,1,26
+ vsel 29,0,7,6
+ vadduwm 0,0,28
+ vadduwm 1,1,29
+ .long 0x13C6FE82
+ vadduwm 1,1,30
+ vxor 29,2,3
+ vsel 29,3,4,29
+ vadduwm 5,5,1
+ .long 0x13C28682
+ vadduwm 30,30,29
+ vadduwm 1,1,30
+ lvx 28,0,7
+ .long 0x13C90682
+ vadduwm 8,8,30
+ .long 0x13DA7E82
+ vadduwm 8,8,30
+ vadduwm 8,8,17
+ vadduwm 0,0,27
+ vsel 29,7,6,5
+ vadduwm 7,7,28
+ vadduwm 0,0,29
+ .long 0x13C5FE82
+ vadduwm 0,0,30
+ vxor 29,1,2
+ vsel 29,2,3,29
+ vadduwm 4,4,0
+ .long 0x13C18682
+ vadduwm 30,30,29
+ vadduwm 0,0,30
+ lvx 28,10,7
+ bdnz .L16_xx
+
+ lvx 10,0,11
+ subic. 5,5,1
+ lvx 11,10,11
+ vadduwm 0,0,10
+ lvx 12,26,11
+ vadduwm 1,1,11
+ lvx 13,27,11
+ vadduwm 2,2,12
+ lvx 14,28,11
+ vadduwm 3,3,13
+ lvx 15,29,11
+ vadduwm 4,4,14
+ lvx 16,30,11
+ vadduwm 5,5,15
+ lvx 17,31,11
+ vadduwm 6,6,16
+ vadduwm 7,7,17
+ bne .Loop
+ lvx 8,26,7
+ vperm 0,0,1,28
+ lvx 9,27,7
+ vperm 4,4,5,28
+ vperm 0,0,2,8
+ vperm 4,4,6,8
+ vperm 0,0,3,9
+ vperm 4,4,7,9
+ .long 0x7C001F19
+ .long 0x7C8A1F19
+ addi 11,1,207
+ mtlr 8
+ or 12,12,12
+ lvx 24,0,11
+ lvx 25,10,11
+ lvx 26,26,11
+ lvx 27,27,11
+ lvx 28,28,11
+ lvx 29,29,11
+ lvx 30,30,11
+ lvx 31,31,11
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,6,3,0
+.long 0
+.size zfs_sha256_power8,.-zfs_sha256_power8
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 6
+ addi 6,6,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98
+.long 0x71374491,0x71374491,0x71374491,0x71374491
+.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf
+.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5
+.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b
+.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1
+.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4
+.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5
+.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98
+.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01
+.long 0x243185be,0x243185be,0x243185be,0x243185be
+.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3
+.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74
+.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe
+.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7
+.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174
+.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1
+.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786
+.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6
+.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc
+.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f
+.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa
+.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc
+.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da
+.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152
+.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d
+.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8
+.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7
+.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3
+.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147
+.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351
+.long 0x14292967,0x14292967,0x14292967,0x14292967
+.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85
+.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138
+.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc
+.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13
+.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354
+.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb
+.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e
+.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85
+.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1
+.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b
+.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70
+.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3
+.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819
+.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624
+.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585
+.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070
+.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116
+.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08
+.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c
+.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5
+.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3
+.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a
+.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f
+.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3
+.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee
+.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f
+.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814
+.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208
+.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa
+.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb
+.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7
+.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2
+.long 0,0,0,0
+.long 0x10111213,0x10111213,0x10111213,0x00010203
+.long 0x10111213,0x10111213,0x04050607,0x00010203
+.long 0x10111213,0x08090a0b,0x04050607,0x00010203
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S
new file mode 100644
index 000000000000..d039bc36ee11
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S
@@ -0,0 +1,2727 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if (defined(__PPC64__) && defined(__BIG_ENDIAN__))
+
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.text
+
+.globl zfs_sha256_ppc
+.globl .zfs_sha256_ppc
+.type zfs_sha256_ppc,@function
+.section ".opd","aw"
+.align 3
+zfs_sha256_ppc:
+.quad .zfs_sha256_ppc,.TOC.@tocbase,0
+.previous
+.align 6
+.zfs_sha256_ppc:
+#else
+.abiversion 2
+.text
+
+.globl zfs_sha256_ppc
+.type zfs_sha256_ppc,@function
+.align 6
+zfs_sha256_ppc:
+.localentry zfs_sha256_ppc,0
+#endif
+ stdu 1,-320(1)
+ mflr 0
+ sldi 5,5,6
+
+ std 3,144(1)
+
+ std 14,176(1)
+ std 15,184(1)
+ std 16,192(1)
+ std 17,200(1)
+ std 18,208(1)
+ std 19,216(1)
+ std 20,224(1)
+ std 21,232(1)
+ std 22,240(1)
+ std 23,248(1)
+ std 24,256(1)
+ std 25,264(1)
+ std 26,272(1)
+ std 27,280(1)
+ std 28,288(1)
+ std 29,296(1)
+ std 30,304(1)
+ std 31,312(1)
+ std 0,336(1)
+ lwz 8,0(3)
+ mr 31,4
+ lwz 9,4(3)
+ lwz 10,8(3)
+ lwz 11,12(3)
+ lwz 12,16(3)
+ lwz 6,20(3)
+ lwz 14,24(3)
+ lwz 15,28(3)
+ bl .LPICmeup
+.LPICedup:
+ andi. 0,31,3
+ bne .Lunaligned
+.Laligned:
+ add 5,31,5
+ std 5,128(1)
+ std 31,136(1)
+ bl .Lsha2_block_private
+ b .Ldone
+
+.align 4
+.Lunaligned:
+ subfic 0,31,4096
+ andi. 0,0,4032
+ beq .Lcross_page
+ cmpld 5,0
+ ble .Laligned
+ subfc 5,0,5
+ add 0,31,0
+ std 5,120(1)
+ std 0,128(1)
+ std 31,136(1)
+ bl .Lsha2_block_private
+
+ ld 5,120(1)
+.Lcross_page:
+ li 0,16
+ mtctr 0
+ addi 20,1,48
+.Lmemcpy:
+ lbz 16,0(31)
+ lbz 17,1(31)
+ lbz 18,2(31)
+ lbz 19,3(31)
+ addi 31,31,4
+ stb 16,0(20)
+ stb 17,1(20)
+ stb 18,2(20)
+ stb 19,3(20)
+ addi 20,20,4
+ bdnz .Lmemcpy
+ std 31,112(1)
+ addi 0,1,112
+ addi 31,1,48
+ std 5,120(1)
+ std 0,128(1)
+ std 31,136(1)
+ bl .Lsha2_block_private
+ ld 31,112(1)
+ ld 5,120(1)
+ addic. 5,5,-64
+ bne .Lunaligned
+
+.Ldone:
+ ld 0,336(1)
+ ld 14,176(1)
+ ld 15,184(1)
+ ld 16,192(1)
+ ld 17,200(1)
+ ld 18,208(1)
+ ld 19,216(1)
+ ld 20,224(1)
+ ld 21,232(1)
+ ld 22,240(1)
+ ld 23,248(1)
+ ld 24,256(1)
+ ld 25,264(1)
+ ld 26,272(1)
+ ld 27,280(1)
+ ld 28,288(1)
+ ld 29,296(1)
+ ld 30,304(1)
+ ld 31,312(1)
+ mtlr 0
+ addi 1,1,320
+ blr
+.long 0
+.byte 0,12,4,1,0x80,18,3,0
+.long 0
+.align 4
+.Lsha2_block_private:
+ lwz 0,0(7)
+ lwz 16,0(31)
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ lwz 0,4(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 17,4(31)
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ lwz 0,8(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 18,8(31)
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ lwz 0,12(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 19,12(31)
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ lwz 0,16(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 20,16(31)
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ lwz 0,20(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 21,20(31)
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ lwz 0,24(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 22,24(31)
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ lwz 0,28(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 23,28(31)
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ lwz 0,32(7)
+ add 8,8,3
+ add 8,8,5
+
+ lwz 24,32(31)
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ lwz 0,36(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 25,36(31)
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ lwz 0,40(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 26,40(31)
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ lwz 0,44(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 27,44(31)
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ lwz 0,48(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 28,48(31)
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ lwz 0,52(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 29,52(31)
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ lwz 0,56(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 30,56(31)
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ lwz 0,60(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 31,60(31)
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ li 5,3
+ mtctr 5
+.align 4
+.Lrounds:
+ addi 7,7,64
+ rotrwi 3,17,7
+ rotrwi 4,17,18
+ rotrwi 5,30,17
+ rotrwi 0,30,19
+ xor 3,3,4
+ srwi 4,17,3
+ xor 5,5,0
+ srwi 0,30,10
+ add 16,16,25
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,0(7)
+ add 16,16,3
+ add 16,16,5
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrwi 3,18,7
+ rotrwi 4,18,18
+ rotrwi 5,31,17
+ rotrwi 0,31,19
+ xor 3,3,4
+ srwi 4,18,3
+ xor 5,5,0
+ srwi 0,31,10
+ add 17,17,26
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,4(7)
+ add 17,17,3
+ add 17,17,5
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrwi 3,19,7
+ rotrwi 4,19,18
+ rotrwi 5,16,17
+ rotrwi 0,16,19
+ xor 3,3,4
+ srwi 4,19,3
+ xor 5,5,0
+ srwi 0,16,10
+ add 18,18,27
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,8(7)
+ add 18,18,3
+ add 18,18,5
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrwi 3,20,7
+ rotrwi 4,20,18
+ rotrwi 5,17,17
+ rotrwi 0,17,19
+ xor 3,3,4
+ srwi 4,20,3
+ xor 5,5,0
+ srwi 0,17,10
+ add 19,19,28
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,12(7)
+ add 19,19,3
+ add 19,19,5
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrwi 3,21,7
+ rotrwi 4,21,18
+ rotrwi 5,18,17
+ rotrwi 0,18,19
+ xor 3,3,4
+ srwi 4,21,3
+ xor 5,5,0
+ srwi 0,18,10
+ add 20,20,29
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,16(7)
+ add 20,20,3
+ add 20,20,5
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrwi 3,22,7
+ rotrwi 4,22,18
+ rotrwi 5,19,17
+ rotrwi 0,19,19
+ xor 3,3,4
+ srwi 4,22,3
+ xor 5,5,0
+ srwi 0,19,10
+ add 21,21,30
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,20(7)
+ add 21,21,3
+ add 21,21,5
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrwi 3,23,7
+ rotrwi 4,23,18
+ rotrwi 5,20,17
+ rotrwi 0,20,19
+ xor 3,3,4
+ srwi 4,23,3
+ xor 5,5,0
+ srwi 0,20,10
+ add 22,22,31
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,24(7)
+ add 22,22,3
+ add 22,22,5
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrwi 3,24,7
+ rotrwi 4,24,18
+ rotrwi 5,21,17
+ rotrwi 0,21,19
+ xor 3,3,4
+ srwi 4,24,3
+ xor 5,5,0
+ srwi 0,21,10
+ add 23,23,16
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,28(7)
+ add 23,23,3
+ add 23,23,5
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ rotrwi 3,25,7
+ rotrwi 4,25,18
+ rotrwi 5,22,17
+ rotrwi 0,22,19
+ xor 3,3,4
+ srwi 4,25,3
+ xor 5,5,0
+ srwi 0,22,10
+ add 24,24,17
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,32(7)
+ add 24,24,3
+ add 24,24,5
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrwi 3,26,7
+ rotrwi 4,26,18
+ rotrwi 5,23,17
+ rotrwi 0,23,19
+ xor 3,3,4
+ srwi 4,26,3
+ xor 5,5,0
+ srwi 0,23,10
+ add 25,25,18
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,36(7)
+ add 25,25,3
+ add 25,25,5
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrwi 3,27,7
+ rotrwi 4,27,18
+ rotrwi 5,24,17
+ rotrwi 0,24,19
+ xor 3,3,4
+ srwi 4,27,3
+ xor 5,5,0
+ srwi 0,24,10
+ add 26,26,19
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,40(7)
+ add 26,26,3
+ add 26,26,5
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrwi 3,28,7
+ rotrwi 4,28,18
+ rotrwi 5,25,17
+ rotrwi 0,25,19
+ xor 3,3,4
+ srwi 4,28,3
+ xor 5,5,0
+ srwi 0,25,10
+ add 27,27,20
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,44(7)
+ add 27,27,3
+ add 27,27,5
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrwi 3,29,7
+ rotrwi 4,29,18
+ rotrwi 5,26,17
+ rotrwi 0,26,19
+ xor 3,3,4
+ srwi 4,29,3
+ xor 5,5,0
+ srwi 0,26,10
+ add 28,28,21
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,48(7)
+ add 28,28,3
+ add 28,28,5
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrwi 3,30,7
+ rotrwi 4,30,18
+ rotrwi 5,27,17
+ rotrwi 0,27,19
+ xor 3,3,4
+ srwi 4,30,3
+ xor 5,5,0
+ srwi 0,27,10
+ add 29,29,22
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,52(7)
+ add 29,29,3
+ add 29,29,5
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrwi 3,31,7
+ rotrwi 4,31,18
+ rotrwi 5,28,17
+ rotrwi 0,28,19
+ xor 3,3,4
+ srwi 4,31,3
+ xor 5,5,0
+ srwi 0,28,10
+ add 30,30,23
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,56(7)
+ add 30,30,3
+ add 30,30,5
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrwi 3,16,7
+ rotrwi 4,16,18
+ rotrwi 5,29,17
+ rotrwi 0,29,19
+ xor 3,3,4
+ srwi 4,16,3
+ xor 5,5,0
+ srwi 0,29,10
+ add 31,31,24
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,60(7)
+ add 31,31,3
+ add 31,31,5
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ bdnz .Lrounds
+
+ ld 3,144(1)
+ ld 31,136(1)
+ ld 5,128(1)
+ subi 7,7,192
+
+ lwz 16,0(3)
+ lwz 17,4(3)
+ lwz 18,8(3)
+ lwz 19,12(3)
+ lwz 20,16(3)
+ lwz 21,20(3)
+ lwz 22,24(3)
+ addi 31,31,64
+ lwz 23,28(3)
+ add 8,8,16
+ add 9,9,17
+ std 31,136(1)
+ add 10,10,18
+ stw 8,0(3)
+ add 11,11,19
+ stw 9,4(3)
+ add 12,12,20
+ stw 10,8(3)
+ add 6,6,21
+ stw 11,12(3)
+ add 14,14,22
+ stw 12,16(3)
+ add 15,15,23
+ stw 6,20(3)
+ stw 14,24(3)
+ cmpld 31,5
+ stw 15,28(3)
+ bne .Lsha2_block_private
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.size .zfs_sha256_ppc,.-.zfs_sha256_ppc
+.size zfs_sha256_ppc,.-.zfs_sha256_ppc
+#else
+.size zfs_sha256_ppc,.-zfs_sha256_ppc
+#endif
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 7
+ addi 7,7,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+.abiversion 2
+.text
+
+.globl zfs_sha256_ppc
+.type zfs_sha256_ppc,@function
+.align 6
+zfs_sha256_ppc:
+.localentry zfs_sha256_ppc,0
+
+ stdu 1,-320(1)
+ mflr 0
+ sldi 5,5,6
+
+ std 3,144(1)
+
+ std 14,176(1)
+ std 15,184(1)
+ std 16,192(1)
+ std 17,200(1)
+ std 18,208(1)
+ std 19,216(1)
+ std 20,224(1)
+ std 21,232(1)
+ std 22,240(1)
+ std 23,248(1)
+ std 24,256(1)
+ std 25,264(1)
+ std 26,272(1)
+ std 27,280(1)
+ std 28,288(1)
+ std 29,296(1)
+ std 30,304(1)
+ std 31,312(1)
+ std 0,336(1)
+ lwz 8,0(3)
+ mr 31,4
+ lwz 9,4(3)
+ lwz 10,8(3)
+ lwz 11,12(3)
+ lwz 12,16(3)
+ lwz 6,20(3)
+ lwz 14,24(3)
+ lwz 15,28(3)
+ bl .LPICmeup
+.LPICedup:
+ andi. 0,31,3
+ bne .Lunaligned
+.Laligned:
+ add 5,31,5
+ std 5,128(1)
+ std 31,136(1)
+ bl .Lsha2_block_private
+ b .Ldone
+
+.align 4
+.Lunaligned:
+ subfic 0,31,4096
+ andi. 0,0,4032
+ beq .Lcross_page
+ cmpld 5,0
+ ble .Laligned
+ subfc 5,0,5
+ add 0,31,0
+ std 5,120(1)
+ std 0,128(1)
+ std 31,136(1)
+ bl .Lsha2_block_private
+
+ ld 5,120(1)
+.Lcross_page:
+ li 0,16
+ mtctr 0
+ addi 20,1,48
+.Lmemcpy:
+ lbz 16,0(31)
+ lbz 17,1(31)
+ lbz 18,2(31)
+ lbz 19,3(31)
+ addi 31,31,4
+ stb 16,0(20)
+ stb 17,1(20)
+ stb 18,2(20)
+ stb 19,3(20)
+ addi 20,20,4
+ bdnz .Lmemcpy
+ std 31,112(1)
+ addi 0,1,112
+ addi 31,1,48
+ std 5,120(1)
+ std 0,128(1)
+ std 31,136(1)
+ bl .Lsha2_block_private
+ ld 31,112(1)
+ ld 5,120(1)
+ addic. 5,5,-64
+ bne .Lunaligned
+
+.Ldone:
+ ld 0,336(1)
+ ld 14,176(1)
+ ld 15,184(1)
+ ld 16,192(1)
+ ld 17,200(1)
+ ld 18,208(1)
+ ld 19,216(1)
+ ld 20,224(1)
+ ld 21,232(1)
+ ld 22,240(1)
+ ld 23,248(1)
+ ld 24,256(1)
+ ld 25,264(1)
+ ld 26,272(1)
+ ld 27,280(1)
+ ld 28,288(1)
+ ld 29,296(1)
+ ld 30,304(1)
+ ld 31,312(1)
+ mtlr 0
+ addi 1,1,320
+ blr
+.long 0
+.byte 0,12,4,1,0x80,18,3,0
+.long 0
+.align 4
+.Lsha2_block_private:
+ lwz 0,0(7)
+ lwz 3,0(31)
+ rotlwi 16,3,8
+ rlwimi 16,3,24,0,7
+ rlwimi 16,3,24,16,23
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ lwz 0,4(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 3,4(31)
+ rotlwi 17,3,8
+ rlwimi 17,3,24,0,7
+ rlwimi 17,3,24,16,23
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ lwz 0,8(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 3,8(31)
+ rotlwi 18,3,8
+ rlwimi 18,3,24,0,7
+ rlwimi 18,3,24,16,23
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ lwz 0,12(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 3,12(31)
+ rotlwi 19,3,8
+ rlwimi 19,3,24,0,7
+ rlwimi 19,3,24,16,23
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ lwz 0,16(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 3,16(31)
+ rotlwi 20,3,8
+ rlwimi 20,3,24,0,7
+ rlwimi 20,3,24,16,23
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ lwz 0,20(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 3,20(31)
+ rotlwi 21,3,8
+ rlwimi 21,3,24,0,7
+ rlwimi 21,3,24,16,23
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ lwz 0,24(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 3,24(31)
+ rotlwi 22,3,8
+ rlwimi 22,3,24,0,7
+ rlwimi 22,3,24,16,23
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ lwz 0,28(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 3,28(31)
+ rotlwi 23,3,8
+ rlwimi 23,3,24,0,7
+ rlwimi 23,3,24,16,23
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ lwz 0,32(7)
+ add 8,8,3
+ add 8,8,5
+
+ lwz 3,32(31)
+ rotlwi 24,3,8
+ rlwimi 24,3,24,0,7
+ rlwimi 24,3,24,16,23
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ lwz 0,36(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 3,36(31)
+ rotlwi 25,3,8
+ rlwimi 25,3,24,0,7
+ rlwimi 25,3,24,16,23
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ lwz 0,40(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 3,40(31)
+ rotlwi 26,3,8
+ rlwimi 26,3,24,0,7
+ rlwimi 26,3,24,16,23
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ lwz 0,44(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 3,44(31)
+ rotlwi 27,3,8
+ rlwimi 27,3,24,0,7
+ rlwimi 27,3,24,16,23
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ lwz 0,48(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 3,48(31)
+ rotlwi 28,3,8
+ rlwimi 28,3,24,0,7
+ rlwimi 28,3,24,16,23
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ lwz 0,52(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 3,52(31)
+ rotlwi 29,3,8
+ rlwimi 29,3,24,0,7
+ rlwimi 29,3,24,16,23
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ lwz 0,56(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 3,56(31)
+ rotlwi 30,3,8
+ rlwimi 30,3,24,0,7
+ rlwimi 30,3,24,16,23
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ lwz 0,60(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 3,60(31)
+ rotlwi 31,3,8
+ rlwimi 31,3,24,0,7
+ rlwimi 31,3,24,16,23
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ li 5,3
+ mtctr 5
+.align 4
+.Lrounds:
+ addi 7,7,64
+ rotrwi 3,17,7
+ rotrwi 4,17,18
+ rotrwi 5,30,17
+ rotrwi 0,30,19
+ xor 3,3,4
+ srwi 4,17,3
+ xor 5,5,0
+ srwi 0,30,10
+ add 16,16,25
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,0(7)
+ add 16,16,3
+ add 16,16,5
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrwi 3,18,7
+ rotrwi 4,18,18
+ rotrwi 5,31,17
+ rotrwi 0,31,19
+ xor 3,3,4
+ srwi 4,18,3
+ xor 5,5,0
+ srwi 0,31,10
+ add 17,17,26
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,4(7)
+ add 17,17,3
+ add 17,17,5
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrwi 3,19,7
+ rotrwi 4,19,18
+ rotrwi 5,16,17
+ rotrwi 0,16,19
+ xor 3,3,4
+ srwi 4,19,3
+ xor 5,5,0
+ srwi 0,16,10
+ add 18,18,27
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,8(7)
+ add 18,18,3
+ add 18,18,5
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrwi 3,20,7
+ rotrwi 4,20,18
+ rotrwi 5,17,17
+ rotrwi 0,17,19
+ xor 3,3,4
+ srwi 4,20,3
+ xor 5,5,0
+ srwi 0,17,10
+ add 19,19,28
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,12(7)
+ add 19,19,3
+ add 19,19,5
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrwi 3,21,7
+ rotrwi 4,21,18
+ rotrwi 5,18,17
+ rotrwi 0,18,19
+ xor 3,3,4
+ srwi 4,21,3
+ xor 5,5,0
+ srwi 0,18,10
+ add 20,20,29
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,16(7)
+ add 20,20,3
+ add 20,20,5
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrwi 3,22,7
+ rotrwi 4,22,18
+ rotrwi 5,19,17
+ rotrwi 0,19,19
+ xor 3,3,4
+ srwi 4,22,3
+ xor 5,5,0
+ srwi 0,19,10
+ add 21,21,30
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,20(7)
+ add 21,21,3
+ add 21,21,5
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrwi 3,23,7
+ rotrwi 4,23,18
+ rotrwi 5,20,17
+ rotrwi 0,20,19
+ xor 3,3,4
+ srwi 4,23,3
+ xor 5,5,0
+ srwi 0,20,10
+ add 22,22,31
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,24(7)
+ add 22,22,3
+ add 22,22,5
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrwi 3,24,7
+ rotrwi 4,24,18
+ rotrwi 5,21,17
+ rotrwi 0,21,19
+ xor 3,3,4
+ srwi 4,24,3
+ xor 5,5,0
+ srwi 0,21,10
+ add 23,23,16
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,28(7)
+ add 23,23,3
+ add 23,23,5
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ rotrwi 3,25,7
+ rotrwi 4,25,18
+ rotrwi 5,22,17
+ rotrwi 0,22,19
+ xor 3,3,4
+ srwi 4,25,3
+ xor 5,5,0
+ srwi 0,22,10
+ add 24,24,17
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,32(7)
+ add 24,24,3
+ add 24,24,5
+ rotrwi 3,12,6
+ rotrwi 4,12,11
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrwi 4,4,14
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrwi 3,8,2
+ rotrwi 4,8,13
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrwi 3,26,7
+ rotrwi 4,26,18
+ rotrwi 5,23,17
+ rotrwi 0,23,19
+ xor 3,3,4
+ srwi 4,26,3
+ xor 5,5,0
+ srwi 0,23,10
+ add 25,25,18
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,36(7)
+ add 25,25,3
+ add 25,25,5
+ rotrwi 3,11,6
+ rotrwi 4,11,11
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrwi 4,4,14
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrwi 3,15,2
+ rotrwi 4,15,13
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrwi 3,27,7
+ rotrwi 4,27,18
+ rotrwi 5,24,17
+ rotrwi 0,24,19
+ xor 3,3,4
+ srwi 4,27,3
+ xor 5,5,0
+ srwi 0,24,10
+ add 26,26,19
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,40(7)
+ add 26,26,3
+ add 26,26,5
+ rotrwi 3,10,6
+ rotrwi 4,10,11
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrwi 4,4,14
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrwi 3,14,2
+ rotrwi 4,14,13
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrwi 3,28,7
+ rotrwi 4,28,18
+ rotrwi 5,25,17
+ rotrwi 0,25,19
+ xor 3,3,4
+ srwi 4,28,3
+ xor 5,5,0
+ srwi 0,25,10
+ add 27,27,20
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,44(7)
+ add 27,27,3
+ add 27,27,5
+ rotrwi 3,9,6
+ rotrwi 4,9,11
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrwi 4,4,14
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrwi 3,6,2
+ rotrwi 4,6,13
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrwi 3,29,7
+ rotrwi 4,29,18
+ rotrwi 5,26,17
+ rotrwi 0,26,19
+ xor 3,3,4
+ srwi 4,29,3
+ xor 5,5,0
+ srwi 0,26,10
+ add 28,28,21
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,48(7)
+ add 28,28,3
+ add 28,28,5
+ rotrwi 3,8,6
+ rotrwi 4,8,11
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrwi 4,4,14
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrwi 3,12,2
+ rotrwi 4,12,13
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrwi 3,30,7
+ rotrwi 4,30,18
+ rotrwi 5,27,17
+ rotrwi 0,27,19
+ xor 3,3,4
+ srwi 4,30,3
+ xor 5,5,0
+ srwi 0,27,10
+ add 29,29,22
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,52(7)
+ add 29,29,3
+ add 29,29,5
+ rotrwi 3,15,6
+ rotrwi 4,15,11
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrwi 4,4,14
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrwi 3,11,2
+ rotrwi 4,11,13
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrwi 3,31,7
+ rotrwi 4,31,18
+ rotrwi 5,28,17
+ rotrwi 0,28,19
+ xor 3,3,4
+ srwi 4,31,3
+ xor 5,5,0
+ srwi 0,28,10
+ add 30,30,23
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,56(7)
+ add 30,30,3
+ add 30,30,5
+ rotrwi 3,14,6
+ rotrwi 4,14,11
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrwi 4,4,14
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrwi 3,10,2
+ rotrwi 4,10,13
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrwi 3,16,7
+ rotrwi 4,16,18
+ rotrwi 5,29,17
+ rotrwi 0,29,19
+ xor 3,3,4
+ srwi 4,16,3
+ xor 5,5,0
+ srwi 0,29,10
+ add 31,31,24
+ xor 3,3,4
+ xor 5,5,0
+ lwz 0,60(7)
+ add 31,31,3
+ add 31,31,5
+ rotrwi 3,6,6
+ rotrwi 4,6,11
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrwi 4,4,14
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrwi 3,9,2
+ rotrwi 4,9,13
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrwi 4,4,9
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ bdnz .Lrounds
+
+ ld 3,144(1)
+ ld 31,136(1)
+ ld 5,128(1)
+ subi 7,7,192
+
+ lwz 16,0(3)
+ lwz 17,4(3)
+ lwz 18,8(3)
+ lwz 19,12(3)
+ lwz 20,16(3)
+ lwz 21,20(3)
+ lwz 22,24(3)
+ addi 31,31,64
+ lwz 23,28(3)
+ add 8,8,16
+ add 9,9,17
+ std 31,136(1)
+ add 10,10,18
+ stw 8,0(3)
+ add 11,11,19
+ stw 9,4(3)
+ add 12,12,20
+ stw 10,8(3)
+ add 6,6,21
+ stw 11,12(3)
+ add 14,14,22
+ stw 12,16(3)
+ add 15,15,23
+ stw 6,20(3)
+ stw 14,24(3)
+ cmpld 31,5
+ stw 15,28(3)
+ bne .Lsha2_block_private
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.size zfs_sha256_ppc,.-zfs_sha256_ppc
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 7
+ addi 7,7,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S
new file mode 100644
index 000000000000..2409c53385d6
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S
@@ -0,0 +1,1722 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if (defined(__PPC64__) && defined(__BIG_ENDIAN__))
+
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.text
+
+.globl zfs_sha512_power8
+.globl .zfs_sha512_power8
+.type zfs_sha512_power8,@function
+.section ".opd","aw"
+.align 3
+zfs_sha512_power8:
+.quad .zfs_sha512_power8,.TOC.@tocbase,0
+.previous
+.align 6
+.zfs_sha512_power8:
+#else
+.abiversion 2
+.text
+
+.globl zfs_sha512_power8
+.type zfs_sha512_power8,@function
+.align 6
+zfs_sha512_power8:
+.localentry zfs_sha512_power8,0
+#endif
+
+ stdu 1,-384(1)
+ mflr 8
+ li 10,207
+ li 11,223
+ stvx 24,10,1
+ addi 10,10,32
+ mfspr 12,256
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 11,-4096+255
+ stw 12,332(1)
+ li 10,0x10
+ std 26,336(1)
+ li 26,0x20
+ std 27,344(1)
+ li 27,0x30
+ std 28,352(1)
+ li 28,0x40
+ std 29,360(1)
+ li 29,0x50
+ std 30,368(1)
+ li 30,0x60
+ std 31,376(1)
+ li 31,0x70
+ std 8,400(1)
+ mtspr 256,11
+
+ bl .LPICmeup
+ addi 11,1,79
+ .long 0x7C001E99
+ .long 0x7C4A1E99
+ .long 0x7C9A1E99
+ vsldoi 1,0,0,8
+ .long 0x7CDB1E99
+ vsldoi 3,2,2,8
+ vsldoi 5,4,4,8
+ vsldoi 7,6,6,8
+ li 0,4
+ b .Loop
+.align 5
+.Loop:
+ lvx 28,0,6
+ .long 0x7D002699
+ addi 4,4,16
+ mr 7,6
+ stvx 0,0,11
+ stvx 1,10,11
+ stvx 2,26,11
+ stvx 3,27,11
+ stvx 4,28,11
+ stvx 5,29,11
+ stvx 6,30,11
+ stvx 7,31,11
+ .long 0x10E7E0C0
+ lvx 28,10,6
+ .long 0x10E740C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x7D402699
+ addi 4,4,16
+ vsldoi 9,8,8,8
+ .long 0x10C648C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x10A550C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x7D802699
+ addi 4,4,16
+ vsldoi 11,10,10,8
+ .long 0x108458C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x106360C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x7DC02699
+ addi 4,4,16
+ vsldoi 13,12,12,8
+ .long 0x104268C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x102170C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x7E002699
+ addi 4,4,16
+ vsldoi 15,14,14,8
+ .long 0x100078C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ .long 0x10E780C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x7E402699
+ addi 4,4,16
+ vsldoi 17,16,16,8
+ .long 0x10C688C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x10A590C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x7F002699
+ addi 4,4,16
+ vsldoi 19,18,18,8
+ .long 0x108498C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x1063C0C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x7F402699
+ addi 4,4,16
+ vsldoi 25,24,24,8
+ .long 0x1042C8C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x1021D0C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ vsldoi 27,26,26,8
+ .long 0x13C906C2
+ .long 0x1108F0C0
+ .long 0x13DA7EC2
+ .long 0x1108F0C0
+ .long 0x110888C0
+ .long 0x1000D8C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ mtctr 0
+ b .L16_xx
+.align 5
+.L16_xx:
+ .long 0x13CA06C2
+ .long 0x1129F0C0
+ .long 0x13DB7EC2
+ .long 0x1129F0C0
+ .long 0x112990C0
+ .long 0x10E740C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x13CB06C2
+ .long 0x114AF0C0
+ .long 0x13C87EC2
+ .long 0x114AF0C0
+ .long 0x114A98C0
+ .long 0x10C648C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x13CC06C2
+ .long 0x116BF0C0
+ .long 0x13C97EC2
+ .long 0x116BF0C0
+ .long 0x116BC0C0
+ .long 0x10A550C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x13CD06C2
+ .long 0x118CF0C0
+ .long 0x13CA7EC2
+ .long 0x118CF0C0
+ .long 0x118CC8C0
+ .long 0x108458C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x13CE06C2
+ .long 0x11ADF0C0
+ .long 0x13CB7EC2
+ .long 0x11ADF0C0
+ .long 0x11ADD0C0
+ .long 0x106360C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x13CF06C2
+ .long 0x11CEF0C0
+ .long 0x13CC7EC2
+ .long 0x11CEF0C0
+ .long 0x11CED8C0
+ .long 0x104268C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13D006C2
+ .long 0x11EFF0C0
+ .long 0x13CD7EC2
+ .long 0x11EFF0C0
+ .long 0x11EF40C0
+ .long 0x102170C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x13D106C2
+ .long 0x1210F0C0
+ .long 0x13CE7EC2
+ .long 0x1210F0C0
+ .long 0x121048C0
+ .long 0x100078C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ .long 0x13D206C2
+ .long 0x1231F0C0
+ .long 0x13CF7EC2
+ .long 0x1231F0C0
+ .long 0x123150C0
+ .long 0x10E780C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x13D306C2
+ .long 0x1252F0C0
+ .long 0x13D07EC2
+ .long 0x1252F0C0
+ .long 0x125258C0
+ .long 0x10C688C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x13D806C2
+ .long 0x1273F0C0
+ .long 0x13D17EC2
+ .long 0x1273F0C0
+ .long 0x127360C0
+ .long 0x10A590C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x13D906C2
+ .long 0x1318F0C0
+ .long 0x13D27EC2
+ .long 0x1318F0C0
+ .long 0x131868C0
+ .long 0x108498C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x13DA06C2
+ .long 0x1339F0C0
+ .long 0x13D37EC2
+ .long 0x1339F0C0
+ .long 0x133970C0
+ .long 0x1063C0C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x13DB06C2
+ .long 0x135AF0C0
+ .long 0x13D87EC2
+ .long 0x135AF0C0
+ .long 0x135A78C0
+ .long 0x1042C8C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13C806C2
+ .long 0x137BF0C0
+ .long 0x13D97EC2
+ .long 0x137BF0C0
+ .long 0x137B80C0
+ .long 0x1021D0C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x13C906C2
+ .long 0x1108F0C0
+ .long 0x13DA7EC2
+ .long 0x1108F0C0
+ .long 0x110888C0
+ .long 0x1000D8C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ bdnz .L16_xx
+
+ lvx 10,0,11
+ subic. 5,5,1
+ lvx 11,10,11
+ .long 0x100050C0
+ lvx 12,26,11
+ .long 0x102158C0
+ lvx 13,27,11
+ .long 0x104260C0
+ lvx 14,28,11
+ .long 0x106368C0
+ lvx 15,29,11
+ .long 0x108470C0
+ lvx 16,30,11
+ .long 0x10A578C0
+ lvx 17,31,11
+ .long 0x10C680C0
+ .long 0x10E788C0
+ bne .Loop
+ vperm 0,0,1,28
+ vperm 2,2,3,28
+ vperm 4,4,5,28
+ vperm 6,6,7,28
+ .long 0x7C001F99
+ .long 0x7C4A1F99
+ .long 0x7C9A1F99
+ .long 0x7CDB1F99
+ addi 11,1,207
+ mtlr 8
+ mtspr 256,12
+ lvx 24,0,11
+ lvx 25,10,11
+ lvx 26,26,11
+ lvx 27,27,11
+ lvx 28,28,11
+ lvx 29,29,11
+ lvx 30,30,11
+ lvx 31,31,11
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,6,3,0
+.long 0
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.size .zfs_sha512_power8,.-.zfs_sha512_power8
+.size zfs_sha512_power8,.-.zfs_sha512_power8
+#else
+.size zfs_sha512_power8,.-zfs_sha512_power8
+#endif
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 6
+ addi 6,6,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0x428a2f98,0xd728ae22
+.long 0x428a2f98,0xd728ae22
+.long 0x71374491,0x23ef65cd
+.long 0x71374491,0x23ef65cd
+.long 0xb5c0fbcf,0xec4d3b2f
+.long 0xb5c0fbcf,0xec4d3b2f
+.long 0xe9b5dba5,0x8189dbbc
+.long 0xe9b5dba5,0x8189dbbc
+.long 0x3956c25b,0xf348b538
+.long 0x3956c25b,0xf348b538
+.long 0x59f111f1,0xb605d019
+.long 0x59f111f1,0xb605d019
+.long 0x923f82a4,0xaf194f9b
+.long 0x923f82a4,0xaf194f9b
+.long 0xab1c5ed5,0xda6d8118
+.long 0xab1c5ed5,0xda6d8118
+.long 0xd807aa98,0xa3030242
+.long 0xd807aa98,0xa3030242
+.long 0x12835b01,0x45706fbe
+.long 0x12835b01,0x45706fbe
+.long 0x243185be,0x4ee4b28c
+.long 0x243185be,0x4ee4b28c
+.long 0x550c7dc3,0xd5ffb4e2
+.long 0x550c7dc3,0xd5ffb4e2
+.long 0x72be5d74,0xf27b896f
+.long 0x72be5d74,0xf27b896f
+.long 0x80deb1fe,0x3b1696b1
+.long 0x80deb1fe,0x3b1696b1
+.long 0x9bdc06a7,0x25c71235
+.long 0x9bdc06a7,0x25c71235
+.long 0xc19bf174,0xcf692694
+.long 0xc19bf174,0xcf692694
+.long 0xe49b69c1,0x9ef14ad2
+.long 0xe49b69c1,0x9ef14ad2
+.long 0xefbe4786,0x384f25e3
+.long 0xefbe4786,0x384f25e3
+.long 0x0fc19dc6,0x8b8cd5b5
+.long 0x0fc19dc6,0x8b8cd5b5
+.long 0x240ca1cc,0x77ac9c65
+.long 0x240ca1cc,0x77ac9c65
+.long 0x2de92c6f,0x592b0275
+.long 0x2de92c6f,0x592b0275
+.long 0x4a7484aa,0x6ea6e483
+.long 0x4a7484aa,0x6ea6e483
+.long 0x5cb0a9dc,0xbd41fbd4
+.long 0x5cb0a9dc,0xbd41fbd4
+.long 0x76f988da,0x831153b5
+.long 0x76f988da,0x831153b5
+.long 0x983e5152,0xee66dfab
+.long 0x983e5152,0xee66dfab
+.long 0xa831c66d,0x2db43210
+.long 0xa831c66d,0x2db43210
+.long 0xb00327c8,0x98fb213f
+.long 0xb00327c8,0x98fb213f
+.long 0xbf597fc7,0xbeef0ee4
+.long 0xbf597fc7,0xbeef0ee4
+.long 0xc6e00bf3,0x3da88fc2
+.long 0xc6e00bf3,0x3da88fc2
+.long 0xd5a79147,0x930aa725
+.long 0xd5a79147,0x930aa725
+.long 0x06ca6351,0xe003826f
+.long 0x06ca6351,0xe003826f
+.long 0x14292967,0x0a0e6e70
+.long 0x14292967,0x0a0e6e70
+.long 0x27b70a85,0x46d22ffc
+.long 0x27b70a85,0x46d22ffc
+.long 0x2e1b2138,0x5c26c926
+.long 0x2e1b2138,0x5c26c926
+.long 0x4d2c6dfc,0x5ac42aed
+.long 0x4d2c6dfc,0x5ac42aed
+.long 0x53380d13,0x9d95b3df
+.long 0x53380d13,0x9d95b3df
+.long 0x650a7354,0x8baf63de
+.long 0x650a7354,0x8baf63de
+.long 0x766a0abb,0x3c77b2a8
+.long 0x766a0abb,0x3c77b2a8
+.long 0x81c2c92e,0x47edaee6
+.long 0x81c2c92e,0x47edaee6
+.long 0x92722c85,0x1482353b
+.long 0x92722c85,0x1482353b
+.long 0xa2bfe8a1,0x4cf10364
+.long 0xa2bfe8a1,0x4cf10364
+.long 0xa81a664b,0xbc423001
+.long 0xa81a664b,0xbc423001
+.long 0xc24b8b70,0xd0f89791
+.long 0xc24b8b70,0xd0f89791
+.long 0xc76c51a3,0x0654be30
+.long 0xc76c51a3,0x0654be30
+.long 0xd192e819,0xd6ef5218
+.long 0xd192e819,0xd6ef5218
+.long 0xd6990624,0x5565a910
+.long 0xd6990624,0x5565a910
+.long 0xf40e3585,0x5771202a
+.long 0xf40e3585,0x5771202a
+.long 0x106aa070,0x32bbd1b8
+.long 0x106aa070,0x32bbd1b8
+.long 0x19a4c116,0xb8d2d0c8
+.long 0x19a4c116,0xb8d2d0c8
+.long 0x1e376c08,0x5141ab53
+.long 0x1e376c08,0x5141ab53
+.long 0x2748774c,0xdf8eeb99
+.long 0x2748774c,0xdf8eeb99
+.long 0x34b0bcb5,0xe19b48a8
+.long 0x34b0bcb5,0xe19b48a8
+.long 0x391c0cb3,0xc5c95a63
+.long 0x391c0cb3,0xc5c95a63
+.long 0x4ed8aa4a,0xe3418acb
+.long 0x4ed8aa4a,0xe3418acb
+.long 0x5b9cca4f,0x7763e373
+.long 0x5b9cca4f,0x7763e373
+.long 0x682e6ff3,0xd6b2b8a3
+.long 0x682e6ff3,0xd6b2b8a3
+.long 0x748f82ee,0x5defb2fc
+.long 0x748f82ee,0x5defb2fc
+.long 0x78a5636f,0x43172f60
+.long 0x78a5636f,0x43172f60
+.long 0x84c87814,0xa1f0ab72
+.long 0x84c87814,0xa1f0ab72
+.long 0x8cc70208,0x1a6439ec
+.long 0x8cc70208,0x1a6439ec
+.long 0x90befffa,0x23631e28
+.long 0x90befffa,0x23631e28
+.long 0xa4506ceb,0xde82bde9
+.long 0xa4506ceb,0xde82bde9
+.long 0xbef9a3f7,0xb2c67915
+.long 0xbef9a3f7,0xb2c67915
+.long 0xc67178f2,0xe372532b
+.long 0xc67178f2,0xe372532b
+.long 0xca273ece,0xea26619c
+.long 0xca273ece,0xea26619c
+.long 0xd186b8c7,0x21c0c207
+.long 0xd186b8c7,0x21c0c207
+.long 0xeada7dd6,0xcde0eb1e
+.long 0xeada7dd6,0xcde0eb1e
+.long 0xf57d4f7f,0xee6ed178
+.long 0xf57d4f7f,0xee6ed178
+.long 0x06f067aa,0x72176fba
+.long 0x06f067aa,0x72176fba
+.long 0x0a637dc5,0xa2c898a6
+.long 0x0a637dc5,0xa2c898a6
+.long 0x113f9804,0xbef90dae
+.long 0x113f9804,0xbef90dae
+.long 0x1b710b35,0x131c471b
+.long 0x1b710b35,0x131c471b
+.long 0x28db77f5,0x23047d84
+.long 0x28db77f5,0x23047d84
+.long 0x32caab7b,0x40c72493
+.long 0x32caab7b,0x40c72493
+.long 0x3c9ebe0a,0x15c9bebc
+.long 0x3c9ebe0a,0x15c9bebc
+.long 0x431d67c4,0x9c100d4c
+.long 0x431d67c4,0x9c100d4c
+.long 0x4cc5d4be,0xcb3e42b6
+.long 0x4cc5d4be,0xcb3e42b6
+.long 0x597f299c,0xfc657e2a
+.long 0x597f299c,0xfc657e2a
+.long 0x5fcb6fab,0x3ad6faec
+.long 0x5fcb6fab,0x3ad6faec
+.long 0x6c44198c,0x4a475817
+.long 0x6c44198c,0x4a475817
+.long 0,0
+.long 0,0
+.long 0x00010203,0x04050607
+.long 0x10111213,0x14151617
+
+#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+.abiversion 2
+.text
+
+.globl zfs_sha512_power8
+.type zfs_sha512_power8,@function
+.align 6
+zfs_sha512_power8:
+.localentry zfs_sha512_power8,0
+
+ stdu 1,-384(1)
+ mflr 8
+ li 10,207
+ li 11,223
+ stvx 24,10,1
+ addi 10,10,32
+ li 12,-1
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 11,-4096+255
+ stw 12,332(1)
+ li 10,0x10
+ std 26,336(1)
+ li 26,0x20
+ std 27,344(1)
+ li 27,0x30
+ std 28,352(1)
+ li 28,0x40
+ std 29,360(1)
+ li 29,0x50
+ std 30,368(1)
+ li 30,0x60
+ std 31,376(1)
+ li 31,0x70
+ std 8,400(1)
+ or 11,11,11
+
+ bl .LPICmeup
+ addi 11,1,79
+ li 7,8
+ lvsl 31,0,7
+ vspltisb 28,0x0f
+ vxor 31,31,28
+ .long 0x7C001E99
+ .long 0x7C4A1E99
+ .long 0x7C9A1E99
+ vsldoi 1,0,0,8
+ .long 0x7CDB1E99
+ vsldoi 3,2,2,8
+ vsldoi 5,4,4,8
+ vsldoi 7,6,6,8
+ li 0,4
+ b .Loop
+.align 5
+.Loop:
+ lvx 28,0,6
+ .long 0x7D002699
+ addi 4,4,16
+ mr 7,6
+ stvx 0,0,11
+ stvx 1,10,11
+ stvx 2,26,11
+ stvx 3,27,11
+ stvx 4,28,11
+ stvx 5,29,11
+ stvx 6,30,11
+ stvx 7,31,11
+ .long 0x10E7E0C0
+ lvx 28,10,6
+ vperm 8,8,8,31
+ .long 0x10E740C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x7D402699
+ addi 4,4,16
+ vsldoi 9,8,8,8
+ .long 0x10C648C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ vperm 10,10,10,31
+ .long 0x10A550C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x7D802699
+ addi 4,4,16
+ vsldoi 11,10,10,8
+ .long 0x108458C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ vperm 12,12,12,31
+ .long 0x106360C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x7DC02699
+ addi 4,4,16
+ vsldoi 13,12,12,8
+ .long 0x104268C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ vperm 14,14,14,31
+ .long 0x102170C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x7E002699
+ addi 4,4,16
+ vsldoi 15,14,14,8
+ .long 0x100078C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ vperm 16,16,16,31
+ .long 0x10E780C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x7E402699
+ addi 4,4,16
+ vsldoi 17,16,16,8
+ .long 0x10C688C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ vperm 18,18,18,31
+ .long 0x10A590C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x7F002699
+ addi 4,4,16
+ vsldoi 19,18,18,8
+ .long 0x108498C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ vperm 24,24,24,31
+ .long 0x1063C0C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x7F402699
+ addi 4,4,16
+ vsldoi 25,24,24,8
+ .long 0x1042C8C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ vperm 26,26,26,31
+ .long 0x1021D0C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ vsldoi 27,26,26,8
+ .long 0x13C906C2
+ .long 0x1108F0C0
+ .long 0x13DA7EC2
+ .long 0x1108F0C0
+ .long 0x110888C0
+ .long 0x1000D8C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ mtctr 0
+ b .L16_xx
+.align 5
+.L16_xx:
+ .long 0x13CA06C2
+ .long 0x1129F0C0
+ .long 0x13DB7EC2
+ .long 0x1129F0C0
+ .long 0x112990C0
+ .long 0x10E740C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x13CB06C2
+ .long 0x114AF0C0
+ .long 0x13C87EC2
+ .long 0x114AF0C0
+ .long 0x114A98C0
+ .long 0x10C648C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x13CC06C2
+ .long 0x116BF0C0
+ .long 0x13C97EC2
+ .long 0x116BF0C0
+ .long 0x116BC0C0
+ .long 0x10A550C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x13CD06C2
+ .long 0x118CF0C0
+ .long 0x13CA7EC2
+ .long 0x118CF0C0
+ .long 0x118CC8C0
+ .long 0x108458C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x13CE06C2
+ .long 0x11ADF0C0
+ .long 0x13CB7EC2
+ .long 0x11ADF0C0
+ .long 0x11ADD0C0
+ .long 0x106360C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x13CF06C2
+ .long 0x11CEF0C0
+ .long 0x13CC7EC2
+ .long 0x11CEF0C0
+ .long 0x11CED8C0
+ .long 0x104268C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13D006C2
+ .long 0x11EFF0C0
+ .long 0x13CD7EC2
+ .long 0x11EFF0C0
+ .long 0x11EF40C0
+ .long 0x102170C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x13D106C2
+ .long 0x1210F0C0
+ .long 0x13CE7EC2
+ .long 0x1210F0C0
+ .long 0x121048C0
+ .long 0x100078C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ .long 0x13D206C2
+ .long 0x1231F0C0
+ .long 0x13CF7EC2
+ .long 0x1231F0C0
+ .long 0x123150C0
+ .long 0x10E780C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x13D306C2
+ .long 0x1252F0C0
+ .long 0x13D07EC2
+ .long 0x1252F0C0
+ .long 0x125258C0
+ .long 0x10C688C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x13D806C2
+ .long 0x1273F0C0
+ .long 0x13D17EC2
+ .long 0x1273F0C0
+ .long 0x127360C0
+ .long 0x10A590C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x13D906C2
+ .long 0x1318F0C0
+ .long 0x13D27EC2
+ .long 0x1318F0C0
+ .long 0x131868C0
+ .long 0x108498C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x13DA06C2
+ .long 0x1339F0C0
+ .long 0x13D37EC2
+ .long 0x1339F0C0
+ .long 0x133970C0
+ .long 0x1063C0C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x13DB06C2
+ .long 0x135AF0C0
+ .long 0x13D87EC2
+ .long 0x135AF0C0
+ .long 0x135A78C0
+ .long 0x1042C8C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13C806C2
+ .long 0x137BF0C0
+ .long 0x13D97EC2
+ .long 0x137BF0C0
+ .long 0x137B80C0
+ .long 0x1021D0C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x13C906C2
+ .long 0x1108F0C0
+ .long 0x13DA7EC2
+ .long 0x1108F0C0
+ .long 0x110888C0
+ .long 0x1000D8C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ bdnz .L16_xx
+
+ lvx 10,0,11
+ subic. 5,5,1
+ lvx 11,10,11
+ .long 0x100050C0
+ lvx 12,26,11
+ .long 0x102158C0
+ lvx 13,27,11
+ .long 0x104260C0
+ lvx 14,28,11
+ .long 0x106368C0
+ lvx 15,29,11
+ .long 0x108470C0
+ lvx 16,30,11
+ .long 0x10A578C0
+ lvx 17,31,11
+ .long 0x10C680C0
+ .long 0x10E788C0
+ bne .Loop
+ vperm 0,0,1,28
+ vperm 2,2,3,28
+ vperm 4,4,5,28
+ vperm 6,6,7,28
+ .long 0x7C001F99
+ .long 0x7C4A1F99
+ .long 0x7C9A1F99
+ .long 0x7CDB1F99
+ addi 11,1,207
+ mtlr 8
+ or 12,12,12
+ lvx 24,0,11
+ lvx 25,10,11
+ lvx 26,26,11
+ lvx 27,27,11
+ lvx 28,28,11
+ lvx 29,29,11
+ lvx 30,30,11
+ lvx 31,31,11
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,6,3,0
+.long 0
+.size zfs_sha512_power8,.-zfs_sha512_power8
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 6
+ addi 6,6,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0xd728ae22,0x428a2f98
+.long 0xd728ae22,0x428a2f98
+.long 0x23ef65cd,0x71374491
+.long 0x23ef65cd,0x71374491
+.long 0xec4d3b2f,0xb5c0fbcf
+.long 0xec4d3b2f,0xb5c0fbcf
+.long 0x8189dbbc,0xe9b5dba5
+.long 0x8189dbbc,0xe9b5dba5
+.long 0xf348b538,0x3956c25b
+.long 0xf348b538,0x3956c25b
+.long 0xb605d019,0x59f111f1
+.long 0xb605d019,0x59f111f1
+.long 0xaf194f9b,0x923f82a4
+.long 0xaf194f9b,0x923f82a4
+.long 0xda6d8118,0xab1c5ed5
+.long 0xda6d8118,0xab1c5ed5
+.long 0xa3030242,0xd807aa98
+.long 0xa3030242,0xd807aa98
+.long 0x45706fbe,0x12835b01
+.long 0x45706fbe,0x12835b01
+.long 0x4ee4b28c,0x243185be
+.long 0x4ee4b28c,0x243185be
+.long 0xd5ffb4e2,0x550c7dc3
+.long 0xd5ffb4e2,0x550c7dc3
+.long 0xf27b896f,0x72be5d74
+.long 0xf27b896f,0x72be5d74
+.long 0x3b1696b1,0x80deb1fe
+.long 0x3b1696b1,0x80deb1fe
+.long 0x25c71235,0x9bdc06a7
+.long 0x25c71235,0x9bdc06a7
+.long 0xcf692694,0xc19bf174
+.long 0xcf692694,0xc19bf174
+.long 0x9ef14ad2,0xe49b69c1
+.long 0x9ef14ad2,0xe49b69c1
+.long 0x384f25e3,0xefbe4786
+.long 0x384f25e3,0xefbe4786
+.long 0x8b8cd5b5,0x0fc19dc6
+.long 0x8b8cd5b5,0x0fc19dc6
+.long 0x77ac9c65,0x240ca1cc
+.long 0x77ac9c65,0x240ca1cc
+.long 0x592b0275,0x2de92c6f
+.long 0x592b0275,0x2de92c6f
+.long 0x6ea6e483,0x4a7484aa
+.long 0x6ea6e483,0x4a7484aa
+.long 0xbd41fbd4,0x5cb0a9dc
+.long 0xbd41fbd4,0x5cb0a9dc
+.long 0x831153b5,0x76f988da
+.long 0x831153b5,0x76f988da
+.long 0xee66dfab,0x983e5152
+.long 0xee66dfab,0x983e5152
+.long 0x2db43210,0xa831c66d
+.long 0x2db43210,0xa831c66d
+.long 0x98fb213f,0xb00327c8
+.long 0x98fb213f,0xb00327c8
+.long 0xbeef0ee4,0xbf597fc7
+.long 0xbeef0ee4,0xbf597fc7
+.long 0x3da88fc2,0xc6e00bf3
+.long 0x3da88fc2,0xc6e00bf3
+.long 0x930aa725,0xd5a79147
+.long 0x930aa725,0xd5a79147
+.long 0xe003826f,0x06ca6351
+.long 0xe003826f,0x06ca6351
+.long 0x0a0e6e70,0x14292967
+.long 0x0a0e6e70,0x14292967
+.long 0x46d22ffc,0x27b70a85
+.long 0x46d22ffc,0x27b70a85
+.long 0x5c26c926,0x2e1b2138
+.long 0x5c26c926,0x2e1b2138
+.long 0x5ac42aed,0x4d2c6dfc
+.long 0x5ac42aed,0x4d2c6dfc
+.long 0x9d95b3df,0x53380d13
+.long 0x9d95b3df,0x53380d13
+.long 0x8baf63de,0x650a7354
+.long 0x8baf63de,0x650a7354
+.long 0x3c77b2a8,0x766a0abb
+.long 0x3c77b2a8,0x766a0abb
+.long 0x47edaee6,0x81c2c92e
+.long 0x47edaee6,0x81c2c92e
+.long 0x1482353b,0x92722c85
+.long 0x1482353b,0x92722c85
+.long 0x4cf10364,0xa2bfe8a1
+.long 0x4cf10364,0xa2bfe8a1
+.long 0xbc423001,0xa81a664b
+.long 0xbc423001,0xa81a664b
+.long 0xd0f89791,0xc24b8b70
+.long 0xd0f89791,0xc24b8b70
+.long 0x0654be30,0xc76c51a3
+.long 0x0654be30,0xc76c51a3
+.long 0xd6ef5218,0xd192e819
+.long 0xd6ef5218,0xd192e819
+.long 0x5565a910,0xd6990624
+.long 0x5565a910,0xd6990624
+.long 0x5771202a,0xf40e3585
+.long 0x5771202a,0xf40e3585
+.long 0x32bbd1b8,0x106aa070
+.long 0x32bbd1b8,0x106aa070
+.long 0xb8d2d0c8,0x19a4c116
+.long 0xb8d2d0c8,0x19a4c116
+.long 0x5141ab53,0x1e376c08
+.long 0x5141ab53,0x1e376c08
+.long 0xdf8eeb99,0x2748774c
+.long 0xdf8eeb99,0x2748774c
+.long 0xe19b48a8,0x34b0bcb5
+.long 0xe19b48a8,0x34b0bcb5
+.long 0xc5c95a63,0x391c0cb3
+.long 0xc5c95a63,0x391c0cb3
+.long 0xe3418acb,0x4ed8aa4a
+.long 0xe3418acb,0x4ed8aa4a
+.long 0x7763e373,0x5b9cca4f
+.long 0x7763e373,0x5b9cca4f
+.long 0xd6b2b8a3,0x682e6ff3
+.long 0xd6b2b8a3,0x682e6ff3
+.long 0x5defb2fc,0x748f82ee
+.long 0x5defb2fc,0x748f82ee
+.long 0x43172f60,0x78a5636f
+.long 0x43172f60,0x78a5636f
+.long 0xa1f0ab72,0x84c87814
+.long 0xa1f0ab72,0x84c87814
+.long 0x1a6439ec,0x8cc70208
+.long 0x1a6439ec,0x8cc70208
+.long 0x23631e28,0x90befffa
+.long 0x23631e28,0x90befffa
+.long 0xde82bde9,0xa4506ceb
+.long 0xde82bde9,0xa4506ceb
+.long 0xb2c67915,0xbef9a3f7
+.long 0xb2c67915,0xbef9a3f7
+.long 0xe372532b,0xc67178f2
+.long 0xe372532b,0xc67178f2
+.long 0xea26619c,0xca273ece
+.long 0xea26619c,0xca273ece
+.long 0x21c0c207,0xd186b8c7
+.long 0x21c0c207,0xd186b8c7
+.long 0xcde0eb1e,0xeada7dd6
+.long 0xcde0eb1e,0xeada7dd6
+.long 0xee6ed178,0xf57d4f7f
+.long 0xee6ed178,0xf57d4f7f
+.long 0x72176fba,0x06f067aa
+.long 0x72176fba,0x06f067aa
+.long 0xa2c898a6,0x0a637dc5
+.long 0xa2c898a6,0x0a637dc5
+.long 0xbef90dae,0x113f9804
+.long 0xbef90dae,0x113f9804
+.long 0x131c471b,0x1b710b35
+.long 0x131c471b,0x1b710b35
+.long 0x23047d84,0x28db77f5
+.long 0x23047d84,0x28db77f5
+.long 0x40c72493,0x32caab7b
+.long 0x40c72493,0x32caab7b
+.long 0x15c9bebc,0x3c9ebe0a
+.long 0x15c9bebc,0x3c9ebe0a
+.long 0x9c100d4c,0x431d67c4
+.long 0x9c100d4c,0x431d67c4
+.long 0xcb3e42b6,0x4cc5d4be
+.long 0xcb3e42b6,0x4cc5d4be
+.long 0xfc657e2a,0x597f299c
+.long 0xfc657e2a,0x597f299c
+.long 0x3ad6faec,0x5fcb6fab
+.long 0x3ad6faec,0x5fcb6fab
+.long 0x4a475817,0x6c44198c
+.long 0x4a475817,0x6c44198c
+.long 0,0
+.long 0,0
+.long 0x14151617,0x10111213
+.long 0x04050607,0x00010203
+
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S
new file mode 100644
index 000000000000..57213f68abc5
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S
@@ -0,0 +1,2973 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if (defined(__PPC64__) && defined(__BIG_ENDIAN__))
+
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.text
+
+.globl zfs_sha512_ppc
+.globl .zfs_sha512_ppc
+.type zfs_sha512_ppc,@function
+.section ".opd","aw"
+.align 3
+zfs_sha512_ppc:
+.quad .zfs_sha512_ppc,.TOC.@tocbase,0
+.previous
+.align 6
+.zfs_sha512_ppc:
+#else
+.abiversion 2
+.text
+
+.globl zfs_sha512_ppc
+.type zfs_sha512_ppc,@function
+.align 6
+zfs_sha512_ppc:
+.localentry zfs_sha512_ppc,0
+#endif
+ stdu 1,-384(1)
+ mflr 0
+ sldi 5,5,7
+
+ std 3,208(1)
+
+ std 14,240(1)
+ std 15,248(1)
+ std 16,256(1)
+ std 17,264(1)
+ std 18,272(1)
+ std 19,280(1)
+ std 20,288(1)
+ std 21,296(1)
+ std 22,304(1)
+ std 23,312(1)
+ std 24,320(1)
+ std 25,328(1)
+ std 26,336(1)
+ std 27,344(1)
+ std 28,352(1)
+ std 29,360(1)
+ std 30,368(1)
+ std 31,376(1)
+ std 0,400(1)
+ ld 8,0(3)
+ mr 31,4
+ ld 9,8(3)
+ ld 10,16(3)
+ ld 11,24(3)
+ ld 12,32(3)
+ ld 6,40(3)
+ ld 14,48(3)
+ ld 15,56(3)
+ bl .LPICmeup
+.LPICedup:
+ andi. 0,31,3
+ bne .Lunaligned
+.Laligned:
+ add 5,31,5
+ std 5,192(1)
+ std 31,200(1)
+ bl .Lsha2_block_private
+ b .Ldone
+
+
+
+
+
+
+
+.align 4
+.Lunaligned:
+ subfic 0,31,4096
+ andi. 0,0,3968
+ beq .Lcross_page
+ cmpld 5,0
+ ble .Laligned
+ subfc 5,0,5
+ add 0,31,0
+ std 5,184(1)
+ std 0,192(1)
+ std 31,200(1)
+ bl .Lsha2_block_private
+
+ ld 5,184(1)
+.Lcross_page:
+ li 0,32
+ mtctr 0
+ addi 20,1,48
+.Lmemcpy:
+ lbz 16,0(31)
+ lbz 17,1(31)
+ lbz 18,2(31)
+ lbz 19,3(31)
+ addi 31,31,4
+ stb 16,0(20)
+ stb 17,1(20)
+ stb 18,2(20)
+ stb 19,3(20)
+ addi 20,20,4
+ bdnz .Lmemcpy
+ std 31,176(1)
+ addi 0,1,176
+ addi 31,1,48
+ std 5,184(1)
+ std 0,192(1)
+ std 31,200(1)
+ bl .Lsha2_block_private
+ ld 31,176(1)
+ ld 5,184(1)
+ addic. 5,5,-128
+ bne .Lunaligned
+
+.Ldone:
+ ld 0,400(1)
+ ld 14,240(1)
+ ld 15,248(1)
+ ld 16,256(1)
+ ld 17,264(1)
+ ld 18,272(1)
+ ld 19,280(1)
+ ld 20,288(1)
+ ld 21,296(1)
+ ld 22,304(1)
+ ld 23,312(1)
+ ld 24,320(1)
+ ld 25,328(1)
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ mtlr 0
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,18,3,0
+.long 0
+.align 4
+.Lsha2_block_private:
+ ld 0,0(7)
+ lwz 5,0(31)
+ lwz 16,4(31)
+ insrdi 16,5,32,0
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ ld 0,8(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 5,8(31)
+ lwz 17,12(31)
+ insrdi 17,5,32,0
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ ld 0,16(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 5,16(31)
+ lwz 18,20(31)
+ insrdi 18,5,32,0
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ ld 0,24(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 5,24(31)
+ lwz 19,28(31)
+ insrdi 19,5,32,0
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ ld 0,32(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 5,32(31)
+ lwz 20,36(31)
+ insrdi 20,5,32,0
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ ld 0,40(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 5,40(31)
+ lwz 21,44(31)
+ insrdi 21,5,32,0
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ ld 0,48(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 5,48(31)
+ lwz 22,52(31)
+ insrdi 22,5,32,0
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ ld 0,56(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 5,56(31)
+ lwz 23,60(31)
+ insrdi 23,5,32,0
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ ld 0,64(7)
+ add 8,8,3
+ add 8,8,5
+
+ lwz 5,64(31)
+ lwz 24,68(31)
+ insrdi 24,5,32,0
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ ld 0,72(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 5,72(31)
+ lwz 25,76(31)
+ insrdi 25,5,32,0
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ ld 0,80(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 5,80(31)
+ lwz 26,84(31)
+ insrdi 26,5,32,0
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ ld 0,88(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 5,88(31)
+ lwz 27,92(31)
+ insrdi 27,5,32,0
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ ld 0,96(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 5,96(31)
+ lwz 28,100(31)
+ insrdi 28,5,32,0
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ ld 0,104(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 5,104(31)
+ lwz 29,108(31)
+ insrdi 29,5,32,0
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ ld 0,112(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 5,112(31)
+ lwz 30,116(31)
+ insrdi 30,5,32,0
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ ld 0,120(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 5,120(31)
+ lwz 31,124(31)
+ insrdi 31,5,32,0
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ li 5,4
+ mtctr 5
+.align 4
+.Lrounds:
+ addi 7,7,128
+ rotrdi 3,17,1
+ rotrdi 4,17,8
+ rotrdi 5,30,19
+ rotrdi 0,30,61
+ xor 3,3,4
+ srdi 4,17,7
+ xor 5,5,0
+ srdi 0,30,6
+ add 16,16,25
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,0(7)
+ add 16,16,3
+ add 16,16,5
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrdi 3,18,1
+ rotrdi 4,18,8
+ rotrdi 5,31,19
+ rotrdi 0,31,61
+ xor 3,3,4
+ srdi 4,18,7
+ xor 5,5,0
+ srdi 0,31,6
+ add 17,17,26
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,8(7)
+ add 17,17,3
+ add 17,17,5
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrdi 3,19,1
+ rotrdi 4,19,8
+ rotrdi 5,16,19
+ rotrdi 0,16,61
+ xor 3,3,4
+ srdi 4,19,7
+ xor 5,5,0
+ srdi 0,16,6
+ add 18,18,27
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,16(7)
+ add 18,18,3
+ add 18,18,5
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrdi 3,20,1
+ rotrdi 4,20,8
+ rotrdi 5,17,19
+ rotrdi 0,17,61
+ xor 3,3,4
+ srdi 4,20,7
+ xor 5,5,0
+ srdi 0,17,6
+ add 19,19,28
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,24(7)
+ add 19,19,3
+ add 19,19,5
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrdi 3,21,1
+ rotrdi 4,21,8
+ rotrdi 5,18,19
+ rotrdi 0,18,61
+ xor 3,3,4
+ srdi 4,21,7
+ xor 5,5,0
+ srdi 0,18,6
+ add 20,20,29
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,32(7)
+ add 20,20,3
+ add 20,20,5
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrdi 3,22,1
+ rotrdi 4,22,8
+ rotrdi 5,19,19
+ rotrdi 0,19,61
+ xor 3,3,4
+ srdi 4,22,7
+ xor 5,5,0
+ srdi 0,19,6
+ add 21,21,30
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,40(7)
+ add 21,21,3
+ add 21,21,5
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrdi 3,23,1
+ rotrdi 4,23,8
+ rotrdi 5,20,19
+ rotrdi 0,20,61
+ xor 3,3,4
+ srdi 4,23,7
+ xor 5,5,0
+ srdi 0,20,6
+ add 22,22,31
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,48(7)
+ add 22,22,3
+ add 22,22,5
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrdi 3,24,1
+ rotrdi 4,24,8
+ rotrdi 5,21,19
+ rotrdi 0,21,61
+ xor 3,3,4
+ srdi 4,24,7
+ xor 5,5,0
+ srdi 0,21,6
+ add 23,23,16
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,56(7)
+ add 23,23,3
+ add 23,23,5
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ rotrdi 3,25,1
+ rotrdi 4,25,8
+ rotrdi 5,22,19
+ rotrdi 0,22,61
+ xor 3,3,4
+ srdi 4,25,7
+ xor 5,5,0
+ srdi 0,22,6
+ add 24,24,17
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,64(7)
+ add 24,24,3
+ add 24,24,5
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrdi 3,26,1
+ rotrdi 4,26,8
+ rotrdi 5,23,19
+ rotrdi 0,23,61
+ xor 3,3,4
+ srdi 4,26,7
+ xor 5,5,0
+ srdi 0,23,6
+ add 25,25,18
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,72(7)
+ add 25,25,3
+ add 25,25,5
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrdi 3,27,1
+ rotrdi 4,27,8
+ rotrdi 5,24,19
+ rotrdi 0,24,61
+ xor 3,3,4
+ srdi 4,27,7
+ xor 5,5,0
+ srdi 0,24,6
+ add 26,26,19
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,80(7)
+ add 26,26,3
+ add 26,26,5
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrdi 3,28,1
+ rotrdi 4,28,8
+ rotrdi 5,25,19
+ rotrdi 0,25,61
+ xor 3,3,4
+ srdi 4,28,7
+ xor 5,5,0
+ srdi 0,25,6
+ add 27,27,20
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,88(7)
+ add 27,27,3
+ add 27,27,5
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrdi 3,29,1
+ rotrdi 4,29,8
+ rotrdi 5,26,19
+ rotrdi 0,26,61
+ xor 3,3,4
+ srdi 4,29,7
+ xor 5,5,0
+ srdi 0,26,6
+ add 28,28,21
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,96(7)
+ add 28,28,3
+ add 28,28,5
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrdi 3,30,1
+ rotrdi 4,30,8
+ rotrdi 5,27,19
+ rotrdi 0,27,61
+ xor 3,3,4
+ srdi 4,30,7
+ xor 5,5,0
+ srdi 0,27,6
+ add 29,29,22
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,104(7)
+ add 29,29,3
+ add 29,29,5
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrdi 3,31,1
+ rotrdi 4,31,8
+ rotrdi 5,28,19
+ rotrdi 0,28,61
+ xor 3,3,4
+ srdi 4,31,7
+ xor 5,5,0
+ srdi 0,28,6
+ add 30,30,23
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,112(7)
+ add 30,30,3
+ add 30,30,5
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrdi 3,16,1
+ rotrdi 4,16,8
+ rotrdi 5,29,19
+ rotrdi 0,29,61
+ xor 3,3,4
+ srdi 4,16,7
+ xor 5,5,0
+ srdi 0,29,6
+ add 31,31,24
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,120(7)
+ add 31,31,3
+ add 31,31,5
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ bdnz .Lrounds
+
+ ld 3,208(1)
+ ld 31,200(1)
+ ld 5,192(1)
+ subi 7,7,512
+
+ ld 16,0(3)
+ ld 17,8(3)
+ ld 18,16(3)
+ ld 19,24(3)
+ ld 20,32(3)
+ ld 21,40(3)
+ ld 22,48(3)
+ addi 31,31,128
+ ld 23,56(3)
+ add 8,8,16
+ add 9,9,17
+ std 31,200(1)
+ add 10,10,18
+ std 8,0(3)
+ add 11,11,19
+ std 9,8(3)
+ add 12,12,20
+ std 10,16(3)
+ add 6,6,21
+ std 11,24(3)
+ add 14,14,22
+ std 12,32(3)
+ add 15,15,23
+ std 6,40(3)
+ std 14,48(3)
+ cmpld 31,5
+ std 15,56(3)
+ bne .Lsha2_block_private
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+#if (!defined(_CALL_ELF) || _CALL_ELF == 1)
+.size .zfs_sha512_ppc,.-.zfs_sha512_ppc
+.size zfs_sha512_ppc,.-.zfs_sha512_ppc
+#else
+.size zfs_sha512_ppc,.-zfs_sha512_ppc
+#endif
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 7
+ addi 7,7,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0x428a2f98,0xd728ae22
+.long 0x71374491,0x23ef65cd
+.long 0xb5c0fbcf,0xec4d3b2f
+.long 0xe9b5dba5,0x8189dbbc
+.long 0x3956c25b,0xf348b538
+.long 0x59f111f1,0xb605d019
+.long 0x923f82a4,0xaf194f9b
+.long 0xab1c5ed5,0xda6d8118
+.long 0xd807aa98,0xa3030242
+.long 0x12835b01,0x45706fbe
+.long 0x243185be,0x4ee4b28c
+.long 0x550c7dc3,0xd5ffb4e2
+.long 0x72be5d74,0xf27b896f
+.long 0x80deb1fe,0x3b1696b1
+.long 0x9bdc06a7,0x25c71235
+.long 0xc19bf174,0xcf692694
+.long 0xe49b69c1,0x9ef14ad2
+.long 0xefbe4786,0x384f25e3
+.long 0x0fc19dc6,0x8b8cd5b5
+.long 0x240ca1cc,0x77ac9c65
+.long 0x2de92c6f,0x592b0275
+.long 0x4a7484aa,0x6ea6e483
+.long 0x5cb0a9dc,0xbd41fbd4
+.long 0x76f988da,0x831153b5
+.long 0x983e5152,0xee66dfab
+.long 0xa831c66d,0x2db43210
+.long 0xb00327c8,0x98fb213f
+.long 0xbf597fc7,0xbeef0ee4
+.long 0xc6e00bf3,0x3da88fc2
+.long 0xd5a79147,0x930aa725
+.long 0x06ca6351,0xe003826f
+.long 0x14292967,0x0a0e6e70
+.long 0x27b70a85,0x46d22ffc
+.long 0x2e1b2138,0x5c26c926
+.long 0x4d2c6dfc,0x5ac42aed
+.long 0x53380d13,0x9d95b3df
+.long 0x650a7354,0x8baf63de
+.long 0x766a0abb,0x3c77b2a8
+.long 0x81c2c92e,0x47edaee6
+.long 0x92722c85,0x1482353b
+.long 0xa2bfe8a1,0x4cf10364
+.long 0xa81a664b,0xbc423001
+.long 0xc24b8b70,0xd0f89791
+.long 0xc76c51a3,0x0654be30
+.long 0xd192e819,0xd6ef5218
+.long 0xd6990624,0x5565a910
+.long 0xf40e3585,0x5771202a
+.long 0x106aa070,0x32bbd1b8
+.long 0x19a4c116,0xb8d2d0c8
+.long 0x1e376c08,0x5141ab53
+.long 0x2748774c,0xdf8eeb99
+.long 0x34b0bcb5,0xe19b48a8
+.long 0x391c0cb3,0xc5c95a63
+.long 0x4ed8aa4a,0xe3418acb
+.long 0x5b9cca4f,0x7763e373
+.long 0x682e6ff3,0xd6b2b8a3
+.long 0x748f82ee,0x5defb2fc
+.long 0x78a5636f,0x43172f60
+.long 0x84c87814,0xa1f0ab72
+.long 0x8cc70208,0x1a6439ec
+.long 0x90befffa,0x23631e28
+.long 0xa4506ceb,0xde82bde9
+.long 0xbef9a3f7,0xb2c67915
+.long 0xc67178f2,0xe372532b
+.long 0xca273ece,0xea26619c
+.long 0xd186b8c7,0x21c0c207
+.long 0xeada7dd6,0xcde0eb1e
+.long 0xf57d4f7f,0xee6ed178
+.long 0x06f067aa,0x72176fba
+.long 0x0a637dc5,0xa2c898a6
+.long 0x113f9804,0xbef90dae
+.long 0x1b710b35,0x131c471b
+.long 0x28db77f5,0x23047d84
+.long 0x32caab7b,0x40c72493
+.long 0x3c9ebe0a,0x15c9bebc
+.long 0x431d67c4,0x9c100d4c
+.long 0x4cc5d4be,0xcb3e42b6
+.long 0x597f299c,0xfc657e2a
+.long 0x5fcb6fab,0x3ad6faec
+.long 0x6c44198c,0x4a475817
+
+#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+.abiversion 2
+.text
+
+.globl zfs_sha512_ppc
+.type zfs_sha512_ppc,@function
+.align 6
+zfs_sha512_ppc:
+.localentry zfs_sha512_ppc,0
+
+ stdu 1,-384(1)
+ mflr 0
+ sldi 5,5,7
+
+ std 3,208(1)
+
+ std 14,240(1)
+ std 15,248(1)
+ std 16,256(1)
+ std 17,264(1)
+ std 18,272(1)
+ std 19,280(1)
+ std 20,288(1)
+ std 21,296(1)
+ std 22,304(1)
+ std 23,312(1)
+ std 24,320(1)
+ std 25,328(1)
+ std 26,336(1)
+ std 27,344(1)
+ std 28,352(1)
+ std 29,360(1)
+ std 30,368(1)
+ std 31,376(1)
+ std 0,400(1)
+ ld 8,0(3)
+ mr 31,4
+ ld 9,8(3)
+ ld 10,16(3)
+ ld 11,24(3)
+ ld 12,32(3)
+ ld 6,40(3)
+ ld 14,48(3)
+ ld 15,56(3)
+ bl .LPICmeup
+.LPICedup:
+ andi. 0,31,3
+ bne .Lunaligned
+.Laligned:
+ add 5,31,5
+ std 5,192(1)
+ std 31,200(1)
+ bl .Lsha2_block_private
+ b .Ldone
+
+.align 4
+.Lunaligned:
+ subfic 0,31,4096
+ andi. 0,0,3968
+ beq .Lcross_page
+ cmpld 5,0
+ ble .Laligned
+ subfc 5,0,5
+ add 0,31,0
+ std 5,184(1)
+ std 0,192(1)
+ std 31,200(1)
+ bl .Lsha2_block_private
+
+ ld 5,184(1)
+.Lcross_page:
+ li 0,32
+ mtctr 0
+ addi 20,1,48
+.Lmemcpy:
+ lbz 16,0(31)
+ lbz 17,1(31)
+ lbz 18,2(31)
+ lbz 19,3(31)
+ addi 31,31,4
+ stb 16,0(20)
+ stb 17,1(20)
+ stb 18,2(20)
+ stb 19,3(20)
+ addi 20,20,4
+ bdnz .Lmemcpy
+ std 31,176(1)
+ addi 0,1,176
+ addi 31,1,48
+ std 5,184(1)
+ std 0,192(1)
+ std 31,200(1)
+ bl .Lsha2_block_private
+ ld 31,176(1)
+ ld 5,184(1)
+ addic. 5,5,-128
+ bne .Lunaligned
+
+.Ldone:
+ ld 0,400(1)
+ ld 14,240(1)
+ ld 15,248(1)
+ ld 16,256(1)
+ ld 17,264(1)
+ ld 18,272(1)
+ ld 19,280(1)
+ ld 20,288(1)
+ ld 21,296(1)
+ ld 22,304(1)
+ ld 23,312(1)
+ ld 24,320(1)
+ ld 25,328(1)
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ mtlr 0
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,18,3,0
+.long 0
+.align 4
+.Lsha2_block_private:
+ ld 0,0(7)
+ lwz 3,0(31)
+ lwz 4,4(31)
+ rotlwi 5,3,8
+ rotlwi 16,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 16,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 16,4,24,16,23
+ insrdi 16,5,32,0
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ ld 0,8(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 3,8(31)
+ lwz 4,12(31)
+ rotlwi 5,3,8
+ rotlwi 17,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 17,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 17,4,24,16,23
+ insrdi 17,5,32,0
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ ld 0,16(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 3,16(31)
+ lwz 4,20(31)
+ rotlwi 5,3,8
+ rotlwi 18,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 18,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 18,4,24,16,23
+ insrdi 18,5,32,0
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ ld 0,24(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 3,24(31)
+ lwz 4,28(31)
+ rotlwi 5,3,8
+ rotlwi 19,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 19,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 19,4,24,16,23
+ insrdi 19,5,32,0
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ ld 0,32(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 3,32(31)
+ lwz 4,36(31)
+ rotlwi 5,3,8
+ rotlwi 20,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 20,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 20,4,24,16,23
+ insrdi 20,5,32,0
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ ld 0,40(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 3,40(31)
+ lwz 4,44(31)
+ rotlwi 5,3,8
+ rotlwi 21,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 21,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 21,4,24,16,23
+ insrdi 21,5,32,0
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ ld 0,48(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 3,48(31)
+ lwz 4,52(31)
+ rotlwi 5,3,8
+ rotlwi 22,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 22,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 22,4,24,16,23
+ insrdi 22,5,32,0
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ ld 0,56(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 3,56(31)
+ lwz 4,60(31)
+ rotlwi 5,3,8
+ rotlwi 23,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 23,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 23,4,24,16,23
+ insrdi 23,5,32,0
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ ld 0,64(7)
+ add 8,8,3
+ add 8,8,5
+
+ lwz 3,64(31)
+ lwz 4,68(31)
+ rotlwi 5,3,8
+ rotlwi 24,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 24,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 24,4,24,16,23
+ insrdi 24,5,32,0
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ ld 0,72(7)
+ add 15,15,3
+ add 15,15,5
+
+ lwz 3,72(31)
+ lwz 4,76(31)
+ rotlwi 5,3,8
+ rotlwi 25,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 25,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 25,4,24,16,23
+ insrdi 25,5,32,0
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ ld 0,80(7)
+ add 14,14,3
+ add 14,14,5
+
+ lwz 3,80(31)
+ lwz 4,84(31)
+ rotlwi 5,3,8
+ rotlwi 26,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 26,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 26,4,24,16,23
+ insrdi 26,5,32,0
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ ld 0,88(7)
+ add 6,6,3
+ add 6,6,5
+
+ lwz 3,88(31)
+ lwz 4,92(31)
+ rotlwi 5,3,8
+ rotlwi 27,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 27,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 27,4,24,16,23
+ insrdi 27,5,32,0
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ ld 0,96(7)
+ add 12,12,3
+ add 12,12,5
+
+ lwz 3,96(31)
+ lwz 4,100(31)
+ rotlwi 5,3,8
+ rotlwi 28,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 28,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 28,4,24,16,23
+ insrdi 28,5,32,0
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ ld 0,104(7)
+ add 11,11,3
+ add 11,11,5
+
+ lwz 3,104(31)
+ lwz 4,108(31)
+ rotlwi 5,3,8
+ rotlwi 29,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 29,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 29,4,24,16,23
+ insrdi 29,5,32,0
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ ld 0,112(7)
+ add 10,10,3
+ add 10,10,5
+
+ lwz 3,112(31)
+ lwz 4,116(31)
+ rotlwi 5,3,8
+ rotlwi 30,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 30,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 30,4,24,16,23
+ insrdi 30,5,32,0
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ ld 0,120(7)
+ add 9,9,3
+ add 9,9,5
+
+ lwz 3,120(31)
+ lwz 4,124(31)
+ rotlwi 5,3,8
+ rotlwi 31,4,8
+ rlwimi 5,3,24,0,7
+ rlwimi 31,4,24,0,7
+ rlwimi 5,3,24,16,23
+ rlwimi 31,4,24,16,23
+ insrdi 31,5,32,0
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ li 5,4
+ mtctr 5
+.align 4
+.Lrounds:
+ addi 7,7,128
+ rotrdi 3,17,1
+ rotrdi 4,17,8
+ rotrdi 5,30,19
+ rotrdi 0,30,61
+ xor 3,3,4
+ srdi 4,17,7
+ xor 5,5,0
+ srdi 0,30,6
+ add 16,16,25
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,0(7)
+ add 16,16,3
+ add 16,16,5
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,16
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrdi 3,18,1
+ rotrdi 4,18,8
+ rotrdi 5,31,19
+ rotrdi 0,31,61
+ xor 3,3,4
+ srdi 4,18,7
+ xor 5,5,0
+ srdi 0,31,6
+ add 17,17,26
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,8(7)
+ add 17,17,3
+ add 17,17,5
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,17
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrdi 3,19,1
+ rotrdi 4,19,8
+ rotrdi 5,16,19
+ rotrdi 0,16,61
+ xor 3,3,4
+ srdi 4,19,7
+ xor 5,5,0
+ srdi 0,16,6
+ add 18,18,27
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,16(7)
+ add 18,18,3
+ add 18,18,5
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,18
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrdi 3,20,1
+ rotrdi 4,20,8
+ rotrdi 5,17,19
+ rotrdi 0,17,61
+ xor 3,3,4
+ srdi 4,20,7
+ xor 5,5,0
+ srdi 0,17,6
+ add 19,19,28
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,24(7)
+ add 19,19,3
+ add 19,19,5
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,19
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrdi 3,21,1
+ rotrdi 4,21,8
+ rotrdi 5,18,19
+ rotrdi 0,18,61
+ xor 3,3,4
+ srdi 4,21,7
+ xor 5,5,0
+ srdi 0,18,6
+ add 20,20,29
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,32(7)
+ add 20,20,3
+ add 20,20,5
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,20
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrdi 3,22,1
+ rotrdi 4,22,8
+ rotrdi 5,19,19
+ rotrdi 0,19,61
+ xor 3,3,4
+ srdi 4,22,7
+ xor 5,5,0
+ srdi 0,19,6
+ add 21,21,30
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,40(7)
+ add 21,21,3
+ add 21,21,5
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,21
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrdi 3,23,1
+ rotrdi 4,23,8
+ rotrdi 5,20,19
+ rotrdi 0,20,61
+ xor 3,3,4
+ srdi 4,23,7
+ xor 5,5,0
+ srdi 0,20,6
+ add 22,22,31
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,48(7)
+ add 22,22,3
+ add 22,22,5
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,22
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrdi 3,24,1
+ rotrdi 4,24,8
+ rotrdi 5,21,19
+ rotrdi 0,21,61
+ xor 3,3,4
+ srdi 4,24,7
+ xor 5,5,0
+ srdi 0,21,6
+ add 23,23,16
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,56(7)
+ add 23,23,3
+ add 23,23,5
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,23
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ rotrdi 3,25,1
+ rotrdi 4,25,8
+ rotrdi 5,22,19
+ rotrdi 0,22,61
+ xor 3,3,4
+ srdi 4,25,7
+ xor 5,5,0
+ srdi 0,22,6
+ add 24,24,17
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,64(7)
+ add 24,24,3
+ add 24,24,5
+ rotrdi 3,12,14
+ rotrdi 4,12,18
+ and 5,6,12
+ xor 3,3,4
+ add 15,15,0
+ andc 0,14,12
+ rotrdi 4,4,23
+ or 5,5,0
+ add 15,15,24
+ xor 3,3,4
+ add 15,15,5
+ add 15,15,3
+
+ rotrdi 3,8,28
+ rotrdi 4,8,34
+ and 5,8,9
+ and 0,8,10
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,9,10
+ xor 3,3,4
+ add 11,11,15
+ xor 5,5,0
+ add 15,15,3
+ add 15,15,5
+
+ rotrdi 3,26,1
+ rotrdi 4,26,8
+ rotrdi 5,23,19
+ rotrdi 0,23,61
+ xor 3,3,4
+ srdi 4,26,7
+ xor 5,5,0
+ srdi 0,23,6
+ add 25,25,18
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,72(7)
+ add 25,25,3
+ add 25,25,5
+ rotrdi 3,11,14
+ rotrdi 4,11,18
+ and 5,12,11
+ xor 3,3,4
+ add 14,14,0
+ andc 0,6,11
+ rotrdi 4,4,23
+ or 5,5,0
+ add 14,14,25
+ xor 3,3,4
+ add 14,14,5
+ add 14,14,3
+
+ rotrdi 3,15,28
+ rotrdi 4,15,34
+ and 5,15,8
+ and 0,15,9
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,8,9
+ xor 3,3,4
+ add 10,10,14
+ xor 5,5,0
+ add 14,14,3
+ add 14,14,5
+
+ rotrdi 3,27,1
+ rotrdi 4,27,8
+ rotrdi 5,24,19
+ rotrdi 0,24,61
+ xor 3,3,4
+ srdi 4,27,7
+ xor 5,5,0
+ srdi 0,24,6
+ add 26,26,19
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,80(7)
+ add 26,26,3
+ add 26,26,5
+ rotrdi 3,10,14
+ rotrdi 4,10,18
+ and 5,11,10
+ xor 3,3,4
+ add 6,6,0
+ andc 0,12,10
+ rotrdi 4,4,23
+ or 5,5,0
+ add 6,6,26
+ xor 3,3,4
+ add 6,6,5
+ add 6,6,3
+
+ rotrdi 3,14,28
+ rotrdi 4,14,34
+ and 5,14,15
+ and 0,14,8
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,15,8
+ xor 3,3,4
+ add 9,9,6
+ xor 5,5,0
+ add 6,6,3
+ add 6,6,5
+
+ rotrdi 3,28,1
+ rotrdi 4,28,8
+ rotrdi 5,25,19
+ rotrdi 0,25,61
+ xor 3,3,4
+ srdi 4,28,7
+ xor 5,5,0
+ srdi 0,25,6
+ add 27,27,20
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,88(7)
+ add 27,27,3
+ add 27,27,5
+ rotrdi 3,9,14
+ rotrdi 4,9,18
+ and 5,10,9
+ xor 3,3,4
+ add 12,12,0
+ andc 0,11,9
+ rotrdi 4,4,23
+ or 5,5,0
+ add 12,12,27
+ xor 3,3,4
+ add 12,12,5
+ add 12,12,3
+
+ rotrdi 3,6,28
+ rotrdi 4,6,34
+ and 5,6,14
+ and 0,6,15
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,14,15
+ xor 3,3,4
+ add 8,8,12
+ xor 5,5,0
+ add 12,12,3
+ add 12,12,5
+
+ rotrdi 3,29,1
+ rotrdi 4,29,8
+ rotrdi 5,26,19
+ rotrdi 0,26,61
+ xor 3,3,4
+ srdi 4,29,7
+ xor 5,5,0
+ srdi 0,26,6
+ add 28,28,21
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,96(7)
+ add 28,28,3
+ add 28,28,5
+ rotrdi 3,8,14
+ rotrdi 4,8,18
+ and 5,9,8
+ xor 3,3,4
+ add 11,11,0
+ andc 0,10,8
+ rotrdi 4,4,23
+ or 5,5,0
+ add 11,11,28
+ xor 3,3,4
+ add 11,11,5
+ add 11,11,3
+
+ rotrdi 3,12,28
+ rotrdi 4,12,34
+ and 5,12,6
+ and 0,12,14
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,6,14
+ xor 3,3,4
+ add 15,15,11
+ xor 5,5,0
+ add 11,11,3
+ add 11,11,5
+
+ rotrdi 3,30,1
+ rotrdi 4,30,8
+ rotrdi 5,27,19
+ rotrdi 0,27,61
+ xor 3,3,4
+ srdi 4,30,7
+ xor 5,5,0
+ srdi 0,27,6
+ add 29,29,22
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,104(7)
+ add 29,29,3
+ add 29,29,5
+ rotrdi 3,15,14
+ rotrdi 4,15,18
+ and 5,8,15
+ xor 3,3,4
+ add 10,10,0
+ andc 0,9,15
+ rotrdi 4,4,23
+ or 5,5,0
+ add 10,10,29
+ xor 3,3,4
+ add 10,10,5
+ add 10,10,3
+
+ rotrdi 3,11,28
+ rotrdi 4,11,34
+ and 5,11,12
+ and 0,11,6
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,12,6
+ xor 3,3,4
+ add 14,14,10
+ xor 5,5,0
+ add 10,10,3
+ add 10,10,5
+
+ rotrdi 3,31,1
+ rotrdi 4,31,8
+ rotrdi 5,28,19
+ rotrdi 0,28,61
+ xor 3,3,4
+ srdi 4,31,7
+ xor 5,5,0
+ srdi 0,28,6
+ add 30,30,23
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,112(7)
+ add 30,30,3
+ add 30,30,5
+ rotrdi 3,14,14
+ rotrdi 4,14,18
+ and 5,15,14
+ xor 3,3,4
+ add 9,9,0
+ andc 0,8,14
+ rotrdi 4,4,23
+ or 5,5,0
+ add 9,9,30
+ xor 3,3,4
+ add 9,9,5
+ add 9,9,3
+
+ rotrdi 3,10,28
+ rotrdi 4,10,34
+ and 5,10,11
+ and 0,10,12
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,11,12
+ xor 3,3,4
+ add 6,6,9
+ xor 5,5,0
+ add 9,9,3
+ add 9,9,5
+
+ rotrdi 3,16,1
+ rotrdi 4,16,8
+ rotrdi 5,29,19
+ rotrdi 0,29,61
+ xor 3,3,4
+ srdi 4,16,7
+ xor 5,5,0
+ srdi 0,29,6
+ add 31,31,24
+ xor 3,3,4
+ xor 5,5,0
+ ld 0,120(7)
+ add 31,31,3
+ add 31,31,5
+ rotrdi 3,6,14
+ rotrdi 4,6,18
+ and 5,14,6
+ xor 3,3,4
+ add 8,8,0
+ andc 0,15,6
+ rotrdi 4,4,23
+ or 5,5,0
+ add 8,8,31
+ xor 3,3,4
+ add 8,8,5
+ add 8,8,3
+
+ rotrdi 3,9,28
+ rotrdi 4,9,34
+ and 5,9,10
+ and 0,9,11
+ xor 3,3,4
+ rotrdi 4,4,5
+ xor 5,5,0
+ and 0,10,11
+ xor 3,3,4
+ add 12,12,8
+ xor 5,5,0
+ add 8,8,3
+ add 8,8,5
+
+ bdnz .Lrounds
+
+ ld 3,208(1)
+ ld 31,200(1)
+ ld 5,192(1)
+ subi 7,7,512
+
+ ld 16,0(3)
+ ld 17,8(3)
+ ld 18,16(3)
+ ld 19,24(3)
+ ld 20,32(3)
+ ld 21,40(3)
+ ld 22,48(3)
+ addi 31,31,128
+ ld 23,56(3)
+ add 8,8,16
+ add 9,9,17
+ std 31,200(1)
+ add 10,10,18
+ std 8,0(3)
+ add 11,11,19
+ std 9,8(3)
+ add 12,12,20
+ std 10,16(3)
+ add 6,6,21
+ std 11,24(3)
+ add 14,14,22
+ std 12,32(3)
+ add 15,15,23
+ std 6,40(3)
+ std 14,48(3)
+ cmpld 31,5
+ std 15,56(3)
+ bne .Lsha2_block_private
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.size zfs_sha512_ppc,.-zfs_sha512_ppc
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 7
+ addi 7,7,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0xd728ae22,0x428a2f98
+.long 0x23ef65cd,0x71374491
+.long 0xec4d3b2f,0xb5c0fbcf
+.long 0x8189dbbc,0xe9b5dba5
+.long 0xf348b538,0x3956c25b
+.long 0xb605d019,0x59f111f1
+.long 0xaf194f9b,0x923f82a4
+.long 0xda6d8118,0xab1c5ed5
+.long 0xa3030242,0xd807aa98
+.long 0x45706fbe,0x12835b01
+.long 0x4ee4b28c,0x243185be
+.long 0xd5ffb4e2,0x550c7dc3
+.long 0xf27b896f,0x72be5d74
+.long 0x3b1696b1,0x80deb1fe
+.long 0x25c71235,0x9bdc06a7
+.long 0xcf692694,0xc19bf174
+.long 0x9ef14ad2,0xe49b69c1
+.long 0x384f25e3,0xefbe4786
+.long 0x8b8cd5b5,0x0fc19dc6
+.long 0x77ac9c65,0x240ca1cc
+.long 0x592b0275,0x2de92c6f
+.long 0x6ea6e483,0x4a7484aa
+.long 0xbd41fbd4,0x5cb0a9dc
+.long 0x831153b5,0x76f988da
+.long 0xee66dfab,0x983e5152
+.long 0x2db43210,0xa831c66d
+.long 0x98fb213f,0xb00327c8
+.long 0xbeef0ee4,0xbf597fc7
+.long 0x3da88fc2,0xc6e00bf3
+.long 0x930aa725,0xd5a79147
+.long 0xe003826f,0x06ca6351
+.long 0x0a0e6e70,0x14292967
+.long 0x46d22ffc,0x27b70a85
+.long 0x5c26c926,0x2e1b2138
+.long 0x5ac42aed,0x4d2c6dfc
+.long 0x9d95b3df,0x53380d13
+.long 0x8baf63de,0x650a7354
+.long 0x3c77b2a8,0x766a0abb
+.long 0x47edaee6,0x81c2c92e
+.long 0x1482353b,0x92722c85
+.long 0x4cf10364,0xa2bfe8a1
+.long 0xbc423001,0xa81a664b
+.long 0xd0f89791,0xc24b8b70
+.long 0x0654be30,0xc76c51a3
+.long 0xd6ef5218,0xd192e819
+.long 0x5565a910,0xd6990624
+.long 0x5771202a,0xf40e3585
+.long 0x32bbd1b8,0x106aa070
+.long 0xb8d2d0c8,0x19a4c116
+.long 0x5141ab53,0x1e376c08
+.long 0xdf8eeb99,0x2748774c
+.long 0xe19b48a8,0x34b0bcb5
+.long 0xc5c95a63,0x391c0cb3
+.long 0xe3418acb,0x4ed8aa4a
+.long 0x7763e373,0x5b9cca4f
+.long 0xd6b2b8a3,0x682e6ff3
+.long 0x5defb2fc,0x748f82ee
+.long 0x43172f60,0x78a5636f
+.long 0xa1f0ab72,0x84c87814
+.long 0x1a6439ec,0x8cc70208
+.long 0x23631e28,0x90befffa
+.long 0xde82bde9,0xa4506ceb
+.long 0xb2c67915,0xbef9a3f7
+.long 0xe372532b,0xc67178f2
+.long 0xea26619c,0xca273ece
+.long 0x21c0c207,0xd186b8c7
+.long 0xcde0eb1e,0xeada7dd6
+.long 0xee6ed178,0xf57d4f7f
+.long 0x72176fba,0x06f067aa
+.long 0xa2c898a6,0x0a637dc5
+.long 0xbef90dae,0x113f9804
+.long 0x131c471b,0x1b710b35
+.long 0x23047d84,0x28db77f5
+.long 0x40c72493,0x32caab7b
+.long 0x15c9bebc,0x3c9ebe0a
+.long 0x9c100d4c,0x431d67c4
+.long 0xcb3e42b6,0x4cc5d4be
+.long 0xfc657e2a,0x597f299c
+.long 0x3ad6faec,0x5fcb6fab
+.long 0x4a475817,0x6c44198c
+
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
index 4a80c62097ae..4f3fe3ec65d6 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
@@ -154,26 +154,26 @@
#include <sys/types.h>
-/* ARGSUSED */
void
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
uint32_t ct[4]) {
+ (void) rk, (void) Nr, (void) pt, (void) ct;
}
-/* ARGSUSED */
void
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
uint32_t pt[4]) {
+ (void) rk, (void) Nr, (void) ct, (void) pt;
}
-/* ARGSUSED */
int
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
uint64_t keyBits) {
+ (void) rk, (void) cipherKey, (void) keyBits;
return (0);
}
-/* ARGSUSED */
int
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
uint64_t keyBits) {
+ (void) rk, (void) cipherKey, (void) keyBits;
return (0);
}
@@ -208,7 +208,7 @@ _key_expansion_256a_local:
pxor %xmm1, %xmm0
movups %xmm0, (%rcx)
add $0x10, %rcx
- ret
+ RET
nop
SET_SIZE(_key_expansion_128)
SET_SIZE(_key_expansion_256a)
@@ -236,7 +236,7 @@ _key_expansion_192a_local:
shufps $0b01001110, %xmm2, %xmm1
movups %xmm1, 0x10(%rcx)
add $0x20, %rcx
- ret
+ RET
SET_SIZE(_key_expansion_192a)
@@ -257,7 +257,7 @@ _key_expansion_192b_local:
movups %xmm0, (%rcx)
add $0x10, %rcx
- ret
+ RET
SET_SIZE(_key_expansion_192b)
@@ -271,7 +271,7 @@ _key_expansion_256b_local:
pxor %xmm1, %xmm2
movups %xmm2, (%rcx)
add $0x10, %rcx
- ret
+ RET
SET_SIZE(_key_expansion_256b)
@@ -376,9 +376,9 @@ rijndael_key_setup_enc_intel_local:
mov $14, %rax // return # rounds = 14
#endif
FRAME_END
- ret
+ RET
-.align 4
+.balign 4
.Lenc_key192:
cmp $192, %KEYSIZE32
jnz .Lenc_key128
@@ -413,9 +413,9 @@ rijndael_key_setup_enc_intel_local:
mov $12, %rax // return # rounds = 12
#endif
FRAME_END
- ret
+ RET
-.align 4
+.balign 4
.Lenc_key128:
cmp $128, %KEYSIZE32
jnz .Lenc_key_invalid_key_bits
@@ -453,13 +453,13 @@ rijndael_key_setup_enc_intel_local:
mov $10, %rax // return # rounds = 10
#endif
FRAME_END
- ret
+ RET
.Lenc_key_invalid_param:
#ifdef OPENSSL_INTERFACE
mov $-1, %rax // user key or AES key pointer is NULL
FRAME_END
- ret
+ RET
#else
/* FALLTHROUGH */
#endif /* OPENSSL_INTERFACE */
@@ -471,7 +471,7 @@ rijndael_key_setup_enc_intel_local:
xor %rax, %rax // a key pointer is NULL or invalid keysize
#endif /* OPENSSL_INTERFACE */
FRAME_END
- ret
+ RET
SET_SIZE(rijndael_key_setup_enc_intel)
@@ -522,7 +522,7 @@ FRAME_BEGIN
add %AESKEY, %ROUNDS64
mov %ROUNDS64, %ENDAESKEY
-.align 4
+.balign 4
.Ldec_key_reorder_loop:
movups (%AESKEY), %xmm0
movups (%ROUNDS64), %xmm1
@@ -533,7 +533,7 @@ FRAME_BEGIN
cmp %AESKEY, %ROUNDS64
ja .Ldec_key_reorder_loop
-.align 4
+.balign 4
.Ldec_key_inv_loop:
movups (%rcx), %xmm0
// Convert an encryption round key to a form usable for decryption
@@ -548,7 +548,7 @@ FRAME_BEGIN
// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
// OpenSSL: rax = 0 for OK, or non-zero for error
FRAME_END
- ret
+ RET
SET_SIZE(rijndael_key_setup_dec_intel)
@@ -622,7 +622,7 @@ ENTRY_NP(aes_encrypt_intel)
movups -0x50(%KEYP), %KEY
aesenc %KEY, %STATE
-.align 4
+.balign 4
.Lenc192:
// AES 192 and 256
movups -0x40(%KEYP), %KEY
@@ -630,7 +630,7 @@ ENTRY_NP(aes_encrypt_intel)
movups -0x30(%KEYP), %KEY
aesenc %KEY, %STATE
-.align 4
+.balign 4
.Lenc128:
// AES 128, 192, and 256
movups -0x20(%KEYP), %KEY
@@ -655,7 +655,7 @@ ENTRY_NP(aes_encrypt_intel)
aesenclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output
- ret
+ RET
SET_SIZE(aes_encrypt_intel)
@@ -705,7 +705,7 @@ ENTRY_NP(aes_decrypt_intel)
movups -0x50(%KEYP), %KEY
aesdec %KEY, %STATE
-.align 4
+.balign 4
.Ldec192:
// AES 192 and 256
movups -0x40(%KEYP), %KEY
@@ -713,7 +713,7 @@ ENTRY_NP(aes_decrypt_intel)
movups -0x30(%KEYP), %KEY
aesdec %KEY, %STATE
-.align 4
+.balign 4
.Ldec128:
// AES 128, 192, and 256
movups -0x20(%KEYP), %KEY
@@ -738,7 +738,7 @@ ENTRY_NP(aes_decrypt_intel)
aesdeclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output
- ret
+ RET
SET_SIZE(aes_decrypt_intel)
#endif /* lint || __lint */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
index 9db3a3179230..c4870a28ead6 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
@@ -186,15 +186,15 @@
#if defined(lint) || defined(__lint)
#include <sys/types.h>
-/* ARGSUSED */
void
aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
- uint32_t ct[4]) {
+ uint32_t ct[4]) {
+ (void) rk, (void) Nr, (void) pt, (void) ct;
}
-/* ARGSUSED */
void
aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
- uint32_t pt[4]) {
+ uint32_t pt[4]) {
+ (void) rk, (void) Nr, (void) pt, (void) ct;
}
@@ -221,23 +221,23 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
// finite field multiplies by {02}, {04} and {08}
-#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
-#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
-#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
+#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
+#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
+#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
// finite field multiplies required in table generation
-#define f3(x) [[f2(x)] ^ [x]]
-#define f9(x) [[f8(x)] ^ [x]]
-#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
-#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
-#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
+#define f3(x) ((f2(x)) ^ (x))
+#define f9(x) ((f8(x)) ^ (x))
+#define fb(x) ((f8(x)) ^ (f2(x)) ^ (x))
+#define fd(x) ((f8(x)) ^ (f4(x)) ^ (x))
+#define fe(x) ((f8(x)) ^ (f4(x)) ^ (f2(x)))
// macros for expanding S-box data
-#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
-#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
-#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
+#define u8(x) (f2(x)), (x), (x), (f3(x)), (f2(x)), (x), (x), (f3(x))
+#define v8(x) (fe(x)), (f9(x)), (fd(x)), (fb(x)), (fe(x)), (f9(x)), (fd(x)), (x)
+#define w8(x) (x), 0, 0, 0, (x), 0, 0, 0
#define enc_vals(x) \
.byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
@@ -693,8 +693,8 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
* int aes_encrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
-.data
-.align 64
+SECTION_STATIC
+.balign 64
enc_tab:
enc_vals(u8)
#ifdef LAST_ROUND_TABLES
@@ -704,6 +704,7 @@ enc_tab:
ENTRY_NP(aes_encrypt_amd64)
+ ENDBR
#ifdef GLADMAN_INTERFACE
// Original interface
sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
@@ -717,7 +718,7 @@ ENTRY_NP(aes_encrypt_amd64)
#else
// OpenSolaris OS interface
- sub $[4*8], %rsp // Make room on stack to save registers
+ sub $(4*8), %rsp // Make room on stack to save registers
mov %rcx, (%rsp) // Save output pointer (P4) on stack
mov %rdi, %r8 // context (P1)
mov %rdx, %rdi // P3: save input pointer
@@ -748,11 +749,11 @@ ENTRY_NP(aes_encrypt_amd64)
lea (kptr,%rsi), kptr
// Jump based on byte key length * 16:
- cmp $[10*16], %esi
+ cmp $(10*16), %esi
je 3f
- cmp $[12*16], %esi
+ cmp $(12*16), %esi
je 2f
- cmp $[14*16], %esi
+ cmp $(14*16), %esi
je 1f
mov $-1, %rax // error
jmp 4f
@@ -784,8 +785,8 @@ ENTRY_NP(aes_encrypt_amd64)
mov 1*8(%rsp), %rbx
mov 2*8(%rsp), %rbp
mov 3*8(%rsp), %r12
- add $[4*8], %rsp
- ret
+ add $(4*8), %rsp
+ RET
SET_SIZE(aes_encrypt_amd64)
@@ -798,8 +799,8 @@ ENTRY_NP(aes_encrypt_amd64)
* int aes_decrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
-.data
-.align 64
+SECTION_STATIC
+.balign 64
dec_tab:
dec_vals(v8)
#ifdef LAST_ROUND_TABLES
@@ -809,6 +810,7 @@ dec_tab:
ENTRY_NP(aes_decrypt_amd64)
+ ENDBR
#ifdef GLADMAN_INTERFACE
// Original interface
sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
@@ -822,7 +824,7 @@ ENTRY_NP(aes_decrypt_amd64)
#else
// OpenSolaris OS interface
- sub $[4*8], %rsp // Make room on stack to save registers
+ sub $(4*8), %rsp // Make room on stack to save registers
mov %rcx, (%rsp) // Save output pointer (P4) on stack
mov %rdi, %r8 // context (P1)
mov %rdx, %rdi // P3: save input pointer
@@ -859,11 +861,11 @@ ENTRY_NP(aes_decrypt_amd64)
xor rofs+12(%rdi), %edx
// Jump based on byte key length * 16:
- cmp $[10*16], %esi
+ cmp $(10*16), %esi
je 3f
- cmp $[12*16], %esi
+ cmp $(12*16), %esi
je 2f
- cmp $[14*16], %esi
+ cmp $(14*16), %esi
je 1f
mov $-1, %rax // error
jmp 4f
@@ -895,11 +897,11 @@ ENTRY_NP(aes_decrypt_amd64)
mov 1*8(%rsp), %rbx
mov 2*8(%rsp), %rbp
mov 3*8(%rsp), %r12
- add $[4*8], %rsp
- ret
+ add $(4*8), %rsp
+ RET
SET_SIZE(aes_decrypt_amd64)
-#endif /* lint || __lint */
+#endif /* lint || __lint */
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h
index eb13f72b10d8..003534e0fa50 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
new file mode 100644
index 000000000000..0ebec5c1095e
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
@@ -0,0 +1,1828 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_AVX2)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+.intel_syntax noprefix
+.text
+
+ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64)
+ ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 680
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+0x280], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0+rip]
+ vpand ymm2, ymm0, ymmword ptr [ADD1+rip]
+ vmovdqa ymmword ptr [rsp+0x220], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+0x240], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+0x260], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+0x2A0], rdx
+ cmp rsi, 8
+ jc 3f
+2:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x2A0]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x200], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x20], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x40], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x60], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x80], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0xA0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0xC0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0xE0], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x100], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x120], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x140], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x160], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x180], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x1A0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x1C0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x1E0], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+0x200]
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+0x240]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+0x260]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x50]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+0x220]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+0x240]
+ vmovdqa ymmword ptr [rsp+0x240], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+0x260]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+0x260], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+0x50], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ RET
+.p2align 5
+3:
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, qword ptr [rsp+0x2A0]
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ test rsi, 0x4
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+0x240]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+0x260]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 0x50
+ vpermq ymm15, ymm15, 0x50
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpblendd ymm14, ymm14, ymm12, 0x44
+ vpblendd ymm15, ymm15, ymm12, 0x44
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+0x20], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ vmovups ymm10, ymmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 0x93
+ vpshufd ymm15, ymm15, 0x93
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ vpbroadcastd ymm2, dword ptr [rsp+0x200]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+0x20]
+ vpblendd ymm3, ymm3, ymm2, 0x88
+ vpblendd ymm11, ymm11, ymm2, 0x88
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+0x40], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+0x60], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+0x80], ymm5
+ vmovdqa ymmword ptr [rsp+0xA0], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm8, ymm8, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpshufd ymm10, ymm10, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm8, ymm8, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ vpshufd ymm10, ymm10, 0x93
+ dec al
+ je 9f
+ vmovdqa ymm4, ymmword ptr [rsp+0x40]
+ vmovdqa ymm5, ymmword ptr [rsp+0x80]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0x0F
+ vpshufd ymm4, ymm12, 0x39
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0xAA
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 0x88
+ vpshufd ymm12, ymm12, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymmword ptr [rsp+0x40], ymm13
+ vmovdqa ymmword ptr [rsp+0x80], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+0x60]
+ vmovdqa ymm13, ymmword ptr [rsp+0xA0]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0x0F
+ vpshufd ymm12, ymm5, 0x39
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0xAA
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 0x88
+ vpshufd ymm5, ymm5, 0x78
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 0x1E
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+0x40]
+ vmovdqa ymm6, ymmword ptr [rsp+0x80]
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqu xmmword ptr [rbx+0x40], xmm8
+ vmovdqu xmmword ptr [rbx+0x50], xmm9
+ vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01
+ vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01
+ vmovaps xmm8, xmmword ptr [rsp+0x280]
+ vmovaps xmm0, xmmword ptr [rsp+0x240]
+ vmovaps xmm1, xmmword ptr [rsp+0x250]
+ vmovaps xmm2, xmmword ptr [rsp+0x260]
+ vmovaps xmm3, xmmword ptr [rsp+0x270]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+0x240], xmm0
+ vmovaps xmmword ptr [rsp+0x260], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test rsi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp+0x240]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x260], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x244]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x264], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x200]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovaps ymm8, ymmword ptr [rsp+0x280]
+ vmovaps ymm0, ymmword ptr [rsp+0x240]
+ vmovups ymm1, ymmword ptr [rsp+0x248]
+ vmovaps ymm2, ymmword ptr [rsp+0x260]
+ vmovups ymm3, ymmword ptr [rsp+0x268]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+0x240], ymm0
+ vmovaps ymmword ptr [rsp+0x260], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test rsi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm3, dword ptr [rsp+0x240]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+0x260], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm14, xmmword ptr [ROT16+rip]
+ vmovdqa xmm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+SET_SIZE(zfs_blake3_hash_many_avx2)
+
+SECTION_STATIC
+
+.p2align 6
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ADD1:
+ .long 8, 8, 8, 8, 8, 8, 8, 8
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
+#endif /* HAVE_AVX2 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
new file mode 100644
index 000000000000..39830f1556bb
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
@@ -0,0 +1,2594 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+.intel_syntax noprefix
+.text
+
+ENTRY_ALIGN(zfs_blake3_hash_many_avx512, 64)
+ ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 144
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip]
+ vpcmpltud k2, ymm2, ymm0
+ vpcmpltud k3, ymm3, ymm0
+ vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8}
+ vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3
+ vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4
+ vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+0x80], rdx
+ cmp rsi, 16
+ jc 3f
+2:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4]
+ vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4]
+ vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4]
+ vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4]
+ vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4]
+ vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4]
+ vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1+rip]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x50]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 0x88
+ vshufi32x4 zmm17, zmm1, zmm5, 0x88
+ vshufi32x4 zmm18, zmm2, zmm6, 0x88
+ vshufi32x4 zmm19, zmm3, zmm7, 0x88
+ vshufi32x4 zmm20, zmm0, zmm4, 0xDD
+ vshufi32x4 zmm21, zmm1, zmm5, 0xDD
+ vshufi32x4 zmm22, zmm2, zmm6, 0xDD
+ vshufi32x4 zmm23, zmm3, zmm7, 0xDD
+ vshufi32x4 zmm0, zmm16, zmm17, 0x88
+ vshufi32x4 zmm1, zmm18, zmm19, 0x88
+ vshufi32x4 zmm2, zmm20, zmm21, 0x88
+ vshufi32x4 zmm3, zmm22, zmm23, 0x88
+ vshufi32x4 zmm4, zmm16, zmm17, 0xDD
+ vshufi32x4 zmm5, zmm18, zmm19, 0xDD
+ vshufi32x4 zmm6, zmm20, zmm21, 0xDD
+ vshufi32x4 zmm7, zmm22, zmm23, 0xDD
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1
+ vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2
+ vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3
+ vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4
+ vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5
+ vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6
+ vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40]
+ vmovdqa32 zmm2, zmm0
+ vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16}
+ vpcmpltud k2, zmm2, zmm0
+ vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+0x50], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ RET
+.p2align 6
+3:
+ test esi, 0x8
+ je 3f
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+2:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+0x40]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd ymm15, dword ptr [rsp+0x88]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 2b
+ mov rbx, qword ptr [rbp+0x50]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+0x50], rbx
+ add rdi, 64
+ sub rsi, 8
+3:
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, qword ptr [rsp+0x80]
+ movzx r13, byte ptr [rbp+0x38]
+ movzx r12, byte ptr [rbp+0x48]
+ test esi, 0x4
+ je 3f
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0xDC
+ vpermq ymm15, ymm15, 0xDC
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vinserti32x8 zmm13, zmm14, ymm15, 0x01
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x30]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-0x20]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x10]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 0x93
+ vpshufd zmm7, zmm7, 0x93
+ mov al, 7
+9:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x93
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x39
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x39
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x93
+ dec al
+ jz 9f
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0x0F
+ vpshufd zmm4, zmm8, 0x39
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 0x78
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 0x1E
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp 9b
+9:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03
+ vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x40]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x40], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test esi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x4]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x88]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+SET_SIZE(zfs_blake3_hash_many_avx512)
+
+ENTRY_ALIGN(zfs_blake3_compress_in_place_avx512, 64)
+ ENDBR
+ vmovdqu xmm0, xmmword ptr [rdi]
+ vmovdqu xmm1, xmmword ptr [rdi+0x10]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ vmovq xmm3, rcx
+ vmovq xmm4, rdx
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rsi]
+ vmovups xmm9, xmmword ptr [rsi+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rsi+0x20]
+ vmovups xmm9, xmmword ptr [rsi+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rdi], xmm0
+ vmovdqu xmmword ptr [rdi+0x10], xmm1
+ RET
+SET_SIZE(zfs_blake3_compress_in_place_avx512)
+
+ENTRY_ALIGN(zfs_blake3_compress_xof_avx512, 64)
+ ENDBR
+ vmovdqu xmm0, xmmword ptr [rdi]
+ vmovdqu xmm1, xmmword ptr [rdi+0x10]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ vmovq xmm3, rcx
+ vmovq xmm4, rdx
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rsi]
+ vmovups xmm9, xmmword ptr [rsi+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rsi+0x20]
+ vmovups xmm9, xmmword ptr [rsi+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, [rdi]
+ vpxor xmm3, xmm3, [rdi+0x10]
+ vmovdqu xmmword ptr [r9], xmm0
+ vmovdqu xmmword ptr [r9+0x10], xmm1
+ vmovdqu xmmword ptr [r9+0x20], xmm2
+ vmovdqu xmmword ptr [r9+0x30], xmm3
+ RET
+SET_SIZE(zfs_blake3_compress_xof_avx512)
+
+SECTION_STATIC
+
+.p2align 6
+INDEX0:
+ .long 0, 1, 2, 3, 16, 17, 18, 19
+ .long 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ .long 4, 5, 6, 7, 20, 21, 22, 23
+ .long 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ .long 8, 9, 10, 11, 12, 13, 14, 15
+ADD1: .long 1
+
+ADD16: .long 16
+BLAKE3_BLOCK_LEN:
+ .long 64
+.p2align 6
+BLAKE3_IV:
+BLAKE3_IV_0:
+ .long 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A
+
+#endif /* HAVE_AVX512 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
new file mode 100644
index 000000000000..78c4ffac53a8
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
@@ -0,0 +1,2299 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Matthew Krupcale
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_SSE2)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+.intel_syntax noprefix
+
+SECTION_TEXT
+
+ENTRY_ALIGN(zfs_blake3_hash_many_sse2, 64)
+ ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 360
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ RET
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ movd xmm13, dword ptr [rsp+0x124]
+ punpckldq xmm14, xmm13
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ shl rax, 0x20
+ or rax, 0x40
+ movq xmm3, rax
+ movdqa xmmword ptr [rsp+0x20], xmm3
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ punpcklqdq xmm3, xmmword ptr [rsp+0x20]
+ punpcklqdq xmm11, xmmword ptr [rsp+0x20]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm13, xmm12
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ movdqa xmm13, xmm6
+ pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm12, xmm13
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm6, xmm5
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ movdqa xmmword ptr [rsp+0x30], xmm2
+ movdqa xmm2, xmm14
+ pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm5, xmm2
+ movdqa xmm2, xmmword ptr [rsp+0x30]
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ mov eax, dword ptr [rsp+0x130]
+ neg eax
+ mov r10d, dword ptr [rsp+0x110+8*rax]
+ mov r11d, dword ptr [rsp+0x120+8*rax]
+ mov dword ptr [rsp+0x110], r10d
+ mov dword ptr [rsp+0x120], r11d
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl rax, 32
+ or rax, 64
+ movq xmm12, rax
+ movdqa xmm3, xmm13
+ punpcklqdq xmm3, xmm12
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+SET_SIZE(zfs_blake3_hash_many_sse2)
+
+ENTRY_ALIGN(zfs_blake3_compress_in_place_sse2, 64)
+ ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl r8, 32
+ add rdx, r8
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rdi], xmm0
+ movups xmmword ptr [rdi+0x10], xmm1
+ RET
+SET_SIZE(zfs_blake3_compress_in_place_sse2)
+
+ENTRY_ALIGN(zfs_blake3_compress_xof_sse2, 64)
+ ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rdi]
+ movdqu xmm5, xmmword ptr [rdi+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r9], xmm0
+ movups xmmword ptr [r9+0x10], xmm1
+ movups xmmword ptr [r9+0x20], xmm2
+ movups xmmword ptr [r9+0x30], xmm3
+ RET
+SET_SIZE(zfs_blake3_compress_xof_sse2)
+
+SECTION_STATIC
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+PBLENDW_0x33_MASK:
+ .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xCC_MASK:
+ .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
+PBLENDW_0x3F_MASK:
+ .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xC0_MASK:
+ .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
+
+#endif /* HAVE_SSE2 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
new file mode 100644
index 000000000000..8ee7be75a0e1
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
@@ -0,0 +1,2037 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#if defined(HAVE_SSE4_1)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+.intel_syntax noprefix
+
+.text
+
+ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64)
+ ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 360
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ RET
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ pinsrd xmm14, dword ptr [rsp+0x124], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16+rip]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8+rip]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0xCC
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0xC0
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0xCC
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0xC0
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ movdqa xmm0, xmmword ptr [rsp+0x130]
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm2, xmmword ptr [rsp+0x120]
+ movdqu xmm3, xmmword ptr [rsp+0x118]
+ movdqu xmm4, xmmword ptr [rsp+0x128]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+SET_SIZE(zfs_blake3_hash_many_sse41)
+
+ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64)
+ ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl r8, 32
+ add rdx, r8
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rdi], xmm0
+ movups xmmword ptr [rdi+0x10], xmm1
+ RET
+SET_SIZE(zfs_blake3_compress_in_place_sse41)
+
+ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64)
+ ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rdi]
+ movdqu xmm5, xmmword ptr [rdi+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r9], xmm0
+ movups xmmword ptr [r9+0x10], xmm1
+ movups xmmword ptr [r9+0x20], xmm2
+ movups xmmword ptr [r9+0x30], xmm3
+ RET
+SET_SIZE(zfs_blake3_compress_xof_sse41)
+
+SECTION_STATIC
+
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+
+#endif /* HAVE_SSE4_1 */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
index dc71ae2c1c89..909b2147dff9 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
@@ -47,15 +47,26 @@
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
+#define _ASM
+#include <sys/asm_linkage.h>
+
+/* Windows userland links with OpenSSL */
+#if !defined (_WIN32) || defined (_KERNEL)
+
+/* Apple needs _ */
+#if defined (__APPLE__)
+#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe
+#endif
+
.extern gcm_avx_can_use_movbe
.text
#ifdef HAVE_MOVBE
-.type _aesni_ctr32_ghash_6x,@function
-.align 32
-_aesni_ctr32_ghash_6x:
+.balign 32
+FUNCTION(_aesni_ctr32_ghash_6x)
.cfi_startproc
+ ENDBR
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
@@ -69,7 +80,7 @@ _aesni_ctr32_ghash_6x:
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
-.align 32
+.balign 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
@@ -281,7 +292,7 @@ _aesni_ctr32_ghash_6x:
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
-.align 32
+.balign 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
@@ -303,7 +314,7 @@ _aesni_ctr32_ghash_6x:
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
-.align 32
+.balign 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
@@ -363,15 +374,15 @@ _aesni_ctr32_ghash_6x:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+SET_SIZE(_aesni_ctr32_ghash_6x)
#endif /* ifdef HAVE_MOVBE */
-.type _aesni_ctr32_ghash_no_movbe_6x,@function
-.align 32
-_aesni_ctr32_ghash_no_movbe_6x:
+.balign 32
+FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
.cfi_startproc
+ ENDBR
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
@@ -385,7 +396,7 @@ _aesni_ctr32_ghash_no_movbe_6x:
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x_nmb
-.align 32
+.balign 32
.Loop6x_nmb:
addl $100663296,%ebx
jc .Lhandle_ctr32_nmb
@@ -609,7 +620,7 @@ _aesni_ctr32_ghash_no_movbe_6x:
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail_nmb
-.align 32
+.balign 32
.Lhandle_ctr32_nmb:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
@@ -631,7 +642,7 @@ _aesni_ctr32_ghash_no_movbe_6x:
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32_nmb
-.align 32
+.balign 32
.Lenc_tail_nmb:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
@@ -691,15 +702,13 @@ _aesni_ctr32_ghash_no_movbe_6x:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x
+SET_SIZE(_aesni_ctr32_ghash_no_movbe_6x)
-.globl aesni_gcm_decrypt
-.type aesni_gcm_decrypt,@function
-.align 32
-aesni_gcm_decrypt:
+ENTRY_ALIGN(aesni_gcm_decrypt, 32)
.cfi_startproc
+ ENDBR
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
@@ -810,13 +819,14 @@ aesni_gcm_decrypt:
.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
-.type _aesni_ctr32_6x,@function
-.align 32
-_aesni_ctr32_6x:
+SET_SIZE(aesni_gcm_decrypt)
+
+.balign 32
+FUNCTION(_aesni_ctr32_6x)
.cfi_startproc
+ ENDBR
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds.
@@ -838,7 +848,7 @@ _aesni_ctr32_6x:
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
-.align 16
+.balign 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
@@ -880,8 +890,8 @@ _aesni_ctr32_6x:
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
- .byte 0xf3,0xc3
-.align 32
+ RET
+.balign 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
@@ -904,13 +914,11 @@ _aesni_ctr32_6x:
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.cfi_endproc
-.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
+SET_SIZE(_aesni_ctr32_6x)
-.globl aesni_gcm_encrypt
-.type aesni_gcm_encrypt,@function
-.align 32
-aesni_gcm_encrypt:
+ENTRY_ALIGN(aesni_gcm_encrypt, 32)
.cfi_startproc
+ ENDBR
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
@@ -1186,9 +1194,11 @@ aesni_gcm_encrypt:
.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
+SET_SIZE(aesni_gcm_encrypt)
+
+#endif /* !_WIN32 || _KERNEL */
/* Some utility routines */
@@ -1196,13 +1206,10 @@ aesni_gcm_encrypt:
* clear all fpu registers
* void clear_fpu_regs_avx(void);
*/
-.globl clear_fpu_regs_avx
-.type clear_fpu_regs_avx,@function
-.align 32
-clear_fpu_regs_avx:
+ENTRY_ALIGN(clear_fpu_regs_avx, 32)
vzeroall
- ret
-.size clear_fpu_regs_avx,.-clear_fpu_regs_avx
+ RET
+SET_SIZE(clear_fpu_regs_avx)
/*
* void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
@@ -1211,35 +1218,31 @@ clear_fpu_regs_avx:
* stores the result at `dst'. The XOR is performed using FPU registers,
* so make sure FPU state is saved when running this in the kernel.
*/
-.globl gcm_xor_avx
-.type gcm_xor_avx,@function
-.align 32
-gcm_xor_avx:
+ENTRY_ALIGN(gcm_xor_avx, 32)
movdqu (%rdi), %xmm0
movdqu (%rsi), %xmm1
pxor %xmm1, %xmm0
movdqu %xmm0, (%rsi)
- ret
-.size gcm_xor_avx,.-gcm_xor_avx
+ RET
+SET_SIZE(gcm_xor_avx)
/*
* Toggle a boolean_t value atomically and return the new value.
* boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
*/
-.globl atomic_toggle_boolean_nv
-.type atomic_toggle_boolean_nv,@function
-.align 32
-atomic_toggle_boolean_nv:
+ENTRY_ALIGN(atomic_toggle_boolean_nv, 32)
xorl %eax, %eax
lock
xorl $1, (%rdi)
jz 1f
movl $1, %eax
1:
- ret
-.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
+ RET
+SET_SIZE(atomic_toggle_boolean_nv)
+
+SECTION_STATIC
-.align 64
+.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
@@ -1251,7 +1254,7 @@ atomic_toggle_boolean_nv:
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
+.balign 64
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
index 59edc4c8d56c..dec782fda33e 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -85,9 +85,9 @@
#include <sys/types.h>
-/* ARGSUSED */
void
gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
+ (void) x_in, (void) y, (void) res;
}
#elif defined(HAVE_PCLMULQDQ) /* guard by instruction set */
@@ -101,8 +101,8 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
// static uint8_t byte_swap16_mask[] = {
// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
-.data
-.align XMM_ALIGN
+SECTION_STATIC
+.balign XMM_ALIGN
.Lbyte_swap16_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
@@ -244,7 +244,7 @@ ENTRY_NP(gcm_mul_pclmulqdq)
//
// Return
//
- ret
+ RET
SET_SIZE(gcm_mul_pclmulqdq)
#endif /* lint || __lint */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
index 90cc36b43a78..f62e056d4b64 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S
@@ -97,13 +97,18 @@
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
+#define _ASM
+#include <sys/asm_linkage.h>
+
.text
-.globl gcm_gmult_clmul
-.type gcm_gmult_clmul,@function
-.align 16
-gcm_gmult_clmul:
+/* Windows userland links with OpenSSL */
+#if !defined (_WIN32) || defined (_KERNEL)
+ENTRY_ALIGN(gcm_gmult_clmul, 16)
+
.cfi_startproc
+ ENDBR
+
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@@ -149,15 +154,14 @@ gcm_gmult_clmul:
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%rdi)
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size gcm_gmult_clmul,.-gcm_gmult_clmul
+SET_SIZE(gcm_gmult_clmul)
+#endif /* !_WIN32 || _KERNEL */
-.globl gcm_init_htab_avx
-.type gcm_init_htab_avx,@function
-.align 32
-gcm_init_htab_avx:
+ENTRY_ALIGN(gcm_init_htab_avx, 32)
.cfi_startproc
+ ENDBR
vzeroupper
vmovdqu (%rsi),%xmm2
@@ -184,7 +188,7 @@ gcm_init_htab_avx:
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp .Linit_start_avx
-.align 32
+.balign 32
.Linit_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
@@ -262,23 +266,21 @@ gcm_init_htab_avx:
vmovdqu %xmm5,-16(%rdi)
vzeroupper
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size gcm_init_htab_avx,.-gcm_init_htab_avx
+SET_SIZE(gcm_init_htab_avx)
-.globl gcm_gmult_avx
-.type gcm_gmult_avx,@function
-.align 32
-gcm_gmult_avx:
+#if !defined (_WIN32) || defined (_KERNEL)
+ENTRY_ALIGN(gcm_gmult_avx, 32)
.cfi_startproc
+ ENDBR
jmp .L_gmult_clmul
.cfi_endproc
-.size gcm_gmult_avx,.-gcm_gmult_avx
-.globl gcm_ghash_avx
-.type gcm_ghash_avx,@function
-.align 32
-gcm_ghash_avx:
+SET_SIZE(gcm_gmult_avx)
+
+ENTRY_ALIGN(gcm_ghash_avx, 32)
.cfi_startproc
+ ENDBR
vzeroupper
vmovdqu (%rdi),%xmm10
@@ -384,7 +386,7 @@ gcm_ghash_avx:
subq $0x80,%rcx
jmp .Loop8x_avx
-.align 32
+.balign 32
.Loop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
@@ -504,7 +506,7 @@ gcm_ghash_avx:
addq $0x80,%rcx
jmp .Ltail_no_xor_avx
-.align 32
+.balign 32
.Lshort_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
@@ -608,7 +610,7 @@ gcm_ghash_avx:
subq $0x10,%rcx
jmp .Ltail_avx
-.align 32
+.balign 32
.Ltail_avx:
vpxor %xmm10,%xmm15,%xmm15
.Ltail_no_xor_avx:
@@ -649,10 +651,14 @@ gcm_ghash_avx:
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size gcm_ghash_avx,.-gcm_ghash_avx
-.align 64
+SET_SIZE(gcm_ghash_avx)
+
+#endif /* !_WIN32 || _KERNEL */
+
+SECTION_STATIC
+.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.L0x1c2_polynomial:
@@ -661,14 +667,14 @@ gcm_ghash_avx:
.long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
-.align 64
-.type .Lrem_4bit,@object
+.balign 64
+SET_OBJ(.Lrem_4bit)
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
-.type .Lrem_8bit,@object
+SET_OBJ(.Lrem_8bit)
.Lrem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
@@ -704,7 +710,7 @@ gcm_ghash_avx:
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
+.balign 64
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
deleted file mode 100644
index fc844cd8c74f..000000000000
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
+++ /dev/null
@@ -1,1369 +0,0 @@
-/*
- * !/usr/bin/env perl
- *
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. The module is, however, dual licensed under OpenSSL and
- * CRYPTOGAMS licenses depending on where you obtain it. For further
- * details see http://www.openssl.org/~appro/cryptogams/.
- * ====================================================================
- *
- * sha1_block procedure for x86_64.
- *
- * It was brought to my attention that on EM64T compiler-generated code
- * was far behind 32-bit assembler implementation. This is unlike on
- * Opteron where compiler-generated code was only 15% behind 32-bit
- * assembler, which originally made it hard to motivate the effort.
- * There was suggestion to mechanically translate 32-bit code, but I
- * dismissed it, reasoning that x86_64 offers enough register bank
- * capacity to fully utilize SHA-1 parallelism. Therefore this fresh
- * implementation:-) However! While 64-bit code does performs better
- * on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
- * x86_64 does offer larger *addressable* bank, but out-of-order core
- * reaches for even more registers through dynamic aliasing, and EM64T
- * core must have managed to run-time optimize even 32-bit code just as
- * good as 64-bit one. Performance improvement is summarized in the
- * following table:
- *
- * gcc 3.4 32-bit asm cycles/byte
- * Opteron +45% +20% 6.8
- * Xeon P4 +65% +0% 9.9
- * Core2 +60% +10% 7.0
- *
- *
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha1-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers).
- *
- */
-
-/*
- * This file was generated by a perl script (sha1-x86_64.pl). The comments from
- * the original file have been pasted above.
- */
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sys/sha1.h>
-
-
-/* ARGSUSED */
-void
-sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks)
-{
-}
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-ENTRY_NP(sha1_block_data_order)
-.cfi_startproc
- mov %rsp,%rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_offset %rbx,-16
- push %rbp
-.cfi_offset %rbp,-24
- push %r12
-.cfi_offset %r12,-32
- mov %rdi,%r8 # reassigned argument
-.cfi_register %rdi, %r8
- sub $72,%rsp
- mov %rsi,%r9 # reassigned argument
-.cfi_register %rsi, %r9
- and $-64,%rsp
- mov %rdx,%r10 # reassigned argument
-.cfi_register %rdx, %r10
- mov %rax,64(%rsp)
-# echo ".cfi_cfa_expression %rsp+64,deref,+8" |
-# openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08
-
- mov 0(%r8),%edx
- mov 4(%r8),%esi
- mov 8(%r8),%edi
- mov 12(%r8),%ebp
- mov 16(%r8),%r11d
-.align 4
-.Lloop:
- mov 0(%r9),%eax
- bswap %eax
- mov %eax,0(%rsp)
- lea 0x5a827999(%eax,%r11d),%r12d
- mov %edi,%ebx
- mov 4(%r9),%eax
- mov %edx,%r11d
- xor %ebp,%ebx
- bswap %eax
- rol $5,%r11d
- and %esi,%ebx
- mov %eax,4(%rsp)
- add %r11d,%r12d
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- lea 0x5a827999(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov 8(%r9),%eax
- mov %r12d,%ebp
- xor %edi,%ebx
- bswap %eax
- rol $5,%ebp
- and %edx,%ebx
- mov %eax,8(%rsp)
- add %ebp,%r11d
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- lea 0x5a827999(%eax,%edi),%ebp
- mov %edx,%ebx
- mov 12(%r9),%eax
- mov %r11d,%edi
- xor %esi,%ebx
- bswap %eax
- rol $5,%edi
- and %r12d,%ebx
- mov %eax,12(%rsp)
- add %edi,%ebp
- xor %esi,%ebx
- rol $30,%r12d
- add %ebx,%ebp
- lea 0x5a827999(%eax,%esi),%edi
- mov %r12d,%ebx
- mov 16(%r9),%eax
- mov %ebp,%esi
- xor %edx,%ebx
- bswap %eax
- rol $5,%esi
- and %r11d,%ebx
- mov %eax,16(%rsp)
- add %esi,%edi
- xor %edx,%ebx
- rol $30,%r11d
- add %ebx,%edi
- lea 0x5a827999(%eax,%edx),%esi
- mov %r11d,%ebx
- mov 20(%r9),%eax
- mov %edi,%edx
- xor %r12d,%ebx
- bswap %eax
- rol $5,%edx
- and %ebp,%ebx
- mov %eax,20(%rsp)
- add %edx,%esi
- xor %r12d,%ebx
- rol $30,%ebp
- add %ebx,%esi
- lea 0x5a827999(%eax,%r12d),%edx
- mov %ebp,%ebx
- mov 24(%r9),%eax
- mov %esi,%r12d
- xor %r11d,%ebx
- bswap %eax
- rol $5,%r12d
- and %edi,%ebx
- mov %eax,24(%rsp)
- add %r12d,%edx
- xor %r11d,%ebx
- rol $30,%edi
- add %ebx,%edx
- lea 0x5a827999(%eax,%r11d),%r12d
- mov %edi,%ebx
- mov 28(%r9),%eax
- mov %edx,%r11d
- xor %ebp,%ebx
- bswap %eax
- rol $5,%r11d
- and %esi,%ebx
- mov %eax,28(%rsp)
- add %r11d,%r12d
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- lea 0x5a827999(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov 32(%r9),%eax
- mov %r12d,%ebp
- xor %edi,%ebx
- bswap %eax
- rol $5,%ebp
- and %edx,%ebx
- mov %eax,32(%rsp)
- add %ebp,%r11d
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- lea 0x5a827999(%eax,%edi),%ebp
- mov %edx,%ebx
- mov 36(%r9),%eax
- mov %r11d,%edi
- xor %esi,%ebx
- bswap %eax
- rol $5,%edi
- and %r12d,%ebx
- mov %eax,36(%rsp)
- add %edi,%ebp
- xor %esi,%ebx
- rol $30,%r12d
- add %ebx,%ebp
- lea 0x5a827999(%eax,%esi),%edi
- mov %r12d,%ebx
- mov 40(%r9),%eax
- mov %ebp,%esi
- xor %edx,%ebx
- bswap %eax
- rol $5,%esi
- and %r11d,%ebx
- mov %eax,40(%rsp)
- add %esi,%edi
- xor %edx,%ebx
- rol $30,%r11d
- add %ebx,%edi
- lea 0x5a827999(%eax,%edx),%esi
- mov %r11d,%ebx
- mov 44(%r9),%eax
- mov %edi,%edx
- xor %r12d,%ebx
- bswap %eax
- rol $5,%edx
- and %ebp,%ebx
- mov %eax,44(%rsp)
- add %edx,%esi
- xor %r12d,%ebx
- rol $30,%ebp
- add %ebx,%esi
- lea 0x5a827999(%eax,%r12d),%edx
- mov %ebp,%ebx
- mov 48(%r9),%eax
- mov %esi,%r12d
- xor %r11d,%ebx
- bswap %eax
- rol $5,%r12d
- and %edi,%ebx
- mov %eax,48(%rsp)
- add %r12d,%edx
- xor %r11d,%ebx
- rol $30,%edi
- add %ebx,%edx
- lea 0x5a827999(%eax,%r11d),%r12d
- mov %edi,%ebx
- mov 52(%r9),%eax
- mov %edx,%r11d
- xor %ebp,%ebx
- bswap %eax
- rol $5,%r11d
- and %esi,%ebx
- mov %eax,52(%rsp)
- add %r11d,%r12d
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- lea 0x5a827999(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov 56(%r9),%eax
- mov %r12d,%ebp
- xor %edi,%ebx
- bswap %eax
- rol $5,%ebp
- and %edx,%ebx
- mov %eax,56(%rsp)
- add %ebp,%r11d
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- lea 0x5a827999(%eax,%edi),%ebp
- mov %edx,%ebx
- mov 60(%r9),%eax
- mov %r11d,%edi
- xor %esi,%ebx
- bswap %eax
- rol $5,%edi
- and %r12d,%ebx
- mov %eax,60(%rsp)
- add %edi,%ebp
- xor %esi,%ebx
- rol $30,%r12d
- add %ebx,%ebp
- lea 0x5a827999(%eax,%esi),%edi
- mov 0(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 8(%rsp),%eax
- xor %edx,%ebx
- rol $5,%esi
- xor 32(%rsp),%eax
- and %r11d,%ebx
- add %esi,%edi
- xor 52(%rsp),%eax
- xor %edx,%ebx
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,0(%rsp)
- lea 0x5a827999(%eax,%edx),%esi
- mov 4(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 12(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edx
- xor 36(%rsp),%eax
- and %ebp,%ebx
- add %edx,%esi
- xor 56(%rsp),%eax
- xor %r12d,%ebx
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,4(%rsp)
- lea 0x5a827999(%eax,%r12d),%edx
- mov 8(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 16(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%r12d
- xor 40(%rsp),%eax
- and %edi,%ebx
- add %r12d,%edx
- xor 60(%rsp),%eax
- xor %r11d,%ebx
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,8(%rsp)
- lea 0x5a827999(%eax,%r11d),%r12d
- mov 12(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 20(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%r11d
- xor 44(%rsp),%eax
- and %esi,%ebx
- add %r11d,%r12d
- xor 0(%rsp),%eax
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,12(%rsp)
- lea 0x5a827999(%eax,%ebp),%r11d
- mov 16(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 24(%rsp),%eax
- xor %edi,%ebx
- rol $5,%ebp
- xor 48(%rsp),%eax
- and %edx,%ebx
- add %ebp,%r11d
- xor 4(%rsp),%eax
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,16(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 20(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 28(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 52(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 8(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,20(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 24(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 32(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 56(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 12(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,24(%rsp)
- lea 0x6ed9eba1(%eax,%edx),%esi
- mov 28(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 36(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 60(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 16(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,28(%rsp)
- lea 0x6ed9eba1(%eax,%r12d),%edx
- mov 32(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 40(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 0(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 20(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,32(%rsp)
- lea 0x6ed9eba1(%eax,%r11d),%r12d
- mov 36(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 44(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 4(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 24(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,36(%rsp)
- lea 0x6ed9eba1(%eax,%ebp),%r11d
- mov 40(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 48(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 8(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 28(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,40(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 44(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 52(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 12(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 32(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,44(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 48(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 56(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 16(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 36(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,48(%rsp)
- lea 0x6ed9eba1(%eax,%edx),%esi
- mov 52(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 60(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 20(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 40(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,52(%rsp)
- lea 0x6ed9eba1(%eax,%r12d),%edx
- mov 56(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 0(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 24(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 44(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,56(%rsp)
- lea 0x6ed9eba1(%eax,%r11d),%r12d
- mov 60(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 4(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 28(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 48(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,60(%rsp)
- lea 0x6ed9eba1(%eax,%ebp),%r11d
- mov 0(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 8(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 32(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 52(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,0(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 4(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 12(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 36(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 56(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,4(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 8(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 16(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 40(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 60(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,8(%rsp)
- lea 0x6ed9eba1(%eax,%edx),%esi
- mov 12(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 20(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 44(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 0(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,12(%rsp)
- lea 0x6ed9eba1(%eax,%r12d),%edx
- mov 16(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 24(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 48(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 4(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,16(%rsp)
- lea 0x6ed9eba1(%eax,%r11d),%r12d
- mov 20(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 28(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 52(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 8(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,20(%rsp)
- lea 0x6ed9eba1(%eax,%ebp),%r11d
- mov 24(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 32(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 56(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 12(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,24(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 28(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 36(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 60(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 16(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,28(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 32(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 40(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 0(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 20(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,32(%rsp)
- lea -0x70e44324(%eax,%edx),%esi
- mov 36(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 44(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 4(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 24(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,36(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 40(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 48(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 8(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 28(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,40(%rsp)
- add %ebx,%edx
- lea -0x70e44324(%eax,%r11d),%r12d
- mov 44(%rsp),%eax
- mov %esi,%ebx
- mov %esi,%ecx
- xor 52(%rsp),%eax
- mov %edx,%r11d
- and %edi,%ebx
- xor 12(%rsp),%eax
- or %edi,%ecx
- rol $5,%r11d
- xor 32(%rsp),%eax
- and %ebp,%ecx
- add %r11d,%r12d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%esi
- mov %eax,44(%rsp)
- add %ebx,%r12d
- lea -0x70e44324(%eax,%ebp),%r11d
- mov 48(%rsp),%eax
- mov %edx,%ebx
- mov %edx,%ecx
- xor 56(%rsp),%eax
- mov %r12d,%ebp
- and %esi,%ebx
- xor 16(%rsp),%eax
- or %esi,%ecx
- rol $5,%ebp
- xor 36(%rsp),%eax
- and %edi,%ecx
- add %ebp,%r11d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edx
- mov %eax,48(%rsp)
- add %ebx,%r11d
- lea -0x70e44324(%eax,%edi),%ebp
- mov 52(%rsp),%eax
- mov %r12d,%ebx
- mov %r12d,%ecx
- xor 60(%rsp),%eax
- mov %r11d,%edi
- and %edx,%ebx
- xor 20(%rsp),%eax
- or %edx,%ecx
- rol $5,%edi
- xor 40(%rsp),%eax
- and %esi,%ecx
- add %edi,%ebp
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r12d
- mov %eax,52(%rsp)
- add %ebx,%ebp
- lea -0x70e44324(%eax,%esi),%edi
- mov 56(%rsp),%eax
- mov %r11d,%ebx
- mov %r11d,%ecx
- xor 0(%rsp),%eax
- mov %ebp,%esi
- and %r12d,%ebx
- xor 24(%rsp),%eax
- or %r12d,%ecx
- rol $5,%esi
- xor 44(%rsp),%eax
- and %edx,%ecx
- add %esi,%edi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r11d
- mov %eax,56(%rsp)
- add %ebx,%edi
- lea -0x70e44324(%eax,%edx),%esi
- mov 60(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 4(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 28(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 48(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,60(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 0(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 8(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 32(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 52(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,0(%rsp)
- add %ebx,%edx
- lea -0x70e44324(%eax,%r11d),%r12d
- mov 4(%rsp),%eax
- mov %esi,%ebx
- mov %esi,%ecx
- xor 12(%rsp),%eax
- mov %edx,%r11d
- and %edi,%ebx
- xor 36(%rsp),%eax
- or %edi,%ecx
- rol $5,%r11d
- xor 56(%rsp),%eax
- and %ebp,%ecx
- add %r11d,%r12d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%esi
- mov %eax,4(%rsp)
- add %ebx,%r12d
- lea -0x70e44324(%eax,%ebp),%r11d
- mov 8(%rsp),%eax
- mov %edx,%ebx
- mov %edx,%ecx
- xor 16(%rsp),%eax
- mov %r12d,%ebp
- and %esi,%ebx
- xor 40(%rsp),%eax
- or %esi,%ecx
- rol $5,%ebp
- xor 60(%rsp),%eax
- and %edi,%ecx
- add %ebp,%r11d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edx
- mov %eax,8(%rsp)
- add %ebx,%r11d
- lea -0x70e44324(%eax,%edi),%ebp
- mov 12(%rsp),%eax
- mov %r12d,%ebx
- mov %r12d,%ecx
- xor 20(%rsp),%eax
- mov %r11d,%edi
- and %edx,%ebx
- xor 44(%rsp),%eax
- or %edx,%ecx
- rol $5,%edi
- xor 0(%rsp),%eax
- and %esi,%ecx
- add %edi,%ebp
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r12d
- mov %eax,12(%rsp)
- add %ebx,%ebp
- lea -0x70e44324(%eax,%esi),%edi
- mov 16(%rsp),%eax
- mov %r11d,%ebx
- mov %r11d,%ecx
- xor 24(%rsp),%eax
- mov %ebp,%esi
- and %r12d,%ebx
- xor 48(%rsp),%eax
- or %r12d,%ecx
- rol $5,%esi
- xor 4(%rsp),%eax
- and %edx,%ecx
- add %esi,%edi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r11d
- mov %eax,16(%rsp)
- add %ebx,%edi
- lea -0x70e44324(%eax,%edx),%esi
- mov 20(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 28(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 52(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 8(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,20(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 24(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 32(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 56(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 12(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,24(%rsp)
- add %ebx,%edx
- lea -0x70e44324(%eax,%r11d),%r12d
- mov 28(%rsp),%eax
- mov %esi,%ebx
- mov %esi,%ecx
- xor 36(%rsp),%eax
- mov %edx,%r11d
- and %edi,%ebx
- xor 60(%rsp),%eax
- or %edi,%ecx
- rol $5,%r11d
- xor 16(%rsp),%eax
- and %ebp,%ecx
- add %r11d,%r12d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%esi
- mov %eax,28(%rsp)
- add %ebx,%r12d
- lea -0x70e44324(%eax,%ebp),%r11d
- mov 32(%rsp),%eax
- mov %edx,%ebx
- mov %edx,%ecx
- xor 40(%rsp),%eax
- mov %r12d,%ebp
- and %esi,%ebx
- xor 0(%rsp),%eax
- or %esi,%ecx
- rol $5,%ebp
- xor 20(%rsp),%eax
- and %edi,%ecx
- add %ebp,%r11d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edx
- mov %eax,32(%rsp)
- add %ebx,%r11d
- lea -0x70e44324(%eax,%edi),%ebp
- mov 36(%rsp),%eax
- mov %r12d,%ebx
- mov %r12d,%ecx
- xor 44(%rsp),%eax
- mov %r11d,%edi
- and %edx,%ebx
- xor 4(%rsp),%eax
- or %edx,%ecx
- rol $5,%edi
- xor 24(%rsp),%eax
- and %esi,%ecx
- add %edi,%ebp
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r12d
- mov %eax,36(%rsp)
- add %ebx,%ebp
- lea -0x70e44324(%eax,%esi),%edi
- mov 40(%rsp),%eax
- mov %r11d,%ebx
- mov %r11d,%ecx
- xor 48(%rsp),%eax
- mov %ebp,%esi
- and %r12d,%ebx
- xor 8(%rsp),%eax
- or %r12d,%ecx
- rol $5,%esi
- xor 28(%rsp),%eax
- and %edx,%ecx
- add %esi,%edi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r11d
- mov %eax,40(%rsp)
- add %ebx,%edi
- lea -0x70e44324(%eax,%edx),%esi
- mov 44(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 52(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 12(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 32(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,44(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 48(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 56(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 16(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 36(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,48(%rsp)
- add %ebx,%edx
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 52(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 60(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 20(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 40(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,52(%rsp)
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov 56(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 0(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 24(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 44(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,56(%rsp)
- lea -0x359d3e2a(%eax,%edi),%ebp
- mov 60(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 4(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 28(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 48(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,60(%rsp)
- lea -0x359d3e2a(%eax,%esi),%edi
- mov 0(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 8(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 32(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 52(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,0(%rsp)
- lea -0x359d3e2a(%eax,%edx),%esi
- mov 4(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 12(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 36(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 56(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,4(%rsp)
- lea -0x359d3e2a(%eax,%r12d),%edx
- mov 8(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 16(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 40(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 60(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,8(%rsp)
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 12(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 20(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 44(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 0(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,12(%rsp)
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov 16(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 24(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 48(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 4(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,16(%rsp)
- lea -0x359d3e2a(%eax,%edi),%ebp
- mov 20(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 28(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 52(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 8(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,20(%rsp)
- lea -0x359d3e2a(%eax,%esi),%edi
- mov 24(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 32(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 56(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 12(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,24(%rsp)
- lea -0x359d3e2a(%eax,%edx),%esi
- mov 28(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 36(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 60(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 16(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,28(%rsp)
- lea -0x359d3e2a(%eax,%r12d),%edx
- mov 32(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 40(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 0(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 20(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,32(%rsp)
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 36(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 44(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 4(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 24(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,36(%rsp)
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov 40(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 48(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 8(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 28(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,40(%rsp)
- lea -0x359d3e2a(%eax,%edi),%ebp
- mov 44(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 52(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 12(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 32(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,44(%rsp)
- lea -0x359d3e2a(%eax,%esi),%edi
- mov 48(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 56(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 16(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 36(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,48(%rsp)
- lea -0x359d3e2a(%eax,%edx),%esi
- mov 52(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 60(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 20(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 40(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- lea -0x359d3e2a(%eax,%r12d),%edx
- mov 56(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 0(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 24(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 44(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 60(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 4(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 28(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 48(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov %r12d,%ebp
- xor %edx,%ebx
- rol $5,%ebp
- xor %edi,%ebx
- add %ebp,%r11d
- rol $30,%edx
- add %ebx,%r11d
- // Update and save state information in SHA-1 context
- add 0(%r8),%r11d
- add 4(%r8),%r12d
- add 8(%r8),%edx
- add 12(%r8),%esi
- add 16(%r8),%edi
- mov %r11d,0(%r8)
- mov %r12d,4(%r8)
- mov %edx,8(%r8)
- mov %esi,12(%r8)
- mov %edi,16(%r8)
-
- xchg %r11d,%edx # mov %r11d,%edx
- xchg %r12d,%esi # mov %r12d,%esi
- xchg %r11d,%edi # mov %edx,%edi
- xchg %r12d,%ebp # mov %esi,%ebp
- # mov %edi,%r11d
- lea 64(%r9),%r9
- sub $1,%r10
- jnz .Lloop
- mov 64(%rsp),%rsp
-.cfi_def_cfa %rsp,8
- movq -24(%rsp),%r12
-.cfi_restore %r12
- movq -16(%rsp),%rbp
-.cfi_restore %rbp
- movq -8(%rsp),%rbx
-.cfi_restore %rbx
- ret
-.cfi_endproc
-SET_SIZE(sha1_block_data_order)
-
-.data
-.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro@openssl.org>"
-
-#endif /* lint || __lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S
new file mode 100644
index 000000000000..d3e5e3f0d080
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S
@@ -0,0 +1,5104 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if defined(__x86_64)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+SECTION_STATIC
+
+.balign 64
+SET_OBJ(K256)
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+
+ENTRY_ALIGN(zfs_sha256_transform_x64, 16)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
+.Lprologue:
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+.balign 16
+.Lloop:
+ movl %ebx,%edi
+ leaq K256(%rip),%rbp
+ xorl %ecx,%edi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 20(%rbp),%rbp
+ addl %r14d,%eax
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 20(%rbp),%rbp
+ jmp .Lrounds_16_xx
+.balign 16
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 36(%rsp),%r12d
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 4(%rbp),%rbp
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 40(%rsp),%r12d
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 4(%rbp),%rbp
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 44(%rsp),%r12d
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 4(%rbp),%rbp
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 48(%rsp),%r12d
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 20(%rbp),%rbp
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 52(%rsp),%r12d
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 4(%rbp),%rbp
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 56(%rsp),%r12d
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 4(%rbp),%rbp
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 60(%rsp),%r12d
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 4(%rbp),%rbp
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 0(%rsp),%r12d
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 20(%rbp),%rbp
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 4(%rsp),%r12d
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 4(%rbp),%rbp
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 8(%rsp),%r12d
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 4(%rbp),%rbp
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 12(%rsp),%r12d
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 4(%rbp),%rbp
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 16(%rsp),%r12d
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 20(%rbp),%rbp
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 20(%rsp),%r12d
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 4(%rbp),%rbp
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 24(%rsp),%r12d
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 4(%rbp),%rbp
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r15d
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 28(%rsp),%r12d
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 4(%rbp),%rbp
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%edi
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 32(%rsp),%r12d
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 20(%rbp),%rbp
+ cmpb $0,3(%rbp)
+ jnz .Lrounds_16_xx
+ movq 64+0(%rsp),%rdi
+ addl %r14d,%eax
+ leaq 64(%rsi),%rsi
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+ cmpq 64+16(%rsp),%rsi
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha256_transform_x64)
+
+ENTRY_ALIGN(zfs_sha256_transform_shani, 64)
+.cfi_startproc
+ ENDBR
+ leaq K256+128(%rip),%rcx
+ movdqu (%rdi),%xmm1
+ movdqu 16(%rdi),%xmm2
+ movdqa 512-128(%rcx),%xmm7
+
+ pshufd $0x1b,%xmm1,%xmm0
+ pshufd $0xb1,%xmm1,%xmm1
+ pshufd $0x1b,%xmm2,%xmm2
+ movdqa %xmm7,%xmm8
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp .Loop_shani
+
+.balign 16
+.Loop_shani:
+ movdqu (%rsi),%xmm3
+ movdqu 16(%rsi),%xmm4
+ movdqu 32(%rsi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%rsi),%xmm6
+
+ movdqa 0-128(%rcx),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+ movdqa %xmm2,%xmm10
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,%xmm9
+.byte 15,56,203,202
+
+ movdqa 32-128(%rcx),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ leaq 64(%rsi),%rsi
+.byte 15,56,204,220
+.byte 15,56,203,202
+
+ movdqa 64-128(%rcx),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+
+ movdqa 96-128(%rcx),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 128-128(%rcx),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 160-128(%rcx),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 192-128(%rcx),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 224-128(%rcx),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 256-128(%rcx),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 288-128(%rcx),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 320-128(%rcx),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 352-128(%rcx),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 384-128(%rcx),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 416-128(%rcx),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+
+ movdqa 448-128(%rcx),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa %xmm8,%xmm7
+.byte 15,56,203,202
+
+ movdqa 480-128(%rcx),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ decq %rdx
+ nop
+.byte 15,56,203,202
+
+ paddd %xmm10,%xmm2
+ paddd %xmm9,%xmm1
+ jnz .Loop_shani
+
+ pshufd $0xb1,%xmm2,%xmm2
+ pshufd $0x1b,%xmm1,%xmm7
+ pshufd $0xb1,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+
+ movdqu %xmm1,(%rdi)
+ movdqu %xmm2,16(%rdi)
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha256_transform_shani)
+
+ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ shlq $4,%rdx
+ subq $96,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
+.Lprologue_ssse3:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+
+ jmp .Lloop_ssse3
+.balign 16
+.Lloop_ssse3:
+ movdqa K256+512(%rip),%xmm7
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+.byte 102,15,56,0,199
+ movdqu 48(%rsi),%xmm3
+ leaq K256(%rip),%rbp
+.byte 102,15,56,0,207
+ movdqa 0(%rbp),%xmm4
+ movdqa 32(%rbp),%xmm5
+.byte 102,15,56,0,215
+ paddd %xmm0,%xmm4
+ movdqa 64(%rbp),%xmm6
+.byte 102,15,56,0,223
+ movdqa 96(%rbp),%xmm7
+ paddd %xmm1,%xmm5
+ paddd %xmm2,%xmm6
+ paddd %xmm3,%xmm7
+ movdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ movdqa %xmm5,16(%rsp)
+ movl %ebx,%edi
+ movdqa %xmm6,32(%rsp)
+ xorl %ecx,%edi
+ movdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp .Lssse3_00_47
+
+.balign 16
+.Lssse3_00_47:
+ subq $-128,%rbp
+ rorl $14,%r13d
+ movdqa %xmm1,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm3,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,224,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,250,4
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm3,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm0
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm0
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm0,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 0(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm0,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,0(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm2,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm0,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,225,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,251,4
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm0,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm1
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm1
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm1,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 32(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm1,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,16(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm3,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm1,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,226,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,248,4
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm1,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm2
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm2
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm2,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 64(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm2,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,32(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm0,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm2,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,227,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,249,4
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm2,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm3
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm3
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm3,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 96(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm3,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,48(%rsp)
+ cmpb $0,131(%rbp)
+ jne .Lssse3_00_47
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%rdi
+ movl %r14d,%eax
+
+ addl 0(%rdi),%eax
+ leaq 64(%rsi),%rsi
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop_ssse3
+
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue_ssse3:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha256_transform_ssse3)
+
+ENTRY_ALIGN(zfs_sha256_transform_avx, 64)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ shlq $4,%rdx
+ subq $96,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
+.Lprologue_avx:
+
+ vzeroupper
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ vmovdqa K256+512+32(%rip),%xmm8
+ vmovdqa K256+512+64(%rip),%xmm9
+ jmp .Lloop_avx
+.balign 16
+.Lloop_avx:
+ vmovdqa K256+512(%rip),%xmm7
+ vmovdqu 0(%rsi),%xmm0
+ vmovdqu 16(%rsi),%xmm1
+ vmovdqu 32(%rsi),%xmm2
+ vmovdqu 48(%rsi),%xmm3
+ vpshufb %xmm7,%xmm0,%xmm0
+ leaq K256(%rip),%rbp
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd 0(%rbp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 32(%rbp),%xmm1,%xmm5
+ vpaddd 64(%rbp),%xmm2,%xmm6
+ vpaddd 96(%rbp),%xmm3,%xmm7
+ vmovdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ vmovdqa %xmm5,16(%rsp)
+ movl %ebx,%edi
+ vmovdqa %xmm6,32(%rsp)
+ xorl %ecx,%edi
+ vmovdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp .Lavx_00_47
+
+.balign 16
+.Lavx_00_47:
+ subq $-128,%rbp
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ shrdl $9,%r14d,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vpaddd %xmm7,%xmm0,%xmm0
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ xorl %r10d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ebx,%r15d
+ vpslld $14,%xmm4,%xmm5
+ addl %r12d,%r11d
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ vpshufd $250,%xmm3,%xmm7
+ shrdl $2,%r14d,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ vpsrld $11,%xmm6,%xmm6
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ shrdl $9,%r14d,%r14d
+ vpslld $11,%xmm5,%xmm5
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ vpsrlq $17,%xmm7,%xmm7
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ vpaddd %xmm4,%xmm0,%xmm0
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %edx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r13d
+ vpshufb %xmm8,%xmm6,%xmm6
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ vpaddd %xmm6,%xmm0,%xmm0
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ vpshufd $80,%xmm0,%xmm7
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ shrdl $11,%r14d,%r14d
+ vpsrld $10,%xmm7,%xmm6
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %r11d,%edi
+ shrdl $2,%r14d,%r14d
+ addl %r9d,%ebx
+ vpsrlq $2,%xmm7,%xmm7
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ vpshufb %xmm9,%xmm6,%xmm6
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ vpaddd %xmm6,%xmm0,%xmm0
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vpaddd 0(%rbp),%xmm0,%xmm6
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ vmovdqa %xmm6,0(%rsp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ shrdl $9,%r14d,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vpaddd %xmm7,%xmm1,%xmm1
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ xorl %ecx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r9d,%r15d
+ vpslld $14,%xmm4,%xmm5
+ addl %r12d,%edx
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ vpshufd $250,%xmm0,%xmm7
+ shrdl $2,%r14d,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ vpsrld $11,%xmm6,%xmm6
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%edx
+ movl %eax,%r12d
+ shrdl $9,%r14d,%r14d
+ vpslld $11,%xmm5,%xmm5
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ vpsrlq $17,%xmm7,%xmm7
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ vpaddd %xmm4,%xmm1,%xmm1
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %r11d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r13d
+ vpshufb %xmm8,%xmm6,%xmm6
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ vpaddd %xmm6,%xmm1,%xmm1
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ vpshufd $80,%xmm1,%xmm7
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ shrdl $11,%r14d,%r14d
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %edx,%edi
+ shrdl $2,%r14d,%r14d
+ addl %ebx,%r9d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ vpshufb %xmm9,%xmm6,%xmm6
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ vpaddd %xmm6,%xmm1,%xmm1
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpaddd 32(%rbp),%xmm1,%xmm6
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ vmovdqa %xmm6,16(%rsp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ shrdl $9,%r14d,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vpaddd %xmm7,%xmm2,%xmm2
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ xorl %r10d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ebx,%r15d
+ vpslld $14,%xmm4,%xmm5
+ addl %r12d,%r11d
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ vpshufd $250,%xmm1,%xmm7
+ shrdl $2,%r14d,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ vpsrld $11,%xmm6,%xmm6
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ shrdl $9,%r14d,%r14d
+ vpslld $11,%xmm5,%xmm5
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ vpsrlq $17,%xmm7,%xmm7
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ vpaddd %xmm4,%xmm2,%xmm2
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %edx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r13d
+ vpshufb %xmm8,%xmm6,%xmm6
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ vpaddd %xmm6,%xmm2,%xmm2
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ vpshufd $80,%xmm2,%xmm7
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ shrdl $11,%r14d,%r14d
+ vpsrld $10,%xmm7,%xmm6
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %r11d,%edi
+ shrdl $2,%r14d,%r14d
+ addl %r9d,%ebx
+ vpsrlq $2,%xmm7,%xmm7
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ vpshufb %xmm9,%xmm6,%xmm6
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ vpaddd %xmm6,%xmm2,%xmm2
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vpaddd 64(%rbp),%xmm2,%xmm6
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ vmovdqa %xmm6,32(%rsp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ shrdl $9,%r14d,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vpaddd %xmm7,%xmm3,%xmm3
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ xorl %ecx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r9d,%r15d
+ vpslld $14,%xmm4,%xmm5
+ addl %r12d,%edx
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ vpshufd $250,%xmm2,%xmm7
+ shrdl $2,%r14d,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ vpsrld $11,%xmm6,%xmm6
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%edx
+ movl %eax,%r12d
+ shrdl $9,%r14d,%r14d
+ vpslld $11,%xmm5,%xmm5
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ vpsrlq $17,%xmm7,%xmm7
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ vpaddd %xmm4,%xmm3,%xmm3
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %r11d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r13d
+ vpshufb %xmm8,%xmm6,%xmm6
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ vpaddd %xmm6,%xmm3,%xmm3
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ vpshufd $80,%xmm3,%xmm7
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ shrdl $11,%r14d,%r14d
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %edx,%edi
+ shrdl $2,%r14d,%r14d
+ addl %ebx,%r9d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ vpshufb %xmm9,%xmm6,%xmm6
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ vpaddd %xmm6,%xmm3,%xmm3
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpaddd 96(%rbp),%xmm3,%xmm6
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ vmovdqa %xmm6,48(%rsp)
+ cmpb $0,131(%rbp)
+ jne .Lavx_00_47
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ shrdl $2,%r14d,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ shrdl $2,%r14d,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ shrdl $2,%r14d,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ shrdl $2,%r14d,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ shrdl $2,%r14d,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ shrdl $2,%r14d,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ shrdl $2,%r14d,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ shrdl $6,%r13d,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ shrdl $2,%r14d,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ shrdl $6,%r13d,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ shrdl $2,%r14d,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%rdi
+ movl %r14d,%eax
+
+ addl 0(%rdi),%eax
+ leaq 64(%rsi),%rsi
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop_avx
+
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ vzeroupper
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue_avx:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha256_transform_avx)
+
+ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ subq $544,%rsp
+ shlq $4,%rdx
+ andq $-1024,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ addq $448,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
+.Lprologue_avx2:
+
+ vzeroupper
+ subq $-64,%rsi
+ movl 0(%rdi),%eax
+ movq %rsi,%r12
+ movl 4(%rdi),%ebx
+ cmpq %rdx,%rsi
+ movl 8(%rdi),%ecx
+ cmoveq %rsp,%r12
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ vmovdqa K256+512+32(%rip),%ymm8
+ vmovdqa K256+512+64(%rip),%ymm9
+ jmp .Loop_avx2
+.balign 16
+.Loop_avx2:
+ vmovdqa K256+512(%rip),%ymm7
+ vmovdqu -64+0(%rsi),%xmm0
+ vmovdqu -64+16(%rsi),%xmm1
+ vmovdqu -64+32(%rsi),%xmm2
+ vmovdqu -64+48(%rsi),%xmm3
+
+ vinserti128 $1,(%r12),%ymm0,%ymm0
+ vinserti128 $1,16(%r12),%ymm1,%ymm1
+ vpshufb %ymm7,%ymm0,%ymm0
+ vinserti128 $1,32(%r12),%ymm2,%ymm2
+ vpshufb %ymm7,%ymm1,%ymm1
+ vinserti128 $1,48(%r12),%ymm3,%ymm3
+
+ leaq K256(%rip),%rbp
+ vpshufb %ymm7,%ymm2,%ymm2
+ vpaddd 0(%rbp),%ymm0,%ymm4
+ vpshufb %ymm7,%ymm3,%ymm3
+ vpaddd 32(%rbp),%ymm1,%ymm5
+ vpaddd 64(%rbp),%ymm2,%ymm6
+ vpaddd 96(%rbp),%ymm3,%ymm7
+ vmovdqa %ymm4,0(%rsp)
+ xorl %r14d,%r14d
+ vmovdqa %ymm5,32(%rsp)
+
+ movq 88(%rsp),%rdi
+.cfi_def_cfa %rdi,8
+ leaq -64(%rsp),%rsp
+
+
+
+ movq %rdi,-8(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
+ movl %ebx,%edi
+ vmovdqa %ymm6,0(%rsp)
+ xorl %ecx,%edi
+ vmovdqa %ymm7,32(%rsp)
+ movl %r9d,%r12d
+ subq $-32*4,%rbp
+ jmp .Lavx2_00_47
+
+.balign 16
+.Lavx2_00_47:
+ leaq -64(%rsp),%rsp
+.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
+
+ pushq 64-8(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
+ leaq 8(%rsp),%rsp
+.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
+ vpalignr $4,%ymm0,%ymm1,%ymm4
+ addl 0+128(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ vpalignr $4,%ymm2,%ymm3,%ymm7
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ vpsrld $7,%ymm4,%ymm6
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ vpaddd %ymm7,%ymm0,%ymm0
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %ebx,%edi
+ vpshufd $250,%ymm3,%ymm7
+ xorl %r13d,%r14d
+ leal (%r11,%rdi,1),%r11d
+ movl %r8d,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 4+128(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%edx,%edi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ vpslld $11,%ymm5,%ymm5
+ andnl %r9d,%edx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%edx,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%edi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%edi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ vpsrlq $17,%ymm7,%ymm7
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ vpaddd %ymm4,%ymm0,%ymm0
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 8+128(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ vpshufb %ymm8,%ymm6,%ymm6
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ vpaddd %ymm6,%ymm0,%ymm0
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ vpshufd $80,%ymm0,%ymm7
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ vpsrld $10,%ymm7,%ymm6
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r11d,%edi
+ vpsrlq $17,%ymm7,%ymm7
+ xorl %r13d,%r14d
+ leal (%r9,%rdi,1),%r9d
+ movl %ecx,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 12+128(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%ebx,%edi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %edx,%ebx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%ebx,%r14d
+ vpshufb %ymm9,%ymm6,%ymm6
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%edi
+ vpaddd %ymm6,%ymm0,%ymm0
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%edi
+ vpaddd 0(%rbp),%ymm0,%ymm6
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ vmovdqa %ymm6,0(%rsp)
+ vpalignr $4,%ymm1,%ymm2,%ymm4
+ addl 32+128(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ vpalignr $4,%ymm3,%ymm0,%ymm7
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ vpsrld $7,%ymm4,%ymm6
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ vpaddd %ymm7,%ymm1,%ymm1
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r9d,%edi
+ vpshufd $250,%ymm0,%ymm7
+ xorl %r13d,%r14d
+ leal (%rdx,%rdi,1),%edx
+ movl %eax,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 36+128(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%r11d,%edi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ vpslld $11,%ymm5,%ymm5
+ andnl %ebx,%r11d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r11d,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%edi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%edi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ vpsrlq $17,%ymm7,%ymm7
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ vpaddd %ymm4,%ymm1,%ymm1
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 40+128(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ vpshufb %ymm8,%ymm6,%ymm6
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ vpaddd %ymm6,%ymm1,%ymm1
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ vpshufd $80,%ymm1,%ymm7
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ vpsrld $10,%ymm7,%ymm6
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %edx,%edi
+ vpsrlq $17,%ymm7,%ymm7
+ xorl %r13d,%r14d
+ leal (%rbx,%rdi,1),%ebx
+ movl %r10d,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 44+128(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%r9d,%edi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %r11d,%r9d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r9d,%r14d
+ vpshufb %ymm9,%ymm6,%ymm6
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%edi
+ vpaddd %ymm6,%ymm1,%ymm1
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%edi
+ vpaddd 32(%rbp),%ymm1,%ymm6
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ vmovdqa %ymm6,32(%rsp)
+ leaq -64(%rsp),%rsp
+.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
+
+ pushq 64-8(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
+ leaq 8(%rsp),%rsp
+.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
+ vpalignr $4,%ymm2,%ymm3,%ymm4
+ addl 0+128(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ vpalignr $4,%ymm0,%ymm1,%ymm7
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ vpsrld $7,%ymm4,%ymm6
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ vpaddd %ymm7,%ymm2,%ymm2
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %ebx,%edi
+ vpshufd $250,%ymm1,%ymm7
+ xorl %r13d,%r14d
+ leal (%r11,%rdi,1),%r11d
+ movl %r8d,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 4+128(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%edx,%edi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ vpslld $11,%ymm5,%ymm5
+ andnl %r9d,%edx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%edx,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%edi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%edi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ vpsrlq $17,%ymm7,%ymm7
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ vpaddd %ymm4,%ymm2,%ymm2
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 8+128(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ vpshufb %ymm8,%ymm6,%ymm6
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ vpaddd %ymm6,%ymm2,%ymm2
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ vpshufd $80,%ymm2,%ymm7
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ vpsrld $10,%ymm7,%ymm6
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r11d,%edi
+ vpsrlq $17,%ymm7,%ymm7
+ xorl %r13d,%r14d
+ leal (%r9,%rdi,1),%r9d
+ movl %ecx,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 12+128(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%ebx,%edi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %edx,%ebx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%ebx,%r14d
+ vpshufb %ymm9,%ymm6,%ymm6
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%edi
+ vpaddd %ymm6,%ymm2,%ymm2
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%edi
+ vpaddd 64(%rbp),%ymm2,%ymm6
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ vmovdqa %ymm6,0(%rsp)
+ vpalignr $4,%ymm3,%ymm0,%ymm4
+ addl 32+128(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ vpalignr $4,%ymm1,%ymm2,%ymm7
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ vpsrld $7,%ymm4,%ymm6
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ vpaddd %ymm7,%ymm3,%ymm3
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r9d,%edi
+ vpshufd $250,%ymm2,%ymm7
+ xorl %r13d,%r14d
+ leal (%rdx,%rdi,1),%edx
+ movl %eax,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 36+128(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%r11d,%edi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ vpslld $11,%ymm5,%ymm5
+ andnl %ebx,%r11d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r11d,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%edi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%edi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ vpsrlq $17,%ymm7,%ymm7
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ vpaddd %ymm4,%ymm3,%ymm3
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 40+128(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ vpshufb %ymm8,%ymm6,%ymm6
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ vpaddd %ymm6,%ymm3,%ymm3
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ vpshufd $80,%ymm3,%ymm7
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ vpsrld $10,%ymm7,%ymm6
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %edx,%edi
+ vpsrlq $17,%ymm7,%ymm7
+ xorl %r13d,%r14d
+ leal (%rbx,%rdi,1),%ebx
+ movl %r10d,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 44+128(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%r9d,%edi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %r11d,%r9d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r9d,%r14d
+ vpshufb %ymm9,%ymm6,%ymm6
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%edi
+ vpaddd %ymm6,%ymm3,%ymm3
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%edi
+ vpaddd 96(%rbp),%ymm3,%ymm6
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ vmovdqa %ymm6,32(%rsp)
+ leaq 128(%rbp),%rbp
+ cmpb $0,3(%rbp)
+ jne .Lavx2_00_47
+ addl 0+64(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %ebx,%edi
+ xorl %r13d,%r14d
+ leal (%r11,%rdi,1),%r11d
+ movl %r8d,%r12d
+ addl 4+64(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%edi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%edi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%edi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8+64(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r11d,%edi
+ xorl %r13d,%r14d
+ leal (%r9,%rdi,1),%r9d
+ movl %ecx,%r12d
+ addl 12+64(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%edi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%edi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%edi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32+64(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r9d,%edi
+ xorl %r13d,%r14d
+ leal (%rdx,%rdi,1),%edx
+ movl %eax,%r12d
+ addl 36+64(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%edi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%edi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%edi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40+64(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %edx,%edi
+ xorl %r13d,%r14d
+ leal (%rbx,%rdi,1),%ebx
+ movl %r10d,%r12d
+ addl 44+64(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%edi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%edi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%edi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ addl 0(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %ebx,%edi
+ xorl %r13d,%r14d
+ leal (%r11,%rdi,1),%r11d
+ movl %r8d,%r12d
+ addl 4(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%edi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%edi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%edi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r11d,%edi
+ xorl %r13d,%r14d
+ leal (%r9,%rdi,1),%r9d
+ movl %ecx,%r12d
+ addl 12(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%edi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%edi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%edi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r9d,%edi
+ xorl %r13d,%r14d
+ leal (%rdx,%rdi,1),%edx
+ movl %eax,%r12d
+ addl 36(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%edi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%edi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%edi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %edx,%edi
+ xorl %r13d,%r14d
+ leal (%rbx,%rdi,1),%ebx
+ movl %r10d,%r12d
+ addl 44(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%edi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%edi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%edi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ movq 512(%rsp),%rdi
+ addl %r14d,%eax
+
+ leaq 448(%rsp),%rbp
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+
+ cmpq 80(%rbp),%rsi
+ je .Ldone_avx2
+
+ xorl %r14d,%r14d
+ movl %ebx,%edi
+ xorl %ecx,%edi
+ movl %r9d,%r12d
+ jmp .Lower_avx2
+.balign 16
+.Lower_avx2:
+ addl 0+16(%rbp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %ebx,%edi
+ xorl %r13d,%r14d
+ leal (%r11,%rdi,1),%r11d
+ movl %r8d,%r12d
+ addl 4+16(%rbp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%edi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%edi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%edi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8+16(%rbp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r11d,%edi
+ xorl %r13d,%r14d
+ leal (%r9,%rdi,1),%r9d
+ movl %ecx,%r12d
+ addl 12+16(%rbp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%edi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%edi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%edi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32+16(%rbp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %r9d,%edi
+ xorl %r13d,%r14d
+ leal (%rdx,%rdi,1),%edx
+ movl %eax,%r12d
+ addl 36+16(%rbp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%edi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%edi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%edi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40+16(%rbp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%edi
+ xorl %r12d,%r14d
+ xorl %edx,%edi
+ xorl %r13d,%r14d
+ leal (%rbx,%rdi,1),%ebx
+ movl %r10d,%r12d
+ addl 44+16(%rbp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%edi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %edi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%edi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%edi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %edi,%r15d
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ leaq -64(%rbp),%rbp
+ cmpq %rsp,%rbp
+ jae .Lower_avx2
+
+ movq 512(%rsp),%rdi
+ addl %r14d,%eax
+
+ leaq 448(%rsp),%rsp
+
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ leaq 128(%rsi),%rsi
+ addl 24(%rdi),%r10d
+ movq %rsi,%r12
+ addl 28(%rdi),%r11d
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ cmoveq %rsp,%r12
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+
+ jbe .Loop_avx2
+ leaq (%rsp),%rbp
+
+
+.cfi_escape 0x0f,0x06,0x76,0xd8,0x00,0x06,0x23,0x08
+
+.Ldone_avx2:
+ movq 88(%rbp),%rsi
+.cfi_def_cfa %rsi,8
+ vzeroupper
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue_avx2:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha256_transform_avx2)
+
+#if defined(__ELF__)
+ .section .note.GNU-stack,"",%progbits
+#endif
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
deleted file mode 100644
index 28b048d2db24..000000000000
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ /dev/null
@@ -1,2089 +0,0 @@
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- * ====================================================================
- *
- * sha256/512_block procedure for x86_64.
- *
- * 40% improvement over compiler-generated code on Opteron. On EM64T
- * sha256 was observed to run >80% faster and sha512 - >40%. No magical
- * tricks, just straight implementation... I really wonder why gcc
- * [being armed with inline assembler] fails to generate as fast code.
- * The only thing which is cool about this module is that it's very
- * same instruction sequence used for both SHA-256 and SHA-512. In
- * former case the instructions operate on 32-bit operands, while in
- * latter - on 64-bit ones. All I had to do is to get one flavor right,
- * the other one passed the test right away:-)
- *
- * sha256_block runs in ~1005 cycles on Opteron, which gives you
- * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
- * frequency in GHz. sha512_block runs in ~1275 cycles, which results
- * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
- * Well, if you compare it to IA-64 implementation, which maintains
- * X[16] in register bank[!], tends to 4 instructions per CPU clock
- * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
- * issue Opteron pipeline and X[16] maintained in memory. So that *if*
- * there is a way to improve it, *then* the only way would be to try to
- * offload X[16] updates to SSE unit, but that would require "deeper"
- * loop unroll, which in turn would naturally cause size blow-up, not
- * to mention increased complexity! And once again, only *if* it's
- * actually possible to noticeably improve overall ILP, instruction
- * level parallelism, on a given CPU implementation in this case.
- *
- * Special note on Intel EM64T. While Opteron CPU exhibits perfect
- * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
- * [currently available] EM64T CPUs apparently are far from it. On the
- * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
- * sha256_block:-( This is presumably because 64-bit shifts/rotates
- * apparently are not atomic instructions, but implemented in microcode.
- */
-
-/*
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha512-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers). Replaced the .picmeup macro with assembler code.
- *
- * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
- * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
- */
-
-/*
- * This file was generated by a perl script (sha512-x86_64.pl) that were
- * used to generate sha256 and sha512 variants from the same code base.
- * The comments from the original file have been pasted above.
- */
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sha2/sha2.h>
-
-/* ARGSUSED */
-void
-SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
-{
-}
-
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-
-ENTRY_NP(SHA256TransformBlocks)
-.cfi_startproc
- movq %rsp, %rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_offset %rbx,-16
- push %rbp
-.cfi_offset %rbp,-24
- push %r12
-.cfi_offset %r12,-32
- push %r13
-.cfi_offset %r13,-40
- push %r14
-.cfi_offset %r14,-48
- push %r15
-.cfi_offset %r15,-56
- mov %rsp,%rbp # copy %rsp
- shl $4,%rdx # num*16
- sub $16*4+4*8,%rsp
- lea (%rsi,%rdx,4),%rdx # inp+num*16*4
- and $-64,%rsp # align stack frame
- add $8,%rdi # Skip OpenSolaris field, "algotype"
- mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg
- mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg
- mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg
- mov %rbp,16*4+3*8(%rsp) # save copy of %rsp
-# echo ".cfi_cfa_expression %rsp+88,deref,+56" |
-# openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
-
- #.picmeup %rbp
- # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
- # the address of the "next" instruction into the target register
- # (%rbp). This generates these 2 instructions:
- lea .Llea(%rip),%rbp
- #nop # .picmeup generates a nop for mod 8 alignment--not needed here
-
-.Llea:
- lea K256-.(%rbp),%rbp
-
- mov 4*0(%rdi),%eax
- mov 4*1(%rdi),%ebx
- mov 4*2(%rdi),%ecx
- mov 4*3(%rdi),%edx
- mov 4*4(%rdi),%r8d
- mov 4*5(%rdi),%r9d
- mov 4*6(%rdi),%r10d
- mov 4*7(%rdi),%r11d
- jmp .Lloop
-
-.align 16
-.Lloop:
- xor %rdi,%rdi
- mov 4*0(%rsi),%r12d
- bswap %r12d
- mov %r8d,%r13d
- mov %r8d,%r14d
- mov %r9d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r10d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r8d,%r15d # (f^g)&e
- mov %r12d,0(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r11d,%r12d # T1+=h
-
- mov %eax,%r11d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %eax,%r13d
- mov %eax,%r14d
-
- ror $2,%r11d
- ror $13,%r13d
- mov %eax,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r11d
- ror $9,%r13d
- or %ecx,%r14d # a|c
-
- xor %r13d,%r11d # h=Sigma0(a)
- and %ecx,%r15d # a&c
- add %r12d,%edx # d+=T1
-
- and %ebx,%r14d # (a|c)&b
- add %r12d,%r11d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r11d # h+=Maj(a,b,c)
- mov 4*1(%rsi),%r12d
- bswap %r12d
- mov %edx,%r13d
- mov %edx,%r14d
- mov %r8d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r9d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %edx,%r15d # (f^g)&e
- mov %r12d,4(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r10d,%r12d # T1+=h
-
- mov %r11d,%r10d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r11d,%r13d
- mov %r11d,%r14d
-
- ror $2,%r10d
- ror $13,%r13d
- mov %r11d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r10d
- ror $9,%r13d
- or %ebx,%r14d # a|c
-
- xor %r13d,%r10d # h=Sigma0(a)
- and %ebx,%r15d # a&c
- add %r12d,%ecx # d+=T1
-
- and %eax,%r14d # (a|c)&b
- add %r12d,%r10d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r10d # h+=Maj(a,b,c)
- mov 4*2(%rsi),%r12d
- bswap %r12d
- mov %ecx,%r13d
- mov %ecx,%r14d
- mov %edx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r8d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ecx,%r15d # (f^g)&e
- mov %r12d,8(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r9d,%r12d # T1+=h
-
- mov %r10d,%r9d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r10d,%r13d
- mov %r10d,%r14d
-
- ror $2,%r9d
- ror $13,%r13d
- mov %r10d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r9d
- ror $9,%r13d
- or %eax,%r14d # a|c
-
- xor %r13d,%r9d # h=Sigma0(a)
- and %eax,%r15d # a&c
- add %r12d,%ebx # d+=T1
-
- and %r11d,%r14d # (a|c)&b
- add %r12d,%r9d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r9d # h+=Maj(a,b,c)
- mov 4*3(%rsi),%r12d
- bswap %r12d
- mov %ebx,%r13d
- mov %ebx,%r14d
- mov %ecx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %edx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ebx,%r15d # (f^g)&e
- mov %r12d,12(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r8d,%r12d # T1+=h
-
- mov %r9d,%r8d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r9d,%r13d
- mov %r9d,%r14d
-
- ror $2,%r8d
- ror $13,%r13d
- mov %r9d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r8d
- ror $9,%r13d
- or %r11d,%r14d # a|c
-
- xor %r13d,%r8d # h=Sigma0(a)
- and %r11d,%r15d # a&c
- add %r12d,%eax # d+=T1
-
- and %r10d,%r14d # (a|c)&b
- add %r12d,%r8d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r8d # h+=Maj(a,b,c)
- mov 4*4(%rsi),%r12d
- bswap %r12d
- mov %eax,%r13d
- mov %eax,%r14d
- mov %ebx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ecx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %eax,%r15d # (f^g)&e
- mov %r12d,16(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %edx,%r12d # T1+=h
-
- mov %r8d,%edx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r8d,%r13d
- mov %r8d,%r14d
-
- ror $2,%edx
- ror $13,%r13d
- mov %r8d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%edx
- ror $9,%r13d
- or %r10d,%r14d # a|c
-
- xor %r13d,%edx # h=Sigma0(a)
- and %r10d,%r15d # a&c
- add %r12d,%r11d # d+=T1
-
- and %r9d,%r14d # (a|c)&b
- add %r12d,%edx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%edx # h+=Maj(a,b,c)
- mov 4*5(%rsi),%r12d
- bswap %r12d
- mov %r11d,%r13d
- mov %r11d,%r14d
- mov %eax,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ebx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r11d,%r15d # (f^g)&e
- mov %r12d,20(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ecx,%r12d # T1+=h
-
- mov %edx,%ecx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %edx,%r13d
- mov %edx,%r14d
-
- ror $2,%ecx
- ror $13,%r13d
- mov %edx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ecx
- ror $9,%r13d
- or %r9d,%r14d # a|c
-
- xor %r13d,%ecx # h=Sigma0(a)
- and %r9d,%r15d # a&c
- add %r12d,%r10d # d+=T1
-
- and %r8d,%r14d # (a|c)&b
- add %r12d,%ecx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ecx # h+=Maj(a,b,c)
- mov 4*6(%rsi),%r12d
- bswap %r12d
- mov %r10d,%r13d
- mov %r10d,%r14d
- mov %r11d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %eax,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r10d,%r15d # (f^g)&e
- mov %r12d,24(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ebx,%r12d # T1+=h
-
- mov %ecx,%ebx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ecx,%r13d
- mov %ecx,%r14d
-
- ror $2,%ebx
- ror $13,%r13d
- mov %ecx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ebx
- ror $9,%r13d
- or %r8d,%r14d # a|c
-
- xor %r13d,%ebx # h=Sigma0(a)
- and %r8d,%r15d # a&c
- add %r12d,%r9d # d+=T1
-
- and %edx,%r14d # (a|c)&b
- add %r12d,%ebx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ebx # h+=Maj(a,b,c)
- mov 4*7(%rsi),%r12d
- bswap %r12d
- mov %r9d,%r13d
- mov %r9d,%r14d
- mov %r10d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r11d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r9d,%r15d # (f^g)&e
- mov %r12d,28(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %eax,%r12d # T1+=h
-
- mov %ebx,%eax
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ebx,%r13d
- mov %ebx,%r14d
-
- ror $2,%eax
- ror $13,%r13d
- mov %ebx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%eax
- ror $9,%r13d
- or %edx,%r14d # a|c
-
- xor %r13d,%eax # h=Sigma0(a)
- and %edx,%r15d # a&c
- add %r12d,%r8d # d+=T1
-
- and %ecx,%r14d # (a|c)&b
- add %r12d,%eax # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%eax # h+=Maj(a,b,c)
- mov 4*8(%rsi),%r12d
- bswap %r12d
- mov %r8d,%r13d
- mov %r8d,%r14d
- mov %r9d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r10d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r8d,%r15d # (f^g)&e
- mov %r12d,32(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r11d,%r12d # T1+=h
-
- mov %eax,%r11d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %eax,%r13d
- mov %eax,%r14d
-
- ror $2,%r11d
- ror $13,%r13d
- mov %eax,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r11d
- ror $9,%r13d
- or %ecx,%r14d # a|c
-
- xor %r13d,%r11d # h=Sigma0(a)
- and %ecx,%r15d # a&c
- add %r12d,%edx # d+=T1
-
- and %ebx,%r14d # (a|c)&b
- add %r12d,%r11d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r11d # h+=Maj(a,b,c)
- mov 4*9(%rsi),%r12d
- bswap %r12d
- mov %edx,%r13d
- mov %edx,%r14d
- mov %r8d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r9d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %edx,%r15d # (f^g)&e
- mov %r12d,36(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r10d,%r12d # T1+=h
-
- mov %r11d,%r10d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r11d,%r13d
- mov %r11d,%r14d
-
- ror $2,%r10d
- ror $13,%r13d
- mov %r11d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r10d
- ror $9,%r13d
- or %ebx,%r14d # a|c
-
- xor %r13d,%r10d # h=Sigma0(a)
- and %ebx,%r15d # a&c
- add %r12d,%ecx # d+=T1
-
- and %eax,%r14d # (a|c)&b
- add %r12d,%r10d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r10d # h+=Maj(a,b,c)
- mov 4*10(%rsi),%r12d
- bswap %r12d
- mov %ecx,%r13d
- mov %ecx,%r14d
- mov %edx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r8d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ecx,%r15d # (f^g)&e
- mov %r12d,40(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r9d,%r12d # T1+=h
-
- mov %r10d,%r9d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r10d,%r13d
- mov %r10d,%r14d
-
- ror $2,%r9d
- ror $13,%r13d
- mov %r10d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r9d
- ror $9,%r13d
- or %eax,%r14d # a|c
-
- xor %r13d,%r9d # h=Sigma0(a)
- and %eax,%r15d # a&c
- add %r12d,%ebx # d+=T1
-
- and %r11d,%r14d # (a|c)&b
- add %r12d,%r9d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r9d # h+=Maj(a,b,c)
- mov 4*11(%rsi),%r12d
- bswap %r12d
- mov %ebx,%r13d
- mov %ebx,%r14d
- mov %ecx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %edx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ebx,%r15d # (f^g)&e
- mov %r12d,44(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r8d,%r12d # T1+=h
-
- mov %r9d,%r8d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r9d,%r13d
- mov %r9d,%r14d
-
- ror $2,%r8d
- ror $13,%r13d
- mov %r9d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r8d
- ror $9,%r13d
- or %r11d,%r14d # a|c
-
- xor %r13d,%r8d # h=Sigma0(a)
- and %r11d,%r15d # a&c
- add %r12d,%eax # d+=T1
-
- and %r10d,%r14d # (a|c)&b
- add %r12d,%r8d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r8d # h+=Maj(a,b,c)
- mov 4*12(%rsi),%r12d
- bswap %r12d
- mov %eax,%r13d
- mov %eax,%r14d
- mov %ebx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ecx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %eax,%r15d # (f^g)&e
- mov %r12d,48(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %edx,%r12d # T1+=h
-
- mov %r8d,%edx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r8d,%r13d
- mov %r8d,%r14d
-
- ror $2,%edx
- ror $13,%r13d
- mov %r8d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%edx
- ror $9,%r13d
- or %r10d,%r14d # a|c
-
- xor %r13d,%edx # h=Sigma0(a)
- and %r10d,%r15d # a&c
- add %r12d,%r11d # d+=T1
-
- and %r9d,%r14d # (a|c)&b
- add %r12d,%edx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%edx # h+=Maj(a,b,c)
- mov 4*13(%rsi),%r12d
- bswap %r12d
- mov %r11d,%r13d
- mov %r11d,%r14d
- mov %eax,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ebx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r11d,%r15d # (f^g)&e
- mov %r12d,52(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ecx,%r12d # T1+=h
-
- mov %edx,%ecx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %edx,%r13d
- mov %edx,%r14d
-
- ror $2,%ecx
- ror $13,%r13d
- mov %edx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ecx
- ror $9,%r13d
- or %r9d,%r14d # a|c
-
- xor %r13d,%ecx # h=Sigma0(a)
- and %r9d,%r15d # a&c
- add %r12d,%r10d # d+=T1
-
- and %r8d,%r14d # (a|c)&b
- add %r12d,%ecx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ecx # h+=Maj(a,b,c)
- mov 4*14(%rsi),%r12d
- bswap %r12d
- mov %r10d,%r13d
- mov %r10d,%r14d
- mov %r11d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %eax,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r10d,%r15d # (f^g)&e
- mov %r12d,56(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ebx,%r12d # T1+=h
-
- mov %ecx,%ebx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ecx,%r13d
- mov %ecx,%r14d
-
- ror $2,%ebx
- ror $13,%r13d
- mov %ecx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ebx
- ror $9,%r13d
- or %r8d,%r14d # a|c
-
- xor %r13d,%ebx # h=Sigma0(a)
- and %r8d,%r15d # a&c
- add %r12d,%r9d # d+=T1
-
- and %edx,%r14d # (a|c)&b
- add %r12d,%ebx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ebx # h+=Maj(a,b,c)
- mov 4*15(%rsi),%r12d
- bswap %r12d
- mov %r9d,%r13d
- mov %r9d,%r14d
- mov %r10d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r11d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r9d,%r15d # (f^g)&e
- mov %r12d,60(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %eax,%r12d # T1+=h
-
- mov %ebx,%eax
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ebx,%r13d
- mov %ebx,%r14d
-
- ror $2,%eax
- ror $13,%r13d
- mov %ebx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%eax
- ror $9,%r13d
- or %edx,%r14d # a|c
-
- xor %r13d,%eax # h=Sigma0(a)
- and %edx,%r15d # a&c
- add %r12d,%r8d # d+=T1
-
- and %ecx,%r14d # (a|c)&b
- add %r12d,%eax # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%eax # h+=Maj(a,b,c)
- jmp .Lrounds_16_xx
-.align 16
-.Lrounds_16_xx:
- mov 4(%rsp),%r13d
- mov 56(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 36(%rsp),%r12d
-
- add 0(%rsp),%r12d
- mov %r8d,%r13d
- mov %r8d,%r14d
- mov %r9d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r10d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r8d,%r15d # (f^g)&e
- mov %r12d,0(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r11d,%r12d # T1+=h
-
- mov %eax,%r11d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %eax,%r13d
- mov %eax,%r14d
-
- ror $2,%r11d
- ror $13,%r13d
- mov %eax,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r11d
- ror $9,%r13d
- or %ecx,%r14d # a|c
-
- xor %r13d,%r11d # h=Sigma0(a)
- and %ecx,%r15d # a&c
- add %r12d,%edx # d+=T1
-
- and %ebx,%r14d # (a|c)&b
- add %r12d,%r11d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r11d # h+=Maj(a,b,c)
- mov 8(%rsp),%r13d
- mov 60(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 40(%rsp),%r12d
-
- add 4(%rsp),%r12d
- mov %edx,%r13d
- mov %edx,%r14d
- mov %r8d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r9d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %edx,%r15d # (f^g)&e
- mov %r12d,4(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r10d,%r12d # T1+=h
-
- mov %r11d,%r10d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r11d,%r13d
- mov %r11d,%r14d
-
- ror $2,%r10d
- ror $13,%r13d
- mov %r11d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r10d
- ror $9,%r13d
- or %ebx,%r14d # a|c
-
- xor %r13d,%r10d # h=Sigma0(a)
- and %ebx,%r15d # a&c
- add %r12d,%ecx # d+=T1
-
- and %eax,%r14d # (a|c)&b
- add %r12d,%r10d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r10d # h+=Maj(a,b,c)
- mov 12(%rsp),%r13d
- mov 0(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 44(%rsp),%r12d
-
- add 8(%rsp),%r12d
- mov %ecx,%r13d
- mov %ecx,%r14d
- mov %edx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r8d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ecx,%r15d # (f^g)&e
- mov %r12d,8(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r9d,%r12d # T1+=h
-
- mov %r10d,%r9d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r10d,%r13d
- mov %r10d,%r14d
-
- ror $2,%r9d
- ror $13,%r13d
- mov %r10d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r9d
- ror $9,%r13d
- or %eax,%r14d # a|c
-
- xor %r13d,%r9d # h=Sigma0(a)
- and %eax,%r15d # a&c
- add %r12d,%ebx # d+=T1
-
- and %r11d,%r14d # (a|c)&b
- add %r12d,%r9d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r9d # h+=Maj(a,b,c)
- mov 16(%rsp),%r13d
- mov 4(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 48(%rsp),%r12d
-
- add 12(%rsp),%r12d
- mov %ebx,%r13d
- mov %ebx,%r14d
- mov %ecx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %edx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ebx,%r15d # (f^g)&e
- mov %r12d,12(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r8d,%r12d # T1+=h
-
- mov %r9d,%r8d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r9d,%r13d
- mov %r9d,%r14d
-
- ror $2,%r8d
- ror $13,%r13d
- mov %r9d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r8d
- ror $9,%r13d
- or %r11d,%r14d # a|c
-
- xor %r13d,%r8d # h=Sigma0(a)
- and %r11d,%r15d # a&c
- add %r12d,%eax # d+=T1
-
- and %r10d,%r14d # (a|c)&b
- add %r12d,%r8d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r8d # h+=Maj(a,b,c)
- mov 20(%rsp),%r13d
- mov 8(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 52(%rsp),%r12d
-
- add 16(%rsp),%r12d
- mov %eax,%r13d
- mov %eax,%r14d
- mov %ebx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ecx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %eax,%r15d # (f^g)&e
- mov %r12d,16(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %edx,%r12d # T1+=h
-
- mov %r8d,%edx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r8d,%r13d
- mov %r8d,%r14d
-
- ror $2,%edx
- ror $13,%r13d
- mov %r8d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%edx
- ror $9,%r13d
- or %r10d,%r14d # a|c
-
- xor %r13d,%edx # h=Sigma0(a)
- and %r10d,%r15d # a&c
- add %r12d,%r11d # d+=T1
-
- and %r9d,%r14d # (a|c)&b
- add %r12d,%edx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%edx # h+=Maj(a,b,c)
- mov 24(%rsp),%r13d
- mov 12(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 56(%rsp),%r12d
-
- add 20(%rsp),%r12d
- mov %r11d,%r13d
- mov %r11d,%r14d
- mov %eax,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ebx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r11d,%r15d # (f^g)&e
- mov %r12d,20(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ecx,%r12d # T1+=h
-
- mov %edx,%ecx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %edx,%r13d
- mov %edx,%r14d
-
- ror $2,%ecx
- ror $13,%r13d
- mov %edx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ecx
- ror $9,%r13d
- or %r9d,%r14d # a|c
-
- xor %r13d,%ecx # h=Sigma0(a)
- and %r9d,%r15d # a&c
- add %r12d,%r10d # d+=T1
-
- and %r8d,%r14d # (a|c)&b
- add %r12d,%ecx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ecx # h+=Maj(a,b,c)
- mov 28(%rsp),%r13d
- mov 16(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 60(%rsp),%r12d
-
- add 24(%rsp),%r12d
- mov %r10d,%r13d
- mov %r10d,%r14d
- mov %r11d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %eax,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r10d,%r15d # (f^g)&e
- mov %r12d,24(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ebx,%r12d # T1+=h
-
- mov %ecx,%ebx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ecx,%r13d
- mov %ecx,%r14d
-
- ror $2,%ebx
- ror $13,%r13d
- mov %ecx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ebx
- ror $9,%r13d
- or %r8d,%r14d # a|c
-
- xor %r13d,%ebx # h=Sigma0(a)
- and %r8d,%r15d # a&c
- add %r12d,%r9d # d+=T1
-
- and %edx,%r14d # (a|c)&b
- add %r12d,%ebx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ebx # h+=Maj(a,b,c)
- mov 32(%rsp),%r13d
- mov 20(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 0(%rsp),%r12d
-
- add 28(%rsp),%r12d
- mov %r9d,%r13d
- mov %r9d,%r14d
- mov %r10d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r11d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r9d,%r15d # (f^g)&e
- mov %r12d,28(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %eax,%r12d # T1+=h
-
- mov %ebx,%eax
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ebx,%r13d
- mov %ebx,%r14d
-
- ror $2,%eax
- ror $13,%r13d
- mov %ebx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%eax
- ror $9,%r13d
- or %edx,%r14d # a|c
-
- xor %r13d,%eax # h=Sigma0(a)
- and %edx,%r15d # a&c
- add %r12d,%r8d # d+=T1
-
- and %ecx,%r14d # (a|c)&b
- add %r12d,%eax # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%eax # h+=Maj(a,b,c)
- mov 36(%rsp),%r13d
- mov 24(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 4(%rsp),%r12d
-
- add 32(%rsp),%r12d
- mov %r8d,%r13d
- mov %r8d,%r14d
- mov %r9d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r10d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r8d,%r15d # (f^g)&e
- mov %r12d,32(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r11d,%r12d # T1+=h
-
- mov %eax,%r11d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %eax,%r13d
- mov %eax,%r14d
-
- ror $2,%r11d
- ror $13,%r13d
- mov %eax,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r11d
- ror $9,%r13d
- or %ecx,%r14d # a|c
-
- xor %r13d,%r11d # h=Sigma0(a)
- and %ecx,%r15d # a&c
- add %r12d,%edx # d+=T1
-
- and %ebx,%r14d # (a|c)&b
- add %r12d,%r11d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r11d # h+=Maj(a,b,c)
- mov 40(%rsp),%r13d
- mov 28(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 8(%rsp),%r12d
-
- add 36(%rsp),%r12d
- mov %edx,%r13d
- mov %edx,%r14d
- mov %r8d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r9d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %edx,%r15d # (f^g)&e
- mov %r12d,36(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r10d,%r12d # T1+=h
-
- mov %r11d,%r10d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r11d,%r13d
- mov %r11d,%r14d
-
- ror $2,%r10d
- ror $13,%r13d
- mov %r11d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r10d
- ror $9,%r13d
- or %ebx,%r14d # a|c
-
- xor %r13d,%r10d # h=Sigma0(a)
- and %ebx,%r15d # a&c
- add %r12d,%ecx # d+=T1
-
- and %eax,%r14d # (a|c)&b
- add %r12d,%r10d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r10d # h+=Maj(a,b,c)
- mov 44(%rsp),%r13d
- mov 32(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 12(%rsp),%r12d
-
- add 40(%rsp),%r12d
- mov %ecx,%r13d
- mov %ecx,%r14d
- mov %edx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r8d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ecx,%r15d # (f^g)&e
- mov %r12d,40(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r9d,%r12d # T1+=h
-
- mov %r10d,%r9d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r10d,%r13d
- mov %r10d,%r14d
-
- ror $2,%r9d
- ror $13,%r13d
- mov %r10d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r9d
- ror $9,%r13d
- or %eax,%r14d # a|c
-
- xor %r13d,%r9d # h=Sigma0(a)
- and %eax,%r15d # a&c
- add %r12d,%ebx # d+=T1
-
- and %r11d,%r14d # (a|c)&b
- add %r12d,%r9d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r9d # h+=Maj(a,b,c)
- mov 48(%rsp),%r13d
- mov 36(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 16(%rsp),%r12d
-
- add 44(%rsp),%r12d
- mov %ebx,%r13d
- mov %ebx,%r14d
- mov %ecx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %edx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %ebx,%r15d # (f^g)&e
- mov %r12d,44(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %r8d,%r12d # T1+=h
-
- mov %r9d,%r8d
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r9d,%r13d
- mov %r9d,%r14d
-
- ror $2,%r8d
- ror $13,%r13d
- mov %r9d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%r8d
- ror $9,%r13d
- or %r11d,%r14d # a|c
-
- xor %r13d,%r8d # h=Sigma0(a)
- and %r11d,%r15d # a&c
- add %r12d,%eax # d+=T1
-
- and %r10d,%r14d # (a|c)&b
- add %r12d,%r8d # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%r8d # h+=Maj(a,b,c)
- mov 52(%rsp),%r13d
- mov 40(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 20(%rsp),%r12d
-
- add 48(%rsp),%r12d
- mov %eax,%r13d
- mov %eax,%r14d
- mov %ebx,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ecx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %eax,%r15d # (f^g)&e
- mov %r12d,48(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %edx,%r12d # T1+=h
-
- mov %r8d,%edx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %r8d,%r13d
- mov %r8d,%r14d
-
- ror $2,%edx
- ror $13,%r13d
- mov %r8d,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%edx
- ror $9,%r13d
- or %r10d,%r14d # a|c
-
- xor %r13d,%edx # h=Sigma0(a)
- and %r10d,%r15d # a&c
- add %r12d,%r11d # d+=T1
-
- and %r9d,%r14d # (a|c)&b
- add %r12d,%edx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%edx # h+=Maj(a,b,c)
- mov 56(%rsp),%r13d
- mov 44(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 24(%rsp),%r12d
-
- add 52(%rsp),%r12d
- mov %r11d,%r13d
- mov %r11d,%r14d
- mov %eax,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %ebx,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r11d,%r15d # (f^g)&e
- mov %r12d,52(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ecx,%r12d # T1+=h
-
- mov %edx,%ecx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %edx,%r13d
- mov %edx,%r14d
-
- ror $2,%ecx
- ror $13,%r13d
- mov %edx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ecx
- ror $9,%r13d
- or %r9d,%r14d # a|c
-
- xor %r13d,%ecx # h=Sigma0(a)
- and %r9d,%r15d # a&c
- add %r12d,%r10d # d+=T1
-
- and %r8d,%r14d # (a|c)&b
- add %r12d,%ecx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ecx # h+=Maj(a,b,c)
- mov 60(%rsp),%r13d
- mov 48(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 28(%rsp),%r12d
-
- add 56(%rsp),%r12d
- mov %r10d,%r13d
- mov %r10d,%r14d
- mov %r11d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %eax,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r10d,%r15d # (f^g)&e
- mov %r12d,56(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %ebx,%r12d # T1+=h
-
- mov %ecx,%ebx
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ecx,%r13d
- mov %ecx,%r14d
-
- ror $2,%ebx
- ror $13,%r13d
- mov %ecx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%ebx
- ror $9,%r13d
- or %r8d,%r14d # a|c
-
- xor %r13d,%ebx # h=Sigma0(a)
- and %r8d,%r15d # a&c
- add %r12d,%r9d # d+=T1
-
- and %edx,%r14d # (a|c)&b
- add %r12d,%ebx # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%ebx # h+=Maj(a,b,c)
- mov 0(%rsp),%r13d
- mov 52(%rsp),%r12d
-
- mov %r13d,%r15d
-
- shr $3,%r13d
- ror $7,%r15d
-
- xor %r15d,%r13d
- ror $11,%r15d
-
- xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
- mov %r12d,%r14d
-
- shr $10,%r12d
- ror $17,%r14d
-
- xor %r14d,%r12d
- ror $2,%r14d
-
- xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
-
- add %r13d,%r12d
-
- add 32(%rsp),%r12d
-
- add 60(%rsp),%r12d
- mov %r9d,%r13d
- mov %r9d,%r14d
- mov %r10d,%r15d
-
- ror $6,%r13d
- ror $11,%r14d
- xor %r11d,%r15d # f^g
-
- xor %r14d,%r13d
- ror $14,%r14d
- and %r9d,%r15d # (f^g)&e
- mov %r12d,60(%rsp)
-
- xor %r14d,%r13d # Sigma1(e)
- xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
- add %eax,%r12d # T1+=h
-
- mov %ebx,%eax
- add %r13d,%r12d # T1+=Sigma1(e)
-
- add %r15d,%r12d # T1+=Ch(e,f,g)
- mov %ebx,%r13d
- mov %ebx,%r14d
-
- ror $2,%eax
- ror $13,%r13d
- mov %ebx,%r15d
- add (%rbp,%rdi,4),%r12d # T1+=K[round]
-
- xor %r13d,%eax
- ror $9,%r13d
- or %edx,%r14d # a|c
-
- xor %r13d,%eax # h=Sigma0(a)
- and %edx,%r15d # a&c
- add %r12d,%r8d # d+=T1
-
- and %ecx,%r14d # (a|c)&b
- add %r12d,%eax # h+=T1
-
- or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14d,%eax # h+=Maj(a,b,c)
- cmp $64,%rdi
- jb .Lrounds_16_xx
-
- mov 16*4+0*8(%rsp),%rdi
- lea 16*4(%rsi),%rsi
-
- add 4*0(%rdi),%eax
- add 4*1(%rdi),%ebx
- add 4*2(%rdi),%ecx
- add 4*3(%rdi),%edx
- add 4*4(%rdi),%r8d
- add 4*5(%rdi),%r9d
- add 4*6(%rdi),%r10d
- add 4*7(%rdi),%r11d
-
- cmp 16*4+2*8(%rsp),%rsi
-
- mov %eax,4*0(%rdi)
- mov %ebx,4*1(%rdi)
- mov %ecx,4*2(%rdi)
- mov %edx,4*3(%rdi)
- mov %r8d,4*4(%rdi)
- mov %r9d,4*5(%rdi)
- mov %r10d,4*6(%rdi)
- mov %r11d,4*7(%rdi)
- jb .Lloop
-
- mov 16*4+3*8(%rsp),%rsp
-.cfi_def_cfa %rsp,56
- pop %r15
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r15
- pop %r14
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r14
- pop %r13
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r13
- pop %r12
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r12
- pop %rbp
-.cfi_adjust_cfa_offset -8
-.cfi_restore %rbp
- pop %rbx
-.cfi_adjust_cfa_offset -8
-.cfi_restore %rbx
-
- ret
-.cfi_endproc
-SET_SIZE(SHA256TransformBlocks)
-
-.data
-.align 64
-.type K256,@object
-K256:
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-#endif /* !lint && !__lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S
new file mode 100644
index 000000000000..fbbcca650d10
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S
@@ -0,0 +1,4011 @@
+/*
+ * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ * - modified assembly to fit into OpenZFS
+ */
+
+#if defined(__x86_64)
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+SECTION_STATIC
+
+.balign 64
+SET_OBJ(K512)
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad 0x0001020304050607,0x08090a0b0c0d0e0f
+.quad 0x0001020304050607,0x08090a0b0c0d0e0f
+
+ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
+.Lprologue:
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Lloop
+.balign 16
+.Lloop:
+ movq %rbx,%rdi
+ leaq K512(%rip),%rbp
+ xorq %rcx,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ xorq %r8,%r13
+ rorq $5,%r14
+ xorq %r10,%r15
+ movq %r12,0(%rsp)
+ xorq %rax,%r14
+ andq %r8,%r15
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %r10,%r15
+ rorq $6,%r14
+ xorq %r8,%r13
+ addq %r15,%r12
+ movq %rax,%r15
+ addq (%rbp),%r12
+ xorq %rax,%r14
+ xorq %rbx,%r15
+ rorq $14,%r13
+ movq %rbx,%r11
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r11
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 8(%rbp),%rbp
+ addq %r14,%r11
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%rdi
+ xorq %rdx,%r13
+ rorq $5,%r14
+ xorq %r9,%rdi
+ movq %r12,8(%rsp)
+ xorq %r11,%r14
+ andq %rdx,%rdi
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r9,%rdi
+ rorq $6,%r14
+ xorq %rdx,%r13
+ addq %rdi,%r12
+ movq %r11,%rdi
+ addq (%rbp),%r12
+ xorq %r11,%r14
+ xorq %rax,%rdi
+ rorq $14,%r13
+ movq %rax,%r10
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r10
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 24(%rbp),%rbp
+ addq %r14,%r10
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ xorq %rcx,%r13
+ rorq $5,%r14
+ xorq %r8,%r15
+ movq %r12,16(%rsp)
+ xorq %r10,%r14
+ andq %rcx,%r15
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r8,%r15
+ rorq $6,%r14
+ xorq %rcx,%r13
+ addq %r15,%r12
+ movq %r10,%r15
+ addq (%rbp),%r12
+ xorq %r10,%r14
+ xorq %r11,%r15
+ rorq $14,%r13
+ movq %r11,%r9
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r9
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 8(%rbp),%rbp
+ addq %r14,%r9
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%rdi
+ xorq %rbx,%r13
+ rorq $5,%r14
+ xorq %rdx,%rdi
+ movq %r12,24(%rsp)
+ xorq %r9,%r14
+ andq %rbx,%rdi
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %rdx,%rdi
+ rorq $6,%r14
+ xorq %rbx,%r13
+ addq %rdi,%r12
+ movq %r9,%rdi
+ addq (%rbp),%r12
+ xorq %r9,%r14
+ xorq %r10,%rdi
+ rorq $14,%r13
+ movq %r10,%r8
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r8
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 24(%rbp),%rbp
+ addq %r14,%r8
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ xorq %rax,%r13
+ rorq $5,%r14
+ xorq %rcx,%r15
+ movq %r12,32(%rsp)
+ xorq %r8,%r14
+ andq %rax,%r15
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %rcx,%r15
+ rorq $6,%r14
+ xorq %rax,%r13
+ addq %r15,%r12
+ movq %r8,%r15
+ addq (%rbp),%r12
+ xorq %r8,%r14
+ xorq %r9,%r15
+ rorq $14,%r13
+ movq %r9,%rdx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rdx
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 8(%rbp),%rbp
+ addq %r14,%rdx
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%rdi
+ xorq %r11,%r13
+ rorq $5,%r14
+ xorq %rbx,%rdi
+ movq %r12,40(%rsp)
+ xorq %rdx,%r14
+ andq %r11,%rdi
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rbx,%rdi
+ rorq $6,%r14
+ xorq %r11,%r13
+ addq %rdi,%r12
+ movq %rdx,%rdi
+ addq (%rbp),%r12
+ xorq %rdx,%r14
+ xorq %r8,%rdi
+ rorq $14,%r13
+ movq %r8,%rcx
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rcx
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 24(%rbp),%rbp
+ addq %r14,%rcx
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ xorq %r10,%r13
+ rorq $5,%r14
+ xorq %rax,%r15
+ movq %r12,48(%rsp)
+ xorq %rcx,%r14
+ andq %r10,%r15
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rax,%r15
+ rorq $6,%r14
+ xorq %r10,%r13
+ addq %r15,%r12
+ movq %rcx,%r15
+ addq (%rbp),%r12
+ xorq %rcx,%r14
+ xorq %rdx,%r15
+ rorq $14,%r13
+ movq %rdx,%rbx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rbx
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 8(%rbp),%rbp
+ addq %r14,%rbx
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%rdi
+ xorq %r9,%r13
+ rorq $5,%r14
+ xorq %r11,%rdi
+ movq %r12,56(%rsp)
+ xorq %rbx,%r14
+ andq %r9,%rdi
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %r11,%rdi
+ rorq $6,%r14
+ xorq %r9,%r13
+ addq %rdi,%r12
+ movq %rbx,%rdi
+ addq (%rbp),%r12
+ xorq %rbx,%r14
+ xorq %rcx,%rdi
+ rorq $14,%r13
+ movq %rcx,%rax
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rax
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 24(%rbp),%rbp
+ addq %r14,%rax
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ xorq %r8,%r13
+ rorq $5,%r14
+ xorq %r10,%r15
+ movq %r12,64(%rsp)
+ xorq %rax,%r14
+ andq %r8,%r15
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %r10,%r15
+ rorq $6,%r14
+ xorq %r8,%r13
+ addq %r15,%r12
+ movq %rax,%r15
+ addq (%rbp),%r12
+ xorq %rax,%r14
+ xorq %rbx,%r15
+ rorq $14,%r13
+ movq %rbx,%r11
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r11
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 8(%rbp),%rbp
+ addq %r14,%r11
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%rdi
+ xorq %rdx,%r13
+ rorq $5,%r14
+ xorq %r9,%rdi
+ movq %r12,72(%rsp)
+ xorq %r11,%r14
+ andq %rdx,%rdi
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r9,%rdi
+ rorq $6,%r14
+ xorq %rdx,%r13
+ addq %rdi,%r12
+ movq %r11,%rdi
+ addq (%rbp),%r12
+ xorq %r11,%r14
+ xorq %rax,%rdi
+ rorq $14,%r13
+ movq %rax,%r10
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r10
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 24(%rbp),%rbp
+ addq %r14,%r10
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ xorq %rcx,%r13
+ rorq $5,%r14
+ xorq %r8,%r15
+ movq %r12,80(%rsp)
+ xorq %r10,%r14
+ andq %rcx,%r15
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r8,%r15
+ rorq $6,%r14
+ xorq %rcx,%r13
+ addq %r15,%r12
+ movq %r10,%r15
+ addq (%rbp),%r12
+ xorq %r10,%r14
+ xorq %r11,%r15
+ rorq $14,%r13
+ movq %r11,%r9
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r9
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 8(%rbp),%rbp
+ addq %r14,%r9
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%rdi
+ xorq %rbx,%r13
+ rorq $5,%r14
+ xorq %rdx,%rdi
+ movq %r12,88(%rsp)
+ xorq %r9,%r14
+ andq %rbx,%rdi
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %rdx,%rdi
+ rorq $6,%r14
+ xorq %rbx,%r13
+ addq %rdi,%r12
+ movq %r9,%rdi
+ addq (%rbp),%r12
+ xorq %r9,%r14
+ xorq %r10,%rdi
+ rorq $14,%r13
+ movq %r10,%r8
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r8
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 24(%rbp),%rbp
+ addq %r14,%r8
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ xorq %rax,%r13
+ rorq $5,%r14
+ xorq %rcx,%r15
+ movq %r12,96(%rsp)
+ xorq %r8,%r14
+ andq %rax,%r15
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %rcx,%r15
+ rorq $6,%r14
+ xorq %rax,%r13
+ addq %r15,%r12
+ movq %r8,%r15
+ addq (%rbp),%r12
+ xorq %r8,%r14
+ xorq %r9,%r15
+ rorq $14,%r13
+ movq %r9,%rdx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rdx
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 8(%rbp),%rbp
+ addq %r14,%rdx
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%rdi
+ xorq %r11,%r13
+ rorq $5,%r14
+ xorq %rbx,%rdi
+ movq %r12,104(%rsp)
+ xorq %rdx,%r14
+ andq %r11,%rdi
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rbx,%rdi
+ rorq $6,%r14
+ xorq %r11,%r13
+ addq %rdi,%r12
+ movq %rdx,%rdi
+ addq (%rbp),%r12
+ xorq %rdx,%r14
+ xorq %r8,%rdi
+ rorq $14,%r13
+ movq %r8,%rcx
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rcx
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 24(%rbp),%rbp
+ addq %r14,%rcx
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ xorq %r10,%r13
+ rorq $5,%r14
+ xorq %rax,%r15
+ movq %r12,112(%rsp)
+ xorq %rcx,%r14
+ andq %r10,%r15
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rax,%r15
+ rorq $6,%r14
+ xorq %r10,%r13
+ addq %r15,%r12
+ movq %rcx,%r15
+ addq (%rbp),%r12
+ xorq %rcx,%r14
+ xorq %rdx,%r15
+ rorq $14,%r13
+ movq %rdx,%rbx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rbx
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 8(%rbp),%rbp
+ addq %r14,%rbx
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%rdi
+ xorq %r9,%r13
+ rorq $5,%r14
+ xorq %r11,%rdi
+ movq %r12,120(%rsp)
+ xorq %rbx,%r14
+ andq %r9,%rdi
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %r11,%rdi
+ rorq $6,%r14
+ xorq %r9,%r13
+ addq %rdi,%r12
+ movq %rbx,%rdi
+ addq (%rbp),%r12
+ xorq %rbx,%r14
+ xorq %rcx,%rdi
+ rorq $14,%r13
+ movq %rcx,%rax
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rax
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 24(%rbp),%rbp
+ jmp .Lrounds_16_xx
+.balign 16
+.Lrounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rax
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 72(%rsp),%r12
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r15,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ xorq %r8,%r13
+ rorq $5,%r14
+ xorq %r10,%r15
+ movq %r12,0(%rsp)
+ xorq %rax,%r14
+ andq %r8,%r15
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %r10,%r15
+ rorq $6,%r14
+ xorq %r8,%r13
+ addq %r15,%r12
+ movq %rax,%r15
+ addq (%rbp),%r12
+ xorq %rax,%r14
+ xorq %rbx,%r15
+ rorq $14,%r13
+ movq %rbx,%r11
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r11
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 8(%rbp),%rbp
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r11
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 80(%rsp),%r12
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %rdi,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%rdi
+ xorq %rdx,%r13
+ rorq $5,%r14
+ xorq %r9,%rdi
+ movq %r12,8(%rsp)
+ xorq %r11,%r14
+ andq %rdx,%rdi
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r9,%rdi
+ rorq $6,%r14
+ xorq %rdx,%r13
+ addq %rdi,%r12
+ movq %r11,%rdi
+ addq (%rbp),%r12
+ xorq %r11,%r14
+ xorq %rax,%rdi
+ rorq $14,%r13
+ movq %rax,%r10
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r10
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 24(%rbp),%rbp
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r10
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 88(%rsp),%r12
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r15,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ xorq %rcx,%r13
+ rorq $5,%r14
+ xorq %r8,%r15
+ movq %r12,16(%rsp)
+ xorq %r10,%r14
+ andq %rcx,%r15
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r8,%r15
+ rorq $6,%r14
+ xorq %rcx,%r13
+ addq %r15,%r12
+ movq %r10,%r15
+ addq (%rbp),%r12
+ xorq %r10,%r14
+ xorq %r11,%r15
+ rorq $14,%r13
+ movq %r11,%r9
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r9
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 8(%rbp),%rbp
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r9
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 96(%rsp),%r12
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %rdi,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%rdi
+ xorq %rbx,%r13
+ rorq $5,%r14
+ xorq %rdx,%rdi
+ movq %r12,24(%rsp)
+ xorq %r9,%r14
+ andq %rbx,%rdi
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %rdx,%rdi
+ rorq $6,%r14
+ xorq %rbx,%r13
+ addq %rdi,%r12
+ movq %r9,%rdi
+ addq (%rbp),%r12
+ xorq %r9,%r14
+ xorq %r10,%rdi
+ rorq $14,%r13
+ movq %r10,%r8
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r8
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 24(%rbp),%rbp
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r8
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 104(%rsp),%r12
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r15,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ xorq %rax,%r13
+ rorq $5,%r14
+ xorq %rcx,%r15
+ movq %r12,32(%rsp)
+ xorq %r8,%r14
+ andq %rax,%r15
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %rcx,%r15
+ rorq $6,%r14
+ xorq %rax,%r13
+ addq %r15,%r12
+ movq %r8,%r15
+ addq (%rbp),%r12
+ xorq %r8,%r14
+ xorq %r9,%r15
+ rorq $14,%r13
+ movq %r9,%rdx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rdx
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 8(%rbp),%rbp
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rdx
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 112(%rsp),%r12
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %rdi,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%rdi
+ xorq %r11,%r13
+ rorq $5,%r14
+ xorq %rbx,%rdi
+ movq %r12,40(%rsp)
+ xorq %rdx,%r14
+ andq %r11,%rdi
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rbx,%rdi
+ rorq $6,%r14
+ xorq %r11,%r13
+ addq %rdi,%r12
+ movq %rdx,%rdi
+ addq (%rbp),%r12
+ xorq %rdx,%r14
+ xorq %r8,%rdi
+ rorq $14,%r13
+ movq %r8,%rcx
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rcx
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 24(%rbp),%rbp
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rcx
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 120(%rsp),%r12
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r15,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ xorq %r10,%r13
+ rorq $5,%r14
+ xorq %rax,%r15
+ movq %r12,48(%rsp)
+ xorq %rcx,%r14
+ andq %r10,%r15
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rax,%r15
+ rorq $6,%r14
+ xorq %r10,%r13
+ addq %r15,%r12
+ movq %rcx,%r15
+ addq (%rbp),%r12
+ xorq %rcx,%r14
+ xorq %rdx,%r15
+ rorq $14,%r13
+ movq %rdx,%rbx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rbx
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 8(%rbp),%rbp
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rbx
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 0(%rsp),%r12
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %rdi,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%rdi
+ xorq %r9,%r13
+ rorq $5,%r14
+ xorq %r11,%rdi
+ movq %r12,56(%rsp)
+ xorq %rbx,%r14
+ andq %r9,%rdi
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %r11,%rdi
+ rorq $6,%r14
+ xorq %r9,%r13
+ addq %rdi,%r12
+ movq %rbx,%rdi
+ addq (%rbp),%r12
+ xorq %rbx,%r14
+ xorq %rcx,%rdi
+ rorq $14,%r13
+ movq %rcx,%rax
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rax
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 24(%rbp),%rbp
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rax
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 8(%rsp),%r12
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r15,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ xorq %r8,%r13
+ rorq $5,%r14
+ xorq %r10,%r15
+ movq %r12,64(%rsp)
+ xorq %rax,%r14
+ andq %r8,%r15
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %r10,%r15
+ rorq $6,%r14
+ xorq %r8,%r13
+ addq %r15,%r12
+ movq %rax,%r15
+ addq (%rbp),%r12
+ xorq %rax,%r14
+ xorq %rbx,%r15
+ rorq $14,%r13
+ movq %rbx,%r11
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r11
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 8(%rbp),%rbp
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r11
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 16(%rsp),%r12
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %rdi,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%rdi
+ xorq %rdx,%r13
+ rorq $5,%r14
+ xorq %r9,%rdi
+ movq %r12,72(%rsp)
+ xorq %r11,%r14
+ andq %rdx,%rdi
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r9,%rdi
+ rorq $6,%r14
+ xorq %rdx,%r13
+ addq %rdi,%r12
+ movq %r11,%rdi
+ addq (%rbp),%r12
+ xorq %r11,%r14
+ xorq %rax,%rdi
+ rorq $14,%r13
+ movq %rax,%r10
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r10
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 24(%rbp),%rbp
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r10
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 24(%rsp),%r12
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r15,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ xorq %rcx,%r13
+ rorq $5,%r14
+ xorq %r8,%r15
+ movq %r12,80(%rsp)
+ xorq %r10,%r14
+ andq %rcx,%r15
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r8,%r15
+ rorq $6,%r14
+ xorq %rcx,%r13
+ addq %r15,%r12
+ movq %r10,%r15
+ addq (%rbp),%r12
+ xorq %r10,%r14
+ xorq %r11,%r15
+ rorq $14,%r13
+ movq %r11,%r9
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%r9
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 8(%rbp),%rbp
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r9
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 32(%rsp),%r12
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %rdi,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%rdi
+ xorq %rbx,%r13
+ rorq $5,%r14
+ xorq %rdx,%rdi
+ movq %r12,88(%rsp)
+ xorq %r9,%r14
+ andq %rbx,%rdi
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %rdx,%rdi
+ rorq $6,%r14
+ xorq %rbx,%r13
+ addq %rdi,%r12
+ movq %r9,%rdi
+ addq (%rbp),%r12
+ xorq %r9,%r14
+ xorq %r10,%rdi
+ rorq $14,%r13
+ movq %r10,%r8
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%r8
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 24(%rbp),%rbp
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%r8
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 40(%rsp),%r12
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r15,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ xorq %rax,%r13
+ rorq $5,%r14
+ xorq %rcx,%r15
+ movq %r12,96(%rsp)
+ xorq %r8,%r14
+ andq %rax,%r15
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %rcx,%r15
+ rorq $6,%r14
+ xorq %rax,%r13
+ addq %r15,%r12
+ movq %r8,%r15
+ addq (%rbp),%r12
+ xorq %r8,%r14
+ xorq %r9,%r15
+ rorq $14,%r13
+ movq %r9,%rdx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rdx
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 8(%rbp),%rbp
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rdx
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 48(%rsp),%r12
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %rdi,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%rdi
+ xorq %r11,%r13
+ rorq $5,%r14
+ xorq %rbx,%rdi
+ movq %r12,104(%rsp)
+ xorq %rdx,%r14
+ andq %r11,%rdi
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rbx,%rdi
+ rorq $6,%r14
+ xorq %r11,%r13
+ addq %rdi,%r12
+ movq %rdx,%rdi
+ addq (%rbp),%r12
+ xorq %rdx,%r14
+ xorq %r8,%rdi
+ rorq $14,%r13
+ movq %r8,%rcx
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rcx
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 24(%rbp),%rbp
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r15
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rcx
+ movq %r15,%r14
+ rorq $42,%r15
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%r15
+ shrq $6,%r14
+ rorq $19,%r15
+ xorq %r13,%r12
+ xorq %r14,%r15
+ addq 56(%rsp),%r12
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r15,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ xorq %r10,%r13
+ rorq $5,%r14
+ xorq %rax,%r15
+ movq %r12,112(%rsp)
+ xorq %rcx,%r14
+ andq %r10,%r15
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rax,%r15
+ rorq $6,%r14
+ xorq %r10,%r13
+ addq %r15,%r12
+ movq %rcx,%r15
+ addq (%rbp),%r12
+ xorq %rcx,%r14
+ xorq %rdx,%r15
+ rorq $14,%r13
+ movq %rdx,%rbx
+ andq %r15,%rdi
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %rdi,%rbx
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 8(%rbp),%rbp
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%rdi
+ movq %r13,%r12
+ rorq $7,%r13
+ addq %r14,%rbx
+ movq %rdi,%r14
+ rorq $42,%rdi
+ xorq %r12,%r13
+ shrq $7,%r12
+ rorq $1,%r13
+ xorq %r14,%rdi
+ shrq $6,%r14
+ rorq $19,%rdi
+ xorq %r13,%r12
+ xorq %r14,%rdi
+ addq 64(%rsp),%r12
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %rdi,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%rdi
+ xorq %r9,%r13
+ rorq $5,%r14
+ xorq %r11,%rdi
+ movq %r12,120(%rsp)
+ xorq %rbx,%r14
+ andq %r9,%rdi
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %r11,%rdi
+ rorq $6,%r14
+ xorq %r9,%r13
+ addq %rdi,%r12
+ movq %rbx,%rdi
+ addq (%rbp),%r12
+ xorq %rbx,%r14
+ xorq %rcx,%rdi
+ rorq $14,%r13
+ movq %rcx,%rax
+ andq %rdi,%r15
+ rorq $28,%r14
+ addq %r13,%r12
+ xorq %r15,%rax
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 24(%rbp),%rbp
+ cmpb $0,7(%rbp)
+ jnz .Lrounds_16_xx
+ movq 128+0(%rsp),%rdi
+ addq %r14,%rax
+ leaq 128(%rsi),%rsi
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+ cmpq 128+16(%rsp),%rsi
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb .Lloop
+ movq 152(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha512_transform_x64)
+
+ENTRY_ALIGN(zfs_sha512_transform_avx, 64)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ shlq $4,%rdx
+ subq $160,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
+.Lprologue_avx:
+
+ vzeroupper
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Lloop_avx
+.balign 16
+.Lloop_avx:
+ vmovdqa K512+1280(%rip),%xmm11
+ vmovdqu 0(%rsi),%xmm0
+ leaq K512+128(%rip),%rbp
+ vmovdqu 16(%rsi),%xmm1
+ vmovdqu 32(%rsi),%xmm2
+ vpshufb %xmm11,%xmm0,%xmm0
+ vmovdqu 48(%rsi),%xmm3
+ vpshufb %xmm11,%xmm1,%xmm1
+ vmovdqu 64(%rsi),%xmm4
+ vpshufb %xmm11,%xmm2,%xmm2
+ vmovdqu 80(%rsi),%xmm5
+ vpshufb %xmm11,%xmm3,%xmm3
+ vmovdqu 96(%rsi),%xmm6
+ vpshufb %xmm11,%xmm4,%xmm4
+ vmovdqu 112(%rsi),%xmm7
+ vpshufb %xmm11,%xmm5,%xmm5
+ vpaddq -128(%rbp),%xmm0,%xmm8
+ vpshufb %xmm11,%xmm6,%xmm6
+ vpaddq -96(%rbp),%xmm1,%xmm9
+ vpshufb %xmm11,%xmm7,%xmm7
+ vpaddq -64(%rbp),%xmm2,%xmm10
+ vpaddq -32(%rbp),%xmm3,%xmm11
+ vmovdqa %xmm8,0(%rsp)
+ vpaddq 0(%rbp),%xmm4,%xmm8
+ vmovdqa %xmm9,16(%rsp)
+ vpaddq 32(%rbp),%xmm5,%xmm9
+ vmovdqa %xmm10,32(%rsp)
+ vpaddq 64(%rbp),%xmm6,%xmm10
+ vmovdqa %xmm11,48(%rsp)
+ vpaddq 96(%rbp),%xmm7,%xmm11
+ vmovdqa %xmm8,64(%rsp)
+ movq %rax,%r14
+ vmovdqa %xmm9,80(%rsp)
+ movq %rbx,%rdi
+ vmovdqa %xmm10,96(%rsp)
+ xorq %rcx,%rdi
+ vmovdqa %xmm11,112(%rsp)
+ movq %r8,%r13
+ jmp .Lavx_00_47
+
+.balign 16
+.Lavx_00_47:
+ addq $256,%rbp
+ vpalignr $8,%xmm0,%xmm1,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%rax
+ vpalignr $8,%xmm4,%xmm5,%xmm11
+ movq %r9,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %r8,%r13
+ xorq %r10,%r12
+ vpaddq %xmm11,%xmm0,%xmm0
+ shrdq $4,%r13,%r13
+ xorq %rax,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %r8,%r12
+ xorq %r8,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 0(%rsp),%r11
+ movq %rax,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %r10,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %rbx,%r15
+ addq %r12,%r11
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %rax,%r14
+ addq %r13,%r11
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %rbx,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm7,%xmm11
+ addq %r11,%rdx
+ addq %rdi,%r11
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %rdx,%r13
+ addq %r11,%r14
+ vpsllq $3,%xmm7,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%r11
+ vpaddq %xmm8,%xmm0,%xmm0
+ movq %r8,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm7,%xmm9
+ xorq %rdx,%r13
+ xorq %r9,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %r11,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %rdx,%r12
+ xorq %rdx,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 8(%rsp),%r10
+ movq %r11,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %r9,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %rax,%rdi
+ addq %r12,%r10
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm0,%xmm0
+ xorq %r11,%r14
+ addq %r13,%r10
+ vpaddq -128(%rbp),%xmm0,%xmm10
+ xorq %rax,%r15
+ shrdq $28,%r14,%r14
+ addq %r10,%rcx
+ addq %r15,%r10
+ movq %rcx,%r13
+ addq %r10,%r14
+ vmovdqa %xmm10,0(%rsp)
+ vpalignr $8,%xmm1,%xmm2,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%r10
+ vpalignr $8,%xmm5,%xmm6,%xmm11
+ movq %rdx,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %rcx,%r13
+ xorq %r8,%r12
+ vpaddq %xmm11,%xmm1,%xmm1
+ shrdq $4,%r13,%r13
+ xorq %r10,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %rcx,%r12
+ xorq %rcx,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 16(%rsp),%r9
+ movq %r10,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %r8,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %r11,%r15
+ addq %r12,%r9
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %r10,%r14
+ addq %r13,%r9
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %r11,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm0,%xmm11
+ addq %r9,%rbx
+ addq %rdi,%r9
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %rbx,%r13
+ addq %r9,%r14
+ vpsllq $3,%xmm0,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%r9
+ vpaddq %xmm8,%xmm1,%xmm1
+ movq %rcx,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm0,%xmm9
+ xorq %rbx,%r13
+ xorq %rdx,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %r9,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %rbx,%r12
+ xorq %rbx,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 24(%rsp),%r8
+ movq %r9,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %rdx,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %r10,%rdi
+ addq %r12,%r8
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm1,%xmm1
+ xorq %r9,%r14
+ addq %r13,%r8
+ vpaddq -96(%rbp),%xmm1,%xmm10
+ xorq %r10,%r15
+ shrdq $28,%r14,%r14
+ addq %r8,%rax
+ addq %r15,%r8
+ movq %rax,%r13
+ addq %r8,%r14
+ vmovdqa %xmm10,16(%rsp)
+ vpalignr $8,%xmm2,%xmm3,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%r8
+ vpalignr $8,%xmm6,%xmm7,%xmm11
+ movq %rbx,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %rax,%r13
+ xorq %rcx,%r12
+ vpaddq %xmm11,%xmm2,%xmm2
+ shrdq $4,%r13,%r13
+ xorq %r8,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %rax,%r12
+ xorq %rax,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 32(%rsp),%rdx
+ movq %r8,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %rcx,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %r9,%r15
+ addq %r12,%rdx
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %r8,%r14
+ addq %r13,%rdx
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %r9,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm1,%xmm11
+ addq %rdx,%r11
+ addq %rdi,%rdx
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %r11,%r13
+ addq %rdx,%r14
+ vpsllq $3,%xmm1,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%rdx
+ vpaddq %xmm8,%xmm2,%xmm2
+ movq %rax,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm1,%xmm9
+ xorq %r11,%r13
+ xorq %rbx,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %rdx,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %r11,%r12
+ xorq %r11,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 40(%rsp),%rcx
+ movq %rdx,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %rbx,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %r8,%rdi
+ addq %r12,%rcx
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm2,%xmm2
+ xorq %rdx,%r14
+ addq %r13,%rcx
+ vpaddq -64(%rbp),%xmm2,%xmm10
+ xorq %r8,%r15
+ shrdq $28,%r14,%r14
+ addq %rcx,%r10
+ addq %r15,%rcx
+ movq %r10,%r13
+ addq %rcx,%r14
+ vmovdqa %xmm10,32(%rsp)
+ vpalignr $8,%xmm3,%xmm4,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%rcx
+ vpalignr $8,%xmm7,%xmm0,%xmm11
+ movq %r11,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %r10,%r13
+ xorq %rax,%r12
+ vpaddq %xmm11,%xmm3,%xmm3
+ shrdq $4,%r13,%r13
+ xorq %rcx,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %r10,%r12
+ xorq %r10,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 48(%rsp),%rbx
+ movq %rcx,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %rax,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %rdx,%r15
+ addq %r12,%rbx
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %rcx,%r14
+ addq %r13,%rbx
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %rdx,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm2,%xmm11
+ addq %rbx,%r9
+ addq %rdi,%rbx
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %r9,%r13
+ addq %rbx,%r14
+ vpsllq $3,%xmm2,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%rbx
+ vpaddq %xmm8,%xmm3,%xmm3
+ movq %r10,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm2,%xmm9
+ xorq %r9,%r13
+ xorq %r11,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %rbx,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %r9,%r12
+ xorq %r9,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 56(%rsp),%rax
+ movq %rbx,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %r11,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %rcx,%rdi
+ addq %r12,%rax
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm3,%xmm3
+ xorq %rbx,%r14
+ addq %r13,%rax
+ vpaddq -32(%rbp),%xmm3,%xmm10
+ xorq %rcx,%r15
+ shrdq $28,%r14,%r14
+ addq %rax,%r8
+ addq %r15,%rax
+ movq %r8,%r13
+ addq %rax,%r14
+ vmovdqa %xmm10,48(%rsp)
+ vpalignr $8,%xmm4,%xmm5,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%rax
+ vpalignr $8,%xmm0,%xmm1,%xmm11
+ movq %r9,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %r8,%r13
+ xorq %r10,%r12
+ vpaddq %xmm11,%xmm4,%xmm4
+ shrdq $4,%r13,%r13
+ xorq %rax,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %r8,%r12
+ xorq %r8,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 64(%rsp),%r11
+ movq %rax,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %r10,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %rbx,%r15
+ addq %r12,%r11
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %rax,%r14
+ addq %r13,%r11
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %rbx,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm3,%xmm11
+ addq %r11,%rdx
+ addq %rdi,%r11
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %rdx,%r13
+ addq %r11,%r14
+ vpsllq $3,%xmm3,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%r11
+ vpaddq %xmm8,%xmm4,%xmm4
+ movq %r8,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm3,%xmm9
+ xorq %rdx,%r13
+ xorq %r9,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %r11,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %rdx,%r12
+ xorq %rdx,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 72(%rsp),%r10
+ movq %r11,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %r9,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %rax,%rdi
+ addq %r12,%r10
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm4,%xmm4
+ xorq %r11,%r14
+ addq %r13,%r10
+ vpaddq 0(%rbp),%xmm4,%xmm10
+ xorq %rax,%r15
+ shrdq $28,%r14,%r14
+ addq %r10,%rcx
+ addq %r15,%r10
+ movq %rcx,%r13
+ addq %r10,%r14
+ vmovdqa %xmm10,64(%rsp)
+ vpalignr $8,%xmm5,%xmm6,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%r10
+ vpalignr $8,%xmm1,%xmm2,%xmm11
+ movq %rdx,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %rcx,%r13
+ xorq %r8,%r12
+ vpaddq %xmm11,%xmm5,%xmm5
+ shrdq $4,%r13,%r13
+ xorq %r10,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %rcx,%r12
+ xorq %rcx,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 80(%rsp),%r9
+ movq %r10,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %r8,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %r11,%r15
+ addq %r12,%r9
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %r10,%r14
+ addq %r13,%r9
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %r11,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm4,%xmm11
+ addq %r9,%rbx
+ addq %rdi,%r9
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %rbx,%r13
+ addq %r9,%r14
+ vpsllq $3,%xmm4,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%r9
+ vpaddq %xmm8,%xmm5,%xmm5
+ movq %rcx,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm4,%xmm9
+ xorq %rbx,%r13
+ xorq %rdx,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %r9,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %rbx,%r12
+ xorq %rbx,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 88(%rsp),%r8
+ movq %r9,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %rdx,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %r10,%rdi
+ addq %r12,%r8
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm5,%xmm5
+ xorq %r9,%r14
+ addq %r13,%r8
+ vpaddq 32(%rbp),%xmm5,%xmm10
+ xorq %r10,%r15
+ shrdq $28,%r14,%r14
+ addq %r8,%rax
+ addq %r15,%r8
+ movq %rax,%r13
+ addq %r8,%r14
+ vmovdqa %xmm10,80(%rsp)
+ vpalignr $8,%xmm6,%xmm7,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%r8
+ vpalignr $8,%xmm2,%xmm3,%xmm11
+ movq %rbx,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %rax,%r13
+ xorq %rcx,%r12
+ vpaddq %xmm11,%xmm6,%xmm6
+ shrdq $4,%r13,%r13
+ xorq %r8,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %rax,%r12
+ xorq %rax,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 96(%rsp),%rdx
+ movq %r8,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %rcx,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %r9,%r15
+ addq %r12,%rdx
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %r8,%r14
+ addq %r13,%rdx
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %r9,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm5,%xmm11
+ addq %rdx,%r11
+ addq %rdi,%rdx
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %r11,%r13
+ addq %rdx,%r14
+ vpsllq $3,%xmm5,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%rdx
+ vpaddq %xmm8,%xmm6,%xmm6
+ movq %rax,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm5,%xmm9
+ xorq %r11,%r13
+ xorq %rbx,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %rdx,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %r11,%r12
+ xorq %r11,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 104(%rsp),%rcx
+ movq %rdx,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %rbx,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %r8,%rdi
+ addq %r12,%rcx
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm6,%xmm6
+ xorq %rdx,%r14
+ addq %r13,%rcx
+ vpaddq 64(%rbp),%xmm6,%xmm10
+ xorq %r8,%r15
+ shrdq $28,%r14,%r14
+ addq %rcx,%r10
+ addq %r15,%rcx
+ movq %r10,%r13
+ addq %rcx,%r14
+ vmovdqa %xmm10,96(%rsp)
+ vpalignr $8,%xmm7,%xmm0,%xmm8
+ shrdq $23,%r13,%r13
+ movq %r14,%rcx
+ vpalignr $8,%xmm3,%xmm4,%xmm11
+ movq %r11,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $1,%xmm8,%xmm10
+ xorq %r10,%r13
+ xorq %rax,%r12
+ vpaddq %xmm11,%xmm7,%xmm7
+ shrdq $4,%r13,%r13
+ xorq %rcx,%r14
+ vpsrlq $7,%xmm8,%xmm11
+ andq %r10,%r12
+ xorq %r10,%r13
+ vpsllq $56,%xmm8,%xmm9
+ addq 112(%rsp),%rbx
+ movq %rcx,%r15
+ vpxor %xmm10,%xmm11,%xmm8
+ xorq %rax,%r12
+ shrdq $6,%r14,%r14
+ vpsrlq $7,%xmm10,%xmm10
+ xorq %rdx,%r15
+ addq %r12,%rbx
+ vpxor %xmm9,%xmm8,%xmm8
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ vpsllq $7,%xmm9,%xmm9
+ xorq %rcx,%r14
+ addq %r13,%rbx
+ vpxor %xmm10,%xmm8,%xmm8
+ xorq %rdx,%rdi
+ shrdq $28,%r14,%r14
+ vpsrlq $6,%xmm6,%xmm11
+ addq %rbx,%r9
+ addq %rdi,%rbx
+ vpxor %xmm9,%xmm8,%xmm8
+ movq %r9,%r13
+ addq %rbx,%r14
+ vpsllq $3,%xmm6,%xmm10
+ shrdq $23,%r13,%r13
+ movq %r14,%rbx
+ vpaddq %xmm8,%xmm7,%xmm7
+ movq %r10,%r12
+ shrdq $5,%r14,%r14
+ vpsrlq $19,%xmm6,%xmm9
+ xorq %r9,%r13
+ xorq %r11,%r12
+ vpxor %xmm10,%xmm11,%xmm11
+ shrdq $4,%r13,%r13
+ xorq %rbx,%r14
+ vpsllq $42,%xmm10,%xmm10
+ andq %r9,%r12
+ xorq %r9,%r13
+ vpxor %xmm9,%xmm11,%xmm11
+ addq 120(%rsp),%rax
+ movq %rbx,%rdi
+ vpsrlq $42,%xmm9,%xmm9
+ xorq %r11,%r12
+ shrdq $6,%r14,%r14
+ vpxor %xmm10,%xmm11,%xmm11
+ xorq %rcx,%rdi
+ addq %r12,%rax
+ vpxor %xmm9,%xmm11,%xmm11
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ vpaddq %xmm11,%xmm7,%xmm7
+ xorq %rbx,%r14
+ addq %r13,%rax
+ vpaddq 96(%rbp),%xmm7,%xmm10
+ xorq %rcx,%r15
+ shrdq $28,%r14,%r14
+ addq %rax,%r8
+ addq %r15,%rax
+ movq %r8,%r13
+ addq %rax,%r14
+ vmovdqa %xmm10,112(%rsp)
+ cmpb $0,135(%rbp)
+ jne .Lavx_00_47
+ shrdq $23,%r13,%r13
+ movq %r14,%rax
+ movq %r9,%r12
+ shrdq $5,%r14,%r14
+ xorq %r8,%r13
+ xorq %r10,%r12
+ shrdq $4,%r13,%r13
+ xorq %rax,%r14
+ andq %r8,%r12
+ xorq %r8,%r13
+ addq 0(%rsp),%r11
+ movq %rax,%r15
+ xorq %r10,%r12
+ shrdq $6,%r14,%r14
+ xorq %rbx,%r15
+ addq %r12,%r11
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %rax,%r14
+ addq %r13,%r11
+ xorq %rbx,%rdi
+ shrdq $28,%r14,%r14
+ addq %r11,%rdx
+ addq %rdi,%r11
+ movq %rdx,%r13
+ addq %r11,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r11
+ movq %r8,%r12
+ shrdq $5,%r14,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r12
+ shrdq $4,%r13,%r13
+ xorq %r11,%r14
+ andq %rdx,%r12
+ xorq %rdx,%r13
+ addq 8(%rsp),%r10
+ movq %r11,%rdi
+ xorq %r9,%r12
+ shrdq $6,%r14,%r14
+ xorq %rax,%rdi
+ addq %r12,%r10
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %r11,%r14
+ addq %r13,%r10
+ xorq %rax,%r15
+ shrdq $28,%r14,%r14
+ addq %r10,%rcx
+ addq %r15,%r10
+ movq %rcx,%r13
+ addq %r10,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r10
+ movq %rdx,%r12
+ shrdq $5,%r14,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r12
+ shrdq $4,%r13,%r13
+ xorq %r10,%r14
+ andq %rcx,%r12
+ xorq %rcx,%r13
+ addq 16(%rsp),%r9
+ movq %r10,%r15
+ xorq %r8,%r12
+ shrdq $6,%r14,%r14
+ xorq %r11,%r15
+ addq %r12,%r9
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %r10,%r14
+ addq %r13,%r9
+ xorq %r11,%rdi
+ shrdq $28,%r14,%r14
+ addq %r9,%rbx
+ addq %rdi,%r9
+ movq %rbx,%r13
+ addq %r9,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r9
+ movq %rcx,%r12
+ shrdq $5,%r14,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r12
+ shrdq $4,%r13,%r13
+ xorq %r9,%r14
+ andq %rbx,%r12
+ xorq %rbx,%r13
+ addq 24(%rsp),%r8
+ movq %r9,%rdi
+ xorq %rdx,%r12
+ shrdq $6,%r14,%r14
+ xorq %r10,%rdi
+ addq %r12,%r8
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %r9,%r14
+ addq %r13,%r8
+ xorq %r10,%r15
+ shrdq $28,%r14,%r14
+ addq %r8,%rax
+ addq %r15,%r8
+ movq %rax,%r13
+ addq %r8,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r8
+ movq %rbx,%r12
+ shrdq $5,%r14,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r12
+ shrdq $4,%r13,%r13
+ xorq %r8,%r14
+ andq %rax,%r12
+ xorq %rax,%r13
+ addq 32(%rsp),%rdx
+ movq %r8,%r15
+ xorq %rcx,%r12
+ shrdq $6,%r14,%r14
+ xorq %r9,%r15
+ addq %r12,%rdx
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %r8,%r14
+ addq %r13,%rdx
+ xorq %r9,%rdi
+ shrdq $28,%r14,%r14
+ addq %rdx,%r11
+ addq %rdi,%rdx
+ movq %r11,%r13
+ addq %rdx,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rdx
+ movq %rax,%r12
+ shrdq $5,%r14,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r12
+ shrdq $4,%r13,%r13
+ xorq %rdx,%r14
+ andq %r11,%r12
+ xorq %r11,%r13
+ addq 40(%rsp),%rcx
+ movq %rdx,%rdi
+ xorq %rbx,%r12
+ shrdq $6,%r14,%r14
+ xorq %r8,%rdi
+ addq %r12,%rcx
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %rdx,%r14
+ addq %r13,%rcx
+ xorq %r8,%r15
+ shrdq $28,%r14,%r14
+ addq %rcx,%r10
+ addq %r15,%rcx
+ movq %r10,%r13
+ addq %rcx,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rcx
+ movq %r11,%r12
+ shrdq $5,%r14,%r14
+ xorq %r10,%r13
+ xorq %rax,%r12
+ shrdq $4,%r13,%r13
+ xorq %rcx,%r14
+ andq %r10,%r12
+ xorq %r10,%r13
+ addq 48(%rsp),%rbx
+ movq %rcx,%r15
+ xorq %rax,%r12
+ shrdq $6,%r14,%r14
+ xorq %rdx,%r15
+ addq %r12,%rbx
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %rcx,%r14
+ addq %r13,%rbx
+ xorq %rdx,%rdi
+ shrdq $28,%r14,%r14
+ addq %rbx,%r9
+ addq %rdi,%rbx
+ movq %r9,%r13
+ addq %rbx,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rbx
+ movq %r10,%r12
+ shrdq $5,%r14,%r14
+ xorq %r9,%r13
+ xorq %r11,%r12
+ shrdq $4,%r13,%r13
+ xorq %rbx,%r14
+ andq %r9,%r12
+ xorq %r9,%r13
+ addq 56(%rsp),%rax
+ movq %rbx,%rdi
+ xorq %r11,%r12
+ shrdq $6,%r14,%r14
+ xorq %rcx,%rdi
+ addq %r12,%rax
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %rbx,%r14
+ addq %r13,%rax
+ xorq %rcx,%r15
+ shrdq $28,%r14,%r14
+ addq %rax,%r8
+ addq %r15,%rax
+ movq %r8,%r13
+ addq %rax,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rax
+ movq %r9,%r12
+ shrdq $5,%r14,%r14
+ xorq %r8,%r13
+ xorq %r10,%r12
+ shrdq $4,%r13,%r13
+ xorq %rax,%r14
+ andq %r8,%r12
+ xorq %r8,%r13
+ addq 64(%rsp),%r11
+ movq %rax,%r15
+ xorq %r10,%r12
+ shrdq $6,%r14,%r14
+ xorq %rbx,%r15
+ addq %r12,%r11
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %rax,%r14
+ addq %r13,%r11
+ xorq %rbx,%rdi
+ shrdq $28,%r14,%r14
+ addq %r11,%rdx
+ addq %rdi,%r11
+ movq %rdx,%r13
+ addq %r11,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r11
+ movq %r8,%r12
+ shrdq $5,%r14,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r12
+ shrdq $4,%r13,%r13
+ xorq %r11,%r14
+ andq %rdx,%r12
+ xorq %rdx,%r13
+ addq 72(%rsp),%r10
+ movq %r11,%rdi
+ xorq %r9,%r12
+ shrdq $6,%r14,%r14
+ xorq %rax,%rdi
+ addq %r12,%r10
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %r11,%r14
+ addq %r13,%r10
+ xorq %rax,%r15
+ shrdq $28,%r14,%r14
+ addq %r10,%rcx
+ addq %r15,%r10
+ movq %rcx,%r13
+ addq %r10,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r10
+ movq %rdx,%r12
+ shrdq $5,%r14,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r12
+ shrdq $4,%r13,%r13
+ xorq %r10,%r14
+ andq %rcx,%r12
+ xorq %rcx,%r13
+ addq 80(%rsp),%r9
+ movq %r10,%r15
+ xorq %r8,%r12
+ shrdq $6,%r14,%r14
+ xorq %r11,%r15
+ addq %r12,%r9
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %r10,%r14
+ addq %r13,%r9
+ xorq %r11,%rdi
+ shrdq $28,%r14,%r14
+ addq %r9,%rbx
+ addq %rdi,%r9
+ movq %rbx,%r13
+ addq %r9,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r9
+ movq %rcx,%r12
+ shrdq $5,%r14,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r12
+ shrdq $4,%r13,%r13
+ xorq %r9,%r14
+ andq %rbx,%r12
+ xorq %rbx,%r13
+ addq 88(%rsp),%r8
+ movq %r9,%rdi
+ xorq %rdx,%r12
+ shrdq $6,%r14,%r14
+ xorq %r10,%rdi
+ addq %r12,%r8
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %r9,%r14
+ addq %r13,%r8
+ xorq %r10,%r15
+ shrdq $28,%r14,%r14
+ addq %r8,%rax
+ addq %r15,%r8
+ movq %rax,%r13
+ addq %r8,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%r8
+ movq %rbx,%r12
+ shrdq $5,%r14,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r12
+ shrdq $4,%r13,%r13
+ xorq %r8,%r14
+ andq %rax,%r12
+ xorq %rax,%r13
+ addq 96(%rsp),%rdx
+ movq %r8,%r15
+ xorq %rcx,%r12
+ shrdq $6,%r14,%r14
+ xorq %r9,%r15
+ addq %r12,%rdx
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %r8,%r14
+ addq %r13,%rdx
+ xorq %r9,%rdi
+ shrdq $28,%r14,%r14
+ addq %rdx,%r11
+ addq %rdi,%rdx
+ movq %r11,%r13
+ addq %rdx,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rdx
+ movq %rax,%r12
+ shrdq $5,%r14,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r12
+ shrdq $4,%r13,%r13
+ xorq %rdx,%r14
+ andq %r11,%r12
+ xorq %r11,%r13
+ addq 104(%rsp),%rcx
+ movq %rdx,%rdi
+ xorq %rbx,%r12
+ shrdq $6,%r14,%r14
+ xorq %r8,%rdi
+ addq %r12,%rcx
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %rdx,%r14
+ addq %r13,%rcx
+ xorq %r8,%r15
+ shrdq $28,%r14,%r14
+ addq %rcx,%r10
+ addq %r15,%rcx
+ movq %r10,%r13
+ addq %rcx,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rcx
+ movq %r11,%r12
+ shrdq $5,%r14,%r14
+ xorq %r10,%r13
+ xorq %rax,%r12
+ shrdq $4,%r13,%r13
+ xorq %rcx,%r14
+ andq %r10,%r12
+ xorq %r10,%r13
+ addq 112(%rsp),%rbx
+ movq %rcx,%r15
+ xorq %rax,%r12
+ shrdq $6,%r14,%r14
+ xorq %rdx,%r15
+ addq %r12,%rbx
+ shrdq $14,%r13,%r13
+ andq %r15,%rdi
+ xorq %rcx,%r14
+ addq %r13,%rbx
+ xorq %rdx,%rdi
+ shrdq $28,%r14,%r14
+ addq %rbx,%r9
+ addq %rdi,%rbx
+ movq %r9,%r13
+ addq %rbx,%r14
+ shrdq $23,%r13,%r13
+ movq %r14,%rbx
+ movq %r10,%r12
+ shrdq $5,%r14,%r14
+ xorq %r9,%r13
+ xorq %r11,%r12
+ shrdq $4,%r13,%r13
+ xorq %rbx,%r14
+ andq %r9,%r12
+ xorq %r9,%r13
+ addq 120(%rsp),%rax
+ movq %rbx,%rdi
+ xorq %r11,%r12
+ shrdq $6,%r14,%r14
+ xorq %rcx,%rdi
+ addq %r12,%rax
+ shrdq $14,%r13,%r13
+ andq %rdi,%r15
+ xorq %rbx,%r14
+ addq %r13,%rax
+ xorq %rcx,%r15
+ shrdq $28,%r14,%r14
+ addq %rax,%r8
+ addq %r15,%rax
+ movq %r8,%r13
+ addq %rax,%r14
+ movq 128+0(%rsp),%rdi
+ movq %r14,%rax
+
+ addq 0(%rdi),%rax
+ leaq 128(%rsi),%rsi
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb .Lloop_avx
+
+ movq 152(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ vzeroupper
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue_avx:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha512_transform_avx)
+
+ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
+.cfi_startproc
+ ENDBR
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
+ pushq %rbx
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_offset %r15,-56
+ subq $1312,%rsp
+ shlq $4,%rdx
+ andq $-2048,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ addq $1152,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
+.Lprologue_avx2:
+
+ vzeroupper
+ subq $-128,%rsi
+ movq 0(%rdi),%rax
+ movq %rsi,%r12
+ movq 8(%rdi),%rbx
+ cmpq %rdx,%rsi
+ movq 16(%rdi),%rcx
+ cmoveq %rsp,%r12
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Loop_avx2
+.balign 16
+.Loop_avx2:
+ vmovdqu -128(%rsi),%xmm0
+ vmovdqu -128+16(%rsi),%xmm1
+ vmovdqu -128+32(%rsi),%xmm2
+ leaq K512+128(%rip),%rbp
+ vmovdqu -128+48(%rsi),%xmm3
+ vmovdqu -128+64(%rsi),%xmm4
+ vmovdqu -128+80(%rsi),%xmm5
+ vmovdqu -128+96(%rsi),%xmm6
+ vmovdqu -128+112(%rsi),%xmm7
+
+ vmovdqa 1152(%rbp),%ymm10
+ vinserti128 $1,(%r12),%ymm0,%ymm0
+ vinserti128 $1,16(%r12),%ymm1,%ymm1
+ vpshufb %ymm10,%ymm0,%ymm0
+ vinserti128 $1,32(%r12),%ymm2,%ymm2
+ vpshufb %ymm10,%ymm1,%ymm1
+ vinserti128 $1,48(%r12),%ymm3,%ymm3
+ vpshufb %ymm10,%ymm2,%ymm2
+ vinserti128 $1,64(%r12),%ymm4,%ymm4
+ vpshufb %ymm10,%ymm3,%ymm3
+ vinserti128 $1,80(%r12),%ymm5,%ymm5
+ vpshufb %ymm10,%ymm4,%ymm4
+ vinserti128 $1,96(%r12),%ymm6,%ymm6
+ vpshufb %ymm10,%ymm5,%ymm5
+ vinserti128 $1,112(%r12),%ymm7,%ymm7
+
+ vpaddq -128(%rbp),%ymm0,%ymm8
+ vpshufb %ymm10,%ymm6,%ymm6
+ vpaddq -96(%rbp),%ymm1,%ymm9
+ vpshufb %ymm10,%ymm7,%ymm7
+ vpaddq -64(%rbp),%ymm2,%ymm10
+ vpaddq -32(%rbp),%ymm3,%ymm11
+ vmovdqa %ymm8,0(%rsp)
+ vpaddq 0(%rbp),%ymm4,%ymm8
+ vmovdqa %ymm9,32(%rsp)
+ vpaddq 32(%rbp),%ymm5,%ymm9
+ vmovdqa %ymm10,64(%rsp)
+ vpaddq 64(%rbp),%ymm6,%ymm10
+ vmovdqa %ymm11,96(%rsp)
+
+ movq 152(%rsp),%rdi
+.cfi_def_cfa %rdi,8
+ leaq -128(%rsp),%rsp
+
+
+
+ movq %rdi,-8(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
+ vpaddq 96(%rbp),%ymm7,%ymm11
+ vmovdqa %ymm8,0(%rsp)
+ xorq %r14,%r14
+ vmovdqa %ymm9,32(%rsp)
+ movq %rbx,%rdi
+ vmovdqa %ymm10,64(%rsp)
+ xorq %rcx,%rdi
+ vmovdqa %ymm11,96(%rsp)
+ movq %r9,%r12
+ addq $32*8,%rbp
+ jmp .Lavx2_00_47
+
+.balign 16
+.Lavx2_00_47:
+ leaq -128(%rsp),%rsp
+.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
+
+ pushq 128-8(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
+ leaq 8(%rsp),%rsp
+.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
+ vpalignr $8,%ymm0,%ymm1,%ymm8
+ addq 0+256(%rsp),%r11
+ andq %r8,%r12
+ rorxq $41,%r8,%r13
+ vpalignr $8,%ymm4,%ymm5,%ymm11
+ rorxq $18,%r8,%r15
+ leaq (%rax,%r14,1),%rax
+ leaq (%r11,%r12,1),%r11
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %r10,%r8,%r12
+ xorq %r15,%r13
+ rorxq $14,%r8,%r14
+ vpaddq %ymm11,%ymm0,%ymm0
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%r11,%r12,1),%r11
+ xorq %r14,%r13
+ movq %rax,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%rax,%r12
+ leaq (%r11,%r13,1),%r11
+ xorq %rbx,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%rax,%r14
+ rorxq $28,%rax,%r13
+ leaq (%rdx,%r11,1),%rdx
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rbx,%rdi
+ vpsrlq $6,%ymm7,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%r11,%rdi,1),%r11
+ movq %r8,%r12
+ vpsllq $3,%ymm7,%ymm10
+ vpaddq %ymm8,%ymm0,%ymm0
+ addq 8+256(%rsp),%r10
+ andq %rdx,%r12
+ rorxq $41,%rdx,%r13
+ vpsrlq $19,%ymm7,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%rdx,%rdi
+ leaq (%r11,%r14,1),%r11
+ leaq (%r10,%r12,1),%r10
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %r9,%rdx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rdx,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%r10,%r12,1),%r10
+ xorq %r14,%r13
+ movq %r11,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%r11,%r12
+ leaq (%r10,%r13,1),%r10
+ xorq %rax,%rdi
+ vpaddq %ymm11,%ymm0,%ymm0
+ rorxq $34,%r11,%r14
+ rorxq $28,%r11,%r13
+ leaq (%rcx,%r10,1),%rcx
+ vpaddq -128(%rbp),%ymm0,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rax,%r15
+ xorq %r13,%r14
+ leaq (%r10,%r15,1),%r10
+ movq %rdx,%r12
+ vmovdqa %ymm10,0(%rsp)
+ vpalignr $8,%ymm1,%ymm2,%ymm8
+ addq 32+256(%rsp),%r9
+ andq %rcx,%r12
+ rorxq $41,%rcx,%r13
+ vpalignr $8,%ymm5,%ymm6,%ymm11
+ rorxq $18,%rcx,%r15
+ leaq (%r10,%r14,1),%r10
+ leaq (%r9,%r12,1),%r9
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %r8,%rcx,%r12
+ xorq %r15,%r13
+ rorxq $14,%rcx,%r14
+ vpaddq %ymm11,%ymm1,%ymm1
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%r9,%r12,1),%r9
+ xorq %r14,%r13
+ movq %r10,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%r10,%r12
+ leaq (%r9,%r13,1),%r9
+ xorq %r11,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%r10,%r14
+ rorxq $28,%r10,%r13
+ leaq (%rbx,%r9,1),%rbx
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r11,%rdi
+ vpsrlq $6,%ymm0,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%r9,%rdi,1),%r9
+ movq %rcx,%r12
+ vpsllq $3,%ymm0,%ymm10
+ vpaddq %ymm8,%ymm1,%ymm1
+ addq 40+256(%rsp),%r8
+ andq %rbx,%r12
+ rorxq $41,%rbx,%r13
+ vpsrlq $19,%ymm0,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%rbx,%rdi
+ leaq (%r9,%r14,1),%r9
+ leaq (%r8,%r12,1),%r8
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %rdx,%rbx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rbx,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%r8,%r12,1),%r8
+ xorq %r14,%r13
+ movq %r9,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%r9,%r12
+ leaq (%r8,%r13,1),%r8
+ xorq %r10,%rdi
+ vpaddq %ymm11,%ymm1,%ymm1
+ rorxq $34,%r9,%r14
+ rorxq $28,%r9,%r13
+ leaq (%rax,%r8,1),%rax
+ vpaddq -96(%rbp),%ymm1,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r10,%r15
+ xorq %r13,%r14
+ leaq (%r8,%r15,1),%r8
+ movq %rbx,%r12
+ vmovdqa %ymm10,32(%rsp)
+ vpalignr $8,%ymm2,%ymm3,%ymm8
+ addq 64+256(%rsp),%rdx
+ andq %rax,%r12
+ rorxq $41,%rax,%r13
+ vpalignr $8,%ymm6,%ymm7,%ymm11
+ rorxq $18,%rax,%r15
+ leaq (%r8,%r14,1),%r8
+ leaq (%rdx,%r12,1),%rdx
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %rcx,%rax,%r12
+ xorq %r15,%r13
+ rorxq $14,%rax,%r14
+ vpaddq %ymm11,%ymm2,%ymm2
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%rdx,%r12,1),%rdx
+ xorq %r14,%r13
+ movq %r8,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%r8,%r12
+ leaq (%rdx,%r13,1),%rdx
+ xorq %r9,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%r8,%r14
+ rorxq $28,%r8,%r13
+ leaq (%r11,%rdx,1),%r11
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r9,%rdi
+ vpsrlq $6,%ymm1,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%rdx,%rdi,1),%rdx
+ movq %rax,%r12
+ vpsllq $3,%ymm1,%ymm10
+ vpaddq %ymm8,%ymm2,%ymm2
+ addq 72+256(%rsp),%rcx
+ andq %r11,%r12
+ rorxq $41,%r11,%r13
+ vpsrlq $19,%ymm1,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%r11,%rdi
+ leaq (%rdx,%r14,1),%rdx
+ leaq (%rcx,%r12,1),%rcx
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %rbx,%r11,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r11,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%rcx,%r12,1),%rcx
+ xorq %r14,%r13
+ movq %rdx,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%rdx,%r12
+ leaq (%rcx,%r13,1),%rcx
+ xorq %r8,%rdi
+ vpaddq %ymm11,%ymm2,%ymm2
+ rorxq $34,%rdx,%r14
+ rorxq $28,%rdx,%r13
+ leaq (%r10,%rcx,1),%r10
+ vpaddq -64(%rbp),%ymm2,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r8,%r15
+ xorq %r13,%r14
+ leaq (%rcx,%r15,1),%rcx
+ movq %r11,%r12
+ vmovdqa %ymm10,64(%rsp)
+ vpalignr $8,%ymm3,%ymm4,%ymm8
+ addq 96+256(%rsp),%rbx
+ andq %r10,%r12
+ rorxq $41,%r10,%r13
+ vpalignr $8,%ymm7,%ymm0,%ymm11
+ rorxq $18,%r10,%r15
+ leaq (%rcx,%r14,1),%rcx
+ leaq (%rbx,%r12,1),%rbx
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %rax,%r10,%r12
+ xorq %r15,%r13
+ rorxq $14,%r10,%r14
+ vpaddq %ymm11,%ymm3,%ymm3
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%rbx,%r12,1),%rbx
+ xorq %r14,%r13
+ movq %rcx,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%rcx,%r12
+ leaq (%rbx,%r13,1),%rbx
+ xorq %rdx,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%rcx,%r14
+ rorxq $28,%rcx,%r13
+ leaq (%r9,%rbx,1),%r9
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rdx,%rdi
+ vpsrlq $6,%ymm2,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%rbx,%rdi,1),%rbx
+ movq %r10,%r12
+ vpsllq $3,%ymm2,%ymm10
+ vpaddq %ymm8,%ymm3,%ymm3
+ addq 104+256(%rsp),%rax
+ andq %r9,%r12
+ rorxq $41,%r9,%r13
+ vpsrlq $19,%ymm2,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%r9,%rdi
+ leaq (%rbx,%r14,1),%rbx
+ leaq (%rax,%r12,1),%rax
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %r11,%r9,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r9,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%rax,%r12,1),%rax
+ xorq %r14,%r13
+ movq %rbx,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%rbx,%r12
+ leaq (%rax,%r13,1),%rax
+ xorq %rcx,%rdi
+ vpaddq %ymm11,%ymm3,%ymm3
+ rorxq $34,%rbx,%r14
+ rorxq $28,%rbx,%r13
+ leaq (%r8,%rax,1),%r8
+ vpaddq -32(%rbp),%ymm3,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rcx,%r15
+ xorq %r13,%r14
+ leaq (%rax,%r15,1),%rax
+ movq %r9,%r12
+ vmovdqa %ymm10,96(%rsp)
+ leaq -128(%rsp),%rsp
+.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
+
+ pushq 128-8(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
+ leaq 8(%rsp),%rsp
+.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
+ vpalignr $8,%ymm4,%ymm5,%ymm8
+ addq 0+256(%rsp),%r11
+ andq %r8,%r12
+ rorxq $41,%r8,%r13
+ vpalignr $8,%ymm0,%ymm1,%ymm11
+ rorxq $18,%r8,%r15
+ leaq (%rax,%r14,1),%rax
+ leaq (%r11,%r12,1),%r11
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %r10,%r8,%r12
+ xorq %r15,%r13
+ rorxq $14,%r8,%r14
+ vpaddq %ymm11,%ymm4,%ymm4
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%r11,%r12,1),%r11
+ xorq %r14,%r13
+ movq %rax,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%rax,%r12
+ leaq (%r11,%r13,1),%r11
+ xorq %rbx,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%rax,%r14
+ rorxq $28,%rax,%r13
+ leaq (%rdx,%r11,1),%rdx
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rbx,%rdi
+ vpsrlq $6,%ymm3,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%r11,%rdi,1),%r11
+ movq %r8,%r12
+ vpsllq $3,%ymm3,%ymm10
+ vpaddq %ymm8,%ymm4,%ymm4
+ addq 8+256(%rsp),%r10
+ andq %rdx,%r12
+ rorxq $41,%rdx,%r13
+ vpsrlq $19,%ymm3,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%rdx,%rdi
+ leaq (%r11,%r14,1),%r11
+ leaq (%r10,%r12,1),%r10
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %r9,%rdx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rdx,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%r10,%r12,1),%r10
+ xorq %r14,%r13
+ movq %r11,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%r11,%r12
+ leaq (%r10,%r13,1),%r10
+ xorq %rax,%rdi
+ vpaddq %ymm11,%ymm4,%ymm4
+ rorxq $34,%r11,%r14
+ rorxq $28,%r11,%r13
+ leaq (%rcx,%r10,1),%rcx
+ vpaddq 0(%rbp),%ymm4,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rax,%r15
+ xorq %r13,%r14
+ leaq (%r10,%r15,1),%r10
+ movq %rdx,%r12
+ vmovdqa %ymm10,0(%rsp)
+ vpalignr $8,%ymm5,%ymm6,%ymm8
+ addq 32+256(%rsp),%r9
+ andq %rcx,%r12
+ rorxq $41,%rcx,%r13
+ vpalignr $8,%ymm1,%ymm2,%ymm11
+ rorxq $18,%rcx,%r15
+ leaq (%r10,%r14,1),%r10
+ leaq (%r9,%r12,1),%r9
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %r8,%rcx,%r12
+ xorq %r15,%r13
+ rorxq $14,%rcx,%r14
+ vpaddq %ymm11,%ymm5,%ymm5
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%r9,%r12,1),%r9
+ xorq %r14,%r13
+ movq %r10,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%r10,%r12
+ leaq (%r9,%r13,1),%r9
+ xorq %r11,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%r10,%r14
+ rorxq $28,%r10,%r13
+ leaq (%rbx,%r9,1),%rbx
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r11,%rdi
+ vpsrlq $6,%ymm4,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%r9,%rdi,1),%r9
+ movq %rcx,%r12
+ vpsllq $3,%ymm4,%ymm10
+ vpaddq %ymm8,%ymm5,%ymm5
+ addq 40+256(%rsp),%r8
+ andq %rbx,%r12
+ rorxq $41,%rbx,%r13
+ vpsrlq $19,%ymm4,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%rbx,%rdi
+ leaq (%r9,%r14,1),%r9
+ leaq (%r8,%r12,1),%r8
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %rdx,%rbx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rbx,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%r8,%r12,1),%r8
+ xorq %r14,%r13
+ movq %r9,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%r9,%r12
+ leaq (%r8,%r13,1),%r8
+ xorq %r10,%rdi
+ vpaddq %ymm11,%ymm5,%ymm5
+ rorxq $34,%r9,%r14
+ rorxq $28,%r9,%r13
+ leaq (%rax,%r8,1),%rax
+ vpaddq 32(%rbp),%ymm5,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r10,%r15
+ xorq %r13,%r14
+ leaq (%r8,%r15,1),%r8
+ movq %rbx,%r12
+ vmovdqa %ymm10,32(%rsp)
+ vpalignr $8,%ymm6,%ymm7,%ymm8
+ addq 64+256(%rsp),%rdx
+ andq %rax,%r12
+ rorxq $41,%rax,%r13
+ vpalignr $8,%ymm2,%ymm3,%ymm11
+ rorxq $18,%rax,%r15
+ leaq (%r8,%r14,1),%r8
+ leaq (%rdx,%r12,1),%rdx
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %rcx,%rax,%r12
+ xorq %r15,%r13
+ rorxq $14,%rax,%r14
+ vpaddq %ymm11,%ymm6,%ymm6
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%rdx,%r12,1),%rdx
+ xorq %r14,%r13
+ movq %r8,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%r8,%r12
+ leaq (%rdx,%r13,1),%rdx
+ xorq %r9,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%r8,%r14
+ rorxq $28,%r8,%r13
+ leaq (%r11,%rdx,1),%r11
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r9,%rdi
+ vpsrlq $6,%ymm5,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%rdx,%rdi,1),%rdx
+ movq %rax,%r12
+ vpsllq $3,%ymm5,%ymm10
+ vpaddq %ymm8,%ymm6,%ymm6
+ addq 72+256(%rsp),%rcx
+ andq %r11,%r12
+ rorxq $41,%r11,%r13
+ vpsrlq $19,%ymm5,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%r11,%rdi
+ leaq (%rdx,%r14,1),%rdx
+ leaq (%rcx,%r12,1),%rcx
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %rbx,%r11,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r11,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%rcx,%r12,1),%rcx
+ xorq %r14,%r13
+ movq %rdx,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%rdx,%r12
+ leaq (%rcx,%r13,1),%rcx
+ xorq %r8,%rdi
+ vpaddq %ymm11,%ymm6,%ymm6
+ rorxq $34,%rdx,%r14
+ rorxq $28,%rdx,%r13
+ leaq (%r10,%rcx,1),%r10
+ vpaddq 64(%rbp),%ymm6,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r8,%r15
+ xorq %r13,%r14
+ leaq (%rcx,%r15,1),%rcx
+ movq %r11,%r12
+ vmovdqa %ymm10,64(%rsp)
+ vpalignr $8,%ymm7,%ymm0,%ymm8
+ addq 96+256(%rsp),%rbx
+ andq %r10,%r12
+ rorxq $41,%r10,%r13
+ vpalignr $8,%ymm3,%ymm4,%ymm11
+ rorxq $18,%r10,%r15
+ leaq (%rcx,%r14,1),%rcx
+ leaq (%rbx,%r12,1),%rbx
+ vpsrlq $1,%ymm8,%ymm10
+ andnq %rax,%r10,%r12
+ xorq %r15,%r13
+ rorxq $14,%r10,%r14
+ vpaddq %ymm11,%ymm7,%ymm7
+ vpsrlq $7,%ymm8,%ymm11
+ leaq (%rbx,%r12,1),%rbx
+ xorq %r14,%r13
+ movq %rcx,%r15
+ vpsllq $56,%ymm8,%ymm9
+ vpxor %ymm10,%ymm11,%ymm8
+ rorxq $39,%rcx,%r12
+ leaq (%rbx,%r13,1),%rbx
+ xorq %rdx,%r15
+ vpsrlq $7,%ymm10,%ymm10
+ vpxor %ymm9,%ymm8,%ymm8
+ rorxq $34,%rcx,%r14
+ rorxq $28,%rcx,%r13
+ leaq (%r9,%rbx,1),%r9
+ vpsllq $7,%ymm9,%ymm9
+ vpxor %ymm10,%ymm8,%ymm8
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rdx,%rdi
+ vpsrlq $6,%ymm6,%ymm11
+ vpxor %ymm9,%ymm8,%ymm8
+ xorq %r13,%r14
+ leaq (%rbx,%rdi,1),%rbx
+ movq %r10,%r12
+ vpsllq $3,%ymm6,%ymm10
+ vpaddq %ymm8,%ymm7,%ymm7
+ addq 104+256(%rsp),%rax
+ andq %r9,%r12
+ rorxq $41,%r9,%r13
+ vpsrlq $19,%ymm6,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ rorxq $18,%r9,%rdi
+ leaq (%rbx,%r14,1),%rbx
+ leaq (%rax,%r12,1),%rax
+ vpsllq $42,%ymm10,%ymm10
+ vpxor %ymm9,%ymm11,%ymm11
+ andnq %r11,%r9,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r9,%r14
+ vpsrlq $42,%ymm9,%ymm9
+ vpxor %ymm10,%ymm11,%ymm11
+ leaq (%rax,%r12,1),%rax
+ xorq %r14,%r13
+ movq %rbx,%rdi
+ vpxor %ymm9,%ymm11,%ymm11
+ rorxq $39,%rbx,%r12
+ leaq (%rax,%r13,1),%rax
+ xorq %rcx,%rdi
+ vpaddq %ymm11,%ymm7,%ymm7
+ rorxq $34,%rbx,%r14
+ rorxq $28,%rbx,%r13
+ leaq (%r8,%rax,1),%r8
+ vpaddq 96(%rbp),%ymm7,%ymm10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rcx,%r15
+ xorq %r13,%r14
+ leaq (%rax,%r15,1),%rax
+ movq %r9,%r12
+ vmovdqa %ymm10,96(%rsp)
+ leaq 256(%rbp),%rbp
+ cmpb $0,-121(%rbp)
+ jne .Lavx2_00_47
+ addq 0+128(%rsp),%r11
+ andq %r8,%r12
+ rorxq $41,%r8,%r13
+ rorxq $18,%r8,%r15
+ leaq (%rax,%r14,1),%rax
+ leaq (%r11,%r12,1),%r11
+ andnq %r10,%r8,%r12
+ xorq %r15,%r13
+ rorxq $14,%r8,%r14
+ leaq (%r11,%r12,1),%r11
+ xorq %r14,%r13
+ movq %rax,%r15
+ rorxq $39,%rax,%r12
+ leaq (%r11,%r13,1),%r11
+ xorq %rbx,%r15
+ rorxq $34,%rax,%r14
+ rorxq $28,%rax,%r13
+ leaq (%rdx,%r11,1),%rdx
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rbx,%rdi
+ xorq %r13,%r14
+ leaq (%r11,%rdi,1),%r11
+ movq %r8,%r12
+ addq 8+128(%rsp),%r10
+ andq %rdx,%r12
+ rorxq $41,%rdx,%r13
+ rorxq $18,%rdx,%rdi
+ leaq (%r11,%r14,1),%r11
+ leaq (%r10,%r12,1),%r10
+ andnq %r9,%rdx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rdx,%r14
+ leaq (%r10,%r12,1),%r10
+ xorq %r14,%r13
+ movq %r11,%rdi
+ rorxq $39,%r11,%r12
+ leaq (%r10,%r13,1),%r10
+ xorq %rax,%rdi
+ rorxq $34,%r11,%r14
+ rorxq $28,%r11,%r13
+ leaq (%rcx,%r10,1),%rcx
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rax,%r15
+ xorq %r13,%r14
+ leaq (%r10,%r15,1),%r10
+ movq %rdx,%r12
+ addq 32+128(%rsp),%r9
+ andq %rcx,%r12
+ rorxq $41,%rcx,%r13
+ rorxq $18,%rcx,%r15
+ leaq (%r10,%r14,1),%r10
+ leaq (%r9,%r12,1),%r9
+ andnq %r8,%rcx,%r12
+ xorq %r15,%r13
+ rorxq $14,%rcx,%r14
+ leaq (%r9,%r12,1),%r9
+ xorq %r14,%r13
+ movq %r10,%r15
+ rorxq $39,%r10,%r12
+ leaq (%r9,%r13,1),%r9
+ xorq %r11,%r15
+ rorxq $34,%r10,%r14
+ rorxq $28,%r10,%r13
+ leaq (%rbx,%r9,1),%rbx
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r11,%rdi
+ xorq %r13,%r14
+ leaq (%r9,%rdi,1),%r9
+ movq %rcx,%r12
+ addq 40+128(%rsp),%r8
+ andq %rbx,%r12
+ rorxq $41,%rbx,%r13
+ rorxq $18,%rbx,%rdi
+ leaq (%r9,%r14,1),%r9
+ leaq (%r8,%r12,1),%r8
+ andnq %rdx,%rbx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rbx,%r14
+ leaq (%r8,%r12,1),%r8
+ xorq %r14,%r13
+ movq %r9,%rdi
+ rorxq $39,%r9,%r12
+ leaq (%r8,%r13,1),%r8
+ xorq %r10,%rdi
+ rorxq $34,%r9,%r14
+ rorxq $28,%r9,%r13
+ leaq (%rax,%r8,1),%rax
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r10,%r15
+ xorq %r13,%r14
+ leaq (%r8,%r15,1),%r8
+ movq %rbx,%r12
+ addq 64+128(%rsp),%rdx
+ andq %rax,%r12
+ rorxq $41,%rax,%r13
+ rorxq $18,%rax,%r15
+ leaq (%r8,%r14,1),%r8
+ leaq (%rdx,%r12,1),%rdx
+ andnq %rcx,%rax,%r12
+ xorq %r15,%r13
+ rorxq $14,%rax,%r14
+ leaq (%rdx,%r12,1),%rdx
+ xorq %r14,%r13
+ movq %r8,%r15
+ rorxq $39,%r8,%r12
+ leaq (%rdx,%r13,1),%rdx
+ xorq %r9,%r15
+ rorxq $34,%r8,%r14
+ rorxq $28,%r8,%r13
+ leaq (%r11,%rdx,1),%r11
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r9,%rdi
+ xorq %r13,%r14
+ leaq (%rdx,%rdi,1),%rdx
+ movq %rax,%r12
+ addq 72+128(%rsp),%rcx
+ andq %r11,%r12
+ rorxq $41,%r11,%r13
+ rorxq $18,%r11,%rdi
+ leaq (%rdx,%r14,1),%rdx
+ leaq (%rcx,%r12,1),%rcx
+ andnq %rbx,%r11,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r11,%r14
+ leaq (%rcx,%r12,1),%rcx
+ xorq %r14,%r13
+ movq %rdx,%rdi
+ rorxq $39,%rdx,%r12
+ leaq (%rcx,%r13,1),%rcx
+ xorq %r8,%rdi
+ rorxq $34,%rdx,%r14
+ rorxq $28,%rdx,%r13
+ leaq (%r10,%rcx,1),%r10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r8,%r15
+ xorq %r13,%r14
+ leaq (%rcx,%r15,1),%rcx
+ movq %r11,%r12
+ addq 96+128(%rsp),%rbx
+ andq %r10,%r12
+ rorxq $41,%r10,%r13
+ rorxq $18,%r10,%r15
+ leaq (%rcx,%r14,1),%rcx
+ leaq (%rbx,%r12,1),%rbx
+ andnq %rax,%r10,%r12
+ xorq %r15,%r13
+ rorxq $14,%r10,%r14
+ leaq (%rbx,%r12,1),%rbx
+ xorq %r14,%r13
+ movq %rcx,%r15
+ rorxq $39,%rcx,%r12
+ leaq (%rbx,%r13,1),%rbx
+ xorq %rdx,%r15
+ rorxq $34,%rcx,%r14
+ rorxq $28,%rcx,%r13
+ leaq (%r9,%rbx,1),%r9
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rdx,%rdi
+ xorq %r13,%r14
+ leaq (%rbx,%rdi,1),%rbx
+ movq %r10,%r12
+ addq 104+128(%rsp),%rax
+ andq %r9,%r12
+ rorxq $41,%r9,%r13
+ rorxq $18,%r9,%rdi
+ leaq (%rbx,%r14,1),%rbx
+ leaq (%rax,%r12,1),%rax
+ andnq %r11,%r9,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r9,%r14
+ leaq (%rax,%r12,1),%rax
+ xorq %r14,%r13
+ movq %rbx,%rdi
+ rorxq $39,%rbx,%r12
+ leaq (%rax,%r13,1),%rax
+ xorq %rcx,%rdi
+ rorxq $34,%rbx,%r14
+ rorxq $28,%rbx,%r13
+ leaq (%r8,%rax,1),%r8
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rcx,%r15
+ xorq %r13,%r14
+ leaq (%rax,%r15,1),%rax
+ movq %r9,%r12
+ addq 0(%rsp),%r11
+ andq %r8,%r12
+ rorxq $41,%r8,%r13
+ rorxq $18,%r8,%r15
+ leaq (%rax,%r14,1),%rax
+ leaq (%r11,%r12,1),%r11
+ andnq %r10,%r8,%r12
+ xorq %r15,%r13
+ rorxq $14,%r8,%r14
+ leaq (%r11,%r12,1),%r11
+ xorq %r14,%r13
+ movq %rax,%r15
+ rorxq $39,%rax,%r12
+ leaq (%r11,%r13,1),%r11
+ xorq %rbx,%r15
+ rorxq $34,%rax,%r14
+ rorxq $28,%rax,%r13
+ leaq (%rdx,%r11,1),%rdx
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rbx,%rdi
+ xorq %r13,%r14
+ leaq (%r11,%rdi,1),%r11
+ movq %r8,%r12
+ addq 8(%rsp),%r10
+ andq %rdx,%r12
+ rorxq $41,%rdx,%r13
+ rorxq $18,%rdx,%rdi
+ leaq (%r11,%r14,1),%r11
+ leaq (%r10,%r12,1),%r10
+ andnq %r9,%rdx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rdx,%r14
+ leaq (%r10,%r12,1),%r10
+ xorq %r14,%r13
+ movq %r11,%rdi
+ rorxq $39,%r11,%r12
+ leaq (%r10,%r13,1),%r10
+ xorq %rax,%rdi
+ rorxq $34,%r11,%r14
+ rorxq $28,%r11,%r13
+ leaq (%rcx,%r10,1),%rcx
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rax,%r15
+ xorq %r13,%r14
+ leaq (%r10,%r15,1),%r10
+ movq %rdx,%r12
+ addq 32(%rsp),%r9
+ andq %rcx,%r12
+ rorxq $41,%rcx,%r13
+ rorxq $18,%rcx,%r15
+ leaq (%r10,%r14,1),%r10
+ leaq (%r9,%r12,1),%r9
+ andnq %r8,%rcx,%r12
+ xorq %r15,%r13
+ rorxq $14,%rcx,%r14
+ leaq (%r9,%r12,1),%r9
+ xorq %r14,%r13
+ movq %r10,%r15
+ rorxq $39,%r10,%r12
+ leaq (%r9,%r13,1),%r9
+ xorq %r11,%r15
+ rorxq $34,%r10,%r14
+ rorxq $28,%r10,%r13
+ leaq (%rbx,%r9,1),%rbx
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r11,%rdi
+ xorq %r13,%r14
+ leaq (%r9,%rdi,1),%r9
+ movq %rcx,%r12
+ addq 40(%rsp),%r8
+ andq %rbx,%r12
+ rorxq $41,%rbx,%r13
+ rorxq $18,%rbx,%rdi
+ leaq (%r9,%r14,1),%r9
+ leaq (%r8,%r12,1),%r8
+ andnq %rdx,%rbx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rbx,%r14
+ leaq (%r8,%r12,1),%r8
+ xorq %r14,%r13
+ movq %r9,%rdi
+ rorxq $39,%r9,%r12
+ leaq (%r8,%r13,1),%r8
+ xorq %r10,%rdi
+ rorxq $34,%r9,%r14
+ rorxq $28,%r9,%r13
+ leaq (%rax,%r8,1),%rax
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r10,%r15
+ xorq %r13,%r14
+ leaq (%r8,%r15,1),%r8
+ movq %rbx,%r12
+ addq 64(%rsp),%rdx
+ andq %rax,%r12
+ rorxq $41,%rax,%r13
+ rorxq $18,%rax,%r15
+ leaq (%r8,%r14,1),%r8
+ leaq (%rdx,%r12,1),%rdx
+ andnq %rcx,%rax,%r12
+ xorq %r15,%r13
+ rorxq $14,%rax,%r14
+ leaq (%rdx,%r12,1),%rdx
+ xorq %r14,%r13
+ movq %r8,%r15
+ rorxq $39,%r8,%r12
+ leaq (%rdx,%r13,1),%rdx
+ xorq %r9,%r15
+ rorxq $34,%r8,%r14
+ rorxq $28,%r8,%r13
+ leaq (%r11,%rdx,1),%r11
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r9,%rdi
+ xorq %r13,%r14
+ leaq (%rdx,%rdi,1),%rdx
+ movq %rax,%r12
+ addq 72(%rsp),%rcx
+ andq %r11,%r12
+ rorxq $41,%r11,%r13
+ rorxq $18,%r11,%rdi
+ leaq (%rdx,%r14,1),%rdx
+ leaq (%rcx,%r12,1),%rcx
+ andnq %rbx,%r11,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r11,%r14
+ leaq (%rcx,%r12,1),%rcx
+ xorq %r14,%r13
+ movq %rdx,%rdi
+ rorxq $39,%rdx,%r12
+ leaq (%rcx,%r13,1),%rcx
+ xorq %r8,%rdi
+ rorxq $34,%rdx,%r14
+ rorxq $28,%rdx,%r13
+ leaq (%r10,%rcx,1),%r10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r8,%r15
+ xorq %r13,%r14
+ leaq (%rcx,%r15,1),%rcx
+ movq %r11,%r12
+ addq 96(%rsp),%rbx
+ andq %r10,%r12
+ rorxq $41,%r10,%r13
+ rorxq $18,%r10,%r15
+ leaq (%rcx,%r14,1),%rcx
+ leaq (%rbx,%r12,1),%rbx
+ andnq %rax,%r10,%r12
+ xorq %r15,%r13
+ rorxq $14,%r10,%r14
+ leaq (%rbx,%r12,1),%rbx
+ xorq %r14,%r13
+ movq %rcx,%r15
+ rorxq $39,%rcx,%r12
+ leaq (%rbx,%r13,1),%rbx
+ xorq %rdx,%r15
+ rorxq $34,%rcx,%r14
+ rorxq $28,%rcx,%r13
+ leaq (%r9,%rbx,1),%r9
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rdx,%rdi
+ xorq %r13,%r14
+ leaq (%rbx,%rdi,1),%rbx
+ movq %r10,%r12
+ addq 104(%rsp),%rax
+ andq %r9,%r12
+ rorxq $41,%r9,%r13
+ rorxq $18,%r9,%rdi
+ leaq (%rbx,%r14,1),%rbx
+ leaq (%rax,%r12,1),%rax
+ andnq %r11,%r9,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r9,%r14
+ leaq (%rax,%r12,1),%rax
+ xorq %r14,%r13
+ movq %rbx,%rdi
+ rorxq $39,%rbx,%r12
+ leaq (%rax,%r13,1),%rax
+ xorq %rcx,%rdi
+ rorxq $34,%rbx,%r14
+ rorxq $28,%rbx,%r13
+ leaq (%r8,%rax,1),%r8
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rcx,%r15
+ xorq %r13,%r14
+ leaq (%rax,%r15,1),%rax
+ movq %r9,%r12
+ movq 1280(%rsp),%rdi
+ addq %r14,%rax
+
+ leaq 1152(%rsp),%rbp
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+
+ cmpq 144(%rbp),%rsi
+ je .Ldone_avx2
+
+ xorq %r14,%r14
+ movq %rbx,%rdi
+ xorq %rcx,%rdi
+ movq %r9,%r12
+ jmp .Lower_avx2
+.balign 16
+.Lower_avx2:
+ addq 0+16(%rbp),%r11
+ andq %r8,%r12
+ rorxq $41,%r8,%r13
+ rorxq $18,%r8,%r15
+ leaq (%rax,%r14,1),%rax
+ leaq (%r11,%r12,1),%r11
+ andnq %r10,%r8,%r12
+ xorq %r15,%r13
+ rorxq $14,%r8,%r14
+ leaq (%r11,%r12,1),%r11
+ xorq %r14,%r13
+ movq %rax,%r15
+ rorxq $39,%rax,%r12
+ leaq (%r11,%r13,1),%r11
+ xorq %rbx,%r15
+ rorxq $34,%rax,%r14
+ rorxq $28,%rax,%r13
+ leaq (%rdx,%r11,1),%rdx
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rbx,%rdi
+ xorq %r13,%r14
+ leaq (%r11,%rdi,1),%r11
+ movq %r8,%r12
+ addq 8+16(%rbp),%r10
+ andq %rdx,%r12
+ rorxq $41,%rdx,%r13
+ rorxq $18,%rdx,%rdi
+ leaq (%r11,%r14,1),%r11
+ leaq (%r10,%r12,1),%r10
+ andnq %r9,%rdx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rdx,%r14
+ leaq (%r10,%r12,1),%r10
+ xorq %r14,%r13
+ movq %r11,%rdi
+ rorxq $39,%r11,%r12
+ leaq (%r10,%r13,1),%r10
+ xorq %rax,%rdi
+ rorxq $34,%r11,%r14
+ rorxq $28,%r11,%r13
+ leaq (%rcx,%r10,1),%rcx
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rax,%r15
+ xorq %r13,%r14
+ leaq (%r10,%r15,1),%r10
+ movq %rdx,%r12
+ addq 32+16(%rbp),%r9
+ andq %rcx,%r12
+ rorxq $41,%rcx,%r13
+ rorxq $18,%rcx,%r15
+ leaq (%r10,%r14,1),%r10
+ leaq (%r9,%r12,1),%r9
+ andnq %r8,%rcx,%r12
+ xorq %r15,%r13
+ rorxq $14,%rcx,%r14
+ leaq (%r9,%r12,1),%r9
+ xorq %r14,%r13
+ movq %r10,%r15
+ rorxq $39,%r10,%r12
+ leaq (%r9,%r13,1),%r9
+ xorq %r11,%r15
+ rorxq $34,%r10,%r14
+ rorxq $28,%r10,%r13
+ leaq (%rbx,%r9,1),%rbx
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r11,%rdi
+ xorq %r13,%r14
+ leaq (%r9,%rdi,1),%r9
+ movq %rcx,%r12
+ addq 40+16(%rbp),%r8
+ andq %rbx,%r12
+ rorxq $41,%rbx,%r13
+ rorxq $18,%rbx,%rdi
+ leaq (%r9,%r14,1),%r9
+ leaq (%r8,%r12,1),%r8
+ andnq %rdx,%rbx,%r12
+ xorq %rdi,%r13
+ rorxq $14,%rbx,%r14
+ leaq (%r8,%r12,1),%r8
+ xorq %r14,%r13
+ movq %r9,%rdi
+ rorxq $39,%r9,%r12
+ leaq (%r8,%r13,1),%r8
+ xorq %r10,%rdi
+ rorxq $34,%r9,%r14
+ rorxq $28,%r9,%r13
+ leaq (%rax,%r8,1),%rax
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r10,%r15
+ xorq %r13,%r14
+ leaq (%r8,%r15,1),%r8
+ movq %rbx,%r12
+ addq 64+16(%rbp),%rdx
+ andq %rax,%r12
+ rorxq $41,%rax,%r13
+ rorxq $18,%rax,%r15
+ leaq (%r8,%r14,1),%r8
+ leaq (%rdx,%r12,1),%rdx
+ andnq %rcx,%rax,%r12
+ xorq %r15,%r13
+ rorxq $14,%rax,%r14
+ leaq (%rdx,%r12,1),%rdx
+ xorq %r14,%r13
+ movq %r8,%r15
+ rorxq $39,%r8,%r12
+ leaq (%rdx,%r13,1),%rdx
+ xorq %r9,%r15
+ rorxq $34,%r8,%r14
+ rorxq $28,%r8,%r13
+ leaq (%r11,%rdx,1),%r11
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %r9,%rdi
+ xorq %r13,%r14
+ leaq (%rdx,%rdi,1),%rdx
+ movq %rax,%r12
+ addq 72+16(%rbp),%rcx
+ andq %r11,%r12
+ rorxq $41,%r11,%r13
+ rorxq $18,%r11,%rdi
+ leaq (%rdx,%r14,1),%rdx
+ leaq (%rcx,%r12,1),%rcx
+ andnq %rbx,%r11,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r11,%r14
+ leaq (%rcx,%r12,1),%rcx
+ xorq %r14,%r13
+ movq %rdx,%rdi
+ rorxq $39,%rdx,%r12
+ leaq (%rcx,%r13,1),%rcx
+ xorq %r8,%rdi
+ rorxq $34,%rdx,%r14
+ rorxq $28,%rdx,%r13
+ leaq (%r10,%rcx,1),%r10
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %r8,%r15
+ xorq %r13,%r14
+ leaq (%rcx,%r15,1),%rcx
+ movq %r11,%r12
+ addq 96+16(%rbp),%rbx
+ andq %r10,%r12
+ rorxq $41,%r10,%r13
+ rorxq $18,%r10,%r15
+ leaq (%rcx,%r14,1),%rcx
+ leaq (%rbx,%r12,1),%rbx
+ andnq %rax,%r10,%r12
+ xorq %r15,%r13
+ rorxq $14,%r10,%r14
+ leaq (%rbx,%r12,1),%rbx
+ xorq %r14,%r13
+ movq %rcx,%r15
+ rorxq $39,%rcx,%r12
+ leaq (%rbx,%r13,1),%rbx
+ xorq %rdx,%r15
+ rorxq $34,%rcx,%r14
+ rorxq $28,%rcx,%r13
+ leaq (%r9,%rbx,1),%r9
+ andq %r15,%rdi
+ xorq %r12,%r14
+ xorq %rdx,%rdi
+ xorq %r13,%r14
+ leaq (%rbx,%rdi,1),%rbx
+ movq %r10,%r12
+ addq 104+16(%rbp),%rax
+ andq %r9,%r12
+ rorxq $41,%r9,%r13
+ rorxq $18,%r9,%rdi
+ leaq (%rbx,%r14,1),%rbx
+ leaq (%rax,%r12,1),%rax
+ andnq %r11,%r9,%r12
+ xorq %rdi,%r13
+ rorxq $14,%r9,%r14
+ leaq (%rax,%r12,1),%rax
+ xorq %r14,%r13
+ movq %rbx,%rdi
+ rorxq $39,%rbx,%r12
+ leaq (%rax,%r13,1),%rax
+ xorq %rcx,%rdi
+ rorxq $34,%rbx,%r14
+ rorxq $28,%rbx,%r13
+ leaq (%r8,%rax,1),%r8
+ andq %rdi,%r15
+ xorq %r12,%r14
+ xorq %rcx,%r15
+ xorq %r13,%r14
+ leaq (%rax,%r15,1),%rax
+ movq %r9,%r12
+ leaq -128(%rbp),%rbp
+ cmpq %rsp,%rbp
+ jae .Lower_avx2
+
+ movq 1280(%rsp),%rdi
+ addq %r14,%rax
+
+ leaq 1152(%rsp),%rsp
+
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ leaq 256(%rsi),%rsi
+ addq 48(%rdi),%r10
+ movq %rsi,%r12
+ addq 56(%rdi),%r11
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ cmoveq %rsp,%r12
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+
+ jbe .Loop_avx2
+ leaq (%rsp),%rbp
+
+.cfi_escape 0x0f,0x06,0x76,0x98,0x01,0x06,0x23,0x08
+
+.Ldone_avx2:
+ movq 152(%rbp),%rsi
+.cfi_def_cfa %rsi,8
+ vzeroupper
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue_avx2:
+ RET
+.cfi_endproc
+SET_SIZE(zfs_sha512_transform_avx2)
+
+#if defined(__ELF__)
+ .section .note.GNU-stack,"",%progbits
+#endif
+#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
deleted file mode 100644
index 746c85a98566..000000000000
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ /dev/null
@@ -1,2114 +0,0 @@
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- * ====================================================================
- *
- * sha256/512_block procedure for x86_64.
- *
- * 40% improvement over compiler-generated code on Opteron. On EM64T
- * sha256 was observed to run >80% faster and sha512 - >40%. No magical
- * tricks, just straight implementation... I really wonder why gcc
- * [being armed with inline assembler] fails to generate as fast code.
- * The only thing which is cool about this module is that it's very
- * same instruction sequence used for both SHA-256 and SHA-512. In
- * former case the instructions operate on 32-bit operands, while in
- * latter - on 64-bit ones. All I had to do is to get one flavor right,
- * the other one passed the test right away:-)
- *
- * sha256_block runs in ~1005 cycles on Opteron, which gives you
- * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
- * frequency in GHz. sha512_block runs in ~1275 cycles, which results
- * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
- * Well, if you compare it to IA-64 implementation, which maintains
- * X[16] in register bank[!], tends to 4 instructions per CPU clock
- * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
- * issue Opteron pipeline and X[16] maintained in memory. So that *if*
- * there is a way to improve it, *then* the only way would be to try to
- * offload X[16] updates to SSE unit, but that would require "deeper"
- * loop unroll, which in turn would naturally cause size blow-up, not
- * to mention increased complexity! And once again, only *if* it's
- * actually possible to noticeably improve overall ILP, instruction
- * level parallelism, on a given CPU implementation in this case.
- *
- * Special note on Intel EM64T. While Opteron CPU exhibits perfect
- * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
- * [currently available] EM64T CPUs apparently are far from it. On the
- * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
- * sha256_block:-( This is presumably because 64-bit shifts/rotates
- * apparently are not atomic instructions, but implemented in microcode.
- */
-
-/*
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha512-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers). Replaced the .picmeup macro with assembler code.
- *
- * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
- * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
- */
-
-/*
- * This file was generated by a perl script (sha512-x86_64.pl) that were
- * used to generate sha256 and sha512 variants from the same code base.
- * The comments from the original file have been pasted above.
- */
-
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sha2/sha2.h>
-
-/* ARGSUSED */
-void
-SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
-{
-}
-
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-
-ENTRY_NP(SHA512TransformBlocks)
-.cfi_startproc
- movq %rsp, %rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_offset %rbx,-16
- push %rbp
-.cfi_offset %rbp,-24
- push %r12
-.cfi_offset %r12,-32
- push %r13
-.cfi_offset %r13,-40
- push %r14
-.cfi_offset %r14,-48
- push %r15
-.cfi_offset %r15,-56
- mov %rsp,%rbp # copy %rsp
- shl $4,%rdx # num*16
- sub $16*8+4*8,%rsp
- lea (%rsi,%rdx,8),%rdx # inp+num*16*8
- and $-64,%rsp # align stack frame
- add $8,%rdi # Skip OpenSolaris field, "algotype"
- mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg
- mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg
- mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg
- mov %rbp,16*8+3*8(%rsp) # save copy of %rsp
-# echo ".cfi_cfa_expression %rsp+152,deref,+56" |
-# openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
-
- #.picmeup %rbp
- # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
- # the address of the "next" instruction into the target register
- # (%rbp). This generates these 2 instructions:
- lea .Llea(%rip),%rbp
- #nop # .picmeup generates a nop for mod 8 alignment--not needed here
-
-.Llea:
- lea K512-.(%rbp),%rbp
-
- mov 8*0(%rdi),%rax
- mov 8*1(%rdi),%rbx
- mov 8*2(%rdi),%rcx
- mov 8*3(%rdi),%rdx
- mov 8*4(%rdi),%r8
- mov 8*5(%rdi),%r9
- mov 8*6(%rdi),%r10
- mov 8*7(%rdi),%r11
- jmp .Lloop
-
-.align 16
-.Lloop:
- xor %rdi,%rdi
- mov 8*0(%rsi),%r12
- bswap %r12
- mov %r8,%r13
- mov %r8,%r14
- mov %r9,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r10,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r8,%r15 # (f^g)&e
- mov %r12,0(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r11,%r12 # T1+=h
-
- mov %rax,%r11
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rax,%r13
- mov %rax,%r14
-
- ror $28,%r11
- ror $34,%r13
- mov %rax,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r11
- ror $5,%r13
- or %rcx,%r14 # a|c
-
- xor %r13,%r11 # h=Sigma0(a)
- and %rcx,%r15 # a&c
- add %r12,%rdx # d+=T1
-
- and %rbx,%r14 # (a|c)&b
- add %r12,%r11 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r11 # h+=Maj(a,b,c)
- mov 8*1(%rsi),%r12
- bswap %r12
- mov %rdx,%r13
- mov %rdx,%r14
- mov %r8,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r9,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rdx,%r15 # (f^g)&e
- mov %r12,8(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r10,%r12 # T1+=h
-
- mov %r11,%r10
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r11,%r13
- mov %r11,%r14
-
- ror $28,%r10
- ror $34,%r13
- mov %r11,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r10
- ror $5,%r13
- or %rbx,%r14 # a|c
-
- xor %r13,%r10 # h=Sigma0(a)
- and %rbx,%r15 # a&c
- add %r12,%rcx # d+=T1
-
- and %rax,%r14 # (a|c)&b
- add %r12,%r10 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r10 # h+=Maj(a,b,c)
- mov 8*2(%rsi),%r12
- bswap %r12
- mov %rcx,%r13
- mov %rcx,%r14
- mov %rdx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r8,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rcx,%r15 # (f^g)&e
- mov %r12,16(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r9,%r12 # T1+=h
-
- mov %r10,%r9
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r10,%r13
- mov %r10,%r14
-
- ror $28,%r9
- ror $34,%r13
- mov %r10,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r9
- ror $5,%r13
- or %rax,%r14 # a|c
-
- xor %r13,%r9 # h=Sigma0(a)
- and %rax,%r15 # a&c
- add %r12,%rbx # d+=T1
-
- and %r11,%r14 # (a|c)&b
- add %r12,%r9 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r9 # h+=Maj(a,b,c)
- mov 8*3(%rsi),%r12
- bswap %r12
- mov %rbx,%r13
- mov %rbx,%r14
- mov %rcx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rdx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rbx,%r15 # (f^g)&e
- mov %r12,24(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r8,%r12 # T1+=h
-
- mov %r9,%r8
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r9,%r13
- mov %r9,%r14
-
- ror $28,%r8
- ror $34,%r13
- mov %r9,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r8
- ror $5,%r13
- or %r11,%r14 # a|c
-
- xor %r13,%r8 # h=Sigma0(a)
- and %r11,%r15 # a&c
- add %r12,%rax # d+=T1
-
- and %r10,%r14 # (a|c)&b
- add %r12,%r8 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r8 # h+=Maj(a,b,c)
- mov 8*4(%rsi),%r12
- bswap %r12
- mov %rax,%r13
- mov %rax,%r14
- mov %rbx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rcx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rax,%r15 # (f^g)&e
- mov %r12,32(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rdx,%r12 # T1+=h
-
- mov %r8,%rdx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r8,%r13
- mov %r8,%r14
-
- ror $28,%rdx
- ror $34,%r13
- mov %r8,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rdx
- ror $5,%r13
- or %r10,%r14 # a|c
-
- xor %r13,%rdx # h=Sigma0(a)
- and %r10,%r15 # a&c
- add %r12,%r11 # d+=T1
-
- and %r9,%r14 # (a|c)&b
- add %r12,%rdx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rdx # h+=Maj(a,b,c)
- mov 8*5(%rsi),%r12
- bswap %r12
- mov %r11,%r13
- mov %r11,%r14
- mov %rax,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rbx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r11,%r15 # (f^g)&e
- mov %r12,40(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rcx,%r12 # T1+=h
-
- mov %rdx,%rcx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rdx,%r13
- mov %rdx,%r14
-
- ror $28,%rcx
- ror $34,%r13
- mov %rdx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rcx
- ror $5,%r13
- or %r9,%r14 # a|c
-
- xor %r13,%rcx # h=Sigma0(a)
- and %r9,%r15 # a&c
- add %r12,%r10 # d+=T1
-
- and %r8,%r14 # (a|c)&b
- add %r12,%rcx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rcx # h+=Maj(a,b,c)
- mov 8*6(%rsi),%r12
- bswap %r12
- mov %r10,%r13
- mov %r10,%r14
- mov %r11,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rax,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r10,%r15 # (f^g)&e
- mov %r12,48(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rbx,%r12 # T1+=h
-
- mov %rcx,%rbx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rcx,%r13
- mov %rcx,%r14
-
- ror $28,%rbx
- ror $34,%r13
- mov %rcx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rbx
- ror $5,%r13
- or %r8,%r14 # a|c
-
- xor %r13,%rbx # h=Sigma0(a)
- and %r8,%r15 # a&c
- add %r12,%r9 # d+=T1
-
- and %rdx,%r14 # (a|c)&b
- add %r12,%rbx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rbx # h+=Maj(a,b,c)
- mov 8*7(%rsi),%r12
- bswap %r12
- mov %r9,%r13
- mov %r9,%r14
- mov %r10,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r11,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r9,%r15 # (f^g)&e
- mov %r12,56(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rax,%r12 # T1+=h
-
- mov %rbx,%rax
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rbx,%r13
- mov %rbx,%r14
-
- ror $28,%rax
- ror $34,%r13
- mov %rbx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rax
- ror $5,%r13
- or %rdx,%r14 # a|c
-
- xor %r13,%rax # h=Sigma0(a)
- and %rdx,%r15 # a&c
- add %r12,%r8 # d+=T1
-
- and %rcx,%r14 # (a|c)&b
- add %r12,%rax # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rax # h+=Maj(a,b,c)
- mov 8*8(%rsi),%r12
- bswap %r12
- mov %r8,%r13
- mov %r8,%r14
- mov %r9,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r10,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r8,%r15 # (f^g)&e
- mov %r12,64(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r11,%r12 # T1+=h
-
- mov %rax,%r11
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rax,%r13
- mov %rax,%r14
-
- ror $28,%r11
- ror $34,%r13
- mov %rax,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r11
- ror $5,%r13
- or %rcx,%r14 # a|c
-
- xor %r13,%r11 # h=Sigma0(a)
- and %rcx,%r15 # a&c
- add %r12,%rdx # d+=T1
-
- and %rbx,%r14 # (a|c)&b
- add %r12,%r11 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r11 # h+=Maj(a,b,c)
- mov 8*9(%rsi),%r12
- bswap %r12
- mov %rdx,%r13
- mov %rdx,%r14
- mov %r8,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r9,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rdx,%r15 # (f^g)&e
- mov %r12,72(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r10,%r12 # T1+=h
-
- mov %r11,%r10
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r11,%r13
- mov %r11,%r14
-
- ror $28,%r10
- ror $34,%r13
- mov %r11,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r10
- ror $5,%r13
- or %rbx,%r14 # a|c
-
- xor %r13,%r10 # h=Sigma0(a)
- and %rbx,%r15 # a&c
- add %r12,%rcx # d+=T1
-
- and %rax,%r14 # (a|c)&b
- add %r12,%r10 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r10 # h+=Maj(a,b,c)
- mov 8*10(%rsi),%r12
- bswap %r12
- mov %rcx,%r13
- mov %rcx,%r14
- mov %rdx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r8,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rcx,%r15 # (f^g)&e
- mov %r12,80(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r9,%r12 # T1+=h
-
- mov %r10,%r9
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r10,%r13
- mov %r10,%r14
-
- ror $28,%r9
- ror $34,%r13
- mov %r10,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r9
- ror $5,%r13
- or %rax,%r14 # a|c
-
- xor %r13,%r9 # h=Sigma0(a)
- and %rax,%r15 # a&c
- add %r12,%rbx # d+=T1
-
- and %r11,%r14 # (a|c)&b
- add %r12,%r9 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r9 # h+=Maj(a,b,c)
- mov 8*11(%rsi),%r12
- bswap %r12
- mov %rbx,%r13
- mov %rbx,%r14
- mov %rcx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rdx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rbx,%r15 # (f^g)&e
- mov %r12,88(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r8,%r12 # T1+=h
-
- mov %r9,%r8
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r9,%r13
- mov %r9,%r14
-
- ror $28,%r8
- ror $34,%r13
- mov %r9,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r8
- ror $5,%r13
- or %r11,%r14 # a|c
-
- xor %r13,%r8 # h=Sigma0(a)
- and %r11,%r15 # a&c
- add %r12,%rax # d+=T1
-
- and %r10,%r14 # (a|c)&b
- add %r12,%r8 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r8 # h+=Maj(a,b,c)
- mov 8*12(%rsi),%r12
- bswap %r12
- mov %rax,%r13
- mov %rax,%r14
- mov %rbx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rcx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rax,%r15 # (f^g)&e
- mov %r12,96(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rdx,%r12 # T1+=h
-
- mov %r8,%rdx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r8,%r13
- mov %r8,%r14
-
- ror $28,%rdx
- ror $34,%r13
- mov %r8,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rdx
- ror $5,%r13
- or %r10,%r14 # a|c
-
- xor %r13,%rdx # h=Sigma0(a)
- and %r10,%r15 # a&c
- add %r12,%r11 # d+=T1
-
- and %r9,%r14 # (a|c)&b
- add %r12,%rdx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rdx # h+=Maj(a,b,c)
- mov 8*13(%rsi),%r12
- bswap %r12
- mov %r11,%r13
- mov %r11,%r14
- mov %rax,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rbx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r11,%r15 # (f^g)&e
- mov %r12,104(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rcx,%r12 # T1+=h
-
- mov %rdx,%rcx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rdx,%r13
- mov %rdx,%r14
-
- ror $28,%rcx
- ror $34,%r13
- mov %rdx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rcx
- ror $5,%r13
- or %r9,%r14 # a|c
-
- xor %r13,%rcx # h=Sigma0(a)
- and %r9,%r15 # a&c
- add %r12,%r10 # d+=T1
-
- and %r8,%r14 # (a|c)&b
- add %r12,%rcx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rcx # h+=Maj(a,b,c)
- mov 8*14(%rsi),%r12
- bswap %r12
- mov %r10,%r13
- mov %r10,%r14
- mov %r11,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rax,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r10,%r15 # (f^g)&e
- mov %r12,112(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rbx,%r12 # T1+=h
-
- mov %rcx,%rbx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rcx,%r13
- mov %rcx,%r14
-
- ror $28,%rbx
- ror $34,%r13
- mov %rcx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rbx
- ror $5,%r13
- or %r8,%r14 # a|c
-
- xor %r13,%rbx # h=Sigma0(a)
- and %r8,%r15 # a&c
- add %r12,%r9 # d+=T1
-
- and %rdx,%r14 # (a|c)&b
- add %r12,%rbx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rbx # h+=Maj(a,b,c)
- mov 8*15(%rsi),%r12
- bswap %r12
- mov %r9,%r13
- mov %r9,%r14
- mov %r10,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r11,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r9,%r15 # (f^g)&e
- mov %r12,120(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rax,%r12 # T1+=h
-
- mov %rbx,%rax
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rbx,%r13
- mov %rbx,%r14
-
- ror $28,%rax
- ror $34,%r13
- mov %rbx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rax
- ror $5,%r13
- or %rdx,%r14 # a|c
-
- xor %r13,%rax # h=Sigma0(a)
- and %rdx,%r15 # a&c
- add %r12,%r8 # d+=T1
-
- and %rcx,%r14 # (a|c)&b
- add %r12,%rax # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rax # h+=Maj(a,b,c)
- jmp .Lrounds_16_xx
-.align 16
-.Lrounds_16_xx:
- mov 8(%rsp),%r13
- mov 112(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 72(%rsp),%r12
-
- add 0(%rsp),%r12
- mov %r8,%r13
- mov %r8,%r14
- mov %r9,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r10,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r8,%r15 # (f^g)&e
- mov %r12,0(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r11,%r12 # T1+=h
-
- mov %rax,%r11
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rax,%r13
- mov %rax,%r14
-
- ror $28,%r11
- ror $34,%r13
- mov %rax,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r11
- ror $5,%r13
- or %rcx,%r14 # a|c
-
- xor %r13,%r11 # h=Sigma0(a)
- and %rcx,%r15 # a&c
- add %r12,%rdx # d+=T1
-
- and %rbx,%r14 # (a|c)&b
- add %r12,%r11 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r11 # h+=Maj(a,b,c)
- mov 16(%rsp),%r13
- mov 120(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 80(%rsp),%r12
-
- add 8(%rsp),%r12
- mov %rdx,%r13
- mov %rdx,%r14
- mov %r8,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r9,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rdx,%r15 # (f^g)&e
- mov %r12,8(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r10,%r12 # T1+=h
-
- mov %r11,%r10
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r11,%r13
- mov %r11,%r14
-
- ror $28,%r10
- ror $34,%r13
- mov %r11,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r10
- ror $5,%r13
- or %rbx,%r14 # a|c
-
- xor %r13,%r10 # h=Sigma0(a)
- and %rbx,%r15 # a&c
- add %r12,%rcx # d+=T1
-
- and %rax,%r14 # (a|c)&b
- add %r12,%r10 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r10 # h+=Maj(a,b,c)
- mov 24(%rsp),%r13
- mov 0(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 88(%rsp),%r12
-
- add 16(%rsp),%r12
- mov %rcx,%r13
- mov %rcx,%r14
- mov %rdx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r8,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rcx,%r15 # (f^g)&e
- mov %r12,16(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r9,%r12 # T1+=h
-
- mov %r10,%r9
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r10,%r13
- mov %r10,%r14
-
- ror $28,%r9
- ror $34,%r13
- mov %r10,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r9
- ror $5,%r13
- or %rax,%r14 # a|c
-
- xor %r13,%r9 # h=Sigma0(a)
- and %rax,%r15 # a&c
- add %r12,%rbx # d+=T1
-
- and %r11,%r14 # (a|c)&b
- add %r12,%r9 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r9 # h+=Maj(a,b,c)
- mov 32(%rsp),%r13
- mov 8(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 96(%rsp),%r12
-
- add 24(%rsp),%r12
- mov %rbx,%r13
- mov %rbx,%r14
- mov %rcx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rdx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rbx,%r15 # (f^g)&e
- mov %r12,24(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r8,%r12 # T1+=h
-
- mov %r9,%r8
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r9,%r13
- mov %r9,%r14
-
- ror $28,%r8
- ror $34,%r13
- mov %r9,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r8
- ror $5,%r13
- or %r11,%r14 # a|c
-
- xor %r13,%r8 # h=Sigma0(a)
- and %r11,%r15 # a&c
- add %r12,%rax # d+=T1
-
- and %r10,%r14 # (a|c)&b
- add %r12,%r8 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r8 # h+=Maj(a,b,c)
- mov 40(%rsp),%r13
- mov 16(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 104(%rsp),%r12
-
- add 32(%rsp),%r12
- mov %rax,%r13
- mov %rax,%r14
- mov %rbx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rcx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rax,%r15 # (f^g)&e
- mov %r12,32(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rdx,%r12 # T1+=h
-
- mov %r8,%rdx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r8,%r13
- mov %r8,%r14
-
- ror $28,%rdx
- ror $34,%r13
- mov %r8,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rdx
- ror $5,%r13
- or %r10,%r14 # a|c
-
- xor %r13,%rdx # h=Sigma0(a)
- and %r10,%r15 # a&c
- add %r12,%r11 # d+=T1
-
- and %r9,%r14 # (a|c)&b
- add %r12,%rdx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rdx # h+=Maj(a,b,c)
- mov 48(%rsp),%r13
- mov 24(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 112(%rsp),%r12
-
- add 40(%rsp),%r12
- mov %r11,%r13
- mov %r11,%r14
- mov %rax,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rbx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r11,%r15 # (f^g)&e
- mov %r12,40(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rcx,%r12 # T1+=h
-
- mov %rdx,%rcx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rdx,%r13
- mov %rdx,%r14
-
- ror $28,%rcx
- ror $34,%r13
- mov %rdx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rcx
- ror $5,%r13
- or %r9,%r14 # a|c
-
- xor %r13,%rcx # h=Sigma0(a)
- and %r9,%r15 # a&c
- add %r12,%r10 # d+=T1
-
- and %r8,%r14 # (a|c)&b
- add %r12,%rcx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rcx # h+=Maj(a,b,c)
- mov 56(%rsp),%r13
- mov 32(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 120(%rsp),%r12
-
- add 48(%rsp),%r12
- mov %r10,%r13
- mov %r10,%r14
- mov %r11,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rax,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r10,%r15 # (f^g)&e
- mov %r12,48(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rbx,%r12 # T1+=h
-
- mov %rcx,%rbx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rcx,%r13
- mov %rcx,%r14
-
- ror $28,%rbx
- ror $34,%r13
- mov %rcx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rbx
- ror $5,%r13
- or %r8,%r14 # a|c
-
- xor %r13,%rbx # h=Sigma0(a)
- and %r8,%r15 # a&c
- add %r12,%r9 # d+=T1
-
- and %rdx,%r14 # (a|c)&b
- add %r12,%rbx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rbx # h+=Maj(a,b,c)
- mov 64(%rsp),%r13
- mov 40(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 0(%rsp),%r12
-
- add 56(%rsp),%r12
- mov %r9,%r13
- mov %r9,%r14
- mov %r10,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r11,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r9,%r15 # (f^g)&e
- mov %r12,56(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rax,%r12 # T1+=h
-
- mov %rbx,%rax
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rbx,%r13
- mov %rbx,%r14
-
- ror $28,%rax
- ror $34,%r13
- mov %rbx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rax
- ror $5,%r13
- or %rdx,%r14 # a|c
-
- xor %r13,%rax # h=Sigma0(a)
- and %rdx,%r15 # a&c
- add %r12,%r8 # d+=T1
-
- and %rcx,%r14 # (a|c)&b
- add %r12,%rax # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rax # h+=Maj(a,b,c)
- mov 72(%rsp),%r13
- mov 48(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 8(%rsp),%r12
-
- add 64(%rsp),%r12
- mov %r8,%r13
- mov %r8,%r14
- mov %r9,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r10,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r8,%r15 # (f^g)&e
- mov %r12,64(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r11,%r12 # T1+=h
-
- mov %rax,%r11
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rax,%r13
- mov %rax,%r14
-
- ror $28,%r11
- ror $34,%r13
- mov %rax,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r11
- ror $5,%r13
- or %rcx,%r14 # a|c
-
- xor %r13,%r11 # h=Sigma0(a)
- and %rcx,%r15 # a&c
- add %r12,%rdx # d+=T1
-
- and %rbx,%r14 # (a|c)&b
- add %r12,%r11 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r11 # h+=Maj(a,b,c)
- mov 80(%rsp),%r13
- mov 56(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 16(%rsp),%r12
-
- add 72(%rsp),%r12
- mov %rdx,%r13
- mov %rdx,%r14
- mov %r8,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r9,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rdx,%r15 # (f^g)&e
- mov %r12,72(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r10,%r12 # T1+=h
-
- mov %r11,%r10
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r11,%r13
- mov %r11,%r14
-
- ror $28,%r10
- ror $34,%r13
- mov %r11,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r10
- ror $5,%r13
- or %rbx,%r14 # a|c
-
- xor %r13,%r10 # h=Sigma0(a)
- and %rbx,%r15 # a&c
- add %r12,%rcx # d+=T1
-
- and %rax,%r14 # (a|c)&b
- add %r12,%r10 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r10 # h+=Maj(a,b,c)
- mov 88(%rsp),%r13
- mov 64(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 24(%rsp),%r12
-
- add 80(%rsp),%r12
- mov %rcx,%r13
- mov %rcx,%r14
- mov %rdx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r8,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rcx,%r15 # (f^g)&e
- mov %r12,80(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r9,%r12 # T1+=h
-
- mov %r10,%r9
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r10,%r13
- mov %r10,%r14
-
- ror $28,%r9
- ror $34,%r13
- mov %r10,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r9
- ror $5,%r13
- or %rax,%r14 # a|c
-
- xor %r13,%r9 # h=Sigma0(a)
- and %rax,%r15 # a&c
- add %r12,%rbx # d+=T1
-
- and %r11,%r14 # (a|c)&b
- add %r12,%r9 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r9 # h+=Maj(a,b,c)
- mov 96(%rsp),%r13
- mov 72(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 32(%rsp),%r12
-
- add 88(%rsp),%r12
- mov %rbx,%r13
- mov %rbx,%r14
- mov %rcx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rdx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rbx,%r15 # (f^g)&e
- mov %r12,88(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %r8,%r12 # T1+=h
-
- mov %r9,%r8
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r9,%r13
- mov %r9,%r14
-
- ror $28,%r8
- ror $34,%r13
- mov %r9,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%r8
- ror $5,%r13
- or %r11,%r14 # a|c
-
- xor %r13,%r8 # h=Sigma0(a)
- and %r11,%r15 # a&c
- add %r12,%rax # d+=T1
-
- and %r10,%r14 # (a|c)&b
- add %r12,%r8 # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%r8 # h+=Maj(a,b,c)
- mov 104(%rsp),%r13
- mov 80(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 40(%rsp),%r12
-
- add 96(%rsp),%r12
- mov %rax,%r13
- mov %rax,%r14
- mov %rbx,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rcx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %rax,%r15 # (f^g)&e
- mov %r12,96(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rdx,%r12 # T1+=h
-
- mov %r8,%rdx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %r8,%r13
- mov %r8,%r14
-
- ror $28,%rdx
- ror $34,%r13
- mov %r8,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rdx
- ror $5,%r13
- or %r10,%r14 # a|c
-
- xor %r13,%rdx # h=Sigma0(a)
- and %r10,%r15 # a&c
- add %r12,%r11 # d+=T1
-
- and %r9,%r14 # (a|c)&b
- add %r12,%rdx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rdx # h+=Maj(a,b,c)
- mov 112(%rsp),%r13
- mov 88(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 48(%rsp),%r12
-
- add 104(%rsp),%r12
- mov %r11,%r13
- mov %r11,%r14
- mov %rax,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rbx,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r11,%r15 # (f^g)&e
- mov %r12,104(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rcx,%r12 # T1+=h
-
- mov %rdx,%rcx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rdx,%r13
- mov %rdx,%r14
-
- ror $28,%rcx
- ror $34,%r13
- mov %rdx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rcx
- ror $5,%r13
- or %r9,%r14 # a|c
-
- xor %r13,%rcx # h=Sigma0(a)
- and %r9,%r15 # a&c
- add %r12,%r10 # d+=T1
-
- and %r8,%r14 # (a|c)&b
- add %r12,%rcx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rcx # h+=Maj(a,b,c)
- mov 120(%rsp),%r13
- mov 96(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 56(%rsp),%r12
-
- add 112(%rsp),%r12
- mov %r10,%r13
- mov %r10,%r14
- mov %r11,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %rax,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r10,%r15 # (f^g)&e
- mov %r12,112(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rbx,%r12 # T1+=h
-
- mov %rcx,%rbx
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rcx,%r13
- mov %rcx,%r14
-
- ror $28,%rbx
- ror $34,%r13
- mov %rcx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rbx
- ror $5,%r13
- or %r8,%r14 # a|c
-
- xor %r13,%rbx # h=Sigma0(a)
- and %r8,%r15 # a&c
- add %r12,%r9 # d+=T1
-
- and %rdx,%r14 # (a|c)&b
- add %r12,%rbx # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rbx # h+=Maj(a,b,c)
- mov 0(%rsp),%r13
- mov 104(%rsp),%r12
-
- mov %r13,%r15
-
- shr $7,%r13
- ror $1,%r15
-
- xor %r15,%r13
- ror $7,%r15
-
- xor %r15,%r13 # sigma0(X[(i+1)&0xf])
- mov %r12,%r14
-
- shr $6,%r12
- ror $19,%r14
-
- xor %r14,%r12
- ror $42,%r14
-
- xor %r14,%r12 # sigma1(X[(i+14)&0xf])
-
- add %r13,%r12
-
- add 64(%rsp),%r12
-
- add 120(%rsp),%r12
- mov %r9,%r13
- mov %r9,%r14
- mov %r10,%r15
-
- ror $14,%r13
- ror $18,%r14
- xor %r11,%r15 # f^g
-
- xor %r14,%r13
- ror $23,%r14
- and %r9,%r15 # (f^g)&e
- mov %r12,120(%rsp)
-
- xor %r14,%r13 # Sigma1(e)
- xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
- add %rax,%r12 # T1+=h
-
- mov %rbx,%rax
- add %r13,%r12 # T1+=Sigma1(e)
-
- add %r15,%r12 # T1+=Ch(e,f,g)
- mov %rbx,%r13
- mov %rbx,%r14
-
- ror $28,%rax
- ror $34,%r13
- mov %rbx,%r15
- add (%rbp,%rdi,8),%r12 # T1+=K[round]
-
- xor %r13,%rax
- ror $5,%r13
- or %rdx,%r14 # a|c
-
- xor %r13,%rax # h=Sigma0(a)
- and %rdx,%r15 # a&c
- add %r12,%r8 # d+=T1
-
- and %rcx,%r14 # (a|c)&b
- add %r12,%rax # h+=T1
-
- or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
- lea 1(%rdi),%rdi # round++
-
- add %r14,%rax # h+=Maj(a,b,c)
- cmp $80,%rdi
- jb .Lrounds_16_xx
-
- mov 16*8+0*8(%rsp),%rdi
- lea 16*8(%rsi),%rsi
-
- add 8*0(%rdi),%rax
- add 8*1(%rdi),%rbx
- add 8*2(%rdi),%rcx
- add 8*3(%rdi),%rdx
- add 8*4(%rdi),%r8
- add 8*5(%rdi),%r9
- add 8*6(%rdi),%r10
- add 8*7(%rdi),%r11
-
- cmp 16*8+2*8(%rsp),%rsi
-
- mov %rax,8*0(%rdi)
- mov %rbx,8*1(%rdi)
- mov %rcx,8*2(%rdi)
- mov %rdx,8*3(%rdi)
- mov %r8,8*4(%rdi)
- mov %r9,8*5(%rdi)
- mov %r10,8*6(%rdi)
- mov %r11,8*7(%rdi)
- jb .Lloop
-
- mov 16*8+3*8(%rsp),%rsp
-.cfi_def_cfa %rsp,56
- pop %r15
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r15
- pop %r14
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r14
- pop %r13
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r13
- pop %r12
-.cfi_adjust_cfa_offset -8
-.cfi_restore %r12
- pop %rbp
-.cfi_adjust_cfa_offset -8
-.cfi_restore %rbp
- pop %rbx
-.cfi_adjust_cfa_offset -8
-.cfi_restore %rbx
-
- ret
-.cfi_endproc
-SET_SIZE(SHA512TransformBlocks)
-
-.data
-.align 64
-.type K512,@object
-K512:
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-#endif /* !lint && !__lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/sys/contrib/openzfs/module/icp/core/kcf_callprov.c b/sys/contrib/openzfs/module/icp/core/kcf_callprov.c
index fd2f7e1aac3d..b1822dd5b878 100644
--- a/sys/contrib/openzfs/module/icp/core/kcf_callprov.c
+++ b/sys/contrib/openzfs/module/icp/core/kcf_callprov.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -27,9 +27,6 @@
#include <sys/crypto/impl.h>
#include <sys/crypto/sched_impl.h>
-static int kcf_emulate_dual(kcf_provider_desc_t *, crypto_ctx_t *,
- kcf_req_params_t *);
-
void
kcf_free_triedlist(kcf_prov_tried_t *list)
{
@@ -66,171 +63,9 @@ is_in_triedlist(kcf_provider_desc_t *pd, kcf_prov_tried_t *triedl)
if (triedl->pt_pd == pd)
return (B_TRUE);
triedl = triedl->pt_next;
- };
-
- return (B_FALSE);
-}
-
-/*
- * Search a mech entry's hardware provider list for the specified
- * provider. Return true if found.
- */
-static boolean_t
-is_valid_provider_for_mech(kcf_provider_desc_t *pd, kcf_mech_entry_t *me,
- crypto_func_group_t fg)
-{
- kcf_prov_mech_desc_t *prov_chain;
-
- prov_chain = me->me_hw_prov_chain;
- if (prov_chain != NULL) {
- ASSERT(me->me_num_hwprov > 0);
- for (; prov_chain != NULL; prov_chain = prov_chain->pm_next) {
- if (prov_chain->pm_prov_desc == pd &&
- IS_FG_SUPPORTED(prov_chain, fg)) {
- return (B_TRUE);
- }
- }
- }
- return (B_FALSE);
-}
-
-/*
- * This routine, given a logical provider, returns the least loaded
- * provider belonging to the logical provider. The provider must be
- * able to do the specified mechanism, i.e. check that the mechanism
- * hasn't been disabled. In addition, just in case providers are not
- * entirely equivalent, the provider's entry point is checked for
- * non-nullness. This is accomplished by having the caller pass, as
- * arguments, the offset of the function group (offset_1), and the
- * offset of the function within the function group (offset_2).
- * Returns NULL if no provider can be found.
- */
-int
-kcf_get_hardware_provider(crypto_mech_type_t mech_type_1,
- crypto_mech_type_t mech_type_2, boolean_t call_restrict,
- kcf_provider_desc_t *old, kcf_provider_desc_t **new, crypto_func_group_t fg)
-{
- kcf_provider_desc_t *provider, *real_pd = old;
- kcf_provider_desc_t *gpd = NULL; /* good provider */
- kcf_provider_desc_t *bpd = NULL; /* busy provider */
- kcf_provider_list_t *p;
- kcf_ops_class_t class;
- kcf_mech_entry_t *me;
- kcf_mech_entry_tab_t *me_tab;
- int index, len, gqlen = INT_MAX, rv = CRYPTO_SUCCESS;
-
- /* get the mech entry for the specified mechanism */
- class = KCF_MECH2CLASS(mech_type_1);
- if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) {
- return (CRYPTO_MECHANISM_INVALID);
- }
-
- me_tab = &kcf_mech_tabs_tab[class];
- index = KCF_MECH2INDEX(mech_type_1);
- if ((index < 0) || (index >= me_tab->met_size)) {
- return (CRYPTO_MECHANISM_INVALID);
}
- me = &((me_tab->met_tab)[index]);
- mutex_enter(&me->me_mutex);
-
- /*
- * We assume the provider descriptor will not go away because
- * it is being held somewhere, i.e. its reference count has been
- * incremented. In the case of the crypto module, the provider
- * descriptor is held by the session structure.
- */
- if (old->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- if (old->pd_provider_list == NULL) {
- real_pd = NULL;
- rv = CRYPTO_DEVICE_ERROR;
- goto out;
- }
- /*
- * Find the least loaded real provider. KCF_PROV_LOAD gives
- * the load (number of pending requests) of the provider.
- */
- mutex_enter(&old->pd_lock);
- p = old->pd_provider_list;
- while (p != NULL) {
- provider = p->pl_provider;
-
- ASSERT(provider->pd_prov_type !=
- CRYPTO_LOGICAL_PROVIDER);
-
- if (call_restrict &&
- (provider->pd_flags & KCF_PROV_RESTRICTED)) {
- p = p->pl_next;
- continue;
- }
-
- if (!is_valid_provider_for_mech(provider, me, fg)) {
- p = p->pl_next;
- continue;
- }
-
- /* provider does second mech */
- if (mech_type_2 != CRYPTO_MECH_INVALID) {
- int i;
-
- i = KCF_TO_PROV_MECH_INDX(provider,
- mech_type_2);
- if (i == KCF_INVALID_INDX) {
- p = p->pl_next;
- continue;
- }
- }
-
- if (provider->pd_state != KCF_PROV_READY) {
- /* choose BUSY if no READY providers */
- if (provider->pd_state == KCF_PROV_BUSY)
- bpd = provider;
- p = p->pl_next;
- continue;
- }
-
- len = KCF_PROV_LOAD(provider);
- if (len < gqlen) {
- gqlen = len;
- gpd = provider;
- }
-
- p = p->pl_next;
- }
-
- if (gpd != NULL) {
- real_pd = gpd;
- KCF_PROV_REFHOLD(real_pd);
- } else if (bpd != NULL) {
- real_pd = bpd;
- KCF_PROV_REFHOLD(real_pd);
- } else {
- /* can't find provider */
- real_pd = NULL;
- rv = CRYPTO_MECHANISM_INVALID;
- }
- mutex_exit(&old->pd_lock);
-
- } else {
- if (!KCF_IS_PROV_USABLE(old) ||
- (call_restrict && (old->pd_flags & KCF_PROV_RESTRICTED))) {
- real_pd = NULL;
- rv = CRYPTO_DEVICE_ERROR;
- goto out;
- }
-
- if (!is_valid_provider_for_mech(old, me, fg)) {
- real_pd = NULL;
- rv = CRYPTO_MECHANISM_INVALID;
- goto out;
- }
-
- KCF_PROV_REFHOLD(real_pd);
- }
-out:
- mutex_exit(&me->me_mutex);
- *new = real_pd;
- return (rv);
+ return (B_FALSE);
}
/*
@@ -243,22 +78,17 @@ out:
* search to find one. This is fine as we assume there are only a few
* number of providers in this list. If this assumption ever changes,
* we should revisit this.
- *
- * call_restrict represents if the caller should not be allowed to
- * use restricted providers.
*/
kcf_provider_desc_t *
kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp,
- int *error, kcf_prov_tried_t *triedl, crypto_func_group_t fg,
- boolean_t call_restrict, size_t data_size)
+ int *error, kcf_prov_tried_t *triedl, crypto_func_group_t fg)
{
- kcf_provider_desc_t *pd = NULL, *gpd = NULL;
- kcf_prov_mech_desc_t *prov_chain, *mdesc;
- int len, gqlen = INT_MAX;
+ kcf_provider_desc_t *pd = NULL;
+ kcf_prov_mech_desc_t *mdesc;
kcf_ops_class_t class;
int index;
kcf_mech_entry_t *me;
- kcf_mech_entry_tab_t *me_tab;
+ const kcf_mech_entry_tab_t *me_tab;
class = KCF_MECH2CLASS(mech_type);
if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) {
@@ -277,58 +107,12 @@ kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp,
if (mepp != NULL)
*mepp = me;
- mutex_enter(&me->me_mutex);
-
- prov_chain = me->me_hw_prov_chain;
-
- /*
- * We check for the threshold for using a hardware provider for
- * this amount of data. If there is no software provider available
- * for the mechanism, then the threshold is ignored.
- */
- if ((prov_chain != NULL) &&
- ((data_size == 0) || (me->me_threshold == 0) ||
- (data_size >= me->me_threshold) ||
- ((mdesc = me->me_sw_prov) == NULL) ||
- (!IS_FG_SUPPORTED(mdesc, fg)) ||
- (!KCF_IS_PROV_USABLE(mdesc->pm_prov_desc)))) {
- ASSERT(me->me_num_hwprov > 0);
- /* there is at least one provider */
-
- /*
- * Find the least loaded real provider. KCF_PROV_LOAD gives
- * the load (number of pending requests) of the provider.
- */
- while (prov_chain != NULL) {
- pd = prov_chain->pm_prov_desc;
-
- if (!IS_FG_SUPPORTED(prov_chain, fg) ||
- !KCF_IS_PROV_USABLE(pd) ||
- IS_PROVIDER_TRIED(pd, triedl) ||
- (call_restrict &&
- (pd->pd_flags & KCF_PROV_RESTRICTED))) {
- prov_chain = prov_chain->pm_next;
- continue;
- }
-
- if ((len = KCF_PROV_LOAD(pd)) < gqlen) {
- gqlen = len;
- gpd = pd;
- }
-
- prov_chain = prov_chain->pm_next;
- }
-
- pd = gpd;
- }
-
- /* No HW provider for this mech, is there a SW provider? */
+ /* Is there a provider? */
if (pd == NULL && (mdesc = me->me_sw_prov) != NULL) {
pd = mdesc->pm_prov_desc;
if (!IS_FG_SUPPORTED(mdesc, fg) ||
!KCF_IS_PROV_USABLE(pd) ||
- IS_PROVIDER_TRIED(pd, triedl) ||
- (call_restrict && (pd->pd_flags & KCF_PROV_RESTRICTED)))
+ IS_PROVIDER_TRIED(pd, triedl))
pd = NULL;
}
@@ -344,1224 +128,5 @@ kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp,
} else
KCF_PROV_REFHOLD(pd);
- mutex_exit(&me->me_mutex);
return (pd);
}
-
-/*
- * Very similar to kcf_get_mech_provider(). Finds the best provider capable of
- * a dual operation with both me1 and me2.
- * When no dual-ops capable providers are available, return the best provider
- * for me1 only, and sets *prov_mt2 to CRYPTO_INVALID_MECHID;
- * We assume/expect that a slower HW capable of the dual is still
- * faster than the 2 fastest providers capable of the individual ops
- * separately.
- */
-kcf_provider_desc_t *
-kcf_get_dual_provider(crypto_mechanism_t *mech1, crypto_mechanism_t *mech2,
- kcf_mech_entry_t **mepp, crypto_mech_type_t *prov_mt1,
- crypto_mech_type_t *prov_mt2, int *error, kcf_prov_tried_t *triedl,
- crypto_func_group_t fg1, crypto_func_group_t fg2, boolean_t call_restrict,
- size_t data_size)
-{
- kcf_provider_desc_t *pd = NULL, *pdm1 = NULL, *pdm1m2 = NULL;
- kcf_prov_mech_desc_t *prov_chain, *mdesc;
- int len, gqlen = INT_MAX, dgqlen = INT_MAX;
- crypto_mech_info_list_t *mil;
- crypto_mech_type_t m2id = mech2->cm_type;
- kcf_mech_entry_t *me;
-
- /* when mech is a valid mechanism, me will be its mech_entry */
- if (kcf_get_mech_entry(mech1->cm_type, &me) != KCF_SUCCESS) {
- *error = CRYPTO_MECHANISM_INVALID;
- return (NULL);
- }
-
- *prov_mt2 = CRYPTO_MECH_INVALID;
-
- if (mepp != NULL)
- *mepp = me;
- mutex_enter(&me->me_mutex);
-
- prov_chain = me->me_hw_prov_chain;
- /*
- * We check the threshold for using a hardware provider for
- * this amount of data. If there is no software provider available
- * for the first mechanism, then the threshold is ignored.
- */
- if ((prov_chain != NULL) &&
- ((data_size == 0) || (me->me_threshold == 0) ||
- (data_size >= me->me_threshold) ||
- ((mdesc = me->me_sw_prov) == NULL) ||
- (!IS_FG_SUPPORTED(mdesc, fg1)) ||
- (!KCF_IS_PROV_USABLE(mdesc->pm_prov_desc)))) {
- /* there is at least one provider */
- ASSERT(me->me_num_hwprov > 0);
-
- /*
- * Find the least loaded provider capable of the combo
- * me1 + me2, and save a pointer to the least loaded
- * provider capable of me1 only.
- */
- while (prov_chain != NULL) {
- pd = prov_chain->pm_prov_desc;
- len = KCF_PROV_LOAD(pd);
-
- if (!IS_FG_SUPPORTED(prov_chain, fg1) ||
- !KCF_IS_PROV_USABLE(pd) ||
- IS_PROVIDER_TRIED(pd, triedl) ||
- (call_restrict &&
- (pd->pd_flags & KCF_PROV_RESTRICTED))) {
- prov_chain = prov_chain->pm_next;
- continue;
- }
-
- /* Save the best provider capable of m1 */
- if (len < gqlen) {
- *prov_mt1 =
- prov_chain->pm_mech_info.cm_mech_number;
- gqlen = len;
- pdm1 = pd;
- }
-
- /* See if pd can do me2 too */
- for (mil = prov_chain->pm_mi_list;
- mil != NULL; mil = mil->ml_next) {
- if ((mil->ml_mech_info.cm_func_group_mask &
- fg2) == 0)
- continue;
-
- if ((mil->ml_kcf_mechid == m2id) &&
- (len < dgqlen)) {
- /* Bingo! */
- dgqlen = len;
- pdm1m2 = pd;
- *prov_mt2 =
- mil->ml_mech_info.cm_mech_number;
- *prov_mt1 = prov_chain->
- pm_mech_info.cm_mech_number;
- break;
- }
- }
-
- prov_chain = prov_chain->pm_next;
- }
-
- pd = (pdm1m2 != NULL) ? pdm1m2 : pdm1;
- }
-
- /* no HW provider for this mech, is there a SW provider? */
- if (pd == NULL && (mdesc = me->me_sw_prov) != NULL) {
- pd = mdesc->pm_prov_desc;
- if (!IS_FG_SUPPORTED(mdesc, fg1) ||
- !KCF_IS_PROV_USABLE(pd) ||
- IS_PROVIDER_TRIED(pd, triedl) ||
- (call_restrict && (pd->pd_flags & KCF_PROV_RESTRICTED)))
- pd = NULL;
- else {
- /* See if pd can do me2 too */
- for (mil = me->me_sw_prov->pm_mi_list;
- mil != NULL; mil = mil->ml_next) {
- if ((mil->ml_mech_info.cm_func_group_mask &
- fg2) == 0)
- continue;
-
- if (mil->ml_kcf_mechid == m2id) {
- /* Bingo! */
- *prov_mt2 =
- mil->ml_mech_info.cm_mech_number;
- break;
- }
- }
- *prov_mt1 = me->me_sw_prov->pm_mech_info.cm_mech_number;
- }
- }
-
- if (pd == NULL)
- *error = CRYPTO_MECH_NOT_SUPPORTED;
- else
- KCF_PROV_REFHOLD(pd);
-
- mutex_exit(&me->me_mutex);
- return (pd);
-}
-
-/*
- * Do the actual work of calling the provider routines.
- *
- * pd - Provider structure
- * ctx - Context for this operation
- * params - Parameters for this operation
- * rhndl - Request handle to use for notification
- *
- * The return values are the same as that of the respective SPI.
- */
-int
-common_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
- kcf_req_params_t *params, crypto_req_handle_t rhndl)
-{
- int err = CRYPTO_ARGUMENTS_BAD;
- kcf_op_type_t optype;
-
- optype = params->rp_optype;
-
- switch (params->rp_opgrp) {
- case KCF_OG_DIGEST: {
- kcf_digest_ops_params_t *dops = &params->rp_u.digest_params;
-
- switch (optype) {
- case KCF_OP_INIT:
- /*
- * We should do this only here and not in KCF_WRAP_*
- * macros. This is because we may want to try other
- * providers, in case we recover from a failure.
- */
- KCF_SET_PROVIDER_MECHNUM(dops->do_framework_mechtype,
- pd, &dops->do_mech);
-
- err = KCF_PROV_DIGEST_INIT(pd, ctx, &dops->do_mech,
- rhndl);
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_DIGEST(pd, ctx, dops->do_data,
- dops->do_digest, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- err = KCF_PROV_DIGEST_UPDATE(pd, ctx,
- dops->do_data, rhndl);
- break;
-
- case KCF_OP_FINAL:
- err = KCF_PROV_DIGEST_FINAL(pd, ctx,
- dops->do_digest, rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(dops->do_framework_mechtype,
- pd, &dops->do_mech);
- err = KCF_PROV_DIGEST_ATOMIC(pd, dops->do_sid,
- &dops->do_mech, dops->do_data, dops->do_digest,
- rhndl);
- break;
-
- case KCF_OP_DIGEST_KEY:
- err = KCF_PROV_DIGEST_KEY(pd, ctx, dops->do_digest_key,
- rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_MAC: {
- kcf_mac_ops_params_t *mops = &params->rp_u.mac_params;
-
- switch (optype) {
- case KCF_OP_INIT:
- KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype,
- pd, &mops->mo_mech);
-
- err = KCF_PROV_MAC_INIT(pd, ctx, &mops->mo_mech,
- mops->mo_key, mops->mo_templ, rhndl);
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_MAC(pd, ctx, mops->mo_data,
- mops->mo_mac, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- err = KCF_PROV_MAC_UPDATE(pd, ctx, mops->mo_data,
- rhndl);
- break;
-
- case KCF_OP_FINAL:
- err = KCF_PROV_MAC_FINAL(pd, ctx, mops->mo_mac, rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype,
- pd, &mops->mo_mech);
-
- err = KCF_PROV_MAC_ATOMIC(pd, mops->mo_sid,
- &mops->mo_mech, mops->mo_key, mops->mo_data,
- mops->mo_mac, mops->mo_templ, rhndl);
- break;
-
- case KCF_OP_MAC_VERIFY_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype,
- pd, &mops->mo_mech);
-
- err = KCF_PROV_MAC_VERIFY_ATOMIC(pd, mops->mo_sid,
- &mops->mo_mech, mops->mo_key, mops->mo_data,
- mops->mo_mac, mops->mo_templ, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_ENCRYPT: {
- kcf_encrypt_ops_params_t *eops = &params->rp_u.encrypt_params;
-
- switch (optype) {
- case KCF_OP_INIT:
- KCF_SET_PROVIDER_MECHNUM(eops->eo_framework_mechtype,
- pd, &eops->eo_mech);
-
- err = KCF_PROV_ENCRYPT_INIT(pd, ctx, &eops->eo_mech,
- eops->eo_key, eops->eo_templ, rhndl);
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_ENCRYPT(pd, ctx, eops->eo_plaintext,
- eops->eo_ciphertext, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- err = KCF_PROV_ENCRYPT_UPDATE(pd, ctx,
- eops->eo_plaintext, eops->eo_ciphertext, rhndl);
- break;
-
- case KCF_OP_FINAL:
- err = KCF_PROV_ENCRYPT_FINAL(pd, ctx,
- eops->eo_ciphertext, rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(eops->eo_framework_mechtype,
- pd, &eops->eo_mech);
-
- err = KCF_PROV_ENCRYPT_ATOMIC(pd, eops->eo_sid,
- &eops->eo_mech, eops->eo_key, eops->eo_plaintext,
- eops->eo_ciphertext, eops->eo_templ, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_DECRYPT: {
- kcf_decrypt_ops_params_t *dcrops = &params->rp_u.decrypt_params;
-
- switch (optype) {
- case KCF_OP_INIT:
- KCF_SET_PROVIDER_MECHNUM(dcrops->dop_framework_mechtype,
- pd, &dcrops->dop_mech);
-
- err = KCF_PROV_DECRYPT_INIT(pd, ctx, &dcrops->dop_mech,
- dcrops->dop_key, dcrops->dop_templ, rhndl);
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_DECRYPT(pd, ctx, dcrops->dop_ciphertext,
- dcrops->dop_plaintext, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- err = KCF_PROV_DECRYPT_UPDATE(pd, ctx,
- dcrops->dop_ciphertext, dcrops->dop_plaintext,
- rhndl);
- break;
-
- case KCF_OP_FINAL:
- err = KCF_PROV_DECRYPT_FINAL(pd, ctx,
- dcrops->dop_plaintext, rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(dcrops->dop_framework_mechtype,
- pd, &dcrops->dop_mech);
-
- err = KCF_PROV_DECRYPT_ATOMIC(pd, dcrops->dop_sid,
- &dcrops->dop_mech, dcrops->dop_key,
- dcrops->dop_ciphertext, dcrops->dop_plaintext,
- dcrops->dop_templ, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_SIGN: {
- kcf_sign_ops_params_t *sops = &params->rp_u.sign_params;
-
- switch (optype) {
- case KCF_OP_INIT:
- KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
- pd, &sops->so_mech);
-
- err = KCF_PROV_SIGN_INIT(pd, ctx, &sops->so_mech,
- sops->so_key, sops->so_templ, rhndl);
- break;
-
- case KCF_OP_SIGN_RECOVER_INIT:
- KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
- pd, &sops->so_mech);
-
- err = KCF_PROV_SIGN_RECOVER_INIT(pd, ctx,
- &sops->so_mech, sops->so_key, sops->so_templ,
- rhndl);
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_SIGN(pd, ctx, sops->so_data,
- sops->so_signature, rhndl);
- break;
-
- case KCF_OP_SIGN_RECOVER:
- err = KCF_PROV_SIGN_RECOVER(pd, ctx,
- sops->so_data, sops->so_signature, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- err = KCF_PROV_SIGN_UPDATE(pd, ctx, sops->so_data,
- rhndl);
- break;
-
- case KCF_OP_FINAL:
- err = KCF_PROV_SIGN_FINAL(pd, ctx, sops->so_signature,
- rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
- pd, &sops->so_mech);
-
- err = KCF_PROV_SIGN_ATOMIC(pd, sops->so_sid,
- &sops->so_mech, sops->so_key, sops->so_data,
- sops->so_templ, sops->so_signature, rhndl);
- break;
-
- case KCF_OP_SIGN_RECOVER_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
- pd, &sops->so_mech);
-
- err = KCF_PROV_SIGN_RECOVER_ATOMIC(pd, sops->so_sid,
- &sops->so_mech, sops->so_key, sops->so_data,
- sops->so_templ, sops->so_signature, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_VERIFY: {
- kcf_verify_ops_params_t *vops = &params->rp_u.verify_params;
-
- switch (optype) {
- case KCF_OP_INIT:
- KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
- pd, &vops->vo_mech);
-
- err = KCF_PROV_VERIFY_INIT(pd, ctx, &vops->vo_mech,
- vops->vo_key, vops->vo_templ, rhndl);
- break;
-
- case KCF_OP_VERIFY_RECOVER_INIT:
- KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
- pd, &vops->vo_mech);
-
- err = KCF_PROV_VERIFY_RECOVER_INIT(pd, ctx,
- &vops->vo_mech, vops->vo_key, vops->vo_templ,
- rhndl);
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_VERIFY(pd, ctx, vops->vo_data,
- vops->vo_signature, rhndl);
- break;
-
- case KCF_OP_VERIFY_RECOVER:
- err = KCF_PROV_VERIFY_RECOVER(pd, ctx,
- vops->vo_signature, vops->vo_data, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- err = KCF_PROV_VERIFY_UPDATE(pd, ctx, vops->vo_data,
- rhndl);
- break;
-
- case KCF_OP_FINAL:
- err = KCF_PROV_VERIFY_FINAL(pd, ctx, vops->vo_signature,
- rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
- pd, &vops->vo_mech);
-
- err = KCF_PROV_VERIFY_ATOMIC(pd, vops->vo_sid,
- &vops->vo_mech, vops->vo_key, vops->vo_data,
- vops->vo_templ, vops->vo_signature, rhndl);
- break;
-
- case KCF_OP_VERIFY_RECOVER_ATOMIC:
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
- pd, &vops->vo_mech);
-
- err = KCF_PROV_VERIFY_RECOVER_ATOMIC(pd, vops->vo_sid,
- &vops->vo_mech, vops->vo_key, vops->vo_signature,
- vops->vo_templ, vops->vo_data, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_ENCRYPT_MAC: {
- kcf_encrypt_mac_ops_params_t *eops =
- &params->rp_u.encrypt_mac_params;
- kcf_context_t *kcf_secondctx;
-
- switch (optype) {
- case KCF_OP_INIT:
- kcf_secondctx = ((kcf_context_t *)
- (ctx->cc_framework_private))->kc_secondctx;
-
- if (kcf_secondctx != NULL) {
- err = kcf_emulate_dual(pd, ctx, params);
- break;
- }
- KCF_SET_PROVIDER_MECHNUM(
- eops->em_framework_encr_mechtype,
- pd, &eops->em_encr_mech);
-
- KCF_SET_PROVIDER_MECHNUM(
- eops->em_framework_mac_mechtype,
- pd, &eops->em_mac_mech);
-
- err = KCF_PROV_ENCRYPT_MAC_INIT(pd, ctx,
- &eops->em_encr_mech, eops->em_encr_key,
- &eops->em_mac_mech, eops->em_mac_key,
- eops->em_encr_templ, eops->em_mac_templ,
- rhndl);
-
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_ENCRYPT_MAC(pd, ctx,
- eops->em_plaintext, eops->em_ciphertext,
- eops->em_mac, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- kcf_secondctx = ((kcf_context_t *)
- (ctx->cc_framework_private))->kc_secondctx;
- if (kcf_secondctx != NULL) {
- err = kcf_emulate_dual(pd, ctx, params);
- break;
- }
- err = KCF_PROV_ENCRYPT_MAC_UPDATE(pd, ctx,
- eops->em_plaintext, eops->em_ciphertext, rhndl);
- break;
-
- case KCF_OP_FINAL:
- kcf_secondctx = ((kcf_context_t *)
- (ctx->cc_framework_private))->kc_secondctx;
- if (kcf_secondctx != NULL) {
- err = kcf_emulate_dual(pd, ctx, params);
- break;
- }
- err = KCF_PROV_ENCRYPT_MAC_FINAL(pd, ctx,
- eops->em_ciphertext, eops->em_mac, rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
-
- KCF_SET_PROVIDER_MECHNUM(
- eops->em_framework_encr_mechtype,
- pd, &eops->em_encr_mech);
-
- KCF_SET_PROVIDER_MECHNUM(
- eops->em_framework_mac_mechtype,
- pd, &eops->em_mac_mech);
-
- err = KCF_PROV_ENCRYPT_MAC_ATOMIC(pd, eops->em_sid,
- &eops->em_encr_mech, eops->em_encr_key,
- &eops->em_mac_mech, eops->em_mac_key,
- eops->em_plaintext, eops->em_ciphertext,
- eops->em_mac,
- eops->em_encr_templ, eops->em_mac_templ,
- rhndl);
-
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_MAC_DECRYPT: {
- kcf_mac_decrypt_ops_params_t *dops =
- &params->rp_u.mac_decrypt_params;
- kcf_context_t *kcf_secondctx;
-
- switch (optype) {
- case KCF_OP_INIT:
- kcf_secondctx = ((kcf_context_t *)
- (ctx->cc_framework_private))->kc_secondctx;
-
- if (kcf_secondctx != NULL) {
- err = kcf_emulate_dual(pd, ctx, params);
- break;
- }
- KCF_SET_PROVIDER_MECHNUM(
- dops->md_framework_mac_mechtype,
- pd, &dops->md_mac_mech);
-
- KCF_SET_PROVIDER_MECHNUM(
- dops->md_framework_decr_mechtype,
- pd, &dops->md_decr_mech);
-
- err = KCF_PROV_MAC_DECRYPT_INIT(pd, ctx,
- &dops->md_mac_mech, dops->md_mac_key,
- &dops->md_decr_mech, dops->md_decr_key,
- dops->md_mac_templ, dops->md_decr_templ,
- rhndl);
-
- break;
-
- case KCF_OP_SINGLE:
- err = KCF_PROV_MAC_DECRYPT(pd, ctx,
- dops->md_ciphertext, dops->md_mac,
- dops->md_plaintext, rhndl);
- break;
-
- case KCF_OP_UPDATE:
- kcf_secondctx = ((kcf_context_t *)
- (ctx->cc_framework_private))->kc_secondctx;
- if (kcf_secondctx != NULL) {
- err = kcf_emulate_dual(pd, ctx, params);
- break;
- }
- err = KCF_PROV_MAC_DECRYPT_UPDATE(pd, ctx,
- dops->md_ciphertext, dops->md_plaintext, rhndl);
- break;
-
- case KCF_OP_FINAL:
- kcf_secondctx = ((kcf_context_t *)
- (ctx->cc_framework_private))->kc_secondctx;
- if (kcf_secondctx != NULL) {
- err = kcf_emulate_dual(pd, ctx, params);
- break;
- }
- err = KCF_PROV_MAC_DECRYPT_FINAL(pd, ctx,
- dops->md_mac, dops->md_plaintext, rhndl);
- break;
-
- case KCF_OP_ATOMIC:
- ASSERT(ctx == NULL);
-
- KCF_SET_PROVIDER_MECHNUM(
- dops->md_framework_mac_mechtype,
- pd, &dops->md_mac_mech);
-
- KCF_SET_PROVIDER_MECHNUM(
- dops->md_framework_decr_mechtype,
- pd, &dops->md_decr_mech);
-
- err = KCF_PROV_MAC_DECRYPT_ATOMIC(pd, dops->md_sid,
- &dops->md_mac_mech, dops->md_mac_key,
- &dops->md_decr_mech, dops->md_decr_key,
- dops->md_ciphertext, dops->md_mac,
- dops->md_plaintext,
- dops->md_mac_templ, dops->md_decr_templ,
- rhndl);
-
- break;
-
- case KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC:
- ASSERT(ctx == NULL);
-
- KCF_SET_PROVIDER_MECHNUM(
- dops->md_framework_mac_mechtype,
- pd, &dops->md_mac_mech);
-
- KCF_SET_PROVIDER_MECHNUM(
- dops->md_framework_decr_mechtype,
- pd, &dops->md_decr_mech);
-
- err = KCF_PROV_MAC_VERIFY_DECRYPT_ATOMIC(pd,
- dops->md_sid, &dops->md_mac_mech, dops->md_mac_key,
- &dops->md_decr_mech, dops->md_decr_key,
- dops->md_ciphertext, dops->md_mac,
- dops->md_plaintext,
- dops->md_mac_templ, dops->md_decr_templ,
- rhndl);
-
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_KEY: {
- kcf_key_ops_params_t *kops = &params->rp_u.key_params;
-
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(kops->ko_framework_mechtype, pd,
- &kops->ko_mech);
-
- switch (optype) {
- case KCF_OP_KEY_GENERATE:
- err = KCF_PROV_KEY_GENERATE(pd, kops->ko_sid,
- &kops->ko_mech,
- kops->ko_key_template, kops->ko_key_attribute_count,
- kops->ko_key_object_id_ptr, rhndl);
- break;
-
- case KCF_OP_KEY_GENERATE_PAIR:
- err = KCF_PROV_KEY_GENERATE_PAIR(pd, kops->ko_sid,
- &kops->ko_mech,
- kops->ko_key_template, kops->ko_key_attribute_count,
- kops->ko_private_key_template,
- kops->ko_private_key_attribute_count,
- kops->ko_key_object_id_ptr,
- kops->ko_private_key_object_id_ptr, rhndl);
- break;
-
- case KCF_OP_KEY_WRAP:
- err = KCF_PROV_KEY_WRAP(pd, kops->ko_sid,
- &kops->ko_mech,
- kops->ko_key, kops->ko_key_object_id_ptr,
- kops->ko_wrapped_key, kops->ko_wrapped_key_len_ptr,
- rhndl);
- break;
-
- case KCF_OP_KEY_UNWRAP:
- err = KCF_PROV_KEY_UNWRAP(pd, kops->ko_sid,
- &kops->ko_mech,
- kops->ko_key, kops->ko_wrapped_key,
- kops->ko_wrapped_key_len_ptr,
- kops->ko_key_template, kops->ko_key_attribute_count,
- kops->ko_key_object_id_ptr, rhndl);
- break;
-
- case KCF_OP_KEY_DERIVE:
- err = KCF_PROV_KEY_DERIVE(pd, kops->ko_sid,
- &kops->ko_mech,
- kops->ko_key, kops->ko_key_template,
- kops->ko_key_attribute_count,
- kops->ko_key_object_id_ptr, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_RANDOM: {
- kcf_random_number_ops_params_t *rops =
- &params->rp_u.random_number_params;
-
- ASSERT(ctx == NULL);
-
- switch (optype) {
- case KCF_OP_RANDOM_SEED:
- err = KCF_PROV_SEED_RANDOM(pd, rops->rn_sid,
- rops->rn_buf, rops->rn_buflen, rops->rn_entropy_est,
- rops->rn_flags, rhndl);
- break;
-
- case KCF_OP_RANDOM_GENERATE:
- err = KCF_PROV_GENERATE_RANDOM(pd, rops->rn_sid,
- rops->rn_buf, rops->rn_buflen, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_SESSION: {
- kcf_session_ops_params_t *sops = &params->rp_u.session_params;
-
- ASSERT(ctx == NULL);
- switch (optype) {
- case KCF_OP_SESSION_OPEN:
- /*
- * so_pd may be a logical provider, in which case
- * we need to check whether it has been removed.
- */
- if (KCF_IS_PROV_REMOVED(sops->so_pd)) {
- err = CRYPTO_DEVICE_ERROR;
- break;
- }
- err = KCF_PROV_SESSION_OPEN(pd, sops->so_sid_ptr,
- rhndl, sops->so_pd);
- break;
-
- case KCF_OP_SESSION_CLOSE:
- /*
- * so_pd may be a logical provider, in which case
- * we need to check whether it has been removed.
- */
- if (KCF_IS_PROV_REMOVED(sops->so_pd)) {
- err = CRYPTO_DEVICE_ERROR;
- break;
- }
- err = KCF_PROV_SESSION_CLOSE(pd, sops->so_sid,
- rhndl, sops->so_pd);
- break;
-
- case KCF_OP_SESSION_LOGIN:
- err = KCF_PROV_SESSION_LOGIN(pd, sops->so_sid,
- sops->so_user_type, sops->so_pin,
- sops->so_pin_len, rhndl);
- break;
-
- case KCF_OP_SESSION_LOGOUT:
- err = KCF_PROV_SESSION_LOGOUT(pd, sops->so_sid, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_OBJECT: {
- kcf_object_ops_params_t *jops = &params->rp_u.object_params;
-
- ASSERT(ctx == NULL);
- switch (optype) {
- case KCF_OP_OBJECT_CREATE:
- err = KCF_PROV_OBJECT_CREATE(pd, jops->oo_sid,
- jops->oo_template, jops->oo_attribute_count,
- jops->oo_object_id_ptr, rhndl);
- break;
-
- case KCF_OP_OBJECT_COPY:
- err = KCF_PROV_OBJECT_COPY(pd, jops->oo_sid,
- jops->oo_object_id,
- jops->oo_template, jops->oo_attribute_count,
- jops->oo_object_id_ptr, rhndl);
- break;
-
- case KCF_OP_OBJECT_DESTROY:
- err = KCF_PROV_OBJECT_DESTROY(pd, jops->oo_sid,
- jops->oo_object_id, rhndl);
- break;
-
- case KCF_OP_OBJECT_GET_SIZE:
- err = KCF_PROV_OBJECT_GET_SIZE(pd, jops->oo_sid,
- jops->oo_object_id, jops->oo_object_size, rhndl);
- break;
-
- case KCF_OP_OBJECT_GET_ATTRIBUTE_VALUE:
- err = KCF_PROV_OBJECT_GET_ATTRIBUTE_VALUE(pd,
- jops->oo_sid, jops->oo_object_id,
- jops->oo_template, jops->oo_attribute_count, rhndl);
- break;
-
- case KCF_OP_OBJECT_SET_ATTRIBUTE_VALUE:
- err = KCF_PROV_OBJECT_SET_ATTRIBUTE_VALUE(pd,
- jops->oo_sid, jops->oo_object_id,
- jops->oo_template, jops->oo_attribute_count, rhndl);
- break;
-
- case KCF_OP_OBJECT_FIND_INIT:
- err = KCF_PROV_OBJECT_FIND_INIT(pd, jops->oo_sid,
- jops->oo_template, jops->oo_attribute_count,
- jops->oo_find_init_pp_ptr, rhndl);
- break;
-
- case KCF_OP_OBJECT_FIND:
- err = KCF_PROV_OBJECT_FIND(pd, jops->oo_find_pp,
- jops->oo_object_id_ptr, jops->oo_max_object_count,
- jops->oo_object_count_ptr, rhndl);
- break;
-
- case KCF_OP_OBJECT_FIND_FINAL:
- err = KCF_PROV_OBJECT_FIND_FINAL(pd, jops->oo_find_pp,
- rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_PROVMGMT: {
- kcf_provmgmt_ops_params_t *pops = &params->rp_u.provmgmt_params;
-
- ASSERT(ctx == NULL);
- switch (optype) {
- case KCF_OP_MGMT_EXTINFO:
- /*
- * po_pd may be a logical provider, in which case
- * we need to check whether it has been removed.
- */
- if (KCF_IS_PROV_REMOVED(pops->po_pd)) {
- err = CRYPTO_DEVICE_ERROR;
- break;
- }
- err = KCF_PROV_EXT_INFO(pd, pops->po_ext_info, rhndl,
- pops->po_pd);
- break;
-
- case KCF_OP_MGMT_INITTOKEN:
- err = KCF_PROV_INIT_TOKEN(pd, pops->po_pin,
- pops->po_pin_len, pops->po_label, rhndl);
- break;
-
- case KCF_OP_MGMT_INITPIN:
- err = KCF_PROV_INIT_PIN(pd, pops->po_sid, pops->po_pin,
- pops->po_pin_len, rhndl);
- break;
-
- case KCF_OP_MGMT_SETPIN:
- err = KCF_PROV_SET_PIN(pd, pops->po_sid,
- pops->po_old_pin, pops->po_old_pin_len,
- pops->po_pin, pops->po_pin_len, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
-
- case KCF_OG_NOSTORE_KEY: {
- kcf_key_ops_params_t *kops = &params->rp_u.key_params;
-
- ASSERT(ctx == NULL);
- KCF_SET_PROVIDER_MECHNUM(kops->ko_framework_mechtype, pd,
- &kops->ko_mech);
-
- switch (optype) {
- case KCF_OP_KEY_GENERATE:
- err = KCF_PROV_NOSTORE_KEY_GENERATE(pd, kops->ko_sid,
- &kops->ko_mech, kops->ko_key_template,
- kops->ko_key_attribute_count,
- kops->ko_out_template1,
- kops->ko_out_attribute_count1, rhndl);
- break;
-
- case KCF_OP_KEY_GENERATE_PAIR:
- err = KCF_PROV_NOSTORE_KEY_GENERATE_PAIR(pd,
- kops->ko_sid, &kops->ko_mech,
- kops->ko_key_template, kops->ko_key_attribute_count,
- kops->ko_private_key_template,
- kops->ko_private_key_attribute_count,
- kops->ko_out_template1,
- kops->ko_out_attribute_count1,
- kops->ko_out_template2,
- kops->ko_out_attribute_count2,
- rhndl);
- break;
-
- case KCF_OP_KEY_DERIVE:
- err = KCF_PROV_NOSTORE_KEY_DERIVE(pd, kops->ko_sid,
- &kops->ko_mech, kops->ko_key,
- kops->ko_key_template,
- kops->ko_key_attribute_count,
- kops->ko_out_template1,
- kops->ko_out_attribute_count1, rhndl);
- break;
-
- default:
- break;
- }
- break;
- }
- default:
- break;
- } /* end of switch(params->rp_opgrp) */
-
- KCF_PROV_INCRSTATS(pd, err);
- return (err);
-}
-
-
-/*
- * Emulate the call for a multipart dual ops with 2 single steps.
- * This routine is always called in the context of a working thread
- * running kcf_svc_do_run().
- * The single steps are submitted in a pure synchronous way (blocking).
- * When this routine returns, kcf_svc_do_run() will call kcf_aop_done()
- * so the originating consumer's callback gets invoked. kcf_aop_done()
- * takes care of freeing the operation context. So, this routine does
- * not free the operation context.
- *
- * The provider descriptor is assumed held by the callers.
- */
-static int
-kcf_emulate_dual(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
- kcf_req_params_t *params)
-{
- int err = CRYPTO_ARGUMENTS_BAD;
- kcf_op_type_t optype;
- size_t save_len;
- off_t save_offset;
-
- optype = params->rp_optype;
-
- switch (params->rp_opgrp) {
- case KCF_OG_ENCRYPT_MAC: {
- kcf_encrypt_mac_ops_params_t *cmops =
- &params->rp_u.encrypt_mac_params;
- kcf_context_t *encr_kcf_ctx;
- crypto_ctx_t *mac_ctx;
- kcf_req_params_t encr_params;
-
- encr_kcf_ctx = (kcf_context_t *)(ctx->cc_framework_private);
-
- switch (optype) {
- case KCF_OP_INIT: {
- encr_kcf_ctx->kc_secondctx = NULL;
-
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_INIT,
- pd->pd_sid, &cmops->em_encr_mech,
- cmops->em_encr_key, NULL, NULL,
- cmops->em_encr_templ);
-
- err = kcf_submit_request(pd, ctx, NULL, &encr_params,
- B_FALSE);
-
- /* It can't be CRYPTO_QUEUED */
- if (err != CRYPTO_SUCCESS) {
- break;
- }
-
- err = crypto_mac_init(&cmops->em_mac_mech,
- cmops->em_mac_key, cmops->em_mac_templ,
- (crypto_context_t *)&mac_ctx, NULL);
-
- if (err == CRYPTO_SUCCESS) {
- encr_kcf_ctx->kc_secondctx = (kcf_context_t *)
- mac_ctx->cc_framework_private;
- KCF_CONTEXT_REFHOLD((kcf_context_t *)
- mac_ctx->cc_framework_private);
- }
-
- break;
-
- }
- case KCF_OP_UPDATE: {
- crypto_dual_data_t *ct = cmops->em_ciphertext;
- crypto_data_t *pt = cmops->em_plaintext;
- kcf_context_t *mac_kcf_ctx = encr_kcf_ctx->kc_secondctx;
- crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
-
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_UPDATE,
- pd->pd_sid, NULL, NULL, pt, (crypto_data_t *)ct,
- NULL);
-
- err = kcf_submit_request(pd, ctx, NULL, &encr_params,
- B_FALSE);
-
- /* It can't be CRYPTO_QUEUED */
- if (err != CRYPTO_SUCCESS) {
- break;
- }
-
- save_offset = ct->dd_offset1;
- save_len = ct->dd_len1;
- if (ct->dd_len2 == 0) {
- /*
- * The previous encrypt step was an
- * accumulation only and didn't produce any
- * partial output
- */
- if (ct->dd_len1 == 0)
- break;
-
- } else {
- ct->dd_offset1 = ct->dd_offset2;
- ct->dd_len1 = ct->dd_len2;
- }
- err = crypto_mac_update((crypto_context_t)mac_ctx,
- (crypto_data_t *)ct, NULL);
-
- ct->dd_offset1 = save_offset;
- ct->dd_len1 = save_len;
-
- break;
- }
- case KCF_OP_FINAL: {
- crypto_dual_data_t *ct = cmops->em_ciphertext;
- crypto_data_t *mac = cmops->em_mac;
- kcf_context_t *mac_kcf_ctx = encr_kcf_ctx->kc_secondctx;
- crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
- crypto_context_t mac_context = mac_ctx;
-
- KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_FINAL,
- pd->pd_sid, NULL, NULL, NULL, (crypto_data_t *)ct,
- NULL);
-
- err = kcf_submit_request(pd, ctx, NULL, &encr_params,
- B_FALSE);
-
- /* It can't be CRYPTO_QUEUED */
- if (err != CRYPTO_SUCCESS) {
- crypto_cancel_ctx(mac_context);
- break;
- }
-
- if (ct->dd_len2 > 0) {
- save_offset = ct->dd_offset1;
- save_len = ct->dd_len1;
- ct->dd_offset1 = ct->dd_offset2;
- ct->dd_len1 = ct->dd_len2;
-
- err = crypto_mac_update(mac_context,
- (crypto_data_t *)ct, NULL);
-
- ct->dd_offset1 = save_offset;
- ct->dd_len1 = save_len;
-
- if (err != CRYPTO_SUCCESS) {
- crypto_cancel_ctx(mac_context);
- return (err);
- }
- }
-
- /* and finally, collect the MAC */
- err = crypto_mac_final(mac_context, mac, NULL);
- break;
- }
-
- default:
- break;
- }
- KCF_PROV_INCRSTATS(pd, err);
- break;
- }
- case KCF_OG_MAC_DECRYPT: {
- kcf_mac_decrypt_ops_params_t *mdops =
- &params->rp_u.mac_decrypt_params;
- kcf_context_t *decr_kcf_ctx;
- crypto_ctx_t *mac_ctx;
- kcf_req_params_t decr_params;
-
- decr_kcf_ctx = (kcf_context_t *)(ctx->cc_framework_private);
-
- switch (optype) {
- case KCF_OP_INIT: {
- decr_kcf_ctx->kc_secondctx = NULL;
-
- err = crypto_mac_init(&mdops->md_mac_mech,
- mdops->md_mac_key, mdops->md_mac_templ,
- (crypto_context_t *)&mac_ctx, NULL);
-
- /* It can't be CRYPTO_QUEUED */
- if (err != CRYPTO_SUCCESS) {
- break;
- }
-
- KCF_WRAP_DECRYPT_OPS_PARAMS(&decr_params, KCF_OP_INIT,
- pd->pd_sid, &mdops->md_decr_mech,
- mdops->md_decr_key, NULL, NULL,
- mdops->md_decr_templ);
-
- err = kcf_submit_request(pd, ctx, NULL, &decr_params,
- B_FALSE);
-
- /* It can't be CRYPTO_QUEUED */
- if (err != CRYPTO_SUCCESS) {
- crypto_cancel_ctx((crypto_context_t)mac_ctx);
- break;
- }
-
- decr_kcf_ctx->kc_secondctx = (kcf_context_t *)
- mac_ctx->cc_framework_private;
- KCF_CONTEXT_REFHOLD((kcf_context_t *)
- mac_ctx->cc_framework_private);
-
- break;
- default:
- break;
-
- }
- case KCF_OP_UPDATE: {
- crypto_dual_data_t *ct = mdops->md_ciphertext;
- crypto_data_t *pt = mdops->md_plaintext;
- kcf_context_t *mac_kcf_ctx = decr_kcf_ctx->kc_secondctx;
- crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
-
- err = crypto_mac_update((crypto_context_t)mac_ctx,
- (crypto_data_t *)ct, NULL);
-
- if (err != CRYPTO_SUCCESS)
- break;
-
- save_offset = ct->dd_offset1;
- save_len = ct->dd_len1;
-
- /* zero ct->dd_len2 means decrypt everything */
- if (ct->dd_len2 > 0) {
- ct->dd_offset1 = ct->dd_offset2;
- ct->dd_len1 = ct->dd_len2;
- }
-
- err = crypto_decrypt_update((crypto_context_t)ctx,
- (crypto_data_t *)ct, pt, NULL);
-
- ct->dd_offset1 = save_offset;
- ct->dd_len1 = save_len;
-
- break;
- }
- case KCF_OP_FINAL: {
- crypto_data_t *pt = mdops->md_plaintext;
- crypto_data_t *mac = mdops->md_mac;
- kcf_context_t *mac_kcf_ctx = decr_kcf_ctx->kc_secondctx;
- crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
-
- err = crypto_mac_final((crypto_context_t)mac_ctx,
- mac, NULL);
-
- if (err != CRYPTO_SUCCESS) {
- crypto_cancel_ctx(ctx);
- break;
- }
-
- /* Get the last chunk of plaintext */
- KCF_CONTEXT_REFHOLD(decr_kcf_ctx);
- err = crypto_decrypt_final((crypto_context_t)ctx, pt,
- NULL);
-
- break;
- }
- }
- break;
- }
- default:
-
- break;
- } /* end of switch(params->rp_opgrp) */
-
- return (err);
-}
diff --git a/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c b/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c
index 2642b317d698..41705e84bc4b 100644
--- a/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c
+++ b/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -27,7 +27,6 @@
#include <sys/crypto/common.h>
#include <sys/crypto/api.h>
#include <sys/crypto/impl.h>
-#include <sys/modhash.h>
/* Cryptographic mechanisms tables and their access functions */
@@ -55,9 +54,6 @@
/*
* Locking conventions:
* --------------------
- * A global mutex, kcf_mech_tabs_lock, serializes writes to the
- * mechanism table via kcf_create_mech_entry().
- *
* A mutex is associated with every entry of the tables.
* The mutex is acquired whenever the entry is accessed for
* 1) retrieving the mech_id (comparing the mech name)
@@ -72,9 +68,6 @@
* long enough to justify the cost of using rwlocks, so the per-mechanism
* entry mutex won't be very *hot*.
*
- * When both kcf_mech_tabs_lock and a mech_entry mutex need to be held,
- * kcf_mech_tabs_lock must always be acquired first.
- *
*/
/* Mechanisms tables */
@@ -82,79 +75,33 @@
/* RFE 4687834 Will deal with the extensibility of these tables later */
-kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST];
-kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER];
-kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC];
-kcf_mech_entry_t kcf_sign_mechs_tab[KCF_MAXSIGN];
-kcf_mech_entry_t kcf_keyops_mechs_tab[KCF_MAXKEYOPS];
-kcf_mech_entry_t kcf_misc_mechs_tab[KCF_MAXMISC];
+static kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST];
+static kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER];
+static kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC];
-kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = {
+const kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = {
{0, NULL}, /* No class zero */
{KCF_MAXDIGEST, kcf_digest_mechs_tab},
{KCF_MAXCIPHER, kcf_cipher_mechs_tab},
{KCF_MAXMAC, kcf_mac_mechs_tab},
- {KCF_MAXSIGN, kcf_sign_mechs_tab},
- {KCF_MAXKEYOPS, kcf_keyops_mechs_tab},
- {KCF_MAXMISC, kcf_misc_mechs_tab}
};
-/*
- * Per-algorithm internal thresholds for the minimum input size of before
- * offloading to hardware provider.
- * Dispatching a crypto operation to a hardware provider entails paying the
- * cost of an additional context switch. Measurements with Sun Accelerator 4000
- * shows that 512-byte jobs or smaller are better handled in software.
- * There is room for refinement here.
- *
- */
-int kcf_md5_threshold = 512;
-int kcf_sha1_threshold = 512;
-int kcf_des_threshold = 512;
-int kcf_des3_threshold = 512;
-int kcf_aes_threshold = 512;
-int kcf_bf_threshold = 512;
-int kcf_rc4_threshold = 512;
-
-kmutex_t kcf_mech_tabs_lock;
-static uint32_t kcf_gen_swprov = 0;
-
-int kcf_mech_hash_size = 256;
-mod_hash_t *kcf_mech_hash; /* mech name to id hash */
-
-static crypto_mech_type_t
-kcf_mech_hash_find(char *mechname)
-{
- mod_hash_val_t hv;
- crypto_mech_type_t mt;
-
- mt = CRYPTO_MECH_INVALID;
- if (mod_hash_find(kcf_mech_hash, (mod_hash_key_t)mechname, &hv) == 0) {
- mt = *(crypto_mech_type_t *)hv;
- ASSERT(mt != CRYPTO_MECH_INVALID);
- }
+static avl_tree_t kcf_mech_hash;
- return (mt);
+static int
+kcf_mech_hash_compar(const void *lhs, const void *rhs)
+{
+ const kcf_mech_entry_t *l = lhs, *r = rhs;
+ int cmp = strncmp(l->me_name, r->me_name, CRYPTO_MAX_MECH_NAME);
+ return ((0 < cmp) - (cmp < 0));
}
void
kcf_destroy_mech_tabs(void)
{
- int i, max;
- kcf_ops_class_t class;
- kcf_mech_entry_t *me_tab;
-
- if (kcf_mech_hash)
- mod_hash_destroy_hash(kcf_mech_hash);
-
- mutex_destroy(&kcf_mech_tabs_lock);
-
- for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) {
- max = kcf_mech_tabs_tab[class].met_size;
- me_tab = kcf_mech_tabs_tab[class].met_tab;
- for (i = 0; i < max; i++)
- mutex_destroy(&(me_tab[i].me_mutex));
- }
+ for (void *cookie = NULL; avl_destroy_nodes(&kcf_mech_hash, &cookie); )
+ ;
+ avl_destroy(&kcf_mech_hash);
}
/*
@@ -166,102 +113,8 @@ kcf_destroy_mech_tabs(void)
void
kcf_init_mech_tabs(void)
{
- int i, max;
- kcf_ops_class_t class;
- kcf_mech_entry_t *me_tab;
-
- /* Initializes the mutex locks. */
-
- mutex_init(&kcf_mech_tabs_lock, NULL, MUTEX_DEFAULT, NULL);
-
- /* Then the pre-defined mechanism entries */
-
- /* Two digests */
- (void) strncpy(kcf_digest_mechs_tab[0].me_name, SUN_CKM_MD5,
- CRYPTO_MAX_MECH_NAME);
- kcf_digest_mechs_tab[0].me_threshold = kcf_md5_threshold;
-
- (void) strncpy(kcf_digest_mechs_tab[1].me_name, SUN_CKM_SHA1,
- CRYPTO_MAX_MECH_NAME);
- kcf_digest_mechs_tab[1].me_threshold = kcf_sha1_threshold;
-
- /* The symmetric ciphers in various modes */
- (void) strncpy(kcf_cipher_mechs_tab[0].me_name, SUN_CKM_DES_CBC,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[0].me_threshold = kcf_des_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[1].me_name, SUN_CKM_DES3_CBC,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[1].me_threshold = kcf_des3_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[2].me_name, SUN_CKM_DES_ECB,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[2].me_threshold = kcf_des_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[3].me_name, SUN_CKM_DES3_ECB,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[3].me_threshold = kcf_des3_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[4].me_name, SUN_CKM_BLOWFISH_CBC,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[4].me_threshold = kcf_bf_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[5].me_name, SUN_CKM_BLOWFISH_ECB,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[5].me_threshold = kcf_bf_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[6].me_name, SUN_CKM_AES_CBC,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[6].me_threshold = kcf_aes_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[7].me_name, SUN_CKM_AES_ECB,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[7].me_threshold = kcf_aes_threshold;
-
- (void) strncpy(kcf_cipher_mechs_tab[8].me_name, SUN_CKM_RC4,
- CRYPTO_MAX_MECH_NAME);
- kcf_cipher_mechs_tab[8].me_threshold = kcf_rc4_threshold;
-
-
- /* 4 HMACs */
- (void) strncpy(kcf_mac_mechs_tab[0].me_name, SUN_CKM_MD5_HMAC,
- CRYPTO_MAX_MECH_NAME);
- kcf_mac_mechs_tab[0].me_threshold = kcf_md5_threshold;
-
- (void) strncpy(kcf_mac_mechs_tab[1].me_name, SUN_CKM_MD5_HMAC_GENERAL,
- CRYPTO_MAX_MECH_NAME);
- kcf_mac_mechs_tab[1].me_threshold = kcf_md5_threshold;
-
- (void) strncpy(kcf_mac_mechs_tab[2].me_name, SUN_CKM_SHA1_HMAC,
- CRYPTO_MAX_MECH_NAME);
- kcf_mac_mechs_tab[2].me_threshold = kcf_sha1_threshold;
-
- (void) strncpy(kcf_mac_mechs_tab[3].me_name, SUN_CKM_SHA1_HMAC_GENERAL,
- CRYPTO_MAX_MECH_NAME);
- kcf_mac_mechs_tab[3].me_threshold = kcf_sha1_threshold;
-
-
- /* 1 random number generation pseudo mechanism */
- (void) strncpy(kcf_misc_mechs_tab[0].me_name, SUN_RANDOM,
- CRYPTO_MAX_MECH_NAME);
-
- kcf_mech_hash = mod_hash_create_strhash_nodtr("kcf mech2id hash",
- kcf_mech_hash_size, mod_hash_null_valdtor);
-
- for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) {
- max = kcf_mech_tabs_tab[class].met_size;
- me_tab = kcf_mech_tabs_tab[class].met_tab;
- for (i = 0; i < max; i++) {
- mutex_init(&(me_tab[i].me_mutex), NULL,
- MUTEX_DEFAULT, NULL);
- if (me_tab[i].me_name[0] != 0) {
- me_tab[i].me_mechid = KCF_MECHID(class, i);
- (void) mod_hash_insert(kcf_mech_hash,
- (mod_hash_key_t)me_tab[i].me_name,
- (mod_hash_val_t)&(me_tab[i].me_mechid));
- }
- }
- }
+ avl_create(&kcf_mech_hash, kcf_mech_hash_compar,
+ sizeof (kcf_mech_entry_t), offsetof(kcf_mech_entry_t, me_node));
}
/*
@@ -291,12 +144,8 @@ kcf_init_mech_tabs(void)
* KCF_SUCCESS otherwise.
*/
static int
-kcf_create_mech_entry(kcf_ops_class_t class, char *mechname)
+kcf_create_mech_entry(kcf_ops_class_t class, const char *mechname)
{
- crypto_mech_type_t mt;
- kcf_mech_entry_t *me_tab;
- int i = 0, size;
-
if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS))
return (KCF_INVALID_MECH_CLASS);
@@ -306,49 +155,28 @@ kcf_create_mech_entry(kcf_ops_class_t class, char *mechname)
* First check if the mechanism is already in one of the tables.
* The mech_entry could be in another class.
*/
- mutex_enter(&kcf_mech_tabs_lock);
- mt = kcf_mech_hash_find(mechname);
- if (mt != CRYPTO_MECH_INVALID) {
- /* Nothing to do, regardless the suggested class. */
- mutex_exit(&kcf_mech_tabs_lock);
+ avl_index_t where = 0;
+ kcf_mech_entry_t tmptab;
+ strlcpy(tmptab.me_name, mechname, CRYPTO_MAX_MECH_NAME);
+ if (avl_find(&kcf_mech_hash, &tmptab, &where) != NULL)
return (KCF_SUCCESS);
- }
/* Now take the next unused mech entry in the class's tab */
- me_tab = kcf_mech_tabs_tab[class].met_tab;
- size = kcf_mech_tabs_tab[class].met_size;
+ kcf_mech_entry_t *me_tab = kcf_mech_tabs_tab[class].met_tab;
+ int size = kcf_mech_tabs_tab[class].met_size;
- while (i < size) {
- mutex_enter(&(me_tab[i].me_mutex));
+ for (int i = 0; i < size; ++i)
if (me_tab[i].me_name[0] == 0) {
/* Found an empty spot */
- (void) strlcpy(me_tab[i].me_name, mechname,
+ strlcpy(me_tab[i].me_name, mechname,
CRYPTO_MAX_MECH_NAME);
- me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0';
me_tab[i].me_mechid = KCF_MECHID(class, i);
- /*
- * No a-priori information about the new mechanism, so
- * the threshold is set to zero.
- */
- me_tab[i].me_threshold = 0;
- mutex_exit(&(me_tab[i].me_mutex));
/* Add the new mechanism to the hash table */
- (void) mod_hash_insert(kcf_mech_hash,
- (mod_hash_key_t)me_tab[i].me_name,
- (mod_hash_val_t)&(me_tab[i].me_mechid));
- break;
+ avl_insert(&kcf_mech_hash, &me_tab[i], where);
+ return (KCF_SUCCESS);
}
- mutex_exit(&(me_tab[i].me_mutex));
- i++;
- }
- mutex_exit(&kcf_mech_tabs_lock);
-
- if (i == size) {
- return (KCF_MECH_TAB_FULL);
- }
-
- return (KCF_SUCCESS);
+ return (KCF_MECH_TAB_FULL);
}
/*
@@ -376,16 +204,9 @@ kcf_add_mech_provider(short mech_indx,
{
int error;
kcf_mech_entry_t *mech_entry = NULL;
- crypto_mech_info_t *mech_info;
- crypto_mech_type_t kcf_mech_type, mt;
- kcf_prov_mech_desc_t *prov_mech, *prov_mech2;
- crypto_func_group_t simple_fg_mask, dual_fg_mask;
- crypto_mech_info_t *dmi;
- crypto_mech_info_list_t *mil, *mil2;
- kcf_mech_entry_t *me;
- int i;
-
- ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+ const crypto_mech_info_t *mech_info;
+ crypto_mech_type_t kcf_mech_type;
+ kcf_prov_mech_desc_t *prov_mech;
mech_info = &prov_desc->pd_mechanisms[mech_indx];
@@ -394,7 +215,7 @@ kcf_add_mech_provider(short mech_indx,
* Find the class corresponding to the function group flag of
* the mechanism.
*/
- kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name);
+ kcf_mech_type = crypto_mech2id(mech_info->cm_mech_name);
if (kcf_mech_type == CRYPTO_MECH_INVALID) {
crypto_func_group_t fg = mech_info->cm_func_group_mask;
kcf_ops_class_t class;
@@ -407,19 +228,8 @@ kcf_add_mech_provider(short mech_indx,
class = KCF_CIPHER_CLASS;
else if (fg & CRYPTO_FG_MAC || fg & CRYPTO_FG_MAC_ATOMIC)
class = KCF_MAC_CLASS;
- else if (fg & CRYPTO_FG_SIGN || fg & CRYPTO_FG_VERIFY ||
- fg & CRYPTO_FG_SIGN_ATOMIC ||
- fg & CRYPTO_FG_VERIFY_ATOMIC ||
- fg & CRYPTO_FG_SIGN_RECOVER ||
- fg & CRYPTO_FG_VERIFY_RECOVER)
- class = KCF_SIGN_CLASS;
- else if (fg & CRYPTO_FG_GENERATE ||
- fg & CRYPTO_FG_GENERATE_KEY_PAIR ||
- fg & CRYPTO_FG_WRAP || fg & CRYPTO_FG_UNWRAP ||
- fg & CRYPTO_FG_DERIVE)
- class = KCF_KEYOPS_CLASS;
else
- class = KCF_MISC_CLASS;
+ __builtin_unreachable();
/*
* Attempt to create a new mech_entry for the specified
@@ -431,7 +241,7 @@ kcf_add_mech_provider(short mech_indx,
return (error);
}
/* get the KCF mech type that was assigned to the mechanism */
- kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name);
+ kcf_mech_type = crypto_mech2id(mech_info->cm_mech_name);
ASSERT(kcf_mech_type != CRYPTO_MECH_INVALID);
}
@@ -440,7 +250,8 @@ kcf_add_mech_provider(short mech_indx,
/* allocate and initialize new kcf_prov_mech_desc */
prov_mech = kmem_zalloc(sizeof (kcf_prov_mech_desc_t), KM_SLEEP);
- bcopy(mech_info, &prov_mech->pm_mech_info, sizeof (crypto_mech_info_t));
+ memcpy(&prov_mech->pm_mech_info, mech_info,
+ sizeof (crypto_mech_info_t));
prov_mech->pm_prov_desc = prov_desc;
prov_desc->pd_mech_indx[KCF_MECH2CLASS(kcf_mech_type)]
[KCF_MECH2INDEX(kcf_mech_type)] = mech_indx;
@@ -448,142 +259,32 @@ kcf_add_mech_provider(short mech_indx,
KCF_PROV_REFHOLD(prov_desc);
KCF_PROV_IREFHOLD(prov_desc);
- dual_fg_mask = mech_info->cm_func_group_mask & CRYPTO_FG_DUAL_MASK;
-
- if (dual_fg_mask == ((crypto_func_group_t)0))
- goto add_entry;
-
- simple_fg_mask = (mech_info->cm_func_group_mask &
- CRYPTO_FG_SIMPLEOP_MASK) | CRYPTO_FG_RANDOM;
-
- for (i = 0; i < prov_desc->pd_mech_list_count; i++) {
- dmi = &prov_desc->pd_mechanisms[i];
-
- /* skip self */
- if (dmi->cm_mech_number == mech_info->cm_mech_number)
- continue;
-
- /* skip if not a dual operation mechanism */
- if (!(dmi->cm_func_group_mask & dual_fg_mask) ||
- (dmi->cm_func_group_mask & simple_fg_mask))
- continue;
-
- mt = kcf_mech_hash_find(dmi->cm_mech_name);
- if (mt == CRYPTO_MECH_INVALID)
- continue;
-
- if (kcf_get_mech_entry(mt, &me) != KCF_SUCCESS)
- continue;
-
- mil = kmem_zalloc(sizeof (*mil), KM_SLEEP);
- mil2 = kmem_zalloc(sizeof (*mil2), KM_SLEEP);
-
- /*
- * Ignore hard-coded entries in the mech table
- * if the provider hasn't registered.
- */
- mutex_enter(&me->me_mutex);
- if (me->me_hw_prov_chain == NULL && me->me_sw_prov == NULL) {
- mutex_exit(&me->me_mutex);
- kmem_free(mil, sizeof (*mil));
- kmem_free(mil2, sizeof (*mil2));
- continue;
- }
-
- /*
- * Add other dual mechanisms that have registered
- * with the framework to this mechanism's
- * cross-reference list.
- */
- mil->ml_mech_info = *dmi; /* struct assignment */
- mil->ml_kcf_mechid = mt;
-
- /* add to head of list */
- mil->ml_next = prov_mech->pm_mi_list;
- prov_mech->pm_mi_list = mil;
-
- if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
- prov_mech2 = me->me_hw_prov_chain;
- else
- prov_mech2 = me->me_sw_prov;
-
- if (prov_mech2 == NULL) {
- kmem_free(mil2, sizeof (*mil2));
- mutex_exit(&me->me_mutex);
- continue;
- }
-
- /*
- * Update all other cross-reference lists by
- * adding this new mechanism.
- */
- while (prov_mech2 != NULL) {
- if (prov_mech2->pm_prov_desc == prov_desc) {
- /* struct assignment */
- mil2->ml_mech_info = *mech_info;
- mil2->ml_kcf_mechid = kcf_mech_type;
-
- /* add to head of list */
- mil2->ml_next = prov_mech2->pm_mi_list;
- prov_mech2->pm_mi_list = mil2;
- break;
- }
- prov_mech2 = prov_mech2->pm_next;
- }
- if (prov_mech2 == NULL)
- kmem_free(mil2, sizeof (*mil2));
-
- mutex_exit(&me->me_mutex);
- }
-
-add_entry:
/*
* Add new kcf_prov_mech_desc at the front of HW providers
* chain.
*/
- switch (prov_desc->pd_prov_type) {
-
- case CRYPTO_HW_PROVIDER:
- mutex_enter(&mech_entry->me_mutex);
- prov_mech->pm_me = mech_entry;
- prov_mech->pm_next = mech_entry->me_hw_prov_chain;
- mech_entry->me_hw_prov_chain = prov_mech;
- mech_entry->me_num_hwprov++;
- mutex_exit(&mech_entry->me_mutex);
- break;
-
- case CRYPTO_SW_PROVIDER:
- mutex_enter(&mech_entry->me_mutex);
- if (mech_entry->me_sw_prov != NULL) {
- /*
- * There is already a SW provider for this mechanism.
- * Since we allow only one SW provider per mechanism,
- * report this condition.
- */
- cmn_err(CE_WARN, "The cryptographic software provider "
- "\"%s\" will not be used for %s. The provider "
- "\"%s\" will be used for this mechanism "
- "instead.", prov_desc->pd_description,
- mech_info->cm_mech_name,
- mech_entry->me_sw_prov->pm_prov_desc->
- pd_description);
- KCF_PROV_REFRELE(prov_desc);
- kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
- prov_mech = NULL;
- } else {
- /*
- * Set the provider as the software provider for
- * this mechanism.
- */
- mech_entry->me_sw_prov = prov_mech;
-
- /* We'll wrap around after 4 billion registrations! */
- mech_entry->me_gen_swprov = kcf_gen_swprov++;
- }
- mutex_exit(&mech_entry->me_mutex);
- break;
- default:
- break;
+ if (mech_entry->me_sw_prov != NULL) {
+ /*
+ * There is already a provider for this mechanism.
+ * Since we allow only one provider per mechanism,
+ * report this condition.
+ */
+ cmn_err(CE_WARN, "The cryptographic provider "
+ "\"%s\" will not be used for %s. The provider "
+ "\"%s\" will be used for this mechanism "
+ "instead.", prov_desc->pd_description,
+ mech_info->cm_mech_name,
+ mech_entry->me_sw_prov->pm_prov_desc->
+ pd_description);
+ KCF_PROV_REFRELE(prov_desc);
+ kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
+ prov_mech = NULL;
+ } else {
+ /*
+ * Set the provider as the provider for
+ * this mechanism.
+ */
+ mech_entry->me_sw_prov = prov_mech;
}
*pmdpp = prov_mech;
@@ -607,18 +308,14 @@ add_entry:
* User context only.
*/
void
-kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc)
+kcf_remove_mech_provider(const char *mech_name, kcf_provider_desc_t *prov_desc)
{
crypto_mech_type_t mech_type;
- kcf_prov_mech_desc_t *prov_mech = NULL, *prov_chain;
- kcf_prov_mech_desc_t **prev_entry_next;
+ kcf_prov_mech_desc_t *prov_mech = NULL;
kcf_mech_entry_t *mech_entry;
- crypto_mech_info_list_t *mil, *mil2, *next, **prev_next;
-
- ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
/* get the KCF mech type that was assigned to the mechanism */
- if ((mech_type = kcf_mech_hash_find(mech_name)) ==
+ if ((mech_type = crypto_mech2id(mech_name)) ==
CRYPTO_MECH_INVALID) {
/*
* Provider was not allowed for this mech due to policy or
@@ -636,92 +333,17 @@ kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc)
return;
}
- mutex_enter(&mech_entry->me_mutex);
-
- switch (prov_desc->pd_prov_type) {
-
- case CRYPTO_HW_PROVIDER:
- /* find the provider in the mech_entry chain */
- prev_entry_next = &mech_entry->me_hw_prov_chain;
- prov_mech = mech_entry->me_hw_prov_chain;
- while (prov_mech != NULL &&
- prov_mech->pm_prov_desc != prov_desc) {
- prev_entry_next = &prov_mech->pm_next;
- prov_mech = prov_mech->pm_next;
- }
-
- if (prov_mech == NULL) {
- /* entry not found, simply return */
- mutex_exit(&mech_entry->me_mutex);
- return;
- }
-
- /* remove provider entry from mech_entry chain */
- *prev_entry_next = prov_mech->pm_next;
- ASSERT(mech_entry->me_num_hwprov > 0);
- mech_entry->me_num_hwprov--;
- break;
-
- case CRYPTO_SW_PROVIDER:
- if (mech_entry->me_sw_prov == NULL ||
- mech_entry->me_sw_prov->pm_prov_desc != prov_desc) {
- /* not the software provider for this mechanism */
- mutex_exit(&mech_entry->me_mutex);
- return;
- }
- prov_mech = mech_entry->me_sw_prov;
- mech_entry->me_sw_prov = NULL;
- break;
- default:
- /* unexpected crypto_provider_type_t */
- mutex_exit(&mech_entry->me_mutex);
+ if (mech_entry->me_sw_prov == NULL ||
+ mech_entry->me_sw_prov->pm_prov_desc != prov_desc) {
+ /* not the provider for this mechanism */
return;
}
-
- mutex_exit(&mech_entry->me_mutex);
-
- /* Free the dual ops cross-reference lists */
- mil = prov_mech->pm_mi_list;
- while (mil != NULL) {
- next = mil->ml_next;
- if (kcf_get_mech_entry(mil->ml_kcf_mechid,
- &mech_entry) != KCF_SUCCESS) {
- mil = next;
- continue;
- }
-
- mutex_enter(&mech_entry->me_mutex);
- if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
- prov_chain = mech_entry->me_hw_prov_chain;
- else
- prov_chain = mech_entry->me_sw_prov;
-
- while (prov_chain != NULL) {
- if (prov_chain->pm_prov_desc == prov_desc) {
- prev_next = &prov_chain->pm_mi_list;
- mil2 = prov_chain->pm_mi_list;
- while (mil2 != NULL &&
- mil2->ml_kcf_mechid != mech_type) {
- prev_next = &mil2->ml_next;
- mil2 = mil2->ml_next;
- }
- if (mil2 != NULL) {
- *prev_next = mil2->ml_next;
- kmem_free(mil2, sizeof (*mil2));
- }
- break;
- }
- prov_chain = prov_chain->pm_next;
- }
-
- mutex_exit(&mech_entry->me_mutex);
- kmem_free(mil, sizeof (crypto_mech_info_list_t));
- mil = next;
- }
+ prov_mech = mech_entry->me_sw_prov;
+ mech_entry->me_sw_prov = NULL;
/* free entry */
- KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
KCF_PROV_IREFRELE(prov_mech->pm_prov_desc);
+ KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
}
@@ -747,7 +369,7 @@ kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep)
{
kcf_ops_class_t class;
int index;
- kcf_mech_entry_tab_t *me_tab;
+ const kcf_mech_entry_tab_t *me_tab;
ASSERT(mep != NULL);
@@ -770,22 +392,44 @@ kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep)
return (KCF_SUCCESS);
}
-/* CURRENTLY UNSUPPORTED: attempting to load the module if it isn't found */
+/*
+ * crypto_mech2id()
+ *
+ * Arguments:
+ * . mechname: A null-terminated string identifying the mechanism name.
+ *
+ * Description:
+ * Walks the mechanisms tables, looking for an entry that matches the
+ * mechname. Once it find it, it builds the 64-bit mech_type and returns
+ * it.
+ *
+ * Context:
+ * Process and interruption.
+ *
+ * Returns:
+ * The unique mechanism identified by 'mechname', if found.
+ * CRYPTO_MECH_INVALID otherwise.
+ */
/*
* Lookup the hash table for an entry that matches the mechname.
- * If there are no hardware or software providers for the mechanism,
- * but there is an unloaded software provider, this routine will attempt
+ * If there are no providers for the mechanism,
+ * but there is an unloaded provider, this routine will attempt
* to load it.
- *
- * If the MOD_NOAUTOUNLOAD flag is not set, a software provider is
- * in constant danger of being unloaded. For consumers that call
- * crypto_mech2id() only once, the provider will not be reloaded
- * if it becomes unloaded. If a provider gets loaded elsewhere
- * without the MOD_NOAUTOUNLOAD flag being set, we set it now.
*/
crypto_mech_type_t
-crypto_mech2id_common(char *mechname, boolean_t load_module)
+crypto_mech2id(const char *mechname)
{
- crypto_mech_type_t mt = kcf_mech_hash_find(mechname);
- return (mt);
+ kcf_mech_entry_t tmptab, *found;
+ strlcpy(tmptab.me_name, mechname, CRYPTO_MAX_MECH_NAME);
+
+ if ((found = avl_find(&kcf_mech_hash, &tmptab, NULL))) {
+ ASSERT(found->me_mechid != CRYPTO_MECH_INVALID);
+ return (found->me_mechid);
+ }
+
+ return (CRYPTO_MECH_INVALID);
}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(crypto_mech2id);
+#endif
diff --git a/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c b/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c
index 1b115d976232..9dca3882e174 100644
--- a/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c
+++ b/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -33,12 +33,11 @@
*/
/*
- * Utility routine to apply the command, 'cmd', to the
+ * Utility routine to apply the command COPY_TO_DATA to the
* data in the uio structure.
*/
-int
-crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
- void *digest_ctx, void (*update)(void))
+static int
+crypto_uio_copy_to_data(crypto_data_t *data, uchar_t *buf, int len)
{
zfs_uio_t *uiop = data->cd_uio;
off_t offset = data->cd_offset;
@@ -71,26 +70,8 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
offset, length);
datap = (uchar_t *)(zfs_uio_iovbase(uiop, vec_idx) + offset);
- switch (cmd) {
- case COPY_FROM_DATA:
- bcopy(datap, buf, cur_len);
- buf += cur_len;
- break;
- case COPY_TO_DATA:
- bcopy(buf, datap, cur_len);
- buf += cur_len;
- break;
- case COMPARE_TO_DATA:
- if (bcmp(datap, buf, cur_len))
- return (CRYPTO_SIGNATURE_INVALID);
- buf += cur_len;
- break;
- case MD5_DIGEST_DATA:
- case SHA1_DIGEST_DATA:
- case SHA2_DIGEST_DATA:
- case GHASH_DATA:
- return (CRYPTO_ARGUMENTS_BAD);
- }
+ memcpy(datap, buf, cur_len);
+ buf += cur_len;
length -= cur_len;
vec_idx++;
@@ -99,16 +80,11 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
/*
- * The end of the specified iovec's was reached but
+ * The end of the specified iovecs was reached but
* the length requested could not be processed.
*/
- switch (cmd) {
- case COPY_TO_DATA:
- data->cd_length = len;
- return (CRYPTO_BUFFER_TOO_SMALL);
- default:
- return (CRYPTO_DATA_LEN_RANGE);
- }
+ data->cd_length = len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
}
return (CRYPTO_SUCCESS);
@@ -123,13 +99,12 @@ crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len)
output->cd_length = len;
return (CRYPTO_BUFFER_TOO_SMALL);
}
- bcopy(buf, (uchar_t *)(output->cd_raw.iov_base +
- output->cd_offset), len);
+ memcpy((uchar_t *)(output->cd_raw.iov_base +
+ output->cd_offset), buf, len);
break;
case CRYPTO_DATA_UIO:
- return (crypto_uio_data(output, buf, len,
- COPY_TO_DATA, NULL, NULL));
+ return (crypto_uio_copy_to_data(output, buf, len));
default:
return (CRYPTO_ARGUMENTS_BAD);
}
@@ -139,33 +114,21 @@ crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len)
int
crypto_update_iov(void *ctx, crypto_data_t *input, crypto_data_t *output,
- int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
- void (*copy_block)(uint8_t *, uint64_t *))
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *))
{
- common_ctx_t *common_ctx = ctx;
- int rv;
-
ASSERT(input != output);
- if (input->cd_miscdata != NULL) {
- copy_block((uint8_t *)input->cd_miscdata,
- &common_ctx->cc_iv[0]);
- }
if (input->cd_raw.iov_len < input->cd_length)
return (CRYPTO_ARGUMENTS_BAD);
- rv = (cipher)(ctx, input->cd_raw.iov_base + input->cd_offset,
- input->cd_length, output);
-
- return (rv);
+ return ((cipher)(ctx, input->cd_raw.iov_base + input->cd_offset,
+ input->cd_length, output));
}
int
crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
- int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
- void (*copy_block)(uint8_t *, uint64_t *))
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *))
{
- common_ctx_t *common_ctx = ctx;
zfs_uio_t *uiop = input->cd_uio;
off_t offset = input->cd_offset;
size_t length = input->cd_length;
@@ -173,10 +136,6 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
size_t cur_len;
ASSERT(input != output);
- if (input->cd_miscdata != NULL) {
- copy_block((uint8_t *)input->cd_miscdata,
- &common_ctx->cc_iv[0]);
- }
if (zfs_uio_segflg(input->cd_uio) != UIO_SYSSPACE) {
return (CRYPTO_ARGUMENTS_BAD);
diff --git a/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c b/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c
index 9d303d022517..93af61a235d0 100644
--- a/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c
+++ b/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -45,7 +45,7 @@
#include <sys/crypto/sched_impl.h>
#include <sys/crypto/spi.h>
-#define KCF_MAX_PROVIDERS 512 /* max number of providers */
+#define KCF_MAX_PROVIDERS 8 /* max number of providers */
/*
* Prov_tab is an array of providers which is updated when
@@ -59,33 +59,25 @@
*
* prov_tab entries are not updated from kcf.conf or by cryptoadm(1M).
*/
-static kcf_provider_desc_t **prov_tab = NULL;
+static kcf_provider_desc_t *prov_tab[KCF_MAX_PROVIDERS];
static kmutex_t prov_tab_mutex; /* ensure exclusive access to the table */
static uint_t prov_tab_num = 0; /* number of providers in table */
-static uint_t prov_tab_max = KCF_MAX_PROVIDERS;
void
kcf_prov_tab_destroy(void)
{
mutex_destroy(&prov_tab_mutex);
-
- if (prov_tab)
- kmem_free(prov_tab, prov_tab_max *
- sizeof (kcf_provider_desc_t *));
}
/*
* Initialize a mutex and the KCF providers table, prov_tab.
- * The providers table is dynamically allocated with prov_tab_max entries.
+ * The providers table is dynamically allocated with KCF_MAX_PROVIDERS entries.
* Called from kcf module _init().
*/
void
kcf_prov_tab_init(void)
{
mutex_init(&prov_tab_mutex, NULL, MUTEX_DEFAULT, NULL);
-
- prov_tab = kmem_zalloc(prov_tab_max * sizeof (kcf_provider_desc_t *),
- KM_SLEEP);
}
/*
@@ -101,8 +93,6 @@ kcf_prov_tab_add_provider(kcf_provider_desc_t *prov_desc)
{
uint_t i;
- ASSERT(prov_tab != NULL);
-
mutex_enter(&prov_tab_mutex);
/* find free slot in providers table */
@@ -146,9 +136,6 @@ kcf_prov_tab_rem_provider(crypto_provider_id_t prov_id)
{
kcf_provider_desc_t *prov_desc;
- ASSERT(prov_tab != NULL);
- ASSERT(prov_tab_num >= 0);
-
/*
* Validate provider id, since it can be specified by a 3rd-party
* provider.
@@ -171,8 +158,8 @@ kcf_prov_tab_rem_provider(crypto_provider_id_t prov_id)
* at that time.
*/
- KCF_PROV_REFRELE(prov_desc);
KCF_PROV_IREFRELE(prov_desc);
+ KCF_PROV_REFRELE(prov_desc);
return (CRYPTO_SUCCESS);
}
@@ -204,91 +191,6 @@ kcf_prov_tab_lookup(crypto_provider_id_t prov_id)
return (prov_desc);
}
-static void
-allocate_ops_v1(crypto_ops_t *src, crypto_ops_t *dst, uint_t *mech_list_count)
-{
- if (src->co_control_ops != NULL)
- dst->co_control_ops = kmem_alloc(sizeof (crypto_control_ops_t),
- KM_SLEEP);
-
- if (src->co_digest_ops != NULL)
- dst->co_digest_ops = kmem_alloc(sizeof (crypto_digest_ops_t),
- KM_SLEEP);
-
- if (src->co_cipher_ops != NULL)
- dst->co_cipher_ops = kmem_alloc(sizeof (crypto_cipher_ops_t),
- KM_SLEEP);
-
- if (src->co_mac_ops != NULL)
- dst->co_mac_ops = kmem_alloc(sizeof (crypto_mac_ops_t),
- KM_SLEEP);
-
- if (src->co_sign_ops != NULL)
- dst->co_sign_ops = kmem_alloc(sizeof (crypto_sign_ops_t),
- KM_SLEEP);
-
- if (src->co_verify_ops != NULL)
- dst->co_verify_ops = kmem_alloc(sizeof (crypto_verify_ops_t),
- KM_SLEEP);
-
- if (src->co_dual_ops != NULL)
- dst->co_dual_ops = kmem_alloc(sizeof (crypto_dual_ops_t),
- KM_SLEEP);
-
- if (src->co_dual_cipher_mac_ops != NULL)
- dst->co_dual_cipher_mac_ops = kmem_alloc(
- sizeof (crypto_dual_cipher_mac_ops_t), KM_SLEEP);
-
- if (src->co_random_ops != NULL) {
- dst->co_random_ops = kmem_alloc(
- sizeof (crypto_random_number_ops_t), KM_SLEEP);
-
- /*
- * Allocate storage to store the array of supported mechanisms
- * specified by provider. We allocate extra mechanism storage
- * if the provider has random_ops since we keep an internal
- * mechanism, SUN_RANDOM, in this case.
- */
- (*mech_list_count)++;
- }
-
- if (src->co_session_ops != NULL)
- dst->co_session_ops = kmem_alloc(sizeof (crypto_session_ops_t),
- KM_SLEEP);
-
- if (src->co_object_ops != NULL)
- dst->co_object_ops = kmem_alloc(sizeof (crypto_object_ops_t),
- KM_SLEEP);
-
- if (src->co_key_ops != NULL)
- dst->co_key_ops = kmem_alloc(sizeof (crypto_key_ops_t),
- KM_SLEEP);
-
- if (src->co_provider_ops != NULL)
- dst->co_provider_ops = kmem_alloc(
- sizeof (crypto_provider_management_ops_t), KM_SLEEP);
-
- if (src->co_ctx_ops != NULL)
- dst->co_ctx_ops = kmem_alloc(sizeof (crypto_ctx_ops_t),
- KM_SLEEP);
-}
-
-static void
-allocate_ops_v2(crypto_ops_t *src, crypto_ops_t *dst)
-{
- if (src->co_mech_ops != NULL)
- dst->co_mech_ops = kmem_alloc(sizeof (crypto_mech_ops_t),
- KM_SLEEP);
-}
-
-static void
-allocate_ops_v3(crypto_ops_t *src, crypto_ops_t *dst)
-{
- if (src->co_nostore_key_ops != NULL)
- dst->co_nostore_key_ops =
- kmem_alloc(sizeof (crypto_nostore_key_ops_t), KM_SLEEP);
-}
-
/*
* Allocate a provider descriptor. mech_list_count specifies the
* number of mechanisms supported by the providers, and is used
@@ -297,61 +199,19 @@ allocate_ops_v3(crypto_ops_t *src, crypto_ops_t *dst)
* since it is invoked from user context during provider registration.
*/
kcf_provider_desc_t *
-kcf_alloc_provider_desc(crypto_provider_info_t *info)
+kcf_alloc_provider_desc(void)
{
- int i, j;
- kcf_provider_desc_t *desc;
- uint_t mech_list_count = info->pi_mech_list_count;
- crypto_ops_t *src_ops = info->pi_ops_vector;
-
- desc = kmem_zalloc(sizeof (kcf_provider_desc_t), KM_SLEEP);
+ kcf_provider_desc_t *desc =
+ kmem_zalloc(sizeof (kcf_provider_desc_t), KM_SLEEP);
- /*
- * pd_description serves two purposes
- * - Appears as a blank padded PKCS#11 style string, that will be
- * returned to applications in CK_SLOT_INFO.slotDescription.
- * This means that we should not have a null character in the
- * first CRYPTO_PROVIDER_DESCR_MAX_LEN bytes.
- * - Appears as a null-terminated string that can be used by
- * other kcf routines.
- *
- * So, we allocate enough room for one extra null terminator
- * which keeps every one happy.
- */
- desc->pd_description = kmem_alloc(CRYPTO_PROVIDER_DESCR_MAX_LEN + 1,
- KM_SLEEP);
- (void) memset(desc->pd_description, ' ',
- CRYPTO_PROVIDER_DESCR_MAX_LEN);
- desc->pd_description[CRYPTO_PROVIDER_DESCR_MAX_LEN] = '\0';
-
- /*
- * Since the framework does not require the ops vector specified
- * by the providers during registration to be persistent,
- * KCF needs to allocate storage where copies of the ops
- * vectors are copied.
- */
- desc->pd_ops_vector = kmem_zalloc(sizeof (crypto_ops_t), KM_SLEEP);
-
- if (info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) {
- allocate_ops_v1(src_ops, desc->pd_ops_vector, &mech_list_count);
- if (info->pi_interface_version >= CRYPTO_SPI_VERSION_2)
- allocate_ops_v2(src_ops, desc->pd_ops_vector);
- if (info->pi_interface_version == CRYPTO_SPI_VERSION_3)
- allocate_ops_v3(src_ops, desc->pd_ops_vector);
- }
-
- desc->pd_mech_list_count = mech_list_count;
- desc->pd_mechanisms = kmem_zalloc(sizeof (crypto_mech_info_t) *
- mech_list_count, KM_SLEEP);
- for (i = 0; i < KCF_OPS_CLASSSIZE; i++)
- for (j = 0; j < KCF_MAXMECHTAB; j++)
+ for (int i = 0; i < KCF_OPS_CLASSSIZE; i++)
+ for (int j = 0; j < KCF_MAXMECHTAB; j++)
desc->pd_mech_indx[i][j] = KCF_INVALID_INDX;
desc->pd_prov_id = KCF_PROVID_INVALID;
desc->pd_state = KCF_PROV_ALLOCATED;
mutex_init(&desc->pd_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&desc->pd_resume_cv, NULL, CV_DEFAULT, NULL);
cv_init(&desc->pd_remove_cv, NULL, CV_DEFAULT, NULL);
return (desc);
@@ -360,7 +220,7 @@ kcf_alloc_provider_desc(crypto_provider_info_t *info)
/*
* Called by KCF_PROV_REFRELE when a provider's reference count drops
* to zero. We free the descriptor when the last reference is released.
- * However, for software providers, we do not free it when there is an
+ * However, for providers, we do not free it when there is an
* unregister thread waiting. We signal that thread in this case and
* that thread is responsible for freeing the descriptor.
*/
@@ -368,22 +228,16 @@ void
kcf_provider_zero_refcnt(kcf_provider_desc_t *desc)
{
mutex_enter(&desc->pd_lock);
- switch (desc->pd_prov_type) {
- case CRYPTO_SW_PROVIDER:
- if (desc->pd_state == KCF_PROV_REMOVED ||
- desc->pd_state == KCF_PROV_DISABLED) {
- desc->pd_state = KCF_PROV_FREED;
- cv_broadcast(&desc->pd_remove_cv);
- mutex_exit(&desc->pd_lock);
- break;
- }
- fallthrough;
-
- case CRYPTO_HW_PROVIDER:
- case CRYPTO_LOGICAL_PROVIDER:
+ if (desc->pd_state == KCF_PROV_REMOVED ||
+ desc->pd_state == KCF_PROV_DISABLED) {
+ desc->pd_state = KCF_PROV_FREED;
+ cv_broadcast(&desc->pd_remove_cv);
mutex_exit(&desc->pd_lock);
- kcf_free_provider_desc(desc);
+ return;
}
+
+ mutex_exit(&desc->pd_lock);
+ kcf_free_provider_desc(desc);
}
/*
@@ -406,202 +260,15 @@ kcf_free_provider_desc(kcf_provider_desc_t *desc)
/* free the kernel memory associated with the provider descriptor */
- if (desc->pd_description != NULL)
- kmem_free(desc->pd_description,
- CRYPTO_PROVIDER_DESCR_MAX_LEN + 1);
-
- if (desc->pd_ops_vector != NULL) {
-
- if (desc->pd_ops_vector->co_control_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_control_ops,
- sizeof (crypto_control_ops_t));
-
- if (desc->pd_ops_vector->co_digest_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_digest_ops,
- sizeof (crypto_digest_ops_t));
-
- if (desc->pd_ops_vector->co_cipher_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_cipher_ops,
- sizeof (crypto_cipher_ops_t));
-
- if (desc->pd_ops_vector->co_mac_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_mac_ops,
- sizeof (crypto_mac_ops_t));
-
- if (desc->pd_ops_vector->co_sign_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_sign_ops,
- sizeof (crypto_sign_ops_t));
-
- if (desc->pd_ops_vector->co_verify_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_verify_ops,
- sizeof (crypto_verify_ops_t));
-
- if (desc->pd_ops_vector->co_dual_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_dual_ops,
- sizeof (crypto_dual_ops_t));
-
- if (desc->pd_ops_vector->co_dual_cipher_mac_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_dual_cipher_mac_ops,
- sizeof (crypto_dual_cipher_mac_ops_t));
-
- if (desc->pd_ops_vector->co_random_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_random_ops,
- sizeof (crypto_random_number_ops_t));
-
- if (desc->pd_ops_vector->co_session_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_session_ops,
- sizeof (crypto_session_ops_t));
-
- if (desc->pd_ops_vector->co_object_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_object_ops,
- sizeof (crypto_object_ops_t));
-
- if (desc->pd_ops_vector->co_key_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_key_ops,
- sizeof (crypto_key_ops_t));
-
- if (desc->pd_ops_vector->co_provider_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_provider_ops,
- sizeof (crypto_provider_management_ops_t));
-
- if (desc->pd_ops_vector->co_ctx_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_ctx_ops,
- sizeof (crypto_ctx_ops_t));
-
- if (desc->pd_ops_vector->co_mech_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_mech_ops,
- sizeof (crypto_mech_ops_t));
-
- if (desc->pd_ops_vector->co_nostore_key_ops != NULL)
- kmem_free(desc->pd_ops_vector->co_nostore_key_ops,
- sizeof (crypto_nostore_key_ops_t));
-
- kmem_free(desc->pd_ops_vector, sizeof (crypto_ops_t));
- }
-
- if (desc->pd_mechanisms != NULL)
- /* free the memory associated with the mechanism info's */
- kmem_free(desc->pd_mechanisms, sizeof (crypto_mech_info_t) *
- desc->pd_mech_list_count);
-
- if (desc->pd_sched_info.ks_taskq != NULL)
- taskq_destroy(desc->pd_sched_info.ks_taskq);
-
mutex_destroy(&desc->pd_lock);
- cv_destroy(&desc->pd_resume_cv);
cv_destroy(&desc->pd_remove_cv);
kmem_free(desc, sizeof (kcf_provider_desc_t));
}
/*
- * Returns an array of hardware and logical provider descriptors,
- * a.k.a the PKCS#11 slot list. A REFHOLD is done on each descriptor
- * before the array is returned. The entire table can be freed by
- * calling kcf_free_provider_tab().
- */
-int
-kcf_get_slot_list(uint_t *count, kcf_provider_desc_t ***array,
- boolean_t unverified)
-{
- kcf_provider_desc_t *prov_desc;
- kcf_provider_desc_t **p = NULL;
- char *last;
- uint_t cnt = 0;
- uint_t i, j;
- int rval = CRYPTO_SUCCESS;
- size_t n, final_size;
-
- /* count the providers */
- mutex_enter(&prov_tab_mutex);
- for (i = 0; i < KCF_MAX_PROVIDERS; i++) {
- if ((prov_desc = prov_tab[i]) != NULL &&
- ((prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (prov_desc->pd_flags & CRYPTO_HIDE_PROVIDER) == 0) ||
- prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)) {
- if (KCF_IS_PROV_USABLE(prov_desc) ||
- (unverified && KCF_IS_PROV_UNVERIFIED(prov_desc))) {
- cnt++;
- }
- }
- }
- mutex_exit(&prov_tab_mutex);
-
- if (cnt == 0)
- goto out;
-
- n = cnt * sizeof (kcf_provider_desc_t *);
-again:
- p = kmem_zalloc(n, KM_SLEEP);
-
- /* pointer to last entry in the array */
- last = (char *)&p[cnt-1];
-
- mutex_enter(&prov_tab_mutex);
- /* fill the slot list */
- for (i = 0, j = 0; i < KCF_MAX_PROVIDERS; i++) {
- if ((prov_desc = prov_tab[i]) != NULL &&
- ((prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER &&
- (prov_desc->pd_flags & CRYPTO_HIDE_PROVIDER) == 0) ||
- prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)) {
- if (KCF_IS_PROV_USABLE(prov_desc) ||
- (unverified && KCF_IS_PROV_UNVERIFIED(prov_desc))) {
- if ((char *)&p[j] > last) {
- mutex_exit(&prov_tab_mutex);
- kcf_free_provider_tab(cnt, p);
- n = n << 1;
- cnt = cnt << 1;
- goto again;
- }
- p[j++] = prov_desc;
- KCF_PROV_REFHOLD(prov_desc);
- }
- }
- }
- mutex_exit(&prov_tab_mutex);
-
- final_size = j * sizeof (kcf_provider_desc_t *);
- cnt = j;
- ASSERT(final_size <= n);
-
- /* check if buffer we allocated is too large */
- if (final_size < n) {
- char *final_buffer = NULL;
-
- if (final_size > 0) {
- final_buffer = kmem_alloc(final_size, KM_SLEEP);
- bcopy(p, final_buffer, final_size);
- }
- kmem_free(p, n);
- p = (kcf_provider_desc_t **)final_buffer;
- }
-out:
- *count = cnt;
- *array = p;
- return (rval);
-}
-
-/*
- * Free an array of hardware provider descriptors. A REFRELE
- * is done on each descriptor before the table is freed.
- */
-void
-kcf_free_provider_tab(uint_t count, kcf_provider_desc_t **array)
-{
- kcf_provider_desc_t *prov_desc;
- int i;
-
- for (i = 0; i < count; i++) {
- if ((prov_desc = array[i]) != NULL) {
- KCF_PROV_REFRELE(prov_desc);
- }
- }
- kmem_free(array, count * sizeof (kcf_provider_desc_t *));
-}
-
-/*
* Returns in the location pointed to by pd a pointer to the descriptor
- * for the software provider for the specified mechanism.
+ * for the provider for the specified mechanism.
* The provider descriptor is returned held and it is the caller's
* responsibility to release it when done. The mechanism entry
* is returned if the optional argument mep is non NULL.
@@ -619,24 +286,17 @@ kcf_get_sw_prov(crypto_mech_type_t mech_type, kcf_provider_desc_t **pd,
if (kcf_get_mech_entry(mech_type, &me) != KCF_SUCCESS)
return (CRYPTO_MECHANISM_INVALID);
- /*
- * Get the software provider for this mechanism.
- * Lock the mech_entry until we grab the 'pd'.
- */
- mutex_enter(&me->me_mutex);
-
+ /* Get the provider for this mechanism. */
if (me->me_sw_prov == NULL ||
(*pd = me->me_sw_prov->pm_prov_desc) == NULL) {
- /* no SW provider for this mechanism */
+ /* no provider for this mechanism */
if (log_warn)
- cmn_err(CE_WARN, "no SW provider for \"%s\"\n",
+ cmn_err(CE_WARN, "no provider for \"%s\"\n",
me->me_name);
- mutex_exit(&me->me_mutex);
return (CRYPTO_MECH_NOT_SUPPORTED);
}
KCF_PROV_REFHOLD(*pd);
- mutex_exit(&me->me_mutex);
if (mep != NULL)
*mep = me;
diff --git a/sys/contrib/openzfs/module/icp/core/kcf_sched.c b/sys/contrib/openzfs/module/icp/core/kcf_sched.c
index 81fd15f8ea26..360ecfb2be19 100644
--- a/sys/contrib/openzfs/module/icp/core/kcf_sched.c
+++ b/sys/contrib/openzfs/module/icp/core/kcf_sched.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -35,774 +35,36 @@
#include <sys/crypto/sched_impl.h>
#include <sys/crypto/api.h>
-kcf_global_swq_t *gswq; /* Global software queue */
-
-/* Thread pool related variables */
-static kcf_pool_t *kcfpool; /* Thread pool of kcfd LWPs */
-int kcf_maxthreads = 2;
-int kcf_minthreads = 1;
-int kcf_thr_multiple = 2; /* Boot-time tunable for experimentation */
-static ulong_t kcf_idlethr_timeout;
-#define KCF_DEFAULT_THRTIMEOUT 60000000 /* 60 seconds */
-
/* kmem caches used by the scheduler */
-static kmem_cache_t *kcf_sreq_cache;
-static kmem_cache_t *kcf_areq_cache;
static kmem_cache_t *kcf_context_cache;
-/* Global request ID table */
-static kcf_reqid_table_t *kcf_reqid_table[REQID_TABLES];
-
-/* KCF stats. Not protected. */
-static kcf_stats_t kcf_ksdata = {
- { "total threads in pool", KSTAT_DATA_UINT32},
- { "idle threads in pool", KSTAT_DATA_UINT32},
- { "min threads in pool", KSTAT_DATA_UINT32},
- { "max threads in pool", KSTAT_DATA_UINT32},
- { "requests in gswq", KSTAT_DATA_UINT32},
- { "max requests in gswq", KSTAT_DATA_UINT32},
- { "threads for HW taskq", KSTAT_DATA_UINT32},
- { "minalloc for HW taskq", KSTAT_DATA_UINT32},
- { "maxalloc for HW taskq", KSTAT_DATA_UINT32}
-};
-
-static kstat_t *kcf_misc_kstat = NULL;
-ulong_t kcf_swprov_hndl = 0;
-
-static kcf_areq_node_t *kcf_areqnode_alloc(kcf_provider_desc_t *,
- kcf_context_t *, crypto_call_req_t *, kcf_req_params_t *, boolean_t);
-static int kcf_disp_sw_request(kcf_areq_node_t *);
-static void process_req_hwp(void *);
-static int kcf_enqueue(kcf_areq_node_t *);
-static void kcfpool_alloc(void);
-static void kcf_reqid_delete(kcf_areq_node_t *areq);
-static crypto_req_id_t kcf_reqid_insert(kcf_areq_node_t *areq);
-static int kcf_misc_kstat_update(kstat_t *ksp, int rw);
-
/*
* Create a new context.
*/
crypto_ctx_t *
-kcf_new_ctx(crypto_call_req_t *crq, kcf_provider_desc_t *pd,
- crypto_session_id_t sid)
+kcf_new_ctx(kcf_provider_desc_t *pd)
{
crypto_ctx_t *ctx;
kcf_context_t *kcf_ctx;
- kcf_ctx = kmem_cache_alloc(kcf_context_cache,
- (crq == NULL) ? KM_SLEEP : KM_NOSLEEP);
+ kcf_ctx = kmem_cache_alloc(kcf_context_cache, KM_SLEEP);
if (kcf_ctx == NULL)
return (NULL);
/* initialize the context for the consumer */
kcf_ctx->kc_refcnt = 1;
- kcf_ctx->kc_req_chain_first = NULL;
- kcf_ctx->kc_req_chain_last = NULL;
- kcf_ctx->kc_secondctx = NULL;
KCF_PROV_REFHOLD(pd);
kcf_ctx->kc_prov_desc = pd;
kcf_ctx->kc_sw_prov_desc = NULL;
- kcf_ctx->kc_mech = NULL;
ctx = &kcf_ctx->kc_glbl_ctx;
- ctx->cc_provider = pd->pd_prov_handle;
- ctx->cc_session = sid;
ctx->cc_provider_private = NULL;
ctx->cc_framework_private = (void *)kcf_ctx;
- ctx->cc_flags = 0;
- ctx->cc_opstate = NULL;
return (ctx);
}
/*
- * Allocate a new async request node.
- *
- * ictx - Framework private context pointer
- * crq - Has callback function and argument. Should be non NULL.
- * req - The parameters to pass to the SPI
- */
-static kcf_areq_node_t *
-kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx,
- crypto_call_req_t *crq, kcf_req_params_t *req, boolean_t isdual)
-{
- kcf_areq_node_t *arptr, *areq;
-
- ASSERT(crq != NULL);
- arptr = kmem_cache_alloc(kcf_areq_cache, KM_NOSLEEP);
- if (arptr == NULL)
- return (NULL);
-
- arptr->an_state = REQ_ALLOCATED;
- arptr->an_reqarg = *crq;
- arptr->an_params = *req;
- arptr->an_context = ictx;
- arptr->an_isdual = isdual;
-
- arptr->an_next = arptr->an_prev = NULL;
- KCF_PROV_REFHOLD(pd);
- arptr->an_provider = pd;
- arptr->an_tried_plist = NULL;
- arptr->an_refcnt = 1;
- arptr->an_idnext = arptr->an_idprev = NULL;
-
- /*
- * Requests for context-less operations do not use the
- * fields - an_is_my_turn, and an_ctxchain_next.
- */
- if (ictx == NULL)
- return (arptr);
-
- KCF_CONTEXT_REFHOLD(ictx);
- /*
- * Chain this request to the context.
- */
- mutex_enter(&ictx->kc_in_use_lock);
- arptr->an_ctxchain_next = NULL;
- if ((areq = ictx->kc_req_chain_last) == NULL) {
- arptr->an_is_my_turn = B_TRUE;
- ictx->kc_req_chain_last =
- ictx->kc_req_chain_first = arptr;
- } else {
- ASSERT(ictx->kc_req_chain_first != NULL);
- arptr->an_is_my_turn = B_FALSE;
- /* Insert the new request to the end of the chain. */
- areq->an_ctxchain_next = arptr;
- ictx->kc_req_chain_last = arptr;
- }
- mutex_exit(&ictx->kc_in_use_lock);
-
- return (arptr);
-}
-
-/*
- * Queue the request node and do one of the following:
- * - If there is an idle thread signal it to run.
- * - If there is no idle thread and max running threads is not
- * reached, signal the creator thread for more threads.
- *
- * If the two conditions above are not met, we don't need to do
- * anything. The request will be picked up by one of the
- * worker threads when it becomes available.
- */
-static int
-kcf_disp_sw_request(kcf_areq_node_t *areq)
-{
- int err;
- int cnt = 0;
-
- if ((err = kcf_enqueue(areq)) != 0)
- return (err);
-
- if (kcfpool->kp_idlethreads > 0) {
- /* Signal an idle thread to run */
- mutex_enter(&gswq->gs_lock);
- cv_signal(&gswq->gs_cv);
- mutex_exit(&gswq->gs_lock);
-
- return (CRYPTO_QUEUED);
- }
-
- /*
- * We keep the number of running threads to be at
- * kcf_minthreads to reduce gs_lock contention.
- */
- cnt = kcf_minthreads -
- (kcfpool->kp_threads - kcfpool->kp_blockedthreads);
- if (cnt > 0) {
- /*
- * The following ensures the number of threads in pool
- * does not exceed kcf_maxthreads.
- */
- cnt = MIN(cnt, kcf_maxthreads - (int)kcfpool->kp_threads);
- if (cnt > 0) {
- /* Signal the creator thread for more threads */
- mutex_enter(&kcfpool->kp_user_lock);
- if (!kcfpool->kp_signal_create_thread) {
- kcfpool->kp_signal_create_thread = B_TRUE;
- kcfpool->kp_nthrs = cnt;
- cv_signal(&kcfpool->kp_user_cv);
- }
- mutex_exit(&kcfpool->kp_user_lock);
- }
- }
-
- return (CRYPTO_QUEUED);
-}
-
-/*
- * This routine is called by the taskq associated with
- * each hardware provider. We notify the kernel consumer
- * via the callback routine in case of CRYPTO_SUCCESS or
- * a failure.
- *
- * A request can be of type kcf_areq_node_t or of type
- * kcf_sreq_node_t.
- */
-static void
-process_req_hwp(void *ireq)
-{
- int error = 0;
- crypto_ctx_t *ctx;
- kcf_call_type_t ctype;
- kcf_provider_desc_t *pd;
- kcf_areq_node_t *areq = (kcf_areq_node_t *)ireq;
- kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)ireq;
-
- pd = ((ctype = GET_REQ_TYPE(ireq)) == CRYPTO_SYNCH) ?
- sreq->sn_provider : areq->an_provider;
-
- /*
- * Wait if flow control is in effect for the provider. A
- * CRYPTO_PROVIDER_READY or CRYPTO_PROVIDER_FAILED
- * notification will signal us. We also get signaled if
- * the provider is unregistering.
- */
- if (pd->pd_state == KCF_PROV_BUSY) {
- mutex_enter(&pd->pd_lock);
- while (pd->pd_state == KCF_PROV_BUSY)
- cv_wait(&pd->pd_resume_cv, &pd->pd_lock);
- mutex_exit(&pd->pd_lock);
- }
-
- /*
- * Bump the internal reference count while the request is being
- * processed. This is how we know when it's safe to unregister
- * a provider. This step must precede the pd_state check below.
- */
- KCF_PROV_IREFHOLD(pd);
-
- /*
- * Fail the request if the provider has failed. We return a
- * recoverable error and the notified clients attempt any
- * recovery. For async clients this is done in kcf_aop_done()
- * and for sync clients it is done in the k-api routines.
- */
- if (pd->pd_state >= KCF_PROV_FAILED) {
- error = CRYPTO_DEVICE_ERROR;
- goto bail;
- }
-
- if (ctype == CRYPTO_SYNCH) {
- mutex_enter(&sreq->sn_lock);
- sreq->sn_state = REQ_INPROGRESS;
- mutex_exit(&sreq->sn_lock);
-
- ctx = sreq->sn_context ? &sreq->sn_context->kc_glbl_ctx : NULL;
- error = common_submit_request(sreq->sn_provider, ctx,
- sreq->sn_params, sreq);
- } else {
- kcf_context_t *ictx;
- ASSERT(ctype == CRYPTO_ASYNCH);
-
- /*
- * We are in the per-hardware provider thread context and
- * hence can sleep. Note that the caller would have done
- * a taskq_dispatch(..., TQ_NOSLEEP) and would have returned.
- */
- ctx = (ictx = areq->an_context) ? &ictx->kc_glbl_ctx : NULL;
-
- mutex_enter(&areq->an_lock);
- /*
- * We need to maintain ordering for multi-part requests.
- * an_is_my_turn is set to B_TRUE initially for a request
- * when it is enqueued and there are no other requests
- * for that context. It is set later from kcf_aop_done() when
- * the request before us in the chain of requests for the
- * context completes. We get signaled at that point.
- */
- if (ictx != NULL) {
- ASSERT(ictx->kc_prov_desc == areq->an_provider);
-
- while (areq->an_is_my_turn == B_FALSE) {
- cv_wait(&areq->an_turn_cv, &areq->an_lock);
- }
- }
- areq->an_state = REQ_INPROGRESS;
- mutex_exit(&areq->an_lock);
-
- error = common_submit_request(areq->an_provider, ctx,
- &areq->an_params, areq);
- }
-
-bail:
- if (error == CRYPTO_QUEUED) {
- /*
- * The request is queued by the provider and we should
- * get a crypto_op_notification() from the provider later.
- * We notify the consumer at that time.
- */
- return;
- } else { /* CRYPTO_SUCCESS or other failure */
- KCF_PROV_IREFRELE(pd);
- if (ctype == CRYPTO_SYNCH)
- kcf_sop_done(sreq, error);
- else
- kcf_aop_done(areq, error);
- }
-}
-
-/*
- * This routine checks if a request can be retried on another
- * provider. If true, mech1 is initialized to point to the mechanism
- * structure. mech2 is also initialized in case of a dual operation. fg
- * is initialized to the correct crypto_func_group_t bit flag. They are
- * initialized by this routine, so that the caller can pass them to a
- * kcf_get_mech_provider() or kcf_get_dual_provider() with no further change.
- *
- * We check that the request is for a init or atomic routine and that
- * it is for one of the operation groups used from k-api .
- */
-static boolean_t
-can_resubmit(kcf_areq_node_t *areq, crypto_mechanism_t **mech1,
- crypto_mechanism_t **mech2, crypto_func_group_t *fg)
-{
- kcf_req_params_t *params;
- kcf_op_type_t optype;
-
- params = &areq->an_params;
- optype = params->rp_optype;
-
- if (!(IS_INIT_OP(optype) || IS_ATOMIC_OP(optype)))
- return (B_FALSE);
-
- switch (params->rp_opgrp) {
- case KCF_OG_DIGEST: {
- kcf_digest_ops_params_t *dops = &params->rp_u.digest_params;
-
- dops->do_mech.cm_type = dops->do_framework_mechtype;
- *mech1 = &dops->do_mech;
- *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DIGEST :
- CRYPTO_FG_DIGEST_ATOMIC;
- break;
- }
-
- case KCF_OG_MAC: {
- kcf_mac_ops_params_t *mops = &params->rp_u.mac_params;
-
- mops->mo_mech.cm_type = mops->mo_framework_mechtype;
- *mech1 = &mops->mo_mech;
- *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC :
- CRYPTO_FG_MAC_ATOMIC;
- break;
- }
-
- case KCF_OG_SIGN: {
- kcf_sign_ops_params_t *sops = &params->rp_u.sign_params;
-
- sops->so_mech.cm_type = sops->so_framework_mechtype;
- *mech1 = &sops->so_mech;
- switch (optype) {
- case KCF_OP_INIT:
- *fg = CRYPTO_FG_SIGN;
- break;
- case KCF_OP_ATOMIC:
- *fg = CRYPTO_FG_SIGN_ATOMIC;
- break;
- default:
- ASSERT(optype == KCF_OP_SIGN_RECOVER_ATOMIC);
- *fg = CRYPTO_FG_SIGN_RECOVER_ATOMIC;
- }
- break;
- }
-
- case KCF_OG_VERIFY: {
- kcf_verify_ops_params_t *vops = &params->rp_u.verify_params;
-
- vops->vo_mech.cm_type = vops->vo_framework_mechtype;
- *mech1 = &vops->vo_mech;
- switch (optype) {
- case KCF_OP_INIT:
- *fg = CRYPTO_FG_VERIFY;
- break;
- case KCF_OP_ATOMIC:
- *fg = CRYPTO_FG_VERIFY_ATOMIC;
- break;
- default:
- ASSERT(optype == KCF_OP_VERIFY_RECOVER_ATOMIC);
- *fg = CRYPTO_FG_VERIFY_RECOVER_ATOMIC;
- }
- break;
- }
-
- case KCF_OG_ENCRYPT: {
- kcf_encrypt_ops_params_t *eops = &params->rp_u.encrypt_params;
-
- eops->eo_mech.cm_type = eops->eo_framework_mechtype;
- *mech1 = &eops->eo_mech;
- *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT :
- CRYPTO_FG_ENCRYPT_ATOMIC;
- break;
- }
-
- case KCF_OG_DECRYPT: {
- kcf_decrypt_ops_params_t *dcrops = &params->rp_u.decrypt_params;
-
- dcrops->dop_mech.cm_type = dcrops->dop_framework_mechtype;
- *mech1 = &dcrops->dop_mech;
- *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DECRYPT :
- CRYPTO_FG_DECRYPT_ATOMIC;
- break;
- }
-
- case KCF_OG_ENCRYPT_MAC: {
- kcf_encrypt_mac_ops_params_t *eops =
- &params->rp_u.encrypt_mac_params;
-
- eops->em_encr_mech.cm_type = eops->em_framework_encr_mechtype;
- *mech1 = &eops->em_encr_mech;
- eops->em_mac_mech.cm_type = eops->em_framework_mac_mechtype;
- *mech2 = &eops->em_mac_mech;
- *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT_MAC :
- CRYPTO_FG_ENCRYPT_MAC_ATOMIC;
- break;
- }
-
- case KCF_OG_MAC_DECRYPT: {
- kcf_mac_decrypt_ops_params_t *dops =
- &params->rp_u.mac_decrypt_params;
-
- dops->md_mac_mech.cm_type = dops->md_framework_mac_mechtype;
- *mech1 = &dops->md_mac_mech;
- dops->md_decr_mech.cm_type = dops->md_framework_decr_mechtype;
- *mech2 = &dops->md_decr_mech;
- *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC_DECRYPT :
- CRYPTO_FG_MAC_DECRYPT_ATOMIC;
- break;
- }
-
- default:
- return (B_FALSE);
- }
-
- return (B_TRUE);
-}
-
-/*
- * This routine is called when a request to a provider has failed
- * with a recoverable error. This routine tries to find another provider
- * and dispatches the request to the new provider, if one is available.
- * We reuse the request structure.
- *
- * A return value of NULL from kcf_get_mech_provider() indicates
- * we have tried the last provider.
- */
-static int
-kcf_resubmit_request(kcf_areq_node_t *areq)
-{
- int error = CRYPTO_FAILED;
- kcf_context_t *ictx;
- kcf_provider_desc_t *old_pd;
- kcf_provider_desc_t *new_pd;
- crypto_mechanism_t *mech1 = NULL, *mech2 = NULL;
- crypto_mech_type_t prov_mt1, prov_mt2;
- crypto_func_group_t fg = 0;
-
- if (!can_resubmit(areq, &mech1, &mech2, &fg))
- return (error);
-
- old_pd = areq->an_provider;
- /*
- * Add old_pd to the list of providers already tried. We release
- * the hold on old_pd (from the earlier kcf_get_mech_provider()) in
- * kcf_free_triedlist().
- */
- if (kcf_insert_triedlist(&areq->an_tried_plist, old_pd,
- KM_NOSLEEP) == NULL)
- return (error);
-
- if (mech1 && !mech2) {
- new_pd = kcf_get_mech_provider(mech1->cm_type, NULL, &error,
- areq->an_tried_plist, fg,
- (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0);
- } else {
- ASSERT(mech1 != NULL && mech2 != NULL);
-
- new_pd = kcf_get_dual_provider(mech1, mech2, NULL, &prov_mt1,
- &prov_mt2, &error, areq->an_tried_plist, fg, fg,
- (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0);
- }
-
- if (new_pd == NULL)
- return (error);
-
- /*
- * We reuse the old context by resetting provider specific
- * fields in it.
- */
- if ((ictx = areq->an_context) != NULL) {
- crypto_ctx_t *ctx;
-
- ASSERT(old_pd == ictx->kc_prov_desc);
- KCF_PROV_REFRELE(ictx->kc_prov_desc);
- KCF_PROV_REFHOLD(new_pd);
- ictx->kc_prov_desc = new_pd;
-
- ctx = &ictx->kc_glbl_ctx;
- ctx->cc_provider = new_pd->pd_prov_handle;
- ctx->cc_session = new_pd->pd_sid;
- ctx->cc_provider_private = NULL;
- }
-
- /* We reuse areq. by resetting the provider and context fields. */
- KCF_PROV_REFRELE(old_pd);
- KCF_PROV_REFHOLD(new_pd);
- areq->an_provider = new_pd;
- mutex_enter(&areq->an_lock);
- areq->an_state = REQ_WAITING;
- mutex_exit(&areq->an_lock);
-
- switch (new_pd->pd_prov_type) {
- case CRYPTO_SW_PROVIDER:
- error = kcf_disp_sw_request(areq);
- break;
-
- case CRYPTO_HW_PROVIDER: {
- taskq_t *taskq = new_pd->pd_sched_info.ks_taskq;
-
- if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) ==
- TASKQID_INVALID) {
- error = CRYPTO_HOST_MEMORY;
- } else {
- error = CRYPTO_QUEUED;
- }
-
- break;
- default:
- break;
- }
- }
-
- return (error);
-}
-
-static inline int EMPTY_TASKQ(taskq_t *tq)
-{
-#ifdef _KERNEL
- return (tq->tq_lowest_id == tq->tq_next_id);
-#else
- return (tq->tq_task.tqent_next == &tq->tq_task || tq->tq_active == 0);
-#endif
-}
-
-/*
- * Routine called by both ioctl and k-api. The consumer should
- * bundle the parameters into a kcf_req_params_t structure. A bunch
- * of macros are available in ops_impl.h for this bundling. They are:
- *
- * KCF_WRAP_DIGEST_OPS_PARAMS()
- * KCF_WRAP_MAC_OPS_PARAMS()
- * KCF_WRAP_ENCRYPT_OPS_PARAMS()
- * KCF_WRAP_DECRYPT_OPS_PARAMS() ... etc.
- *
- * It is the caller's responsibility to free the ctx argument when
- * appropriate. See the KCF_CONTEXT_COND_RELEASE macro for details.
- */
-int
-kcf_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
- crypto_call_req_t *crq, kcf_req_params_t *params, boolean_t cont)
-{
- int error = CRYPTO_SUCCESS;
- kcf_areq_node_t *areq;
- kcf_sreq_node_t *sreq;
- kcf_context_t *kcf_ctx;
- taskq_t *taskq = pd->pd_sched_info.ks_taskq;
-
- kcf_ctx = ctx ? (kcf_context_t *)ctx->cc_framework_private : NULL;
-
- /* Synchronous cases */
- if (crq == NULL) {
- switch (pd->pd_prov_type) {
- case CRYPTO_SW_PROVIDER:
- error = common_submit_request(pd, ctx, params,
- KCF_RHNDL(KM_SLEEP));
- break;
-
- case CRYPTO_HW_PROVIDER:
- /*
- * Special case for CRYPTO_SYNCHRONOUS providers that
- * never return a CRYPTO_QUEUED error. We skip any
- * request allocation and call the SPI directly.
- */
- if ((pd->pd_flags & CRYPTO_SYNCHRONOUS) &&
- EMPTY_TASKQ(taskq)) {
- KCF_PROV_IREFHOLD(pd);
- if (pd->pd_state == KCF_PROV_READY) {
- error = common_submit_request(pd, ctx,
- params, KCF_RHNDL(KM_SLEEP));
- KCF_PROV_IREFRELE(pd);
- ASSERT(error != CRYPTO_QUEUED);
- break;
- }
- KCF_PROV_IREFRELE(pd);
- }
-
- sreq = kmem_cache_alloc(kcf_sreq_cache, KM_SLEEP);
- sreq->sn_state = REQ_ALLOCATED;
- sreq->sn_rv = CRYPTO_FAILED;
- sreq->sn_params = params;
-
- /*
- * Note that we do not need to hold the context
- * for synchronous case as the context will never
- * become invalid underneath us. We do not need to hold
- * the provider here either as the caller has a hold.
- */
- sreq->sn_context = kcf_ctx;
- ASSERT(KCF_PROV_REFHELD(pd));
- sreq->sn_provider = pd;
-
- ASSERT(taskq != NULL);
- /*
- * Call the SPI directly if the taskq is empty and the
- * provider is not busy, else dispatch to the taskq.
- * Calling directly is fine as this is the synchronous
- * case. This is unlike the asynchronous case where we
- * must always dispatch to the taskq.
- */
- if (EMPTY_TASKQ(taskq) &&
- pd->pd_state == KCF_PROV_READY) {
- process_req_hwp(sreq);
- } else {
- /*
- * We can not tell from taskq_dispatch() return
- * value if we exceeded maxalloc. Hence the
- * check here. Since we are allowed to wait in
- * the synchronous case, we wait for the taskq
- * to become empty.
- */
- if (taskq->tq_nalloc >= crypto_taskq_maxalloc) {
- taskq_wait(taskq);
- }
-
- (void) taskq_dispatch(taskq, process_req_hwp,
- sreq, TQ_SLEEP);
- }
-
- /*
- * Wait for the notification to arrive,
- * if the operation is not done yet.
- * Bug# 4722589 will make the wait a cv_wait_sig().
- */
- mutex_enter(&sreq->sn_lock);
- while (sreq->sn_state < REQ_DONE)
- cv_wait(&sreq->sn_cv, &sreq->sn_lock);
- mutex_exit(&sreq->sn_lock);
-
- error = sreq->sn_rv;
- kmem_cache_free(kcf_sreq_cache, sreq);
-
- break;
-
- default:
- error = CRYPTO_FAILED;
- break;
- }
-
- } else { /* Asynchronous cases */
- switch (pd->pd_prov_type) {
- case CRYPTO_SW_PROVIDER:
- if (!(crq->cr_flag & CRYPTO_ALWAYS_QUEUE)) {
- /*
- * This case has less overhead since there is
- * no switching of context.
- */
- error = common_submit_request(pd, ctx, params,
- KCF_RHNDL(KM_NOSLEEP));
- } else {
- /*
- * CRYPTO_ALWAYS_QUEUE is set. We need to
- * queue the request and return.
- */
- areq = kcf_areqnode_alloc(pd, kcf_ctx, crq,
- params, cont);
- if (areq == NULL)
- error = CRYPTO_HOST_MEMORY;
- else {
- if (!(crq->cr_flag
- & CRYPTO_SKIP_REQID)) {
- /*
- * Set the request handle. This handle
- * is used for any crypto_cancel_req(9f)
- * calls from the consumer. We have to
- * do this before dispatching the
- * request.
- */
- crq->cr_reqid = kcf_reqid_insert(areq);
- }
-
- error = kcf_disp_sw_request(areq);
- /*
- * There is an error processing this
- * request. Remove the handle and
- * release the request structure.
- */
- if (error != CRYPTO_QUEUED) {
- if (!(crq->cr_flag
- & CRYPTO_SKIP_REQID))
- kcf_reqid_delete(areq);
- KCF_AREQ_REFRELE(areq);
- }
- }
- }
- break;
-
- case CRYPTO_HW_PROVIDER:
- /*
- * We need to queue the request and return.
- */
- areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, params,
- cont);
- if (areq == NULL) {
- error = CRYPTO_HOST_MEMORY;
- goto done;
- }
-
- ASSERT(taskq != NULL);
- /*
- * We can not tell from taskq_dispatch() return
- * value if we exceeded maxalloc. Hence the check
- * here.
- */
- if (taskq->tq_nalloc >= crypto_taskq_maxalloc) {
- error = CRYPTO_BUSY;
- KCF_AREQ_REFRELE(areq);
- goto done;
- }
-
- if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) {
- /*
- * Set the request handle. This handle is used
- * for any crypto_cancel_req(9f) calls from the
- * consumer. We have to do this before dispatching
- * the request.
- */
- crq->cr_reqid = kcf_reqid_insert(areq);
- }
-
- if (taskq_dispatch(taskq,
- process_req_hwp, areq, TQ_NOSLEEP) ==
- TASKQID_INVALID) {
- error = CRYPTO_HOST_MEMORY;
- if (!(crq->cr_flag & CRYPTO_SKIP_REQID))
- kcf_reqid_delete(areq);
- KCF_AREQ_REFRELE(areq);
- } else {
- error = CRYPTO_QUEUED;
- }
- break;
-
- default:
- error = CRYPTO_FAILED;
- break;
- }
- }
-
-done:
- return (error);
-}
-
-/*
* We're done with this framework context, so free it. Note that freeing
* framework context (kcf_context) frees the global context (crypto_ctx).
*
@@ -817,12 +79,6 @@ kcf_free_context(kcf_context_t *kcf_ctx)
{
kcf_provider_desc_t *pd = kcf_ctx->kc_prov_desc;
crypto_ctx_t *gctx = &kcf_ctx->kc_glbl_ctx;
- kcf_context_t *kcf_secondctx = kcf_ctx->kc_secondctx;
-
- /* Release the second context, if any */
-
- if (kcf_secondctx != NULL)
- KCF_CONTEXT_REFRELE(kcf_secondctx);
if (gctx->cc_provider_private != NULL) {
mutex_enter(&pd->pd_lock);
@@ -844,250 +100,37 @@ kcf_free_context(kcf_context_t *kcf_ctx)
/* kcf_ctx->kc_prov_desc has a hold on pd */
KCF_PROV_REFRELE(kcf_ctx->kc_prov_desc);
- /* check if this context is shared with a software provider */
- if ((gctx->cc_flags & CRYPTO_INIT_OPSTATE) &&
- kcf_ctx->kc_sw_prov_desc != NULL) {
- KCF_PROV_REFRELE(kcf_ctx->kc_sw_prov_desc);
- }
-
kmem_cache_free(kcf_context_cache, kcf_ctx);
}
/*
- * Free the request after releasing all the holds.
- */
-void
-kcf_free_req(kcf_areq_node_t *areq)
-{
- KCF_PROV_REFRELE(areq->an_provider);
- if (areq->an_context != NULL)
- KCF_CONTEXT_REFRELE(areq->an_context);
-
- if (areq->an_tried_plist != NULL)
- kcf_free_triedlist(areq->an_tried_plist);
- kmem_cache_free(kcf_areq_cache, areq);
-}
-
-/*
- * Utility routine to remove a request from the chain of requests
- * hanging off a context.
- */
-static void
-kcf_removereq_in_ctxchain(kcf_context_t *ictx, kcf_areq_node_t *areq)
-{
- kcf_areq_node_t *cur, *prev;
-
- /*
- * Get context lock, search for areq in the chain and remove it.
- */
- ASSERT(ictx != NULL);
- mutex_enter(&ictx->kc_in_use_lock);
- prev = cur = ictx->kc_req_chain_first;
-
- while (cur != NULL) {
- if (cur == areq) {
- if (prev == cur) {
- if ((ictx->kc_req_chain_first =
- cur->an_ctxchain_next) == NULL)
- ictx->kc_req_chain_last = NULL;
- } else {
- if (cur == ictx->kc_req_chain_last)
- ictx->kc_req_chain_last = prev;
- prev->an_ctxchain_next = cur->an_ctxchain_next;
- }
-
- break;
- }
- prev = cur;
- cur = cur->an_ctxchain_next;
- }
- mutex_exit(&ictx->kc_in_use_lock);
-}
-
-/*
- * Remove the specified node from the global software queue.
- *
- * The caller must hold the queue lock and request lock (an_lock).
- */
-static void
-kcf_remove_node(kcf_areq_node_t *node)
-{
- kcf_areq_node_t *nextp = node->an_next;
- kcf_areq_node_t *prevp = node->an_prev;
-
- if (nextp != NULL)
- nextp->an_prev = prevp;
- else
- gswq->gs_last = prevp;
-
- if (prevp != NULL)
- prevp->an_next = nextp;
- else
- gswq->gs_first = nextp;
-
- node->an_state = REQ_CANCELED;
-}
-
-/*
- * Add the request node to the end of the global software queue.
- *
- * The caller should not hold the queue lock. Returns 0 if the
- * request is successfully queued. Returns CRYPTO_BUSY if the limit
- * on the number of jobs is exceeded.
- */
-static int
-kcf_enqueue(kcf_areq_node_t *node)
-{
- kcf_areq_node_t *tnode;
-
- mutex_enter(&gswq->gs_lock);
-
- if (gswq->gs_njobs >= gswq->gs_maxjobs) {
- mutex_exit(&gswq->gs_lock);
- return (CRYPTO_BUSY);
- }
-
- if (gswq->gs_last == NULL) {
- gswq->gs_first = gswq->gs_last = node;
- } else {
- ASSERT(gswq->gs_last->an_next == NULL);
- tnode = gswq->gs_last;
- tnode->an_next = node;
- gswq->gs_last = node;
- node->an_prev = tnode;
- }
-
- gswq->gs_njobs++;
-
- /* an_lock not needed here as we hold gs_lock */
- node->an_state = REQ_WAITING;
-
- mutex_exit(&gswq->gs_lock);
-
- return (0);
-}
-
-/*
- * kmem_cache_alloc constructor for sync request structure.
- */
-/* ARGSUSED */
-static int
-kcf_sreq_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
- kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf;
-
- sreq->sn_type = CRYPTO_SYNCH;
- cv_init(&sreq->sn_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&sreq->sn_lock, NULL, MUTEX_DEFAULT, NULL);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-kcf_sreq_cache_destructor(void *buf, void *cdrarg)
-{
- kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf;
-
- mutex_destroy(&sreq->sn_lock);
- cv_destroy(&sreq->sn_cv);
-}
-
-/*
- * kmem_cache_alloc constructor for async request structure.
- */
-/* ARGSUSED */
-static int
-kcf_areq_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
- kcf_areq_node_t *areq = (kcf_areq_node_t *)buf;
-
- areq->an_type = CRYPTO_ASYNCH;
- areq->an_refcnt = 0;
- mutex_init(&areq->an_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&areq->an_done, NULL, CV_DEFAULT, NULL);
- cv_init(&areq->an_turn_cv, NULL, CV_DEFAULT, NULL);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-kcf_areq_cache_destructor(void *buf, void *cdrarg)
-{
- kcf_areq_node_t *areq = (kcf_areq_node_t *)buf;
-
- ASSERT(areq->an_refcnt == 0);
- mutex_destroy(&areq->an_lock);
- cv_destroy(&areq->an_done);
- cv_destroy(&areq->an_turn_cv);
-}
-
-/*
* kmem_cache_alloc constructor for kcf_context structure.
*/
-/* ARGSUSED */
static int
kcf_context_cache_constructor(void *buf, void *cdrarg, int kmflags)
{
+ (void) cdrarg, (void) kmflags;
kcf_context_t *kctx = (kcf_context_t *)buf;
kctx->kc_refcnt = 0;
- mutex_init(&kctx->kc_in_use_lock, NULL, MUTEX_DEFAULT, NULL);
return (0);
}
-/* ARGSUSED */
static void
kcf_context_cache_destructor(void *buf, void *cdrarg)
{
+ (void) cdrarg;
kcf_context_t *kctx = (kcf_context_t *)buf;
ASSERT(kctx->kc_refcnt == 0);
- mutex_destroy(&kctx->kc_in_use_lock);
}
void
kcf_sched_destroy(void)
{
- int i;
-
- if (kcf_misc_kstat)
- kstat_delete(kcf_misc_kstat);
-
- if (kcfpool) {
- mutex_destroy(&kcfpool->kp_thread_lock);
- cv_destroy(&kcfpool->kp_nothr_cv);
- mutex_destroy(&kcfpool->kp_user_lock);
- cv_destroy(&kcfpool->kp_user_cv);
-
- kmem_free(kcfpool, sizeof (kcf_pool_t));
- }
-
- for (i = 0; i < REQID_TABLES; i++) {
- if (kcf_reqid_table[i]) {
- mutex_destroy(&(kcf_reqid_table[i]->rt_lock));
- kmem_free(kcf_reqid_table[i],
- sizeof (kcf_reqid_table_t));
- }
- }
-
- if (gswq) {
- mutex_destroy(&gswq->gs_lock);
- cv_destroy(&gswq->gs_cv);
- kmem_free(gswq, sizeof (kcf_global_swq_t));
- }
-
if (kcf_context_cache)
kmem_cache_destroy(kcf_context_cache);
- if (kcf_areq_cache)
- kmem_cache_destroy(kcf_areq_cache);
- if (kcf_sreq_cache)
- kmem_cache_destroy(kcf_sreq_cache);
-
- mutex_destroy(&ntfy_list_lock);
- cv_destroy(&ntfy_list_cv);
}
/*
@@ -1096,9 +139,6 @@ kcf_sched_destroy(void)
void
kcf_sched_init(void)
{
- int i;
- kcf_reqid_table_t *rt;
-
/*
* Create all the kmem caches needed by the framework. We set the
* align argument to 64, to get a slab aligned to 64-byte as well as
@@ -1106,675 +146,7 @@ kcf_sched_init(void)
* This helps to avoid false sharing as this is the size of the
* CPU cache line.
*/
- kcf_sreq_cache = kmem_cache_create("kcf_sreq_cache",
- sizeof (struct kcf_sreq_node), 64, kcf_sreq_cache_constructor,
- kcf_sreq_cache_destructor, NULL, NULL, NULL, 0);
-
- kcf_areq_cache = kmem_cache_create("kcf_areq_cache",
- sizeof (struct kcf_areq_node), 64, kcf_areq_cache_constructor,
- kcf_areq_cache_destructor, NULL, NULL, NULL, 0);
-
kcf_context_cache = kmem_cache_create("kcf_context_cache",
sizeof (struct kcf_context), 64, kcf_context_cache_constructor,
kcf_context_cache_destructor, NULL, NULL, NULL, 0);
-
- gswq = kmem_alloc(sizeof (kcf_global_swq_t), KM_SLEEP);
-
- mutex_init(&gswq->gs_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&gswq->gs_cv, NULL, CV_DEFAULT, NULL);
- gswq->gs_njobs = 0;
- gswq->gs_maxjobs = kcf_maxthreads * crypto_taskq_maxalloc;
- gswq->gs_first = gswq->gs_last = NULL;
-
- /* Initialize the global reqid table */
- for (i = 0; i < REQID_TABLES; i++) {
- rt = kmem_zalloc(sizeof (kcf_reqid_table_t), KM_SLEEP);
- kcf_reqid_table[i] = rt;
- mutex_init(&rt->rt_lock, NULL, MUTEX_DEFAULT, NULL);
- rt->rt_curid = i;
- }
-
- /* Allocate and initialize the thread pool */
- kcfpool_alloc();
-
- /* Initialize the event notification list variables */
- mutex_init(&ntfy_list_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&ntfy_list_cv, NULL, CV_DEFAULT, NULL);
-
- /* Create the kcf kstat */
- kcf_misc_kstat = kstat_create("kcf", 0, "framework_stats", "crypto",
- KSTAT_TYPE_NAMED, sizeof (kcf_stats_t) / sizeof (kstat_named_t),
- KSTAT_FLAG_VIRTUAL);
-
- if (kcf_misc_kstat != NULL) {
- kcf_misc_kstat->ks_data = &kcf_ksdata;
- kcf_misc_kstat->ks_update = kcf_misc_kstat_update;
- kstat_install(kcf_misc_kstat);
- }
-}
-
-/*
- * Signal the waiting sync client.
- */
-void
-kcf_sop_done(kcf_sreq_node_t *sreq, int error)
-{
- mutex_enter(&sreq->sn_lock);
- sreq->sn_state = REQ_DONE;
- sreq->sn_rv = error;
- cv_signal(&sreq->sn_cv);
- mutex_exit(&sreq->sn_lock);
-}
-
-/*
- * Callback the async client with the operation status.
- * We free the async request node and possibly the context.
- * We also handle any chain of requests hanging off of
- * the context.
- */
-void
-kcf_aop_done(kcf_areq_node_t *areq, int error)
-{
- kcf_op_type_t optype;
- boolean_t skip_notify = B_FALSE;
- kcf_context_t *ictx;
- kcf_areq_node_t *nextreq;
-
- /*
- * Handle recoverable errors. This has to be done first
- * before doing anything else in this routine so that
- * we do not change the state of the request.
- */
- if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
- /*
- * We try another provider, if one is available. Else
- * we continue with the failure notification to the
- * client.
- */
- if (kcf_resubmit_request(areq) == CRYPTO_QUEUED)
- return;
- }
-
- mutex_enter(&areq->an_lock);
- areq->an_state = REQ_DONE;
- mutex_exit(&areq->an_lock);
-
- optype = (&areq->an_params)->rp_optype;
- if ((ictx = areq->an_context) != NULL) {
- /*
- * A request after it is removed from the request
- * queue, still stays on a chain of requests hanging
- * of its context structure. It needs to be removed
- * from this chain at this point.
- */
- mutex_enter(&ictx->kc_in_use_lock);
- nextreq = areq->an_ctxchain_next;
- if (nextreq != NULL) {
- mutex_enter(&nextreq->an_lock);
- nextreq->an_is_my_turn = B_TRUE;
- cv_signal(&nextreq->an_turn_cv);
- mutex_exit(&nextreq->an_lock);
- }
-
- ictx->kc_req_chain_first = nextreq;
- if (nextreq == NULL)
- ictx->kc_req_chain_last = NULL;
- mutex_exit(&ictx->kc_in_use_lock);
-
- if (IS_SINGLE_OP(optype) || IS_FINAL_OP(optype)) {
- ASSERT(nextreq == NULL);
- KCF_CONTEXT_REFRELE(ictx);
- } else if (error != CRYPTO_SUCCESS && IS_INIT_OP(optype)) {
- /*
- * NOTE - We do not release the context in case of update
- * operations. We require the consumer to free it explicitly,
- * in case it wants to abandon an update operation. This is done
- * as there may be mechanisms in ECB mode that can continue
- * even if an operation on a block fails.
- */
- KCF_CONTEXT_REFRELE(ictx);
- }
- }
-
- /* Deal with the internal continuation to this request first */
-
- if (areq->an_isdual) {
- kcf_dual_req_t *next_arg;
- next_arg = (kcf_dual_req_t *)areq->an_reqarg.cr_callback_arg;
- next_arg->kr_areq = areq;
- KCF_AREQ_REFHOLD(areq);
- areq->an_isdual = B_FALSE;
-
- NOTIFY_CLIENT(areq, error);
- return;
- }
-
- /*
- * If CRYPTO_NOTIFY_OPDONE flag is set, we should notify
- * always. If this flag is clear, we skip the notification
- * provided there are no errors. We check this flag for only
- * init or update operations. It is ignored for single, final or
- * atomic operations.
- */
- skip_notify = (IS_UPDATE_OP(optype) || IS_INIT_OP(optype)) &&
- (!(areq->an_reqarg.cr_flag & CRYPTO_NOTIFY_OPDONE)) &&
- (error == CRYPTO_SUCCESS);
-
- if (!skip_notify) {
- NOTIFY_CLIENT(areq, error);
- }
-
- if (!(areq->an_reqarg.cr_flag & CRYPTO_SKIP_REQID))
- kcf_reqid_delete(areq);
-
- KCF_AREQ_REFRELE(areq);
-}
-
-/*
- * Allocate the thread pool and initialize all the fields.
- */
-static void
-kcfpool_alloc()
-{
- kcfpool = kmem_alloc(sizeof (kcf_pool_t), KM_SLEEP);
-
- kcfpool->kp_threads = kcfpool->kp_idlethreads = 0;
- kcfpool->kp_blockedthreads = 0;
- kcfpool->kp_signal_create_thread = B_FALSE;
- kcfpool->kp_nthrs = 0;
- kcfpool->kp_user_waiting = B_FALSE;
-
- mutex_init(&kcfpool->kp_thread_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&kcfpool->kp_nothr_cv, NULL, CV_DEFAULT, NULL);
-
- mutex_init(&kcfpool->kp_user_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&kcfpool->kp_user_cv, NULL, CV_DEFAULT, NULL);
-
- kcf_idlethr_timeout = KCF_DEFAULT_THRTIMEOUT;
-}
-
-/*
- * Insert the async request in the hash table after assigning it
- * an ID. Returns the ID.
- *
- * The ID is used by the caller to pass as an argument to a
- * cancel_req() routine later.
- */
-static crypto_req_id_t
-kcf_reqid_insert(kcf_areq_node_t *areq)
-{
- int indx;
- crypto_req_id_t id;
- kcf_areq_node_t *headp;
- kcf_reqid_table_t *rt;
-
- rt = kcf_reqid_table[CPU_SEQID_UNSTABLE & REQID_TABLE_MASK];
-
- mutex_enter(&rt->rt_lock);
-
- rt->rt_curid = id =
- (rt->rt_curid - REQID_COUNTER_LOW) | REQID_COUNTER_HIGH;
- SET_REQID(areq, id);
- indx = REQID_HASH(id);
- headp = areq->an_idnext = rt->rt_idhash[indx];
- areq->an_idprev = NULL;
- if (headp != NULL)
- headp->an_idprev = areq;
-
- rt->rt_idhash[indx] = areq;
- mutex_exit(&rt->rt_lock);
-
- return (id);
-}
-
-/*
- * Delete the async request from the hash table.
- */
-static void
-kcf_reqid_delete(kcf_areq_node_t *areq)
-{
- int indx;
- kcf_areq_node_t *nextp, *prevp;
- crypto_req_id_t id = GET_REQID(areq);
- kcf_reqid_table_t *rt;
-
- rt = kcf_reqid_table[id & REQID_TABLE_MASK];
- indx = REQID_HASH(id);
-
- mutex_enter(&rt->rt_lock);
-
- nextp = areq->an_idnext;
- prevp = areq->an_idprev;
- if (nextp != NULL)
- nextp->an_idprev = prevp;
- if (prevp != NULL)
- prevp->an_idnext = nextp;
- else
- rt->rt_idhash[indx] = nextp;
-
- SET_REQID(areq, 0);
- cv_broadcast(&areq->an_done);
-
- mutex_exit(&rt->rt_lock);
-}
-
-/*
- * Cancel a single asynchronous request.
- *
- * We guarantee that no problems will result from calling
- * crypto_cancel_req() for a request which is either running, or
- * has already completed. We remove the request from any queues
- * if it is possible. We wait for request completion if the
- * request is dispatched to a provider.
- *
- * Calling context:
- * Can be called from user context only.
- *
- * NOTE: We acquire the following locks in this routine (in order):
- * - rt_lock (kcf_reqid_table_t)
- * - gswq->gs_lock
- * - areq->an_lock
- * - ictx->kc_in_use_lock (from kcf_removereq_in_ctxchain())
- *
- * This locking order MUST be maintained in code every where else.
- */
-void
-crypto_cancel_req(crypto_req_id_t id)
-{
- int indx;
- kcf_areq_node_t *areq;
- kcf_provider_desc_t *pd;
- kcf_context_t *ictx;
- kcf_reqid_table_t *rt;
-
- rt = kcf_reqid_table[id & REQID_TABLE_MASK];
- indx = REQID_HASH(id);
-
- mutex_enter(&rt->rt_lock);
- for (areq = rt->rt_idhash[indx]; areq; areq = areq->an_idnext) {
- if (GET_REQID(areq) == id) {
- /*
- * We found the request. It is either still waiting
- * in the framework queues or running at the provider.
- */
- pd = areq->an_provider;
- ASSERT(pd != NULL);
-
- switch (pd->pd_prov_type) {
- case CRYPTO_SW_PROVIDER:
- mutex_enter(&gswq->gs_lock);
- mutex_enter(&areq->an_lock);
-
- /* This request can be safely canceled. */
- if (areq->an_state <= REQ_WAITING) {
- /* Remove from gswq, global software queue. */
- kcf_remove_node(areq);
- if ((ictx = areq->an_context) != NULL)
- kcf_removereq_in_ctxchain(ictx, areq);
-
- mutex_exit(&areq->an_lock);
- mutex_exit(&gswq->gs_lock);
- mutex_exit(&rt->rt_lock);
-
- /* Remove areq from hash table and free it. */
- kcf_reqid_delete(areq);
- KCF_AREQ_REFRELE(areq);
- return;
- }
-
- mutex_exit(&areq->an_lock);
- mutex_exit(&gswq->gs_lock);
- break;
-
- case CRYPTO_HW_PROVIDER:
- /*
- * There is no interface to remove an entry
- * once it is on the taskq. So, we do not do
- * anything for a hardware provider.
- */
- break;
- default:
- break;
- }
-
- /*
- * The request is running. Wait for the request completion
- * to notify us.
- */
- KCF_AREQ_REFHOLD(areq);
- while (GET_REQID(areq) == id)
- cv_wait(&areq->an_done, &rt->rt_lock);
- KCF_AREQ_REFRELE(areq);
- break;
- }
- }
-
- mutex_exit(&rt->rt_lock);
-}
-
-/*
- * Cancel all asynchronous requests associated with the
- * passed in crypto context and free it.
- *
- * A client SHOULD NOT call this routine after calling a crypto_*_final
- * routine. This routine is called only during intermediate operations.
- * The client should not use the crypto context after this function returns
- * since we destroy it.
- *
- * Calling context:
- * Can be called from user context only.
- */
-void
-crypto_cancel_ctx(crypto_context_t ctx)
-{
- kcf_context_t *ictx;
- kcf_areq_node_t *areq;
-
- if (ctx == NULL)
- return;
-
- ictx = (kcf_context_t *)((crypto_ctx_t *)ctx)->cc_framework_private;
-
- mutex_enter(&ictx->kc_in_use_lock);
-
- /* Walk the chain and cancel each request */
- while ((areq = ictx->kc_req_chain_first) != NULL) {
- /*
- * We have to drop the lock here as we may have
- * to wait for request completion. We hold the
- * request before dropping the lock though, so that it
- * won't be freed underneath us.
- */
- KCF_AREQ_REFHOLD(areq);
- mutex_exit(&ictx->kc_in_use_lock);
-
- crypto_cancel_req(GET_REQID(areq));
- KCF_AREQ_REFRELE(areq);
-
- mutex_enter(&ictx->kc_in_use_lock);
- }
-
- mutex_exit(&ictx->kc_in_use_lock);
- KCF_CONTEXT_REFRELE(ictx);
-}
-
-/*
- * Update kstats.
- */
-static int
-kcf_misc_kstat_update(kstat_t *ksp, int rw)
-{
- uint_t tcnt;
- kcf_stats_t *ks_data;
-
- if (rw == KSTAT_WRITE)
- return (EACCES);
-
- ks_data = ksp->ks_data;
-
- ks_data->ks_thrs_in_pool.value.ui32 = kcfpool->kp_threads;
- /*
- * The failover thread is counted in kp_idlethreads in
- * some corner cases. This is done to avoid doing more checks
- * when submitting a request. We account for those cases below.
- */
- if ((tcnt = kcfpool->kp_idlethreads) == (kcfpool->kp_threads + 1))
- tcnt--;
- ks_data->ks_idle_thrs.value.ui32 = tcnt;
- ks_data->ks_minthrs.value.ui32 = kcf_minthreads;
- ks_data->ks_maxthrs.value.ui32 = kcf_maxthreads;
- ks_data->ks_swq_njobs.value.ui32 = gswq->gs_njobs;
- ks_data->ks_swq_maxjobs.value.ui32 = gswq->gs_maxjobs;
- ks_data->ks_taskq_threads.value.ui32 = crypto_taskq_threads;
- ks_data->ks_taskq_minalloc.value.ui32 = crypto_taskq_minalloc;
- ks_data->ks_taskq_maxalloc.value.ui32 = crypto_taskq_maxalloc;
-
- return (0);
-}
-
-/*
- * Allocate and initialize a kcf_dual_req, used for saving the arguments of
- * a dual operation or an atomic operation that has to be internally
- * simulated with multiple single steps.
- * crq determines the memory allocation flags.
- */
-
-kcf_dual_req_t *
-kcf_alloc_req(crypto_call_req_t *crq)
-{
- kcf_dual_req_t *kcr;
-
- kcr = kmem_alloc(sizeof (kcf_dual_req_t), KCF_KMFLAG(crq));
-
- if (kcr == NULL)
- return (NULL);
-
- /* Copy the whole crypto_call_req struct, as it isn't persistent */
- if (crq != NULL)
- kcr->kr_callreq = *crq;
- else
- bzero(&(kcr->kr_callreq), sizeof (crypto_call_req_t));
- kcr->kr_areq = NULL;
- kcr->kr_saveoffset = 0;
- kcr->kr_savelen = 0;
-
- return (kcr);
-}
-
-/*
- * Callback routine for the next part of a simulated dual part.
- * Schedules the next step.
- *
- * This routine can be called from interrupt context.
- */
-void
-kcf_next_req(void *next_req_arg, int status)
-{
- kcf_dual_req_t *next_req = (kcf_dual_req_t *)next_req_arg;
- kcf_req_params_t *params = &(next_req->kr_params);
- kcf_areq_node_t *areq = next_req->kr_areq;
- int error = status;
- kcf_provider_desc_t *pd = NULL;
- crypto_dual_data_t *ct = NULL;
-
- /* Stop the processing if an error occurred at this step */
- if (error != CRYPTO_SUCCESS) {
-out:
- areq->an_reqarg = next_req->kr_callreq;
- KCF_AREQ_REFRELE(areq);
- kmem_free(next_req, sizeof (kcf_dual_req_t));
- areq->an_isdual = B_FALSE;
- kcf_aop_done(areq, error);
- return;
- }
-
- switch (params->rp_opgrp) {
- case KCF_OG_MAC: {
-
- /*
- * The next req is submitted with the same reqid as the
- * first part. The consumer only got back that reqid, and
- * should still be able to cancel the operation during its
- * second step.
- */
- kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params);
- crypto_ctx_template_t mac_tmpl;
- kcf_mech_entry_t *me;
-
- ct = (crypto_dual_data_t *)mops->mo_data;
- mac_tmpl = (crypto_ctx_template_t)mops->mo_templ;
-
- /* No expected recoverable failures, so no retry list */
- pd = kcf_get_mech_provider(mops->mo_framework_mechtype,
- &me, &error, NULL, CRYPTO_FG_MAC_ATOMIC,
- (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len2);
-
- if (pd == NULL) {
- error = CRYPTO_MECH_NOT_SUPPORTED;
- goto out;
- }
- /* Validate the MAC context template here */
- if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
- (mac_tmpl != NULL)) {
- kcf_ctx_template_t *ctx_mac_tmpl;
-
- ctx_mac_tmpl = (kcf_ctx_template_t *)mac_tmpl;
-
- if (ctx_mac_tmpl->ct_generation != me->me_gen_swprov) {
- KCF_PROV_REFRELE(pd);
- error = CRYPTO_OLD_CTX_TEMPLATE;
- goto out;
- }
- mops->mo_templ = ctx_mac_tmpl->ct_prov_tmpl;
- }
-
- break;
- }
- case KCF_OG_DECRYPT: {
- kcf_decrypt_ops_params_t *dcrops =
- &(params->rp_u.decrypt_params);
-
- ct = (crypto_dual_data_t *)dcrops->dop_ciphertext;
- /* No expected recoverable failures, so no retry list */
- pd = kcf_get_mech_provider(dcrops->dop_framework_mechtype,
- NULL, &error, NULL, CRYPTO_FG_DECRYPT_ATOMIC,
- (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len1);
-
- if (pd == NULL) {
- error = CRYPTO_MECH_NOT_SUPPORTED;
- goto out;
- }
- break;
- }
- default:
- break;
- }
-
- /* The second step uses len2 and offset2 of the dual_data */
- next_req->kr_saveoffset = ct->dd_offset1;
- next_req->kr_savelen = ct->dd_len1;
- ct->dd_offset1 = ct->dd_offset2;
- ct->dd_len1 = ct->dd_len2;
-
- /* preserve if the caller is restricted */
- if (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED) {
- areq->an_reqarg.cr_flag = CRYPTO_RESTRICTED;
- } else {
- areq->an_reqarg.cr_flag = 0;
- }
-
- areq->an_reqarg.cr_callback_func = kcf_last_req;
- areq->an_reqarg.cr_callback_arg = next_req;
- areq->an_isdual = B_TRUE;
-
- /*
- * We would like to call kcf_submit_request() here. But,
- * that is not possible as that routine allocates a new
- * kcf_areq_node_t request structure, while we need to
- * reuse the existing request structure.
- */
- switch (pd->pd_prov_type) {
- case CRYPTO_SW_PROVIDER:
- error = common_submit_request(pd, NULL, params,
- KCF_RHNDL(KM_NOSLEEP));
- break;
-
- case CRYPTO_HW_PROVIDER: {
- kcf_provider_desc_t *old_pd;
- taskq_t *taskq = pd->pd_sched_info.ks_taskq;
-
- /*
- * Set the params for the second step in the
- * dual-ops.
- */
- areq->an_params = *params;
- old_pd = areq->an_provider;
- KCF_PROV_REFRELE(old_pd);
- KCF_PROV_REFHOLD(pd);
- areq->an_provider = pd;
-
- /*
- * Note that we have to do a taskq_dispatch()
- * here as we may be in interrupt context.
- */
- if (taskq_dispatch(taskq, process_req_hwp, areq,
- TQ_NOSLEEP) == (taskqid_t)0) {
- error = CRYPTO_HOST_MEMORY;
- } else {
- error = CRYPTO_QUEUED;
- }
- break;
- }
- default:
- break;
- }
-
- /*
- * We have to release the holds on the request and the provider
- * in all cases.
- */
- KCF_AREQ_REFRELE(areq);
- KCF_PROV_REFRELE(pd);
-
- if (error != CRYPTO_QUEUED) {
- /* restore, clean up, and invoke the client's callback */
-
- ct->dd_offset1 = next_req->kr_saveoffset;
- ct->dd_len1 = next_req->kr_savelen;
- areq->an_reqarg = next_req->kr_callreq;
- kmem_free(next_req, sizeof (kcf_dual_req_t));
- areq->an_isdual = B_FALSE;
- kcf_aop_done(areq, error);
- }
-}
-
-/*
- * Last part of an emulated dual operation.
- * Clean up and restore ...
- */
-void
-kcf_last_req(void *last_req_arg, int status)
-{
- kcf_dual_req_t *last_req = (kcf_dual_req_t *)last_req_arg;
-
- kcf_req_params_t *params = &(last_req->kr_params);
- kcf_areq_node_t *areq = last_req->kr_areq;
- crypto_dual_data_t *ct = NULL;
-
- switch (params->rp_opgrp) {
- case KCF_OG_MAC: {
- kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params);
-
- ct = (crypto_dual_data_t *)mops->mo_data;
- break;
- }
- case KCF_OG_DECRYPT: {
- kcf_decrypt_ops_params_t *dcrops =
- &(params->rp_u.decrypt_params);
-
- ct = (crypto_dual_data_t *)dcrops->dop_ciphertext;
- break;
- }
- default: {
- panic("invalid kcf_op_group_t %d", (int)params->rp_opgrp);
- return;
- }
- }
- ct->dd_offset1 = last_req->kr_saveoffset;
- ct->dd_len1 = last_req->kr_savelen;
-
- /* The submitter used kcf_last_req as its callback */
-
- if (areq == NULL) {
- crypto_call_req_t *cr = &last_req->kr_callreq;
-
- (*(cr->cr_callback_func))(cr->cr_callback_arg, status);
- kmem_free(last_req, sizeof (kcf_dual_req_t));
- return;
- }
- areq->an_reqarg = last_req->kr_callreq;
- KCF_AREQ_REFRELE(areq);
- kmem_free(last_req, sizeof (kcf_dual_req_t));
- areq->an_isdual = B_FALSE;
- kcf_aop_done(areq, status);
}
diff --git a/sys/contrib/openzfs/module/icp/illumos-crypto.c b/sys/contrib/openzfs/module/icp/illumos-crypto.c
index 3c5ef4393940..13f05c06ed5c 100644
--- a/sys/contrib/openzfs/module/icp/illumos-crypto.c
+++ b/sys/contrib/openzfs/module/icp/illumos-crypto.c
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -36,7 +36,6 @@
#include <sys/crypto/api.h>
#include <sys/crypto/impl.h>
#include <sys/crypto/sched_impl.h>
-#include <sys/modhash_impl.h>
#include <sys/crypto/icp.h>
/*
@@ -61,16 +60,15 @@
* the future it may make sense to have encryption algorithms that are
* loadable into the ICP at runtime via separate kernel modules.
* However, considering that this code will probably not see much use
- * outside of zfs and zfs encryption only requires aes and sha256
+ * outside of zfs and zfs encryption only requires a select few
* algorithms it seemed like more trouble than it was worth to port over
* Illumos's kernel module structure to a Linux kernel module. In
* addition, The Illumos code related to keeping track of kernel modules
- * is very much tied to the Illumos OS and proved difficult to port to
- * Linux. Therefore, the structure of the ICP was simplified to work
- * statically and several pieces of code responsible for keeping track
- * of Illumos kernel modules were removed and simplified. All module
- * initialization and destruction is now called in this file during
- * Linux kernel module loading and unloading.
+ * is very much tied to the Illumos OS and proved difficult to port.
+ * Therefore, the structure of the ICP was simplified to work
+ * statically and all the Illumos kernel module loading subsystem was removed.
+ * All module initialization and destruction is now called in this file
+ * during kernel module loading and unloading.
*
* 4) Adding destructors: The Illumos Crypto Layer is built into
* the Illumos kernel and is not meant to be unloaded. Some destructors
@@ -106,27 +104,21 @@
* ZFS Makefiles.
*/
-void __exit
+void
icp_fini(void)
{
skein_mod_fini();
sha2_mod_fini();
- sha1_mod_fini();
- edonr_mod_fini();
aes_mod_fini();
kcf_sched_destroy();
kcf_prov_tab_destroy();
kcf_destroy_mech_tabs();
- mod_hash_fini();
}
/* roughly equivalent to kcf.c: _init() */
int __init
icp_init(void)
{
- /* initialize the mod hash module */
- mod_hash_init();
-
/* initialize the mechanisms tables supported out-of-the-box */
kcf_init_mech_tabs();
@@ -141,18 +133,13 @@ icp_init(void)
/* initialize algorithms */
aes_mod_init();
- edonr_mod_init();
- sha1_mod_init();
sha2_mod_init();
skein_mod_init();
return (0);
}
-#if defined(_KERNEL)
+#if defined(_KERNEL) && defined(__FreeBSD__)
module_exit(icp_fini);
module_init(icp_init);
-MODULE_AUTHOR(ZFS_META_AUTHOR);
-MODULE_LICENSE(ZFS_META_LICENSE);
-MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
#endif
diff --git a/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h b/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h
index 41dccaa3848a..66eb4a6c8fb6 100644
--- a/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h
+++ b/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -36,6 +36,7 @@ extern "C" {
#include <sys/zfs_context.h>
#include <sys/crypto/common.h>
+#include <sys/asm_linkage.h>
/* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
#define IS_P2ALIGNED2(v, w, a) \
@@ -83,14 +84,7 @@ extern "C" {
/* AES key size definitions */
#define AES_MINBITS 128
-#define AES_MINBYTES ((AES_MINBITS) >> 3)
#define AES_MAXBITS 256
-#define AES_MAXBYTES ((AES_MAXBITS) >> 3)
-
-#define AES_MIN_KEY_BYTES ((AES_MINBITS) >> 3)
-#define AES_MAX_KEY_BYTES ((AES_MAXBITS) >> 3)
-#define AES_192_KEY_BYTES 24
-#define AES_IV_LEN 16
/* AES key schedule may be implemented with 32- or 64-bit elements: */
#define AES_32BIT_KS 32
@@ -197,13 +191,13 @@ extern const aes_impl_ops_t aes_generic_impl;
extern const aes_impl_ops_t aes_x86_64_impl;
/* These functions are used to execute amd64 instructions for AMD or Intel: */
-extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
+extern ASMABI int rijndael_key_setup_enc_amd64(uint32_t rk[],
const uint32_t cipherKey[], int keyBits);
-extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
+extern ASMABI int rijndael_key_setup_dec_amd64(uint32_t rk[],
const uint32_t cipherKey[], int keyBits);
-extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
+extern ASMABI void aes_encrypt_amd64(const uint32_t rk[], int Nr,
const uint32_t pt[4], uint32_t ct[4]);
-extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
+extern ASMABI void aes_decrypt_amd64(const uint32_t rk[], int Nr,
const uint32_t ct[4], uint32_t pt[4]);
#endif
#if defined(__x86_64) && defined(HAVE_AES)
diff --git a/sys/contrib/openzfs/module/icp/include/generic_impl.c b/sys/contrib/openzfs/module/icp/include/generic_impl.c
new file mode 100644
index 000000000000..16f802cf7558
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/include/generic_impl.c
@@ -0,0 +1,233 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2003, 2010 Oracle and/or its affiliates.
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+/*
+ * This file gets included by c files for implementing the full set
+ * of zfs_impl.h defines.
+ *
+ * It's ment for easier maintaining multiple implementations of
+ * algorithms. Look into blake3_impl.c, sha256_impl.c or sha512_impl.c
+ * for reference.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_impl.h>
+
+/* Two default implementations */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX - 1)
+
+#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
+
+/* Implementation that contains the fastest method */
+static IMPL_OPS_T generic_fastest_impl = {
+ .name = "fastest"
+};
+
+/* Hold all supported implementations */
+static const IMPL_OPS_T *generic_supp_impls[ARRAY_SIZE(IMPL_ARRAY)];
+static uint32_t generic_supp_impls_cnt = 0;
+
+/* Currently selected implementation */
+static uint32_t generic_impl_chosen = IMPL_FASTEST;
+
+static struct generic_impl_selector {
+ const char *name;
+ uint32_t sel;
+} generic_impl_selectors[] = {
+ { "cycle", IMPL_CYCLE },
+ { "fastest", IMPL_FASTEST }
+};
+
+/* check the supported implementations */
+static void
+generic_impl_init(void)
+{
+ int i, c;
+
+ /* init only once */
+ if (likely(generic_supp_impls_cnt != 0))
+ return;
+
+ /* Move supported implementations into generic_supp_impls */
+ for (i = 0, c = 0; i < ARRAY_SIZE(IMPL_ARRAY); i++) {
+ const IMPL_OPS_T *impl = IMPL_ARRAY[i];
+
+ if (impl->is_supported && impl->is_supported())
+ generic_supp_impls[c++] = impl;
+ }
+ generic_supp_impls_cnt = c;
+
+ /* first init generic impl, may be changed via set_fastest() */
+ memcpy(&generic_fastest_impl, generic_supp_impls[0],
+ sizeof (generic_fastest_impl));
+}
+
+/* get number of supported implementations */
+static uint32_t
+generic_impl_getcnt(void)
+{
+ generic_impl_init();
+ return (generic_supp_impls_cnt);
+}
+
+/* get id of selected implementation */
+static uint32_t
+generic_impl_getid(void)
+{
+ generic_impl_init();
+ return (IMPL_READ(generic_impl_chosen));
+}
+
+/* get name of selected implementation */
+static const char *
+generic_impl_getname(void)
+{
+ uint32_t impl = IMPL_READ(generic_impl_chosen);
+
+ generic_impl_init();
+ switch (impl) {
+ case IMPL_FASTEST:
+ return ("fastest");
+ case IMPL_CYCLE:
+ return ("cycle");
+ default:
+ return (generic_supp_impls[impl]->name);
+ }
+}
+
+/* set implementation by id */
+static void
+generic_impl_setid(uint32_t id)
+{
+ generic_impl_init();
+ switch (id) {
+ case IMPL_FASTEST:
+ atomic_swap_32(&generic_impl_chosen, IMPL_FASTEST);
+ break;
+ case IMPL_CYCLE:
+ atomic_swap_32(&generic_impl_chosen, IMPL_CYCLE);
+ break;
+ default:
+ ASSERT3U(id, <, generic_supp_impls_cnt);
+ atomic_swap_32(&generic_impl_chosen, id);
+ break;
+ }
+}
+
+/* set implementation by name */
+static int
+generic_impl_setname(const char *val)
+{
+ uint32_t impl = IMPL_READ(generic_impl_chosen);
+ size_t val_len;
+ int i, err = -EINVAL;
+
+ generic_impl_init();
+ val_len = strlen(val);
+ while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
+ val_len--;
+
+ /* check mandatory implementations */
+ for (i = 0; i < ARRAY_SIZE(generic_impl_selectors); i++) {
+ const char *name = generic_impl_selectors[i].name;
+
+ if (val_len == strlen(name) &&
+ strncmp(val, name, val_len) == 0) {
+ impl = generic_impl_selectors[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported implementations */
+ if (err != 0) {
+ for (i = 0; i < generic_supp_impls_cnt; i++) {
+ const char *name = generic_supp_impls[i]->name;
+
+ if (val_len == strlen(name) &&
+ strncmp(val, name, val_len) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ if (err == 0) {
+ atomic_swap_32(&generic_impl_chosen, impl);
+ }
+
+ return (err);
+}
+
+/* setup id as fastest implementation */
+static void
+generic_impl_set_fastest(uint32_t id)
+{
+ generic_impl_init();
+ memcpy(&generic_fastest_impl, generic_supp_impls[id],
+ sizeof (generic_fastest_impl));
+}
+
+/* return impl iterating functions */
+const zfs_impl_t ZFS_IMPL_OPS = {
+ .name = IMPL_NAME,
+ .getcnt = generic_impl_getcnt,
+ .getid = generic_impl_getid,
+ .getname = generic_impl_getname,
+ .set_fastest = generic_impl_set_fastest,
+ .setid = generic_impl_setid,
+ .setname = generic_impl_setname
+};
+
+/* get impl ops_t of selected implementation */
+const IMPL_OPS_T *
+IMPL_GET_OPS(void)
+{
+ const IMPL_OPS_T *ops = NULL;
+ uint32_t idx, impl = IMPL_READ(generic_impl_chosen);
+ static uint32_t cycle_count = 0;
+
+ generic_impl_init();
+ switch (impl) {
+ case IMPL_FASTEST:
+ ops = &generic_fastest_impl;
+ break;
+ case IMPL_CYCLE:
+ idx = (++cycle_count) % generic_supp_impls_cnt;
+ ops = generic_supp_impls[idx];
+ break;
+ default:
+ ASSERT3U(impl, <, generic_supp_impls_cnt);
+ ops = generic_supp_impls[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+ return (ops);
+}
diff --git a/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h b/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h
index 28c8f63a7d46..3afc9e2c6317 100644
--- a/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h
+++ b/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/include/modes/modes.h b/sys/contrib/openzfs/module/icp/include/modes/modes.h
index ab71197542eb..23bf46ab51a0 100644
--- a/sys/contrib/openzfs/module/icp/include/modes/modes.h
+++ b/sys/contrib/openzfs/module/icp/include/modes/modes.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -207,10 +207,6 @@ typedef struct ccm_ctx {
*
* gcm_len_a_len_c: 64-bit representations of the bit lengths of
* AAD and ciphertext.
- *
- * gcm_kmflag: Current value of kmflag. Used for allocating
- * the plaintext buffer during decryption and a
- * gcm_avx_chunk_size'd buffer for avx enabled encryption.
*/
typedef struct gcm_ctx {
struct common_ctx gcm_common;
@@ -231,7 +227,6 @@ typedef struct gcm_ctx {
uint64_t gcm_J0[2];
uint64_t gcm_len_a_len_c[2];
uint8_t *gcm_pt_buf;
- int gcm_kmflag;
#ifdef CAN_USE_GCM_ASM
boolean_t gcm_use_avx;
#endif
@@ -249,6 +244,8 @@ typedef struct gcm_ctx {
#define AES_GMAC_IV_LEN 12
#define AES_GMAC_TAG_BITS 128
+void gcm_clear_ctx(gcm_ctx_t *ctx);
+
typedef struct aes_ctx {
union {
ecb_ctx_t acu_ecb;
@@ -402,7 +399,6 @@ extern void *ccm_alloc_ctx(int);
extern void *gcm_alloc_ctx(int);
extern void *gmac_alloc_ctx(int);
extern void crypto_free_mode_ctx(void *);
-extern void gcm_set_kmflag(gcm_ctx_t *, int);
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1.h
deleted file mode 100644
index 251b64fcaeee..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha1/sha1.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SHA1_H
-#define _SYS_SHA1_H
-
-#include <sys/types.h> /* for uint_* */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * NOTE: n2rng (Niagara2 RNG driver) accesses the state field of
- * SHA1_CTX directly. NEVER change this structure without verifying
- * compatibility with n2rng. The important thing is that the state
- * must be in a field declared as uint32_t state[5].
- */
-/* SHA-1 context. */
-typedef struct {
- uint32_t state[5]; /* state (ABCDE) */
- uint32_t count[2]; /* number of bits, modulo 2^64 (msb first) */
- union {
- uint8_t buf8[64]; /* undigested input */
- uint32_t buf32[16]; /* realigned input */
- } buf_un;
-} SHA1_CTX;
-
-#define SHA1_DIGEST_LENGTH 20
-
-void SHA1Init(SHA1_CTX *);
-void SHA1Update(SHA1_CTX *, const void *, size_t);
-void SHA1Final(void *, SHA1_CTX *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA1_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h
deleted file mode 100644
index 848d25ef050f..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1998, by Sun Microsystems, Inc.
- * All rights reserved.
- */
-
-#ifndef _SYS_SHA1_CONSTS_H
-#define _SYS_SHA1_CONSTS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * as explained in sha1.c, loading 32-bit constants on a sparc is expensive
- * since it involves both a `sethi' and an `or'. thus, we instead use `ld'
- * to load the constants from an array called `sha1_consts'. however, on
- * intel (and perhaps other processors), it is cheaper to load the constant
- * directly. thus, the c code in SHA1Transform() uses the macro SHA1_CONST()
- * which either expands to a constant or an array reference, depending on
- * the architecture the code is being compiled for.
- */
-
-#include <sys/types.h> /* uint32_t */
-
-extern const uint32_t sha1_consts[];
-
-#if defined(__sparc)
-#define SHA1_CONST(x) (sha1_consts[x])
-#else
-#define SHA1_CONST(x) (SHA1_CONST_ ## x)
-#endif
-
-/* constants, as provided in FIPS 180-1 */
-
-#define SHA1_CONST_0 0x5a827999U
-#define SHA1_CONST_1 0x6ed9eba1U
-#define SHA1_CONST_2 0x8f1bbcdcU
-#define SHA1_CONST_3 0xca62c1d6U
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA1_CONSTS_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h
deleted file mode 100644
index 1c1f8728f9b5..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SHA1_IMPL_H
-#define _SHA1_IMPL_H
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SHA1_HASH_SIZE 20 /* SHA_1 digest length in bytes */
-#define SHA1_DIGEST_LENGTH 20 /* SHA1 digest length in bytes */
-#define SHA1_HMAC_BLOCK_SIZE 64 /* SHA1-HMAC block size */
-#define SHA1_HMAC_MIN_KEY_LEN 1 /* SHA1-HMAC min key length in bytes */
-#define SHA1_HMAC_MAX_KEY_LEN INT_MAX /* SHA1-HMAC max key length in bytes */
-#define SHA1_HMAC_INTS_PER_BLOCK (SHA1_HMAC_BLOCK_SIZE/sizeof (uint32_t))
-
-/*
- * CSPI information (entry points, provider info, etc.)
- */
-typedef enum sha1_mech_type {
- SHA1_MECH_INFO_TYPE, /* SUN_CKM_SHA1 */
- SHA1_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA1_HMAC */
- SHA1_HMAC_GEN_MECH_INFO_TYPE /* SUN_CKM_SHA1_HMAC_GENERAL */
-} sha1_mech_type_t;
-
-/*
- * Context for SHA1 mechanism.
- */
-typedef struct sha1_ctx {
- sha1_mech_type_t sc_mech_type; /* type of context */
- SHA1_CTX sc_sha1_ctx; /* SHA1 context */
-} sha1_ctx_t;
-
-/*
- * Context for SHA1-HMAC and SHA1-HMAC-GENERAL mechanisms.
- */
-typedef struct sha1_hmac_ctx {
- sha1_mech_type_t hc_mech_type; /* type of context */
- uint32_t hc_digest_len; /* digest len in bytes */
- SHA1_CTX hc_icontext; /* inner SHA1 context */
- SHA1_CTX hc_ocontext; /* outer SHA1 context */
-} sha1_hmac_ctx_t;
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SHA1_IMPL_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h b/sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h
deleted file mode 100644
index 3a6645508fe9..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SHA2_CONSTS_H
-#define _SYS_SHA2_CONSTS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Loading 32-bit constants on a sparc is expensive since it involves both
- * a `sethi' and an `or'. thus, we instead use `ld' to load the constants
- * from an array called `sha2_consts'. however, on intel (and perhaps other
- * processors), it is cheaper to load the constant directly. thus, the c
- * code in SHA transform functions uses the macro SHA2_CONST() which either
- * expands to a constant or an array reference, depending on
- * the architecture the code is being compiled for.
- *
- * SHA512 constants are used for SHA384
- */
-
-#include <sys/types.h> /* uint32_t */
-
-extern const uint32_t sha256_consts[];
-extern const uint64_t sha512_consts[];
-
-#if defined(__sparc)
-#define SHA256_CONST(x) (sha256_consts[x])
-#define SHA512_CONST(x) (sha512_consts[x])
-#else
-#define SHA256_CONST(x) (SHA256_CONST_ ## x)
-#define SHA512_CONST(x) (SHA512_CONST_ ## x)
-#endif
-
-/* constants, as provided in FIPS 180-2 */
-
-#define SHA256_CONST_0 0x428a2f98U
-#define SHA256_CONST_1 0x71374491U
-#define SHA256_CONST_2 0xb5c0fbcfU
-#define SHA256_CONST_3 0xe9b5dba5U
-#define SHA256_CONST_4 0x3956c25bU
-#define SHA256_CONST_5 0x59f111f1U
-#define SHA256_CONST_6 0x923f82a4U
-#define SHA256_CONST_7 0xab1c5ed5U
-
-#define SHA256_CONST_8 0xd807aa98U
-#define SHA256_CONST_9 0x12835b01U
-#define SHA256_CONST_10 0x243185beU
-#define SHA256_CONST_11 0x550c7dc3U
-#define SHA256_CONST_12 0x72be5d74U
-#define SHA256_CONST_13 0x80deb1feU
-#define SHA256_CONST_14 0x9bdc06a7U
-#define SHA256_CONST_15 0xc19bf174U
-
-#define SHA256_CONST_16 0xe49b69c1U
-#define SHA256_CONST_17 0xefbe4786U
-#define SHA256_CONST_18 0x0fc19dc6U
-#define SHA256_CONST_19 0x240ca1ccU
-#define SHA256_CONST_20 0x2de92c6fU
-#define SHA256_CONST_21 0x4a7484aaU
-#define SHA256_CONST_22 0x5cb0a9dcU
-#define SHA256_CONST_23 0x76f988daU
-
-#define SHA256_CONST_24 0x983e5152U
-#define SHA256_CONST_25 0xa831c66dU
-#define SHA256_CONST_26 0xb00327c8U
-#define SHA256_CONST_27 0xbf597fc7U
-#define SHA256_CONST_28 0xc6e00bf3U
-#define SHA256_CONST_29 0xd5a79147U
-#define SHA256_CONST_30 0x06ca6351U
-#define SHA256_CONST_31 0x14292967U
-
-#define SHA256_CONST_32 0x27b70a85U
-#define SHA256_CONST_33 0x2e1b2138U
-#define SHA256_CONST_34 0x4d2c6dfcU
-#define SHA256_CONST_35 0x53380d13U
-#define SHA256_CONST_36 0x650a7354U
-#define SHA256_CONST_37 0x766a0abbU
-#define SHA256_CONST_38 0x81c2c92eU
-#define SHA256_CONST_39 0x92722c85U
-
-#define SHA256_CONST_40 0xa2bfe8a1U
-#define SHA256_CONST_41 0xa81a664bU
-#define SHA256_CONST_42 0xc24b8b70U
-#define SHA256_CONST_43 0xc76c51a3U
-#define SHA256_CONST_44 0xd192e819U
-#define SHA256_CONST_45 0xd6990624U
-#define SHA256_CONST_46 0xf40e3585U
-#define SHA256_CONST_47 0x106aa070U
-
-#define SHA256_CONST_48 0x19a4c116U
-#define SHA256_CONST_49 0x1e376c08U
-#define SHA256_CONST_50 0x2748774cU
-#define SHA256_CONST_51 0x34b0bcb5U
-#define SHA256_CONST_52 0x391c0cb3U
-#define SHA256_CONST_53 0x4ed8aa4aU
-#define SHA256_CONST_54 0x5b9cca4fU
-#define SHA256_CONST_55 0x682e6ff3U
-
-#define SHA256_CONST_56 0x748f82eeU
-#define SHA256_CONST_57 0x78a5636fU
-#define SHA256_CONST_58 0x84c87814U
-#define SHA256_CONST_59 0x8cc70208U
-#define SHA256_CONST_60 0x90befffaU
-#define SHA256_CONST_61 0xa4506cebU
-#define SHA256_CONST_62 0xbef9a3f7U
-#define SHA256_CONST_63 0xc67178f2U
-
-#define SHA512_CONST_0 0x428a2f98d728ae22ULL
-#define SHA512_CONST_1 0x7137449123ef65cdULL
-#define SHA512_CONST_2 0xb5c0fbcfec4d3b2fULL
-#define SHA512_CONST_3 0xe9b5dba58189dbbcULL
-#define SHA512_CONST_4 0x3956c25bf348b538ULL
-#define SHA512_CONST_5 0x59f111f1b605d019ULL
-#define SHA512_CONST_6 0x923f82a4af194f9bULL
-#define SHA512_CONST_7 0xab1c5ed5da6d8118ULL
-#define SHA512_CONST_8 0xd807aa98a3030242ULL
-#define SHA512_CONST_9 0x12835b0145706fbeULL
-#define SHA512_CONST_10 0x243185be4ee4b28cULL
-#define SHA512_CONST_11 0x550c7dc3d5ffb4e2ULL
-#define SHA512_CONST_12 0x72be5d74f27b896fULL
-#define SHA512_CONST_13 0x80deb1fe3b1696b1ULL
-#define SHA512_CONST_14 0x9bdc06a725c71235ULL
-#define SHA512_CONST_15 0xc19bf174cf692694ULL
-#define SHA512_CONST_16 0xe49b69c19ef14ad2ULL
-#define SHA512_CONST_17 0xefbe4786384f25e3ULL
-#define SHA512_CONST_18 0x0fc19dc68b8cd5b5ULL
-#define SHA512_CONST_19 0x240ca1cc77ac9c65ULL
-#define SHA512_CONST_20 0x2de92c6f592b0275ULL
-#define SHA512_CONST_21 0x4a7484aa6ea6e483ULL
-#define SHA512_CONST_22 0x5cb0a9dcbd41fbd4ULL
-#define SHA512_CONST_23 0x76f988da831153b5ULL
-#define SHA512_CONST_24 0x983e5152ee66dfabULL
-#define SHA512_CONST_25 0xa831c66d2db43210ULL
-#define SHA512_CONST_26 0xb00327c898fb213fULL
-#define SHA512_CONST_27 0xbf597fc7beef0ee4ULL
-#define SHA512_CONST_28 0xc6e00bf33da88fc2ULL
-#define SHA512_CONST_29 0xd5a79147930aa725ULL
-#define SHA512_CONST_30 0x06ca6351e003826fULL
-#define SHA512_CONST_31 0x142929670a0e6e70ULL
-#define SHA512_CONST_32 0x27b70a8546d22ffcULL
-#define SHA512_CONST_33 0x2e1b21385c26c926ULL
-#define SHA512_CONST_34 0x4d2c6dfc5ac42aedULL
-#define SHA512_CONST_35 0x53380d139d95b3dfULL
-#define SHA512_CONST_36 0x650a73548baf63deULL
-#define SHA512_CONST_37 0x766a0abb3c77b2a8ULL
-#define SHA512_CONST_38 0x81c2c92e47edaee6ULL
-#define SHA512_CONST_39 0x92722c851482353bULL
-#define SHA512_CONST_40 0xa2bfe8a14cf10364ULL
-#define SHA512_CONST_41 0xa81a664bbc423001ULL
-#define SHA512_CONST_42 0xc24b8b70d0f89791ULL
-#define SHA512_CONST_43 0xc76c51a30654be30ULL
-#define SHA512_CONST_44 0xd192e819d6ef5218ULL
-#define SHA512_CONST_45 0xd69906245565a910ULL
-#define SHA512_CONST_46 0xf40e35855771202aULL
-#define SHA512_CONST_47 0x106aa07032bbd1b8ULL
-#define SHA512_CONST_48 0x19a4c116b8d2d0c8ULL
-#define SHA512_CONST_49 0x1e376c085141ab53ULL
-#define SHA512_CONST_50 0x2748774cdf8eeb99ULL
-#define SHA512_CONST_51 0x34b0bcb5e19b48a8ULL
-#define SHA512_CONST_52 0x391c0cb3c5c95a63ULL
-#define SHA512_CONST_53 0x4ed8aa4ae3418acbULL
-#define SHA512_CONST_54 0x5b9cca4f7763e373ULL
-#define SHA512_CONST_55 0x682e6ff3d6b2b8a3ULL
-#define SHA512_CONST_56 0x748f82ee5defb2fcULL
-#define SHA512_CONST_57 0x78a5636f43172f60ULL
-#define SHA512_CONST_58 0x84c87814a1f0ab72ULL
-#define SHA512_CONST_59 0x8cc702081a6439ecULL
-#define SHA512_CONST_60 0x90befffa23631e28ULL
-#define SHA512_CONST_61 0xa4506cebde82bde9ULL
-#define SHA512_CONST_62 0xbef9a3f7b2c67915ULL
-#define SHA512_CONST_63 0xc67178f2e372532bULL
-#define SHA512_CONST_64 0xca273eceea26619cULL
-#define SHA512_CONST_65 0xd186b8c721c0c207ULL
-#define SHA512_CONST_66 0xeada7dd6cde0eb1eULL
-#define SHA512_CONST_67 0xf57d4f7fee6ed178ULL
-#define SHA512_CONST_68 0x06f067aa72176fbaULL
-#define SHA512_CONST_69 0x0a637dc5a2c898a6ULL
-#define SHA512_CONST_70 0x113f9804bef90daeULL
-#define SHA512_CONST_71 0x1b710b35131c471bULL
-#define SHA512_CONST_72 0x28db77f523047d84ULL
-#define SHA512_CONST_73 0x32caab7b40c72493ULL
-#define SHA512_CONST_74 0x3c9ebe0a15c9bebcULL
-#define SHA512_CONST_75 0x431d67c49c100d4cULL
-#define SHA512_CONST_76 0x4cc5d4becb3e42b6ULL
-#define SHA512_CONST_77 0x597f299cfc657e2aULL
-#define SHA512_CONST_78 0x5fcb6fab3ad6faecULL
-#define SHA512_CONST_79 0x6c44198c4a475817ULL
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_CONSTS_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h b/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h
index b9768d344e95..9a1bd38f1a77 100644
--- a/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h
+++ b/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -18,9 +18,10 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/
#ifndef _SHA2_IMPL_H
@@ -32,6 +33,28 @@
extern "C" {
#endif
+/* transform function definition */
+typedef void (*sha256_f)(uint32_t state[8], const void *data, size_t blks);
+typedef void (*sha512_f)(uint64_t state[8], const void *data, size_t blks);
+
+/* needed for checking valid implementations */
+typedef boolean_t (*sha2_is_supported_f)(void);
+
+typedef struct {
+ const char *name;
+ sha256_f transform;
+ sha2_is_supported_f is_supported;
+} sha256_ops_t;
+
+typedef struct {
+ const char *name;
+ sha512_f transform;
+ sha2_is_supported_f is_supported;
+} sha512_ops_t;
+
+extern const sha256_ops_t *sha256_get_ops(void);
+extern const sha512_ops_t *sha512_get_ops(void);
+
typedef enum {
SHA1_TYPE,
SHA256_TYPE,
diff --git a/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h b/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h
deleted file mode 100644
index 49a494b46e0b..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ASM_LINKAGE_H
-#define _SYS_ASM_LINKAGE_H
-
-#if defined(__i386) || defined(__amd64)
-
-#include <sys/ia32/asm_linkage.h> /* XX64 x86/sys/asm_linkage.h */
-
-#endif
-
-#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL)
-
-#include <asm/frame.h>
-
-#else /* userspace */
-#define FRAME_BEGIN
-#define FRAME_END
-#endif
-
-
-#endif /* _SYS_ASM_LINKAGE_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/bitmap.h b/sys/contrib/openzfs/module/icp/include/sys/bitmap.h
deleted file mode 100644
index 4e86ee70ed9e..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/bitmap.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-#ifndef _SYS_BITMAP_H
-#define _SYS_BITMAP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__GNUC__) && defined(_ASM_INLINES) && \
- (defined(__i386) || defined(__amd64))
-#include <asm/bitmap.h>
-#endif
-
-/*
- * Operations on bitmaps of arbitrary size
- * A bitmap is a vector of 1 or more ulong_t's.
- * The user of the package is responsible for range checks and keeping
- * track of sizes.
- */
-
-#ifdef _LP64
-#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */
-#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */
-#else
-#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */
-#endif
-
-#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */
-#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */
-
-#ifdef _LP64
-#define BT_NBIPUL32 (1 << BT_ULSHIFT32) /* n bits per ulong_t */
-#define BT_ULMASK32 (BT_NBIPUL32 - 1) /* to extract bit index */
-#define BT_ULMAXMASK 0xffffffffffffffff /* used by bt_getlowbit */
-#else
-#define BT_ULMAXMASK 0xffffffff
-#endif
-
-/*
- * bitmap is a ulong_t *, bitindex an index_t
- *
- * The macros BT_WIM and BT_BIW internal; there is no need
- * for users of this package to use them.
- */
-
-/*
- * word in map
- */
-#define BT_WIM(bitmap, bitindex) \
- ((bitmap)[(bitindex) >> BT_ULSHIFT])
-/*
- * bit in word
- */
-#define BT_BIW(bitindex) \
- (1UL << ((bitindex) & BT_ULMASK))
-
-#ifdef _LP64
-#define BT_WIM32(bitmap, bitindex) \
- ((bitmap)[(bitindex) >> BT_ULSHIFT32])
-
-#define BT_BIW32(bitindex) \
- (1UL << ((bitindex) & BT_ULMASK32))
-#endif
-
-/*
- * These are public macros
- *
- * BT_BITOUL == n bits to n ulong_t's
- */
-#define BT_BITOUL(nbits) \
- (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
-#define BT_SIZEOFMAP(nbits) \
- (BT_BITOUL(nbits) * sizeof (ulong_t))
-#define BT_TEST(bitmap, bitindex) \
- ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
-#define BT_SET(bitmap, bitindex) \
- { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
-#define BT_CLEAR(bitmap, bitindex) \
- { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
-
-#ifdef _LP64
-#define BT_BITOUL32(nbits) \
- (((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32)
-#define BT_SIZEOFMAP32(nbits) \
- (BT_BITOUL32(nbits) * sizeof (uint_t))
-#define BT_TEST32(bitmap, bitindex) \
- ((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0)
-#define BT_SET32(bitmap, bitindex) \
- { BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); }
-#define BT_CLEAR32(bitmap, bitindex) \
- { BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); }
-#endif /* _LP64 */
-
-
-/*
- * BIT_ONLYONESET is a private macro not designed for bitmaps of
- * arbitrary size. u must be an unsigned integer/long. It returns
- * true if one and only one bit is set in u.
- */
-#define BIT_ONLYONESET(u) \
- ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
-
-#ifndef _ASM
-
-/*
- * return next available bit index from map with specified number of bits
- */
-extern index_t bt_availbit(ulong_t *bitmap, size_t nbits);
-/*
- * find the highest order bit that is on, and is within or below
- * the word specified by wx
- */
-extern int bt_gethighbit(ulong_t *mapp, int wx);
-extern int bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2,
- size_t end_pos);
-extern int bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop);
-extern void bt_copy(ulong_t *, ulong_t *, ulong_t);
-
-/*
- * find the parity
- */
-extern int odd_parity(ulong_t);
-
-/*
- * Atomically set/clear bits
- * Atomic exclusive operations will set "result" to "-1"
- * if the bit is already set/cleared. "result" will be set
- * to 0 otherwise.
- */
-#define BT_ATOMIC_SET(bitmap, bitindex) \
- { atomic_or_ulong(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
-#define BT_ATOMIC_CLEAR(bitmap, bitindex) \
- { atomic_and_ulong(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
-
-#define BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \
- { result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)), \
- (bitindex) % BT_NBIPUL); }
-#define BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \
- { result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)), \
- (bitindex) % BT_NBIPUL); }
-
-/*
- * Extracts bits between index h (high, inclusive) and l (low, exclusive) from
- * u, which must be an unsigned integer.
- */
-#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
-
-#endif /* _ASM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_BITMAP_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h
deleted file mode 100644
index 5432f0c8d607..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CRYPTO_ELFSIGN_H
-#define _SYS_CRYPTO_ELFSIGN_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Consolidation Private Interface for elfsign/libpkcs11/kcfd
- */
-
-#include <sys/zfs_context.h>
-
-/*
- * Project Private structures and types used for communication between kcfd
- * and KCF over the door.
- */
-
-typedef enum ELFsign_status_e {
- ELFSIGN_UNKNOWN,
- ELFSIGN_SUCCESS,
- ELFSIGN_FAILED,
- ELFSIGN_NOTSIGNED,
- ELFSIGN_INVALID_CERTPATH,
- ELFSIGN_INVALID_ELFOBJ,
- ELFSIGN_RESTRICTED
-} ELFsign_status_t;
-
-#define KCF_KCFD_VERSION1 1
-#define SIG_MAX_LENGTH 1024
-
-#define ELF_SIGNATURE_SECTION ".SUNW_signature"
-
-typedef struct kcf_door_arg_s {
- short da_version;
- boolean_t da_iskernel;
-
- union {
- char filename[MAXPATHLEN]; /* For request */
-
- struct kcf_door_result_s { /* For response */
- ELFsign_status_t status;
- uint32_t siglen;
- uchar_t signature[1];
- } result;
- } da_u;
-} kcf_door_arg_t;
-
-typedef uint32_t filesig_vers_t;
-
-/*
- * File Signature Structure
- * Applicable to ELF and other file formats
- */
-struct filesignatures {
- uint32_t filesig_cnt; /* count of signatures */
- uint32_t filesig_pad; /* unused */
- union {
- char filesig_data[1];
- struct filesig { /* one of these for each signature */
- uint32_t filesig_size;
- filesig_vers_t filesig_version;
- union {
- struct filesig_version1 {
- uint32_t filesig_v1_dnsize;
- uint32_t filesig_v1_sigsize;
- uint32_t filesig_v1_oidsize;
- char filesig_v1_data[1];
- } filesig_v1;
- struct filesig_version3 {
- uint64_t filesig_v3_time;
- uint32_t filesig_v3_dnsize;
- uint32_t filesig_v3_sigsize;
- uint32_t filesig_v3_oidsize;
- char filesig_v3_data[1];
- } filesig_v3;
- } _u2;
- } filesig_sig;
- uint64_t filesig_align;
- } _u1;
-};
-#define filesig_sig _u1.filesig_sig
-
-#define filesig_v1_dnsize _u2.filesig_v1.filesig_v1_dnsize
-#define filesig_v1_sigsize _u2.filesig_v1.filesig_v1_sigsize
-#define filesig_v1_oidsize _u2.filesig_v1.filesig_v1_oidsize
-#define filesig_v1_data _u2.filesig_v1.filesig_v1_data
-
-#define filesig_v3_time _u2.filesig_v3.filesig_v3_time
-#define filesig_v3_dnsize _u2.filesig_v3.filesig_v3_dnsize
-#define filesig_v3_sigsize _u2.filesig_v3.filesig_v3_sigsize
-#define filesig_v3_oidsize _u2.filesig_v3.filesig_v3_oidsize
-#define filesig_v3_data _u2.filesig_v3.filesig_v3_data
-
-#define filesig_ALIGN(s) (((s) + sizeof (uint64_t) - 1) & \
- (-sizeof (uint64_t)))
-#define filesig_next(ptr) (struct filesig *)((void *)((char *)(ptr) + \
- filesig_ALIGN((ptr)->filesig_size)))
-
-#define FILESIG_UNKNOWN 0 /* unrecognized version */
-#define FILESIG_VERSION1 1 /* version1, all but sig section */
-#define FILESIG_VERSION2 2 /* version1 format, SHF_ALLOC only */
-#define FILESIG_VERSION3 3 /* version3, all but sig section */
-#define FILESIG_VERSION4 4 /* version3 format, SHF_ALLOC only */
-
-#define _PATH_KCFD_DOOR "/etc/svc/volatile/kcfd_door"
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CRYPTO_ELFSIGN_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h
index 0f37f3f63532..4d17221ea9a3 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -34,80 +34,17 @@
#include <sys/crypto/common.h>
#include <sys/crypto/api.h>
#include <sys/crypto/spi.h>
-#include <sys/crypto/ioctl.h>
+#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
-#define KCF_MODULE "kcf"
-
/*
* Prefixes convention: structures internal to the kernel cryptographic
* framework start with 'kcf_'. Exposed structure start with 'crypto_'.
*/
-/* Provider stats. Not protected. */
-typedef struct kcf_prov_stats {
- kstat_named_t ps_ops_total;
- kstat_named_t ps_ops_passed;
- kstat_named_t ps_ops_failed;
- kstat_named_t ps_ops_busy_rval;
-} kcf_prov_stats_t;
-
-/* Various kcf stats. Not protected. */
-typedef struct kcf_stats {
- kstat_named_t ks_thrs_in_pool;
- kstat_named_t ks_idle_thrs;
- kstat_named_t ks_minthrs;
- kstat_named_t ks_maxthrs;
- kstat_named_t ks_swq_njobs;
- kstat_named_t ks_swq_maxjobs;
- kstat_named_t ks_taskq_threads;
- kstat_named_t ks_taskq_minalloc;
- kstat_named_t ks_taskq_maxalloc;
-} kcf_stats_t;
-
-/*
- * Keep all the information needed by the scheduler from
- * this provider.
- */
-typedef struct kcf_sched_info {
- /* The number of operations dispatched. */
- uint64_t ks_ndispatches;
-
- /* The number of operations that failed. */
- uint64_t ks_nfails;
-
- /* The number of operations that returned CRYPTO_BUSY. */
- uint64_t ks_nbusy_rval;
-
- /* taskq used to dispatch crypto requests */
- taskq_t *ks_taskq;
-} kcf_sched_info_t;
-
-/*
- * pd_irefcnt approximates the number of inflight requests to the
- * provider. Though we increment this counter during registration for
- * other purposes, that base value is mostly same across all providers.
- * So, it is a good measure of the load on a provider when it is not
- * in a busy state. Once a provider notifies it is busy, requests
- * backup in the taskq. So, we use tq_nalloc in that case which gives
- * the number of task entries in the task queue. Note that we do not
- * acquire any locks here as it is not critical to get the exact number
- * and the lock contention may be too costly for this code path.
- */
-#define KCF_PROV_LOAD(pd) ((pd)->pd_state != KCF_PROV_BUSY ? \
- (pd)->pd_irefcnt : (pd)->pd_sched_info.ks_taskq->tq_nalloc)
-
-#define KCF_PROV_INCRSTATS(pd, error) { \
- (pd)->pd_sched_info.ks_ndispatches++; \
- if (error == CRYPTO_BUSY) \
- (pd)->pd_sched_info.ks_nbusy_rval++; \
- else if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED) \
- (pd)->pd_sched_info.ks_nfails++; \
-}
-
/*
* The following two macros should be
@@ -118,7 +55,7 @@ typedef struct kcf_sched_info {
* When impl.h is broken up (bug# 4703218), this will be done. For now,
* we hardcode these values.
*/
-#define KCF_OPS_CLASSSIZE 8
+#define KCF_OPS_CLASSSIZE 4
#define KCF_MAXMECHTAB 32
/*
@@ -126,21 +63,17 @@ typedef struct kcf_sched_info {
* the elements is important.
*
* Routines which get a provider or the list of providers
- * should pick only those that are either in KCF_PROV_READY state
- * or in KCF_PROV_BUSY state.
+ * should pick only those that are in KCF_PROV_READY state.
*/
typedef enum {
KCF_PROV_ALLOCATED = 1,
- KCF_PROV_UNVERIFIED,
- KCF_PROV_VERIFICATION_FAILED,
/*
* state < KCF_PROV_READY means the provider can not
* be used at all.
*/
KCF_PROV_READY,
- KCF_PROV_BUSY,
/*
- * state > KCF_PROV_BUSY means the provider can not
+ * state > KCF_PROV_READY means the provider can not
* be used for new requests.
*/
KCF_PROV_FAILED,
@@ -153,112 +86,66 @@ typedef enum {
KCF_PROV_FREED
} kcf_prov_state_t;
-#define KCF_IS_PROV_UNVERIFIED(pd) ((pd)->pd_state == KCF_PROV_UNVERIFIED)
-#define KCF_IS_PROV_USABLE(pd) ((pd)->pd_state == KCF_PROV_READY || \
- (pd)->pd_state == KCF_PROV_BUSY)
+#define KCF_IS_PROV_USABLE(pd) ((pd)->pd_state == KCF_PROV_READY)
#define KCF_IS_PROV_REMOVED(pd) ((pd)->pd_state >= KCF_PROV_REMOVED)
-/* Internal flags valid for pd_flags field */
-#define KCF_PROV_RESTRICTED 0x40000000
-#define KCF_LPROV_MEMBER 0x80000000 /* is member of a logical provider */
-
/*
* A provider descriptor structure. There is one such structure per
* provider. It is allocated and initialized at registration time and
* freed when the provider unregisters.
*
- * pd_prov_type: Provider type, hardware or software
- * pd_sid: Session ID of the provider used by kernel clients.
- * This is valid only for session-oriented providers.
* pd_refcnt: Reference counter to this provider descriptor
* pd_irefcnt: References held by the framework internal structs
- * pd_lock: lock protects pd_state and pd_provider_list
+ * pd_lock: lock protects pd_state
* pd_state: State value of the provider
- * pd_provider_list: Used to cross-reference logical providers and their
- * members. Not used for software providers.
- * pd_resume_cv: cv to wait for state to change from KCF_PROV_BUSY
- * pd_prov_handle: Provider handle specified by provider
* pd_ops_vector: The ops vector specified by Provider
* pd_mech_indx: Lookup table which maps a core framework mechanism
* number to an index in pd_mechanisms array
* pd_mechanisms: Array of mechanisms supported by the provider, specified
* by the provider during registration
- * pd_sched_info: Scheduling information associated with the provider
* pd_mech_list_count: The number of entries in pi_mechanisms, specified
* by the provider during registration
- * pd_name: Device name or module name
- * pd_instance: Device instance
- * pd_module_id: Module ID returned by modload
- * pd_mctlp: Pointer to modctl structure for this provider
* pd_remove_cv: cv to wait on while the provider queue drains
* pd_description: Provider description string
- * pd_flags bitwise OR of pi_flags from crypto_provider_info_t
- * and other internal flags defined above.
- * pd_hash_limit Maximum data size that hash mechanisms of this provider
- * can support.
* pd_kcf_prov_handle: KCF-private handle assigned by KCF
* pd_prov_id: Identification # assigned by KCF to provider
- * pd_kstat: kstat associated with the provider
- * pd_ks_data: kstat data
*/
typedef struct kcf_provider_desc {
- crypto_provider_type_t pd_prov_type;
- crypto_session_id_t pd_sid;
uint_t pd_refcnt;
uint_t pd_irefcnt;
kmutex_t pd_lock;
kcf_prov_state_t pd_state;
- struct kcf_provider_list *pd_provider_list;
- kcondvar_t pd_resume_cv;
- crypto_provider_handle_t pd_prov_handle;
- crypto_ops_t *pd_ops_vector;
+ const crypto_ops_t *pd_ops_vector;
ushort_t pd_mech_indx[KCF_OPS_CLASSSIZE]\
[KCF_MAXMECHTAB];
- crypto_mech_info_t *pd_mechanisms;
- kcf_sched_info_t pd_sched_info;
+ const crypto_mech_info_t *pd_mechanisms;
uint_t pd_mech_list_count;
- // char *pd_name;
- // uint_t pd_instance;
- // int pd_module_id;
- // struct modctl *pd_mctlp;
kcondvar_t pd_remove_cv;
- char *pd_description;
- uint_t pd_flags;
- uint_t pd_hash_limit;
+ const char *pd_description;
crypto_kcf_provider_handle_t pd_kcf_prov_handle;
crypto_provider_id_t pd_prov_id;
- kstat_t *pd_kstat;
- kcf_prov_stats_t pd_ks_data;
} kcf_provider_desc_t;
-/* useful for making a list of providers */
-typedef struct kcf_provider_list {
- struct kcf_provider_list *pl_next;
- struct kcf_provider_desc *pl_provider;
-} kcf_provider_list_t;
-
-/* atomic operations in linux implicitly form a memory barrier */
-#define membar_exit()
-
/*
* If a component has a reference to a kcf_provider_desc_t,
* it REFHOLD()s. A new provider descriptor which is referenced only
* by the providers table has a reference counter of one.
*/
-#define KCF_PROV_REFHOLD(desc) { \
- atomic_add_32(&(desc)->pd_refcnt, 1); \
- ASSERT((desc)->pd_refcnt != 0); \
+#define KCF_PROV_REFHOLD(desc) { \
+ int newval = atomic_add_32_nv(&(desc)->pd_refcnt, 1); \
+ ASSERT(newval != 0); \
}
-#define KCF_PROV_IREFHOLD(desc) { \
- atomic_add_32(&(desc)->pd_irefcnt, 1); \
- ASSERT((desc)->pd_irefcnt != 0); \
+#define KCF_PROV_IREFHOLD(desc) { \
+ int newval = atomic_add_32_nv(&(desc)->pd_irefcnt, 1); \
+ ASSERT(newval != 0); \
}
#define KCF_PROV_IREFRELE(desc) { \
- ASSERT((desc)->pd_irefcnt != 0); \
- membar_exit(); \
- if (atomic_add_32_nv(&(desc)->pd_irefcnt, -1) == 0) { \
+ membar_producer(); \
+ int newval = atomic_add_32_nv(&(desc)->pd_irefcnt, -1); \
+ ASSERT(newval != -1); \
+ if (newval == 0) { \
cv_broadcast(&(desc)->pd_remove_cv); \
} \
}
@@ -266,22 +153,15 @@ typedef struct kcf_provider_list {
#define KCF_PROV_REFHELD(desc) ((desc)->pd_refcnt >= 1)
#define KCF_PROV_REFRELE(desc) { \
- ASSERT((desc)->pd_refcnt != 0); \
- membar_exit(); \
- if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) { \
+ membar_producer(); \
+ int newval = atomic_add_32_nv(&(desc)->pd_refcnt, -1); \
+ ASSERT(newval != -1); \
+ if (newval == 0) { \
kcf_provider_zero_refcnt((desc)); \
} \
}
-/* list of crypto_mech_info_t valid as the second mech in a dual operation */
-
-typedef struct crypto_mech_info_list {
- struct crypto_mech_info_list *ml_next;
- crypto_mech_type_t ml_kcf_mechid; /* KCF's id */
- crypto_mech_info_t ml_mech_info;
-} crypto_mech_info_list_t;
-
/*
* An element in a mechanism provider descriptors chain.
* The kcf_prov_mech_desc_t is duplicated in every chain the provider belongs
@@ -293,15 +173,9 @@ typedef struct kcf_prov_mech_desc {
struct kcf_mech_entry *pm_me; /* Back to the head */
struct kcf_prov_mech_desc *pm_next; /* Next in the chain */
crypto_mech_info_t pm_mech_info; /* Provider mech info */
- crypto_mech_info_list_t *pm_mi_list; /* list for duals */
kcf_provider_desc_t *pm_prov_desc; /* Common desc. */
} kcf_prov_mech_desc_t;
-/* and the notation shortcuts ... */
-#define pm_provider_type pm_prov_desc.pd_provider_type
-#define pm_provider_handle pm_prov_desc.pd_provider_handle
-#define pm_ops_vector pm_prov_desc.pd_ops_vector
-
/*
* A mechanism entry in an xxx_mech_tab[]. me_pad was deemed
* to be unnecessary and removed.
@@ -309,55 +183,18 @@ typedef struct kcf_prov_mech_desc {
typedef struct kcf_mech_entry {
crypto_mech_name_t me_name; /* mechanism name */
crypto_mech_type_t me_mechid; /* Internal id for mechanism */
- kmutex_t me_mutex; /* access protection */
- kcf_prov_mech_desc_t *me_hw_prov_chain; /* list of HW providers */
- kcf_prov_mech_desc_t *me_sw_prov; /* SW provider */
- /*
- * Number of HW providers in the chain. There is only one
- * SW provider. So, we need only a count of HW providers.
- */
- int me_num_hwprov;
- /*
- * When a SW provider is present, this is the generation number that
- * ensures no objects from old SW providers are used in the new one
- */
- uint32_t me_gen_swprov;
- /*
- * threshold for using hardware providers for this mech
- */
- size_t me_threshold;
+ kcf_prov_mech_desc_t *me_sw_prov; /* provider */
+ avl_node_t me_node;
} kcf_mech_entry_t;
/*
- * A policy descriptor structure. It is allocated and initialized
- * when administrative ioctls load disabled mechanisms.
- *
- * pd_prov_type: Provider type, hardware or software
- * pd_name: Device name or module name.
- * pd_instance: Device instance.
- * pd_refcnt: Reference counter for this policy descriptor
- * pd_mutex: Protects array and count of disabled mechanisms.
- * pd_disabled_count: Count of disabled mechanisms.
- * pd_disabled_mechs: Array of disabled mechanisms.
- */
-typedef struct kcf_policy_desc {
- crypto_provider_type_t pd_prov_type;
- char *pd_name;
- uint_t pd_instance;
- uint_t pd_refcnt;
- kmutex_t pd_mutex;
- uint_t pd_disabled_count;
- crypto_mech_name_t *pd_disabled_mechs;
-} kcf_policy_desc_t;
-
-/*
* If a component has a reference to a kcf_policy_desc_t,
* it REFHOLD()s. A new policy descriptor which is referenced only
* by the policy table has a reference count of one.
*/
-#define KCF_POLICY_REFHOLD(desc) { \
- atomic_add_32(&(desc)->pd_refcnt, 1); \
- ASSERT((desc)->pd_refcnt != 0); \
+#define KCF_POLICY_REFHOLD(desc) { \
+ int newval = atomic_add_32_nv(&(desc)->pd_refcnt, 1); \
+ ASSERT(newval != 0); \
}
/*
@@ -365,63 +202,36 @@ typedef struct kcf_policy_desc {
* reference is released, the descriptor is freed.
*/
#define KCF_POLICY_REFRELE(desc) { \
- ASSERT((desc)->pd_refcnt != 0); \
- membar_exit(); \
- if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) \
+ membar_producer(); \
+ int newval = atomic_add_32_nv(&(desc)->pd_refcnt, -1); \
+ ASSERT(newval != -1); \
+ if (newval == 0) \
kcf_policy_free_desc(desc); \
}
/*
- * This entry stores the name of a software module and its
- * mechanisms. The mechanisms are 'hints' that are used to
- * trigger loading of the module.
- */
-typedef struct kcf_soft_conf_entry {
- struct kcf_soft_conf_entry *ce_next;
- char *ce_name;
- crypto_mech_name_t *ce_mechs;
- uint_t ce_count;
-} kcf_soft_conf_entry_t;
-
-extern kmutex_t soft_config_mutex;
-extern kcf_soft_conf_entry_t *soft_config_list;
-
-/*
* Global tables. The sizes are from the predefined PKCS#11 v2.20 mechanisms,
* with a margin of few extra empty entry points
*/
#define KCF_MAXDIGEST 16 /* Digests */
-#define KCF_MAXCIPHER 64 /* Ciphers */
+#define KCF_MAXCIPHER 32 /* Ciphers */
#define KCF_MAXMAC 40 /* Message authentication codes */
-#define KCF_MAXSIGN 24 /* Sign/Verify */
-#define KCF_MAXKEYOPS 116 /* Key generation and derivation */
-#define KCF_MAXMISC 16 /* Others ... */
-
-#define KCF_MAXMECHS KCF_MAXDIGEST + KCF_MAXCIPHER + KCF_MAXMAC + \
- KCF_MAXSIGN + KCF_MAXKEYOPS + \
- KCF_MAXMISC
-extern kcf_mech_entry_t kcf_digest_mechs_tab[];
-extern kcf_mech_entry_t kcf_cipher_mechs_tab[];
-extern kcf_mech_entry_t kcf_mac_mechs_tab[];
-extern kcf_mech_entry_t kcf_sign_mechs_tab[];
-extern kcf_mech_entry_t kcf_keyops_mechs_tab[];
-extern kcf_mech_entry_t kcf_misc_mechs_tab[];
-
-extern kmutex_t kcf_mech_tabs_lock;
+_Static_assert(KCF_MAXCIPHER == KCF_MAXMECHTAB,
+ "KCF_MAXCIPHER != KCF_MAXMECHTAB"); /* See KCF_MAXMECHTAB comment */
typedef enum {
KCF_DIGEST_CLASS = 1,
KCF_CIPHER_CLASS,
KCF_MAC_CLASS,
- KCF_SIGN_CLASS,
- KCF_KEYOPS_CLASS,
- KCF_MISC_CLASS
} kcf_ops_class_t;
#define KCF_FIRST_OPSCLASS KCF_DIGEST_CLASS
-#define KCF_LAST_OPSCLASS KCF_MISC_CLASS
+#define KCF_LAST_OPSCLASS KCF_MAC_CLASS
+_Static_assert(
+ KCF_OPS_CLASSSIZE == (KCF_LAST_OPSCLASS - KCF_FIRST_OPSCLASS + 2),
+ "KCF_OPS_CLASSSIZE doesn't match kcf_ops_class_t!");
/* The table of all the kcf_xxx_mech_tab[]s, indexed by kcf_ops_class */
@@ -430,14 +240,14 @@ typedef struct kcf_mech_entry_tab {
kcf_mech_entry_t *met_tab; /* the table */
} kcf_mech_entry_tab_t;
-extern kcf_mech_entry_tab_t kcf_mech_tabs_tab[];
+extern const kcf_mech_entry_tab_t kcf_mech_tabs_tab[];
#define KCF_MECHID(class, index) \
(((crypto_mech_type_t)(class) << 32) | (crypto_mech_type_t)(index))
#define KCF_MECH2CLASS(mech_type) ((kcf_ops_class_t)((mech_type) >> 32))
-#define KCF_MECH2INDEX(mech_type) ((int)(mech_type))
+#define KCF_MECH2INDEX(mech_type) ((int)((mech_type) & 0xFFFFFFFF))
#define KCF_TO_PROV_MECH_INDX(pd, mech_type) \
((pd)->pd_mech_indx[KCF_MECH2CLASS(mech_type)] \
@@ -449,58 +259,6 @@ extern kcf_mech_entry_tab_t kcf_mech_tabs_tab[];
#define KCF_TO_PROV_MECHNUM(pd, mech_type) \
(KCF_TO_PROV_MECHINFO(pd, mech_type).cm_mech_number)
-#define KCF_CAN_SHARE_OPSTATE(pd, mech_type) \
- ((KCF_TO_PROV_MECHINFO(pd, mech_type).cm_mech_flags) & \
- CRYPTO_CAN_SHARE_OPSTATE)
-
-/* ps_refcnt is protected by cm_lock in the crypto_minor structure */
-typedef struct crypto_provider_session {
- struct crypto_provider_session *ps_next;
- crypto_session_id_t ps_session;
- kcf_provider_desc_t *ps_provider;
- kcf_provider_desc_t *ps_real_provider;
- uint_t ps_refcnt;
-} crypto_provider_session_t;
-
-typedef struct crypto_session_data {
- kmutex_t sd_lock;
- kcondvar_t sd_cv;
- uint32_t sd_flags;
- int sd_pre_approved_amount;
- crypto_ctx_t *sd_digest_ctx;
- crypto_ctx_t *sd_encr_ctx;
- crypto_ctx_t *sd_decr_ctx;
- crypto_ctx_t *sd_sign_ctx;
- crypto_ctx_t *sd_verify_ctx;
- crypto_ctx_t *sd_sign_recover_ctx;
- crypto_ctx_t *sd_verify_recover_ctx;
- kcf_provider_desc_t *sd_provider;
- void *sd_find_init_cookie;
- crypto_provider_session_t *sd_provider_session;
-} crypto_session_data_t;
-
-#define CRYPTO_SESSION_IN_USE 0x00000001
-#define CRYPTO_SESSION_IS_BUSY 0x00000002
-#define CRYPTO_SESSION_IS_CLOSED 0x00000004
-
-#define KCF_MAX_PIN_LEN 1024
-
-/*
- * Per-minor info.
- *
- * cm_lock protects everything in this structure except for cm_refcnt.
- */
-typedef struct crypto_minor {
- uint_t cm_refcnt;
- kmutex_t cm_lock;
- kcondvar_t cm_cv;
- crypto_session_data_t **cm_session_table;
- uint_t cm_session_table_count;
- kcf_provider_desc_t **cm_provider_array;
- uint_t cm_provider_count;
- crypto_provider_session_t *cm_provider_session;
-} crypto_minor_t;
-
/*
* Return codes for internal functions
*/
@@ -512,849 +270,118 @@ typedef struct crypto_minor {
#define KCF_INVALID_INDX ((ushort_t)-1)
/*
- * kCF internal mechanism and function group for tracking RNG providers.
- */
-#define SUN_RANDOM "random"
-#define CRYPTO_FG_RANDOM 0x80000000 /* generate_random() */
-
-/*
* Wrappers for ops vectors. In the wrapper definitions below, the pd
* argument always corresponds to a pointer to a provider descriptor
* of type kcf_prov_desc_t.
*/
-#define KCF_PROV_CONTROL_OPS(pd) ((pd)->pd_ops_vector->co_control_ops)
-#define KCF_PROV_CTX_OPS(pd) ((pd)->pd_ops_vector->co_ctx_ops)
#define KCF_PROV_DIGEST_OPS(pd) ((pd)->pd_ops_vector->co_digest_ops)
#define KCF_PROV_CIPHER_OPS(pd) ((pd)->pd_ops_vector->co_cipher_ops)
#define KCF_PROV_MAC_OPS(pd) ((pd)->pd_ops_vector->co_mac_ops)
-#define KCF_PROV_SIGN_OPS(pd) ((pd)->pd_ops_vector->co_sign_ops)
-#define KCF_PROV_VERIFY_OPS(pd) ((pd)->pd_ops_vector->co_verify_ops)
-#define KCF_PROV_DUAL_OPS(pd) ((pd)->pd_ops_vector->co_dual_ops)
-#define KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) \
- ((pd)->pd_ops_vector->co_dual_cipher_mac_ops)
-#define KCF_PROV_RANDOM_OPS(pd) ((pd)->pd_ops_vector->co_random_ops)
-#define KCF_PROV_SESSION_OPS(pd) ((pd)->pd_ops_vector->co_session_ops)
-#define KCF_PROV_OBJECT_OPS(pd) ((pd)->pd_ops_vector->co_object_ops)
-#define KCF_PROV_KEY_OPS(pd) ((pd)->pd_ops_vector->co_key_ops)
-#define KCF_PROV_PROVIDER_OPS(pd) ((pd)->pd_ops_vector->co_provider_ops)
-#define KCF_PROV_MECH_OPS(pd) ((pd)->pd_ops_vector->co_mech_ops)
-#define KCF_PROV_NOSTORE_KEY_OPS(pd) \
- ((pd)->pd_ops_vector->co_nostore_key_ops)
-
-/*
- * Wrappers for crypto_control_ops(9S) entry points.
- */
-
-#define KCF_PROV_STATUS(pd, status) ( \
- (KCF_PROV_CONTROL_OPS(pd) && \
- KCF_PROV_CONTROL_OPS(pd)->provider_status) ? \
- KCF_PROV_CONTROL_OPS(pd)->provider_status( \
- (pd)->pd_prov_handle, status) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_ctx_ops(9S) entry points.
- */
-
-#define KCF_PROV_CREATE_CTX_TEMPLATE(pd, mech, key, template, size, req) ( \
- (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->create_ctx_template) ? \
- KCF_PROV_CTX_OPS(pd)->create_ctx_template( \
- (pd)->pd_prov_handle, mech, key, template, size, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_FREE_CONTEXT(pd, ctx) ( \
- (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->free_context) ? \
- KCF_PROV_CTX_OPS(pd)->free_context(ctx) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_COPYIN_MECH(pd, umech, kmech, errorp, mode) ( \
- (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->copyin_mechanism) ? \
- KCF_PROV_MECH_OPS(pd)->copyin_mechanism( \
- (pd)->pd_prov_handle, umech, kmech, errorp, mode) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_COPYOUT_MECH(pd, kmech, umech, errorp, mode) ( \
- (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->copyout_mechanism) ? \
- KCF_PROV_MECH_OPS(pd)->copyout_mechanism( \
- (pd)->pd_prov_handle, kmech, umech, errorp, mode) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_FREE_MECH(pd, prov_mech) ( \
- (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->free_mechanism) ? \
- KCF_PROV_MECH_OPS(pd)->free_mechanism( \
- (pd)->pd_prov_handle, prov_mech) : CRYPTO_NOT_SUPPORTED)
+#define KCF_PROV_CTX_OPS(pd) ((pd)->pd_ops_vector->co_ctx_ops)
/*
* Wrappers for crypto_digest_ops(9S) entry points.
*/
-#define KCF_PROV_DIGEST_INIT(pd, ctx, mech, req) ( \
+#define KCF_PROV_DIGEST_INIT(pd, ctx, mech) ( \
(KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_init) ? \
- KCF_PROV_DIGEST_OPS(pd)->digest_init(ctx, mech, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * The _ (underscore) in _digest is needed to avoid replacing the
- * function digest().
- */
-#define KCF_PROV_DIGEST(pd, ctx, data, _digest, req) ( \
- (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest) ? \
- KCF_PROV_DIGEST_OPS(pd)->digest(ctx, data, _digest, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DIGEST_UPDATE(pd, ctx, data, req) ( \
- (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_update) ? \
- KCF_PROV_DIGEST_OPS(pd)->digest_update(ctx, data, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DIGEST_KEY(pd, ctx, key, req) ( \
- (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_key) ? \
- KCF_PROV_DIGEST_OPS(pd)->digest_key(ctx, key, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DIGEST_FINAL(pd, ctx, digest, req) ( \
- (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_final) ? \
- KCF_PROV_DIGEST_OPS(pd)->digest_final(ctx, digest, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DIGEST_ATOMIC(pd, session, mech, data, digest, req) ( \
- (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_atomic) ? \
- KCF_PROV_DIGEST_OPS(pd)->digest_atomic( \
- (pd)->pd_prov_handle, session, mech, data, digest, req) : \
+ KCF_PROV_DIGEST_OPS(pd)->digest_init(ctx, mech) : \
CRYPTO_NOT_SUPPORTED)
/*
* Wrappers for crypto_cipher_ops(9S) entry points.
*/
-#define KCF_PROV_ENCRYPT_INIT(pd, ctx, mech, key, template, req) ( \
+#define KCF_PROV_ENCRYPT_INIT(pd, ctx, mech, key, template) ( \
(KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_init) ? \
- KCF_PROV_CIPHER_OPS(pd)->encrypt_init(ctx, mech, key, template, \
- req) : \
+ KCF_PROV_CIPHER_OPS(pd)->encrypt_init(ctx, mech, key, template) : \
CRYPTO_NOT_SUPPORTED)
-#define KCF_PROV_ENCRYPT(pd, ctx, plaintext, ciphertext, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt) ? \
- KCF_PROV_CIPHER_OPS(pd)->encrypt(ctx, plaintext, ciphertext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext, ciphertext, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_update) ? \
- KCF_PROV_CIPHER_OPS(pd)->encrypt_update(ctx, plaintext, \
- ciphertext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_final) ? \
- KCF_PROV_CIPHER_OPS(pd)->encrypt_final(ctx, ciphertext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_ATOMIC(pd, session, mech, key, plaintext, ciphertext, \
- template, req) ( \
+#define KCF_PROV_ENCRYPT_ATOMIC(pd, mech, key, plaintext, ciphertext, \
+ template) ( \
(KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_atomic) ? \
KCF_PROV_CIPHER_OPS(pd)->encrypt_atomic( \
- (pd)->pd_prov_handle, session, mech, key, plaintext, ciphertext, \
- template, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DECRYPT_INIT(pd, ctx, mech, key, template, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_init) ? \
- KCF_PROV_CIPHER_OPS(pd)->decrypt_init(ctx, mech, key, template, \
- req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DECRYPT(pd, ctx, ciphertext, plaintext, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt) ? \
- KCF_PROV_CIPHER_OPS(pd)->decrypt(ctx, ciphertext, plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext, plaintext, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_update) ? \
- KCF_PROV_CIPHER_OPS(pd)->decrypt_update(ctx, ciphertext, \
- plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext, req) ( \
- (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_final) ? \
- KCF_PROV_CIPHER_OPS(pd)->decrypt_final(ctx, plaintext, req) : \
+ mech, key, plaintext, ciphertext, template) : \
CRYPTO_NOT_SUPPORTED)
-#define KCF_PROV_DECRYPT_ATOMIC(pd, session, mech, key, ciphertext, plaintext, \
- template, req) ( \
+#define KCF_PROV_DECRYPT_ATOMIC(pd, mech, key, ciphertext, plaintext, \
+ template) ( \
(KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_atomic) ? \
KCF_PROV_CIPHER_OPS(pd)->decrypt_atomic( \
- (pd)->pd_prov_handle, session, mech, key, ciphertext, plaintext, \
- template, req) : \
+ mech, key, ciphertext, plaintext, template) : \
CRYPTO_NOT_SUPPORTED)
/*
* Wrappers for crypto_mac_ops(9S) entry points.
*/
-#define KCF_PROV_MAC_INIT(pd, ctx, mech, key, template, req) ( \
+#define KCF_PROV_MAC_INIT(pd, ctx, mech, key, template) ( \
(KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_init) ? \
- KCF_PROV_MAC_OPS(pd)->mac_init(ctx, mech, key, template, req) \
+ KCF_PROV_MAC_OPS(pd)->mac_init(ctx, mech, key, template) \
: CRYPTO_NOT_SUPPORTED)
/*
* The _ (underscore) in _mac is needed to avoid replacing the
* function mac().
*/
-#define KCF_PROV_MAC(pd, ctx, data, _mac, req) ( \
- (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac) ? \
- KCF_PROV_MAC_OPS(pd)->mac(ctx, data, _mac, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_UPDATE(pd, ctx, data, req) ( \
+#define KCF_PROV_MAC_UPDATE(pd, ctx, data) ( \
(KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_update) ? \
- KCF_PROV_MAC_OPS(pd)->mac_update(ctx, data, req) : \
+ KCF_PROV_MAC_OPS(pd)->mac_update(ctx, data) : \
CRYPTO_NOT_SUPPORTED)
-#define KCF_PROV_MAC_FINAL(pd, ctx, mac, req) ( \
+#define KCF_PROV_MAC_FINAL(pd, ctx, mac) ( \
(KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_final) ? \
- KCF_PROV_MAC_OPS(pd)->mac_final(ctx, mac, req) : \
+ KCF_PROV_MAC_OPS(pd)->mac_final(ctx, mac) : \
CRYPTO_NOT_SUPPORTED)
-#define KCF_PROV_MAC_ATOMIC(pd, session, mech, key, data, mac, template, \
- req) ( \
+#define KCF_PROV_MAC_ATOMIC(pd, mech, key, data, mac, template) ( \
(KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_atomic) ? \
KCF_PROV_MAC_OPS(pd)->mac_atomic( \
- (pd)->pd_prov_handle, session, mech, key, data, mac, template, \
- req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_VERIFY_ATOMIC(pd, session, mech, key, data, mac, \
- template, req) ( \
- (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_verify_atomic) ? \
- KCF_PROV_MAC_OPS(pd)->mac_verify_atomic( \
- (pd)->pd_prov_handle, session, mech, key, data, mac, template, \
- req) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_sign_ops(9S) entry points.
- */
-
-#define KCF_PROV_SIGN_INIT(pd, ctx, mech, key, template, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_init) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_init( \
- ctx, mech, key, template, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN(pd, ctx, data, sig, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign) ? \
- KCF_PROV_SIGN_OPS(pd)->sign(ctx, data, sig, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN_UPDATE(pd, ctx, data, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_update) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_update(ctx, data, req) : \
+ mech, key, data, mac, template) : \
CRYPTO_NOT_SUPPORTED)
-#define KCF_PROV_SIGN_FINAL(pd, ctx, sig, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_final) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_final(ctx, sig, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN_ATOMIC(pd, session, mech, key, data, template, \
- sig, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_atomic) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_atomic( \
- (pd)->pd_prov_handle, session, mech, key, data, sig, template, \
- req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN_RECOVER_INIT(pd, ctx, mech, key, template, \
- req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_recover_init) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_recover_init(ctx, mech, key, template, \
- req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN_RECOVER(pd, ctx, data, sig, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_recover) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_recover(ctx, data, sig, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN_RECOVER_ATOMIC(pd, session, mech, key, data, template, \
- sig, req) ( \
- (KCF_PROV_SIGN_OPS(pd) && \
- KCF_PROV_SIGN_OPS(pd)->sign_recover_atomic) ? \
- KCF_PROV_SIGN_OPS(pd)->sign_recover_atomic( \
- (pd)->pd_prov_handle, session, mech, key, data, sig, template, \
- req) : CRYPTO_NOT_SUPPORTED)
-
/*
- * Wrappers for crypto_verify_ops(9S) entry points.
- */
-
-#define KCF_PROV_VERIFY_INIT(pd, ctx, mech, key, template, req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_init) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_init(ctx, mech, key, template, \
- req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_VERIFY(pd, ctx, data, sig, req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->do_verify) ? \
- KCF_PROV_VERIFY_OPS(pd)->do_verify(ctx, data, sig, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_VERIFY_UPDATE(pd, ctx, data, req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_update) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_update(ctx, data, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_VERIFY_FINAL(pd, ctx, sig, req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_final) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_final(ctx, sig, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_VERIFY_ATOMIC(pd, session, mech, key, data, template, sig, \
- req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_atomic) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_atomic( \
- (pd)->pd_prov_handle, session, mech, key, data, sig, template, \
- req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_VERIFY_RECOVER_INIT(pd, ctx, mech, key, template, \
- req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && \
- KCF_PROV_VERIFY_OPS(pd)->verify_recover_init) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_recover_init(ctx, mech, key, \
- template, req) : CRYPTO_NOT_SUPPORTED)
-
-/* verify_recover() CSPI routine has different argument order than verify() */
-#define KCF_PROV_VERIFY_RECOVER(pd, ctx, sig, data, req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_recover) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_recover(ctx, sig, data, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * verify_recover_atomic() CSPI routine has different argument order
- * than verify_atomic().
- */
-#define KCF_PROV_VERIFY_RECOVER_ATOMIC(pd, session, mech, key, sig, \
- template, data, req) ( \
- (KCF_PROV_VERIFY_OPS(pd) && \
- KCF_PROV_VERIFY_OPS(pd)->verify_recover_atomic) ? \
- KCF_PROV_VERIFY_OPS(pd)->verify_recover_atomic( \
- (pd)->pd_prov_handle, session, mech, key, sig, data, template, \
- req) : CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_dual_ops(9S) entry points.
- */
-
-#define KCF_PROV_DIGEST_ENCRYPT_UPDATE(digest_ctx, encrypt_ctx, plaintext, \
- ciphertext, req) ( \
- (KCF_PROV_DUAL_OPS(pd) && \
- KCF_PROV_DUAL_OPS(pd)->digest_encrypt_update) ? \
- KCF_PROV_DUAL_OPS(pd)->digest_encrypt_update( \
- digest_ctx, encrypt_ctx, plaintext, ciphertext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DECRYPT_DIGEST_UPDATE(decrypt_ctx, digest_ctx, ciphertext, \
- plaintext, req) ( \
- (KCF_PROV_DUAL_OPS(pd) && \
- KCF_PROV_DUAL_OPS(pd)->decrypt_digest_update) ? \
- KCF_PROV_DUAL_OPS(pd)->decrypt_digest_update( \
- decrypt_ctx, digest_ctx, ciphertext, plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SIGN_ENCRYPT_UPDATE(sign_ctx, encrypt_ctx, plaintext, \
- ciphertext, req) ( \
- (KCF_PROV_DUAL_OPS(pd) && \
- KCF_PROV_DUAL_OPS(pd)->sign_encrypt_update) ? \
- KCF_PROV_DUAL_OPS(pd)->sign_encrypt_update( \
- sign_ctx, encrypt_ctx, plaintext, ciphertext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_DECRYPT_VERIFY_UPDATE(decrypt_ctx, verify_ctx, ciphertext, \
- plaintext, req) ( \
- (KCF_PROV_DUAL_OPS(pd) && \
- KCF_PROV_DUAL_OPS(pd)->decrypt_verify_update) ? \
- KCF_PROV_DUAL_OPS(pd)->decrypt_verify_update( \
- decrypt_ctx, verify_ctx, ciphertext, plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_dual_cipher_mac_ops(9S) entry points.
- */
-
-#define KCF_PROV_ENCRYPT_MAC_INIT(pd, ctx, encr_mech, encr_key, mac_mech, \
- mac_key, encr_ctx_template, mac_ctx_template, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_init) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_init( \
- ctx, encr_mech, encr_key, mac_mech, mac_key, encr_ctx_template, \
- mac_ctx_template, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_MAC(pd, ctx, plaintext, ciphertext, mac, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac( \
- ctx, plaintext, ciphertext, mac, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_MAC_UPDATE(pd, ctx, plaintext, ciphertext, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_update) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_update( \
- ctx, plaintext, ciphertext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_MAC_FINAL(pd, ctx, ciphertext, mac, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_final) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_final( \
- ctx, ciphertext, mac, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_ENCRYPT_MAC_ATOMIC(pd, session, encr_mech, encr_key, \
- mac_mech, mac_key, plaintext, ciphertext, mac, \
- encr_ctx_template, mac_ctx_template, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_atomic) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_atomic( \
- (pd)->pd_prov_handle, session, encr_mech, encr_key, \
- mac_mech, mac_key, plaintext, ciphertext, mac, \
- encr_ctx_template, mac_ctx_template, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_DECRYPT_INIT(pd, ctx, mac_mech, mac_key, decr_mech, \
- decr_key, mac_ctx_template, decr_ctx_template, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_init) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_init( \
- ctx, mac_mech, mac_key, decr_mech, decr_key, mac_ctx_template, \
- decr_ctx_template, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_DECRYPT(pd, ctx, ciphertext, mac, plaintext, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt( \
- ctx, ciphertext, mac, plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_DECRYPT_UPDATE(pd, ctx, ciphertext, plaintext, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_update) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_update( \
- ctx, ciphertext, plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_DECRYPT_FINAL(pd, ctx, mac, plaintext, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_final) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_final( \
- ctx, mac, plaintext, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_DECRYPT_ATOMIC(pd, session, mac_mech, mac_key, \
- decr_mech, decr_key, ciphertext, mac, plaintext, \
- mac_ctx_template, decr_ctx_template, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_atomic) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_atomic( \
- (pd)->pd_prov_handle, session, mac_mech, mac_key, \
- decr_mech, decr_key, ciphertext, mac, plaintext, \
- mac_ctx_template, decr_ctx_template, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_MAC_VERIFY_DECRYPT_ATOMIC(pd, session, mac_mech, mac_key, \
- decr_mech, decr_key, ciphertext, mac, plaintext, \
- mac_ctx_template, decr_ctx_template, req) ( \
- (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_verify_decrypt_atomic \
- != NULL) ? \
- KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_verify_decrypt_atomic( \
- (pd)->pd_prov_handle, session, mac_mech, mac_key, \
- decr_mech, decr_key, ciphertext, mac, plaintext, \
- mac_ctx_template, decr_ctx_template, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_random_number_ops(9S) entry points.
- */
-
-#define KCF_PROV_SEED_RANDOM(pd, session, buf, len, est, flags, req) ( \
- (KCF_PROV_RANDOM_OPS(pd) && KCF_PROV_RANDOM_OPS(pd)->seed_random) ? \
- KCF_PROV_RANDOM_OPS(pd)->seed_random((pd)->pd_prov_handle, \
- session, buf, len, est, flags, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_GENERATE_RANDOM(pd, session, buf, len, req) ( \
- (KCF_PROV_RANDOM_OPS(pd) && \
- KCF_PROV_RANDOM_OPS(pd)->generate_random) ? \
- KCF_PROV_RANDOM_OPS(pd)->generate_random((pd)->pd_prov_handle, \
- session, buf, len, req) : CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_session_ops(9S) entry points.
- *
- * ops_pd is the provider descriptor that supplies the ops_vector.
- * pd is the descriptor that supplies the provider handle.
- * Only session open/close needs two handles.
- */
-
-#define KCF_PROV_SESSION_OPEN(ops_pd, session, req, pd) ( \
- (KCF_PROV_SESSION_OPS(ops_pd) && \
- KCF_PROV_SESSION_OPS(ops_pd)->session_open) ? \
- KCF_PROV_SESSION_OPS(ops_pd)->session_open((pd)->pd_prov_handle, \
- session, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SESSION_CLOSE(ops_pd, session, req, pd) ( \
- (KCF_PROV_SESSION_OPS(ops_pd) && \
- KCF_PROV_SESSION_OPS(ops_pd)->session_close) ? \
- KCF_PROV_SESSION_OPS(ops_pd)->session_close((pd)->pd_prov_handle, \
- session, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SESSION_LOGIN(pd, session, user_type, pin, len, req) ( \
- (KCF_PROV_SESSION_OPS(pd) && \
- KCF_PROV_SESSION_OPS(pd)->session_login) ? \
- KCF_PROV_SESSION_OPS(pd)->session_login((pd)->pd_prov_handle, \
- session, user_type, pin, len, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SESSION_LOGOUT(pd, session, req) ( \
- (KCF_PROV_SESSION_OPS(pd) && \
- KCF_PROV_SESSION_OPS(pd)->session_logout) ? \
- KCF_PROV_SESSION_OPS(pd)->session_logout((pd)->pd_prov_handle, \
- session, req) : CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_object_ops(9S) entry points.
- */
-
-#define KCF_PROV_OBJECT_CREATE(pd, session, template, count, object, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_create) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_create((pd)->pd_prov_handle, \
- session, template, count, object, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_COPY(pd, session, object, template, count, \
- new_object, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_copy) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_copy((pd)->pd_prov_handle, \
- session, object, template, count, new_object, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_DESTROY(pd, session, object, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_destroy) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_destroy((pd)->pd_prov_handle, \
- session, object, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_GET_SIZE(pd, session, object, size, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && \
- KCF_PROV_OBJECT_OPS(pd)->object_get_size) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_get_size((pd)->pd_prov_handle, \
- session, object, size, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_GET_ATTRIBUTE_VALUE(pd, session, object, template, \
- count, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && \
- KCF_PROV_OBJECT_OPS(pd)->object_get_attribute_value) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_get_attribute_value( \
- (pd)->pd_prov_handle, session, object, template, count, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_SET_ATTRIBUTE_VALUE(pd, session, object, template, \
- count, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && \
- KCF_PROV_OBJECT_OPS(pd)->object_set_attribute_value) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_set_attribute_value( \
- (pd)->pd_prov_handle, session, object, template, count, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_FIND_INIT(pd, session, template, count, ppriv, \
- req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && \
- KCF_PROV_OBJECT_OPS(pd)->object_find_init) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_find_init((pd)->pd_prov_handle, \
- session, template, count, ppriv, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_FIND(pd, ppriv, objects, max_objects, object_count, \
- req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_find) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_find( \
- (pd)->pd_prov_handle, ppriv, objects, max_objects, object_count, \
- req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_OBJECT_FIND_FINAL(pd, ppriv, req) ( \
- (KCF_PROV_OBJECT_OPS(pd) && \
- KCF_PROV_OBJECT_OPS(pd)->object_find_final) ? \
- KCF_PROV_OBJECT_OPS(pd)->object_find_final( \
- (pd)->pd_prov_handle, ppriv, req) : CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_key_ops(9S) entry points.
+ * Wrappers for crypto_ctx_ops(9S) entry points.
*/
-#define KCF_PROV_KEY_GENERATE(pd, session, mech, template, count, object, \
- req) ( \
- (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_generate) ? \
- KCF_PROV_KEY_OPS(pd)->key_generate((pd)->pd_prov_handle, \
- session, mech, template, count, object, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_KEY_GENERATE_PAIR(pd, session, mech, pub_template, \
- pub_count, priv_template, priv_count, pub_key, priv_key, req) ( \
- (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_generate_pair) ? \
- KCF_PROV_KEY_OPS(pd)->key_generate_pair((pd)->pd_prov_handle, \
- session, mech, pub_template, pub_count, priv_template, \
- priv_count, pub_key, priv_key, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_KEY_WRAP(pd, session, mech, wrapping_key, key, wrapped_key, \
- wrapped_key_len, req) ( \
- (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_wrap) ? \
- KCF_PROV_KEY_OPS(pd)->key_wrap((pd)->pd_prov_handle, \
- session, mech, wrapping_key, key, wrapped_key, wrapped_key_len, \
- req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_KEY_UNWRAP(pd, session, mech, unwrapping_key, wrapped_key, \
- wrapped_key_len, template, count, key, req) ( \
- (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_unwrap) ? \
- KCF_PROV_KEY_OPS(pd)->key_unwrap((pd)->pd_prov_handle, \
- session, mech, unwrapping_key, wrapped_key, wrapped_key_len, \
- template, count, key, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_KEY_DERIVE(pd, session, mech, base_key, template, count, \
- key, req) ( \
- (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_derive) ? \
- KCF_PROV_KEY_OPS(pd)->key_derive((pd)->pd_prov_handle, \
- session, mech, base_key, template, count, key, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_KEY_CHECK(pd, mech, key) ( \
- (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_check) ? \
- KCF_PROV_KEY_OPS(pd)->key_check((pd)->pd_prov_handle, mech, key) : \
+#define KCF_PROV_CREATE_CTX_TEMPLATE(pd, mech, key, template, size) ( \
+ (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->create_ctx_template) ? \
+ KCF_PROV_CTX_OPS(pd)->create_ctx_template( \
+ mech, key, template, size) : \
CRYPTO_NOT_SUPPORTED)
-/*
- * Wrappers for crypto_provider_management_ops(9S) entry points.
- *
- * ops_pd is the provider descriptor that supplies the ops_vector.
- * pd is the descriptor that supplies the provider handle.
- * Only ext_info needs two handles.
- */
-
-#define KCF_PROV_EXT_INFO(ops_pd, provext_info, req, pd) ( \
- (KCF_PROV_PROVIDER_OPS(ops_pd) && \
- KCF_PROV_PROVIDER_OPS(ops_pd)->ext_info) ? \
- KCF_PROV_PROVIDER_OPS(ops_pd)->ext_info((pd)->pd_prov_handle, \
- provext_info, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_INIT_TOKEN(pd, pin, pin_len, label, req) ( \
- (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->init_token) ? \
- KCF_PROV_PROVIDER_OPS(pd)->init_token((pd)->pd_prov_handle, \
- pin, pin_len, label, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_INIT_PIN(pd, session, pin, pin_len, req) ( \
- (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->init_pin) ? \
- KCF_PROV_PROVIDER_OPS(pd)->init_pin((pd)->pd_prov_handle, \
- session, pin, pin_len, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_SET_PIN(pd, session, old_pin, old_len, new_pin, new_len, \
- req) ( \
- (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->set_pin) ? \
- KCF_PROV_PROVIDER_OPS(pd)->set_pin((pd)->pd_prov_handle, \
- session, old_pin, old_len, new_pin, new_len, req) : \
- CRYPTO_NOT_SUPPORTED)
-
-/*
- * Wrappers for crypto_nostore_key_ops(9S) entry points.
- */
-
-#define KCF_PROV_NOSTORE_KEY_GENERATE(pd, session, mech, template, count, \
- out_template, out_count, req) ( \
- (KCF_PROV_NOSTORE_KEY_OPS(pd) && \
- KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate) ? \
- KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate( \
- (pd)->pd_prov_handle, session, mech, template, count, \
- out_template, out_count, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_NOSTORE_KEY_GENERATE_PAIR(pd, session, mech, pub_template, \
- pub_count, priv_template, priv_count, out_pub_template, \
- out_pub_count, out_priv_template, out_priv_count, req) ( \
- (KCF_PROV_NOSTORE_KEY_OPS(pd) && \
- KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate_pair) ? \
- KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate_pair( \
- (pd)->pd_prov_handle, session, mech, pub_template, pub_count, \
- priv_template, priv_count, out_pub_template, out_pub_count, \
- out_priv_template, out_priv_count, req) : CRYPTO_NOT_SUPPORTED)
-
-#define KCF_PROV_NOSTORE_KEY_DERIVE(pd, session, mech, base_key, template, \
- count, out_template, out_count, req) ( \
- (KCF_PROV_NOSTORE_KEY_OPS(pd) && \
- KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_derive) ? \
- KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_derive( \
- (pd)->pd_prov_handle, session, mech, base_key, template, count, \
- out_template, out_count, req) : CRYPTO_NOT_SUPPORTED)
-
-/*
- * The following routines are exported by the kcf module (/kernel/misc/kcf)
- * to the crypto and cryptoadmin modules.
- */
-
-/* Digest/mac/cipher entry points that take a provider descriptor and session */
-extern int crypto_digest_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-extern int crypto_mac_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-extern int crypto_encrypt_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-extern int crypto_decrypt_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-
-/* Other private digest/mac/cipher entry points not exported through k-API */
-extern int crypto_digest_key_prov(crypto_context_t, crypto_key_t *,
- crypto_call_req_t *);
-
-/* Private sign entry points exported by KCF */
-extern int crypto_sign_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-extern int crypto_sign_recover_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-/* Private verify entry points exported by KCF */
-extern int crypto_verify_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-extern int crypto_verify_recover_single(crypto_context_t, crypto_data_t *,
- crypto_data_t *, crypto_call_req_t *);
-
-/* Private dual operations entry points exported by KCF */
-extern int crypto_digest_encrypt_update(crypto_context_t, crypto_context_t,
- crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
-extern int crypto_decrypt_digest_update(crypto_context_t, crypto_context_t,
- crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
-extern int crypto_sign_encrypt_update(crypto_context_t, crypto_context_t,
- crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
-extern int crypto_decrypt_verify_update(crypto_context_t, crypto_context_t,
- crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
-
-/* Random Number Generation */
-int crypto_seed_random(crypto_provider_handle_t provider, uchar_t *buf,
- size_t len, crypto_call_req_t *req);
-int crypto_generate_random(crypto_provider_handle_t provider, uchar_t *buf,
- size_t len, crypto_call_req_t *req);
-
-/* Provider Management */
-int crypto_get_provider_info(crypto_provider_id_t id,
- crypto_provider_info_t **info, crypto_call_req_t *req);
-int crypto_get_provider_mechanisms(crypto_minor_t *, crypto_provider_id_t id,
- uint_t *count, crypto_mech_name_t **list);
-int crypto_init_token(crypto_provider_handle_t provider, char *pin,
- size_t pin_len, char *label, crypto_call_req_t *);
-int crypto_init_pin(crypto_provider_handle_t provider, char *pin,
- size_t pin_len, crypto_call_req_t *req);
-int crypto_set_pin(crypto_provider_handle_t provider, char *old_pin,
- size_t old_len, char *new_pin, size_t new_len, crypto_call_req_t *req);
-void crypto_free_provider_list(crypto_provider_entry_t *list, uint_t count);
-void crypto_free_provider_info(crypto_provider_info_t *info);
+#define KCF_PROV_FREE_CONTEXT(pd, ctx) ( \
+ (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->free_context) ? \
+ KCF_PROV_CTX_OPS(pd)->free_context(ctx) : CRYPTO_NOT_SUPPORTED)
-/* Administrative */
-int crypto_get_dev_list(uint_t *count, crypto_dev_list_entry_t **list);
-int crypto_get_soft_list(uint_t *count, char **list, size_t *len);
-int crypto_get_dev_info(char *name, uint_t instance, uint_t *count,
- crypto_mech_name_t **list);
-int crypto_get_soft_info(caddr_t name, uint_t *count,
- crypto_mech_name_t **list);
-int crypto_load_dev_disabled(char *name, uint_t instance, uint_t count,
- crypto_mech_name_t *list);
-int crypto_load_soft_disabled(caddr_t name, uint_t count,
- crypto_mech_name_t *list);
-int crypto_unload_soft_module(caddr_t path);
-int crypto_load_soft_config(caddr_t name, uint_t count,
- crypto_mech_name_t *list);
-int crypto_load_door(uint_t did);
-void crypto_free_mech_list(crypto_mech_name_t *list, uint_t count);
-void crypto_free_dev_list(crypto_dev_list_entry_t *list, uint_t count);
/* Miscellaneous */
-int crypto_get_mechanism_number(caddr_t name, crypto_mech_type_t *number);
-int crypto_get_function_list(crypto_provider_id_t id,
- crypto_function_list_t **list, int kmflag);
-void crypto_free_function_list(crypto_function_list_t *list);
-int crypto_build_permitted_mech_names(kcf_provider_desc_t *,
- crypto_mech_name_t **, uint_t *, int);
extern void kcf_destroy_mech_tabs(void);
extern void kcf_init_mech_tabs(void);
extern int kcf_add_mech_provider(short, kcf_provider_desc_t *,
kcf_prov_mech_desc_t **);
-extern void kcf_remove_mech_provider(char *, kcf_provider_desc_t *);
+extern void kcf_remove_mech_provider(const char *, kcf_provider_desc_t *);
extern int kcf_get_mech_entry(crypto_mech_type_t, kcf_mech_entry_t **);
-extern kcf_provider_desc_t *kcf_alloc_provider_desc(crypto_provider_info_t *);
+extern kcf_provider_desc_t *kcf_alloc_provider_desc(void);
extern void kcf_provider_zero_refcnt(kcf_provider_desc_t *);
extern void kcf_free_provider_desc(kcf_provider_desc_t *);
-extern void kcf_soft_config_init(void);
-extern int get_sw_provider_for_mech(crypto_mech_name_t, char **);
-extern crypto_mech_type_t crypto_mech2id_common(char *, boolean_t);
extern void undo_register_provider(kcf_provider_desc_t *, boolean_t);
-extern void redo_register_provider(kcf_provider_desc_t *);
-extern void kcf_rnd_init(void);
-extern boolean_t kcf_rngprov_check(void);
-extern int kcf_rnd_get_pseudo_bytes(uint8_t *, size_t);
-extern int kcf_rnd_get_bytes(uint8_t *, size_t, boolean_t, boolean_t);
-extern int random_add_pseudo_entropy(uint8_t *, size_t, uint_t);
-extern void kcf_rnd_schedule_timeout(boolean_t);
-extern int crypto_uio_data(crypto_data_t *, uchar_t *, int, cmd_type_t,
- void *, void (*update)(void));
-extern int crypto_mblk_data(crypto_data_t *, uchar_t *, int, cmd_type_t,
- void *, void (*update)(void));
extern int crypto_put_output_data(uchar_t *, crypto_data_t *, int);
-extern int crypto_get_input_data(crypto_data_t *, uchar_t **, uchar_t *);
-extern int crypto_copy_key_to_ctx(crypto_key_t *, crypto_key_t **, size_t *,
- int kmflag);
-extern int crypto_digest_data(crypto_data_t *, void *, uchar_t *,
- void (*update)(void), void (*final)(void), uchar_t);
extern int crypto_update_iov(void *, crypto_data_t *, crypto_data_t *,
- int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
- void (*copy_block)(uint8_t *, uint64_t *));
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *));
extern int crypto_update_uio(void *, crypto_data_t *, crypto_data_t *,
- int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
- void (*copy_block)(uint8_t *, uint64_t *));
-extern int crypto_update_mp(void *, crypto_data_t *, crypto_data_t *,
- int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
- void (*copy_block)(uint8_t *, uint64_t *));
-extern int crypto_get_key_attr(crypto_key_t *, crypto_attr_type_t, uchar_t **,
- ssize_t *);
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *));
/* Access to the provider's table */
extern void kcf_prov_tab_destroy(void);
extern void kcf_prov_tab_init(void);
extern int kcf_prov_tab_add_provider(kcf_provider_desc_t *);
extern int kcf_prov_tab_rem_provider(crypto_provider_id_t);
-extern kcf_provider_desc_t *kcf_prov_tab_lookup_by_name(char *);
-extern kcf_provider_desc_t *kcf_prov_tab_lookup_by_dev(char *, uint_t);
-extern int kcf_get_hw_prov_tab(uint_t *, kcf_provider_desc_t ***, int,
- char *, uint_t, boolean_t);
-extern int kcf_get_slot_list(uint_t *, kcf_provider_desc_t ***, boolean_t);
-extern void kcf_free_provider_tab(uint_t, kcf_provider_desc_t **);
extern kcf_provider_desc_t *kcf_prov_tab_lookup(crypto_provider_id_t);
extern int kcf_get_sw_prov(crypto_mech_type_t, kcf_provider_desc_t **,
kcf_mech_entry_t **, boolean_t);
-/* Access to the policy table */
-extern boolean_t is_mech_disabled(kcf_provider_desc_t *, crypto_mech_name_t);
-extern boolean_t is_mech_disabled_byname(crypto_provider_type_t, char *,
- uint_t, crypto_mech_name_t);
-extern void kcf_policy_tab_init(void);
-extern void kcf_policy_free_desc(kcf_policy_desc_t *);
-extern void kcf_policy_remove_by_name(char *, uint_t *, crypto_mech_name_t **);
-extern void kcf_policy_remove_by_dev(char *, uint_t, uint_t *,
- crypto_mech_name_t **);
-extern kcf_policy_desc_t *kcf_policy_lookup_by_name(char *);
-extern kcf_policy_desc_t *kcf_policy_lookup_by_dev(char *, uint_t);
-extern int kcf_policy_load_soft_disabled(char *, uint_t, crypto_mech_name_t *,
- uint_t *, crypto_mech_name_t **);
-extern int kcf_policy_load_dev_disabled(char *, uint_t, uint_t,
- crypto_mech_name_t *, uint_t *, crypto_mech_name_t **);
-extern boolean_t in_soft_config_list(char *);
-
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h
deleted file mode 100644
index 6e371e343945..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h
+++ /dev/null
@@ -1,1480 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CRYPTO_IOCTL_H
-#define _SYS_CRYPTO_IOCTL_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/zfs_context.h>
-#include <sys/crypto/api.h>
-#include <sys/crypto/spi.h>
-#include <sys/crypto/common.h>
-
-#define CRYPTO_MAX_ATTRIBUTE_COUNT 128
-
-#define CRYPTO_IOFLAGS_RW_SESSION 0x00000001
-
-#define CRYPTO(x) (('y' << 8) | (x))
-
-#define MAX_NUM_THRESHOLD 7
-
-/* the PKCS11 Mechanisms */
-#define CKM_RC4 0x00000111
-#define CKM_DES3_ECB 0x00000132
-#define CKM_DES3_CBC 0x00000133
-#define CKM_MD5 0x00000210
-#define CKM_SHA_1 0x00000220
-#define CKM_AES_ECB 0x00001081
-#define CKM_AES_CBC 0x00001082
-
-/*
- * General Purpose Ioctls
- */
-
-typedef struct fl_mechs_threshold {
- int mech_type;
- uint32_t mech_threshold;
-} fl_mechs_threshold_t;
-
-typedef struct crypto_function_list {
- boolean_t fl_digest_init;
- boolean_t fl_digest;
- boolean_t fl_digest_update;
- boolean_t fl_digest_key;
- boolean_t fl_digest_final;
-
- boolean_t fl_encrypt_init;
- boolean_t fl_encrypt;
- boolean_t fl_encrypt_update;
- boolean_t fl_encrypt_final;
-
- boolean_t fl_decrypt_init;
- boolean_t fl_decrypt;
- boolean_t fl_decrypt_update;
- boolean_t fl_decrypt_final;
-
- boolean_t fl_mac_init;
- boolean_t fl_mac;
- boolean_t fl_mac_update;
- boolean_t fl_mac_final;
-
- boolean_t fl_sign_init;
- boolean_t fl_sign;
- boolean_t fl_sign_update;
- boolean_t fl_sign_final;
- boolean_t fl_sign_recover_init;
- boolean_t fl_sign_recover;
-
- boolean_t fl_verify_init;
- boolean_t fl_verify;
- boolean_t fl_verify_update;
- boolean_t fl_verify_final;
- boolean_t fl_verify_recover_init;
- boolean_t fl_verify_recover;
-
- boolean_t fl_digest_encrypt_update;
- boolean_t fl_decrypt_digest_update;
- boolean_t fl_sign_encrypt_update;
- boolean_t fl_decrypt_verify_update;
-
- boolean_t fl_seed_random;
- boolean_t fl_generate_random;
-
- boolean_t fl_session_open;
- boolean_t fl_session_close;
- boolean_t fl_session_login;
- boolean_t fl_session_logout;
-
- boolean_t fl_object_create;
- boolean_t fl_object_copy;
- boolean_t fl_object_destroy;
- boolean_t fl_object_get_size;
- boolean_t fl_object_get_attribute_value;
- boolean_t fl_object_set_attribute_value;
- boolean_t fl_object_find_init;
- boolean_t fl_object_find;
- boolean_t fl_object_find_final;
-
- boolean_t fl_key_generate;
- boolean_t fl_key_generate_pair;
- boolean_t fl_key_wrap;
- boolean_t fl_key_unwrap;
- boolean_t fl_key_derive;
-
- boolean_t fl_init_token;
- boolean_t fl_init_pin;
- boolean_t fl_set_pin;
-
- boolean_t prov_is_limited;
- uint32_t prov_hash_threshold;
- uint32_t prov_hash_limit;
-
- int total_threshold_count;
- fl_mechs_threshold_t fl_threshold[MAX_NUM_THRESHOLD];
-} crypto_function_list_t;
-
-typedef struct crypto_get_function_list {
- uint_t fl_return_value;
- crypto_provider_id_t fl_provider_id;
- crypto_function_list_t fl_list;
-} crypto_get_function_list_t;
-
-typedef struct crypto_get_mechanism_number {
- uint_t pn_return_value;
- caddr_t pn_mechanism_string;
- size_t pn_mechanism_len;
- crypto_mech_type_t pn_internal_number;
-} crypto_get_mechanism_number_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_get_mechanism_number32 {
- uint32_t pn_return_value;
- caddr32_t pn_mechanism_string;
- size32_t pn_mechanism_len;
- crypto_mech_type_t pn_internal_number;
-} crypto_get_mechanism_number32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_GET_FUNCTION_LIST CRYPTO(20)
-#define CRYPTO_GET_MECHANISM_NUMBER CRYPTO(21)
-
-/*
- * Session Ioctls
- */
-
-typedef uint32_t crypto_flags_t;
-
-typedef struct crypto_open_session {
- uint_t os_return_value;
- crypto_session_id_t os_session;
- crypto_flags_t os_flags;
- crypto_provider_id_t os_provider_id;
-} crypto_open_session_t;
-
-typedef struct crypto_close_session {
- uint_t cs_return_value;
- crypto_session_id_t cs_session;
-} crypto_close_session_t;
-
-typedef struct crypto_close_all_sessions {
- uint_t as_return_value;
- crypto_provider_id_t as_provider_id;
-} crypto_close_all_sessions_t;
-
-#define CRYPTO_OPEN_SESSION CRYPTO(30)
-#define CRYPTO_CLOSE_SESSION CRYPTO(31)
-#define CRYPTO_CLOSE_ALL_SESSIONS CRYPTO(32)
-
-/*
- * Login Ioctls
- */
-typedef struct crypto_login {
- uint_t co_return_value;
- crypto_session_id_t co_session;
- uint_t co_user_type;
- uint_t co_pin_len;
- caddr_t co_pin;
-} crypto_login_t;
-
-typedef struct crypto_logout {
- uint_t cl_return_value;
- crypto_session_id_t cl_session;
-} crypto_logout_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_login32 {
- uint32_t co_return_value;
- crypto_session_id_t co_session;
- uint32_t co_user_type;
- uint32_t co_pin_len;
- caddr32_t co_pin;
-} crypto_login32_t;
-
-typedef struct crypto_logout32 {
- uint32_t cl_return_value;
- crypto_session_id_t cl_session;
-} crypto_logout32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_LOGIN CRYPTO(40)
-#define CRYPTO_LOGOUT CRYPTO(41)
-
-/*
- * Cryptographic Ioctls
- */
-typedef struct crypto_encrypt {
- uint_t ce_return_value;
- crypto_session_id_t ce_session;
- size_t ce_datalen;
- caddr_t ce_databuf;
- size_t ce_encrlen;
- caddr_t ce_encrbuf;
- uint_t ce_flags;
-} crypto_encrypt_t;
-
-typedef struct crypto_encrypt_init {
- uint_t ei_return_value;
- crypto_session_id_t ei_session;
- crypto_mechanism_t ei_mech;
- crypto_key_t ei_key;
-} crypto_encrypt_init_t;
-
-typedef struct crypto_encrypt_update {
- uint_t eu_return_value;
- crypto_session_id_t eu_session;
- size_t eu_datalen;
- caddr_t eu_databuf;
- size_t eu_encrlen;
- caddr_t eu_encrbuf;
-} crypto_encrypt_update_t;
-
-typedef struct crypto_encrypt_final {
- uint_t ef_return_value;
- crypto_session_id_t ef_session;
- size_t ef_encrlen;
- caddr_t ef_encrbuf;
-} crypto_encrypt_final_t;
-
-typedef struct crypto_decrypt {
- uint_t cd_return_value;
- crypto_session_id_t cd_session;
- size_t cd_encrlen;
- caddr_t cd_encrbuf;
- size_t cd_datalen;
- caddr_t cd_databuf;
- uint_t cd_flags;
-} crypto_decrypt_t;
-
-typedef struct crypto_decrypt_init {
- uint_t di_return_value;
- crypto_session_id_t di_session;
- crypto_mechanism_t di_mech;
- crypto_key_t di_key;
-} crypto_decrypt_init_t;
-
-typedef struct crypto_decrypt_update {
- uint_t du_return_value;
- crypto_session_id_t du_session;
- size_t du_encrlen;
- caddr_t du_encrbuf;
- size_t du_datalen;
- caddr_t du_databuf;
-} crypto_decrypt_update_t;
-
-typedef struct crypto_decrypt_final {
- uint_t df_return_value;
- crypto_session_id_t df_session;
- size_t df_datalen;
- caddr_t df_databuf;
-} crypto_decrypt_final_t;
-
-typedef struct crypto_digest {
- uint_t cd_return_value;
- crypto_session_id_t cd_session;
- size_t cd_datalen;
- caddr_t cd_databuf;
- size_t cd_digestlen;
- caddr_t cd_digestbuf;
-} crypto_digest_t;
-
-typedef struct crypto_digest_init {
- uint_t di_return_value;
- crypto_session_id_t di_session;
- crypto_mechanism_t di_mech;
-} crypto_digest_init_t;
-
-typedef struct crypto_digest_update {
- uint_t du_return_value;
- crypto_session_id_t du_session;
- size_t du_datalen;
- caddr_t du_databuf;
-} crypto_digest_update_t;
-
-typedef struct crypto_digest_key {
- uint_t dk_return_value;
- crypto_session_id_t dk_session;
- crypto_key_t dk_key;
-} crypto_digest_key_t;
-
-typedef struct crypto_digest_final {
- uint_t df_return_value;
- crypto_session_id_t df_session;
- size_t df_digestlen;
- caddr_t df_digestbuf;
-} crypto_digest_final_t;
-
-typedef struct crypto_mac {
- uint_t cm_return_value;
- crypto_session_id_t cm_session;
- size_t cm_datalen;
- caddr_t cm_databuf;
- size_t cm_maclen;
- caddr_t cm_macbuf;
-} crypto_mac_t;
-
-typedef struct crypto_mac_init {
- uint_t mi_return_value;
- crypto_session_id_t mi_session;
- crypto_mechanism_t mi_mech;
- crypto_key_t mi_key;
-} crypto_mac_init_t;
-
-typedef struct crypto_mac_update {
- uint_t mu_return_value;
- crypto_session_id_t mu_session;
- size_t mu_datalen;
- caddr_t mu_databuf;
-} crypto_mac_update_t;
-
-typedef struct crypto_mac_final {
- uint_t mf_return_value;
- crypto_session_id_t mf_session;
- size_t mf_maclen;
- caddr_t mf_macbuf;
-} crypto_mac_final_t;
-
-typedef struct crypto_sign {
- uint_t cs_return_value;
- crypto_session_id_t cs_session;
- size_t cs_datalen;
- caddr_t cs_databuf;
- size_t cs_signlen;
- caddr_t cs_signbuf;
-} crypto_sign_t;
-
-typedef struct crypto_sign_init {
- uint_t si_return_value;
- crypto_session_id_t si_session;
- crypto_mechanism_t si_mech;
- crypto_key_t si_key;
-} crypto_sign_init_t;
-
-typedef struct crypto_sign_update {
- uint_t su_return_value;
- crypto_session_id_t su_session;
- size_t su_datalen;
- caddr_t su_databuf;
-} crypto_sign_update_t;
-
-typedef struct crypto_sign_final {
- uint_t sf_return_value;
- crypto_session_id_t sf_session;
- size_t sf_signlen;
- caddr_t sf_signbuf;
-} crypto_sign_final_t;
-
-typedef struct crypto_sign_recover_init {
- uint_t ri_return_value;
- crypto_session_id_t ri_session;
- crypto_mechanism_t ri_mech;
- crypto_key_t ri_key;
-} crypto_sign_recover_init_t;
-
-typedef struct crypto_sign_recover {
- uint_t sr_return_value;
- crypto_session_id_t sr_session;
- size_t sr_datalen;
- caddr_t sr_databuf;
- size_t sr_signlen;
- caddr_t sr_signbuf;
-} crypto_sign_recover_t;
-
-typedef struct crypto_verify {
- uint_t cv_return_value;
- crypto_session_id_t cv_session;
- size_t cv_datalen;
- caddr_t cv_databuf;
- size_t cv_signlen;
- caddr_t cv_signbuf;
-} crypto_verify_t;
-
-typedef struct crypto_verify_init {
- uint_t vi_return_value;
- crypto_session_id_t vi_session;
- crypto_mechanism_t vi_mech;
- crypto_key_t vi_key;
-} crypto_verify_init_t;
-
-typedef struct crypto_verify_update {
- uint_t vu_return_value;
- crypto_session_id_t vu_session;
- size_t vu_datalen;
- caddr_t vu_databuf;
-} crypto_verify_update_t;
-
-typedef struct crypto_verify_final {
- uint_t vf_return_value;
- crypto_session_id_t vf_session;
- size_t vf_signlen;
- caddr_t vf_signbuf;
-} crypto_verify_final_t;
-
-typedef struct crypto_verify_recover_init {
- uint_t ri_return_value;
- crypto_session_id_t ri_session;
- crypto_mechanism_t ri_mech;
- crypto_key_t ri_key;
-} crypto_verify_recover_init_t;
-
-typedef struct crypto_verify_recover {
- uint_t vr_return_value;
- crypto_session_id_t vr_session;
- size_t vr_signlen;
- caddr_t vr_signbuf;
- size_t vr_datalen;
- caddr_t vr_databuf;
-} crypto_verify_recover_t;
-
-typedef struct crypto_digest_encrypt_update {
- uint_t eu_return_value;
- crypto_session_id_t eu_session;
- size_t eu_datalen;
- caddr_t eu_databuf;
- size_t eu_encrlen;
- caddr_t eu_encrbuf;
-} crypto_digest_encrypt_update_t;
-
-typedef struct crypto_decrypt_digest_update {
- uint_t du_return_value;
- crypto_session_id_t du_session;
- size_t du_encrlen;
- caddr_t du_encrbuf;
- size_t du_datalen;
- caddr_t du_databuf;
-} crypto_decrypt_digest_update_t;
-
-typedef struct crypto_sign_encrypt_update {
- uint_t eu_return_value;
- crypto_session_id_t eu_session;
- size_t eu_datalen;
- caddr_t eu_databuf;
- size_t eu_encrlen;
- caddr_t eu_encrbuf;
-} crypto_sign_encrypt_update_t;
-
-typedef struct crypto_decrypt_verify_update {
- uint_t vu_return_value;
- crypto_session_id_t vu_session;
- size_t vu_encrlen;
- caddr_t vu_encrbuf;
- size_t vu_datalen;
- caddr_t vu_databuf;
-} crypto_decrypt_verify_update_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_encrypt32 {
- uint32_t ce_return_value;
- crypto_session_id_t ce_session;
- size32_t ce_datalen;
- caddr32_t ce_databuf;
- size32_t ce_encrlen;
- caddr32_t ce_encrbuf;
- uint32_t ce_flags;
-} crypto_encrypt32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_encrypt_init32 {
- uint32_t ei_return_value;
- crypto_session_id_t ei_session;
- crypto_mechanism32_t ei_mech;
- crypto_key32_t ei_key;
-} crypto_encrypt_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_encrypt_update32 {
- uint32_t eu_return_value;
- crypto_session_id_t eu_session;
- size32_t eu_datalen;
- caddr32_t eu_databuf;
- size32_t eu_encrlen;
- caddr32_t eu_encrbuf;
-} crypto_encrypt_update32_t;
-
-typedef struct crypto_encrypt_final32 {
- uint32_t ef_return_value;
- crypto_session_id_t ef_session;
- size32_t ef_encrlen;
- caddr32_t ef_encrbuf;
-} crypto_encrypt_final32_t;
-
-typedef struct crypto_decrypt32 {
- uint32_t cd_return_value;
- crypto_session_id_t cd_session;
- size32_t cd_encrlen;
- caddr32_t cd_encrbuf;
- size32_t cd_datalen;
- caddr32_t cd_databuf;
- uint32_t cd_flags;
-} crypto_decrypt32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_decrypt_init32 {
- uint32_t di_return_value;
- crypto_session_id_t di_session;
- crypto_mechanism32_t di_mech;
- crypto_key32_t di_key;
-} crypto_decrypt_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_decrypt_update32 {
- uint32_t du_return_value;
- crypto_session_id_t du_session;
- size32_t du_encrlen;
- caddr32_t du_encrbuf;
- size32_t du_datalen;
- caddr32_t du_databuf;
-} crypto_decrypt_update32_t;
-
-typedef struct crypto_decrypt_final32 {
- uint32_t df_return_value;
- crypto_session_id_t df_session;
- size32_t df_datalen;
- caddr32_t df_databuf;
-} crypto_decrypt_final32_t;
-
-typedef struct crypto_digest32 {
- uint32_t cd_return_value;
- crypto_session_id_t cd_session;
- size32_t cd_datalen;
- caddr32_t cd_databuf;
- size32_t cd_digestlen;
- caddr32_t cd_digestbuf;
-} crypto_digest32_t;
-
-typedef struct crypto_digest_init32 {
- uint32_t di_return_value;
- crypto_session_id_t di_session;
- crypto_mechanism32_t di_mech;
-} crypto_digest_init32_t;
-
-typedef struct crypto_digest_update32 {
- uint32_t du_return_value;
- crypto_session_id_t du_session;
- size32_t du_datalen;
- caddr32_t du_databuf;
-} crypto_digest_update32_t;
-
-typedef struct crypto_digest_key32 {
- uint32_t dk_return_value;
- crypto_session_id_t dk_session;
- crypto_key32_t dk_key;
-} crypto_digest_key32_t;
-
-typedef struct crypto_digest_final32 {
- uint32_t df_return_value;
- crypto_session_id_t df_session;
- size32_t df_digestlen;
- caddr32_t df_digestbuf;
-} crypto_digest_final32_t;
-
-typedef struct crypto_mac32 {
- uint32_t cm_return_value;
- crypto_session_id_t cm_session;
- size32_t cm_datalen;
- caddr32_t cm_databuf;
- size32_t cm_maclen;
- caddr32_t cm_macbuf;
-} crypto_mac32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_mac_init32 {
- uint32_t mi_return_value;
- crypto_session_id_t mi_session;
- crypto_mechanism32_t mi_mech;
- crypto_key32_t mi_key;
-} crypto_mac_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_mac_update32 {
- uint32_t mu_return_value;
- crypto_session_id_t mu_session;
- size32_t mu_datalen;
- caddr32_t mu_databuf;
-} crypto_mac_update32_t;
-
-typedef struct crypto_mac_final32 {
- uint32_t mf_return_value;
- crypto_session_id_t mf_session;
- size32_t mf_maclen;
- caddr32_t mf_macbuf;
-} crypto_mac_final32_t;
-
-typedef struct crypto_sign32 {
- uint32_t cs_return_value;
- crypto_session_id_t cs_session;
- size32_t cs_datalen;
- caddr32_t cs_databuf;
- size32_t cs_signlen;
- caddr32_t cs_signbuf;
-} crypto_sign32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_sign_init32 {
- uint32_t si_return_value;
- crypto_session_id_t si_session;
- crypto_mechanism32_t si_mech;
- crypto_key32_t si_key;
-} crypto_sign_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_sign_update32 {
- uint32_t su_return_value;
- crypto_session_id_t su_session;
- size32_t su_datalen;
- caddr32_t su_databuf;
-} crypto_sign_update32_t;
-
-typedef struct crypto_sign_final32 {
- uint32_t sf_return_value;
- crypto_session_id_t sf_session;
- size32_t sf_signlen;
- caddr32_t sf_signbuf;
-} crypto_sign_final32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_sign_recover_init32 {
- uint32_t ri_return_value;
- crypto_session_id_t ri_session;
- crypto_mechanism32_t ri_mech;
- crypto_key32_t ri_key;
-} crypto_sign_recover_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_sign_recover32 {
- uint32_t sr_return_value;
- crypto_session_id_t sr_session;
- size32_t sr_datalen;
- caddr32_t sr_databuf;
- size32_t sr_signlen;
- caddr32_t sr_signbuf;
-} crypto_sign_recover32_t;
-
-typedef struct crypto_verify32 {
- uint32_t cv_return_value;
- crypto_session_id_t cv_session;
- size32_t cv_datalen;
- caddr32_t cv_databuf;
- size32_t cv_signlen;
- caddr32_t cv_signbuf;
-} crypto_verify32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_verify_init32 {
- uint32_t vi_return_value;
- crypto_session_id_t vi_session;
- crypto_mechanism32_t vi_mech;
- crypto_key32_t vi_key;
-} crypto_verify_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_verify_update32 {
- uint32_t vu_return_value;
- crypto_session_id_t vu_session;
- size32_t vu_datalen;
- caddr32_t vu_databuf;
-} crypto_verify_update32_t;
-
-typedef struct crypto_verify_final32 {
- uint32_t vf_return_value;
- crypto_session_id_t vf_session;
- size32_t vf_signlen;
- caddr32_t vf_signbuf;
-} crypto_verify_final32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_verify_recover_init32 {
- uint32_t ri_return_value;
- crypto_session_id_t ri_session;
- crypto_mechanism32_t ri_mech;
- crypto_key32_t ri_key;
-} crypto_verify_recover_init32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_verify_recover32 {
- uint32_t vr_return_value;
- crypto_session_id_t vr_session;
- size32_t vr_signlen;
- caddr32_t vr_signbuf;
- size32_t vr_datalen;
- caddr32_t vr_databuf;
-} crypto_verify_recover32_t;
-
-typedef struct crypto_digest_encrypt_update32 {
- uint32_t eu_return_value;
- crypto_session_id_t eu_session;
- size32_t eu_datalen;
- caddr32_t eu_databuf;
- size32_t eu_encrlen;
- caddr32_t eu_encrbuf;
-} crypto_digest_encrypt_update32_t;
-
-typedef struct crypto_decrypt_digest_update32 {
- uint32_t du_return_value;
- crypto_session_id_t du_session;
- size32_t du_encrlen;
- caddr32_t du_encrbuf;
- size32_t du_datalen;
- caddr32_t du_databuf;
-} crypto_decrypt_digest_update32_t;
-
-typedef struct crypto_sign_encrypt_update32 {
- uint32_t eu_return_value;
- crypto_session_id_t eu_session;
- size32_t eu_datalen;
- caddr32_t eu_databuf;
- size32_t eu_encrlen;
- caddr32_t eu_encrbuf;
-} crypto_sign_encrypt_update32_t;
-
-typedef struct crypto_decrypt_verify_update32 {
- uint32_t vu_return_value;
- crypto_session_id_t vu_session;
- size32_t vu_encrlen;
- caddr32_t vu_encrbuf;
- size32_t vu_datalen;
- caddr32_t vu_databuf;
-} crypto_decrypt_verify_update32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_ENCRYPT CRYPTO(50)
-#define CRYPTO_ENCRYPT_INIT CRYPTO(51)
-#define CRYPTO_ENCRYPT_UPDATE CRYPTO(52)
-#define CRYPTO_ENCRYPT_FINAL CRYPTO(53)
-#define CRYPTO_DECRYPT CRYPTO(54)
-#define CRYPTO_DECRYPT_INIT CRYPTO(55)
-#define CRYPTO_DECRYPT_UPDATE CRYPTO(56)
-#define CRYPTO_DECRYPT_FINAL CRYPTO(57)
-
-#define CRYPTO_DIGEST CRYPTO(58)
-#define CRYPTO_DIGEST_INIT CRYPTO(59)
-#define CRYPTO_DIGEST_UPDATE CRYPTO(60)
-#define CRYPTO_DIGEST_KEY CRYPTO(61)
-#define CRYPTO_DIGEST_FINAL CRYPTO(62)
-#define CRYPTO_MAC CRYPTO(63)
-#define CRYPTO_MAC_INIT CRYPTO(64)
-#define CRYPTO_MAC_UPDATE CRYPTO(65)
-#define CRYPTO_MAC_FINAL CRYPTO(66)
-
-#define CRYPTO_SIGN CRYPTO(67)
-#define CRYPTO_SIGN_INIT CRYPTO(68)
-#define CRYPTO_SIGN_UPDATE CRYPTO(69)
-#define CRYPTO_SIGN_FINAL CRYPTO(70)
-#define CRYPTO_SIGN_RECOVER_INIT CRYPTO(71)
-#define CRYPTO_SIGN_RECOVER CRYPTO(72)
-#define CRYPTO_VERIFY CRYPTO(73)
-#define CRYPTO_VERIFY_INIT CRYPTO(74)
-#define CRYPTO_VERIFY_UPDATE CRYPTO(75)
-#define CRYPTO_VERIFY_FINAL CRYPTO(76)
-#define CRYPTO_VERIFY_RECOVER_INIT CRYPTO(77)
-#define CRYPTO_VERIFY_RECOVER CRYPTO(78)
-
-#define CRYPTO_DIGEST_ENCRYPT_UPDATE CRYPTO(79)
-#define CRYPTO_DECRYPT_DIGEST_UPDATE CRYPTO(80)
-#define CRYPTO_SIGN_ENCRYPT_UPDATE CRYPTO(81)
-#define CRYPTO_DECRYPT_VERIFY_UPDATE CRYPTO(82)
-
-/*
- * Random Number Ioctls
- */
-typedef struct crypto_seed_random {
- uint_t sr_return_value;
- crypto_session_id_t sr_session;
- size_t sr_seedlen;
- caddr_t sr_seedbuf;
-} crypto_seed_random_t;
-
-typedef struct crypto_generate_random {
- uint_t gr_return_value;
- crypto_session_id_t gr_session;
- caddr_t gr_buf;
- size_t gr_buflen;
-} crypto_generate_random_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_seed_random32 {
- uint32_t sr_return_value;
- crypto_session_id_t sr_session;
- size32_t sr_seedlen;
- caddr32_t sr_seedbuf;
-} crypto_seed_random32_t;
-
-typedef struct crypto_generate_random32 {
- uint32_t gr_return_value;
- crypto_session_id_t gr_session;
- caddr32_t gr_buf;
- size32_t gr_buflen;
-} crypto_generate_random32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_SEED_RANDOM CRYPTO(90)
-#define CRYPTO_GENERATE_RANDOM CRYPTO(91)
-
-/*
- * Object Management Ioctls
- */
-typedef struct crypto_object_create {
- uint_t oc_return_value;
- crypto_session_id_t oc_session;
- crypto_object_id_t oc_handle;
- uint_t oc_count;
- caddr_t oc_attributes;
-} crypto_object_create_t;
-
-typedef struct crypto_object_copy {
- uint_t oc_return_value;
- crypto_session_id_t oc_session;
- crypto_object_id_t oc_handle;
- crypto_object_id_t oc_new_handle;
- uint_t oc_count;
- caddr_t oc_new_attributes;
-} crypto_object_copy_t;
-
-typedef struct crypto_object_destroy {
- uint_t od_return_value;
- crypto_session_id_t od_session;
- crypto_object_id_t od_handle;
-} crypto_object_destroy_t;
-
-typedef struct crypto_object_get_attribute_value {
- uint_t og_return_value;
- crypto_session_id_t og_session;
- crypto_object_id_t og_handle;
- uint_t og_count;
- caddr_t og_attributes;
-} crypto_object_get_attribute_value_t;
-
-typedef struct crypto_object_get_size {
- uint_t gs_return_value;
- crypto_session_id_t gs_session;
- crypto_object_id_t gs_handle;
- size_t gs_size;
-} crypto_object_get_size_t;
-
-typedef struct crypto_object_set_attribute_value {
- uint_t sa_return_value;
- crypto_session_id_t sa_session;
- crypto_object_id_t sa_handle;
- uint_t sa_count;
- caddr_t sa_attributes;
-} crypto_object_set_attribute_value_t;
-
-typedef struct crypto_object_find_init {
- uint_t fi_return_value;
- crypto_session_id_t fi_session;
- uint_t fi_count;
- caddr_t fi_attributes;
-} crypto_object_find_init_t;
-
-typedef struct crypto_object_find_update {
- uint_t fu_return_value;
- crypto_session_id_t fu_session;
- uint_t fu_max_count;
- uint_t fu_count;
- caddr_t fu_handles;
-} crypto_object_find_update_t;
-
-typedef struct crypto_object_find_final {
- uint_t ff_return_value;
- crypto_session_id_t ff_session;
-} crypto_object_find_final_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_object_create32 {
- uint32_t oc_return_value;
- crypto_session_id_t oc_session;
- crypto_object_id_t oc_handle;
- uint32_t oc_count;
- caddr32_t oc_attributes;
-} crypto_object_create32_t;
-
-typedef struct crypto_object_copy32 {
- uint32_t oc_return_value;
- crypto_session_id_t oc_session;
- crypto_object_id_t oc_handle;
- crypto_object_id_t oc_new_handle;
- uint32_t oc_count;
- caddr32_t oc_new_attributes;
-} crypto_object_copy32_t;
-
-typedef struct crypto_object_destroy32 {
- uint32_t od_return_value;
- crypto_session_id_t od_session;
- crypto_object_id_t od_handle;
-} crypto_object_destroy32_t;
-
-typedef struct crypto_object_get_attribute_value32 {
- uint32_t og_return_value;
- crypto_session_id_t og_session;
- crypto_object_id_t og_handle;
- uint32_t og_count;
- caddr32_t og_attributes;
-} crypto_object_get_attribute_value32_t;
-
-typedef struct crypto_object_get_size32 {
- uint32_t gs_return_value;
- crypto_session_id_t gs_session;
- crypto_object_id_t gs_handle;
- size32_t gs_size;
-} crypto_object_get_size32_t;
-
-typedef struct crypto_object_set_attribute_value32 {
- uint32_t sa_return_value;
- crypto_session_id_t sa_session;
- crypto_object_id_t sa_handle;
- uint32_t sa_count;
- caddr32_t sa_attributes;
-} crypto_object_set_attribute_value32_t;
-
-typedef struct crypto_object_find_init32 {
- uint32_t fi_return_value;
- crypto_session_id_t fi_session;
- uint32_t fi_count;
- caddr32_t fi_attributes;
-} crypto_object_find_init32_t;
-
-typedef struct crypto_object_find_update32 {
- uint32_t fu_return_value;
- crypto_session_id_t fu_session;
- uint32_t fu_max_count;
- uint32_t fu_count;
- caddr32_t fu_handles;
-} crypto_object_find_update32_t;
-
-typedef struct crypto_object_find_final32 {
- uint32_t ff_return_value;
- crypto_session_id_t ff_session;
-} crypto_object_find_final32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_OBJECT_CREATE CRYPTO(100)
-#define CRYPTO_OBJECT_COPY CRYPTO(101)
-#define CRYPTO_OBJECT_DESTROY CRYPTO(102)
-#define CRYPTO_OBJECT_GET_ATTRIBUTE_VALUE CRYPTO(103)
-#define CRYPTO_OBJECT_GET_SIZE CRYPTO(104)
-#define CRYPTO_OBJECT_SET_ATTRIBUTE_VALUE CRYPTO(105)
-#define CRYPTO_OBJECT_FIND_INIT CRYPTO(106)
-#define CRYPTO_OBJECT_FIND_UPDATE CRYPTO(107)
-#define CRYPTO_OBJECT_FIND_FINAL CRYPTO(108)
-
-/*
- * Key Generation Ioctls
- */
-typedef struct crypto_object_generate_key {
- uint_t gk_return_value;
- crypto_session_id_t gk_session;
- crypto_object_id_t gk_handle;
- crypto_mechanism_t gk_mechanism;
- uint_t gk_count;
- caddr_t gk_attributes;
-} crypto_object_generate_key_t;
-
-typedef struct crypto_object_generate_key_pair {
- uint_t kp_return_value;
- crypto_session_id_t kp_session;
- crypto_object_id_t kp_public_handle;
- crypto_object_id_t kp_private_handle;
- uint_t kp_public_count;
- uint_t kp_private_count;
- caddr_t kp_public_attributes;
- caddr_t kp_private_attributes;
- crypto_mechanism_t kp_mechanism;
-} crypto_object_generate_key_pair_t;
-
-typedef struct crypto_object_wrap_key {
- uint_t wk_return_value;
- crypto_session_id_t wk_session;
- crypto_mechanism_t wk_mechanism;
- crypto_key_t wk_wrapping_key;
- crypto_object_id_t wk_object_handle;
- size_t wk_wrapped_key_len;
- caddr_t wk_wrapped_key;
-} crypto_object_wrap_key_t;
-
-typedef struct crypto_object_unwrap_key {
- uint_t uk_return_value;
- crypto_session_id_t uk_session;
- crypto_mechanism_t uk_mechanism;
- crypto_key_t uk_unwrapping_key;
- crypto_object_id_t uk_object_handle;
- size_t uk_wrapped_key_len;
- caddr_t uk_wrapped_key;
- uint_t uk_count;
- caddr_t uk_attributes;
-} crypto_object_unwrap_key_t;
-
-typedef struct crypto_derive_key {
- uint_t dk_return_value;
- crypto_session_id_t dk_session;
- crypto_mechanism_t dk_mechanism;
- crypto_key_t dk_base_key;
- crypto_object_id_t dk_object_handle;
- uint_t dk_count;
- caddr_t dk_attributes;
-} crypto_derive_key_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_object_generate_key32 {
- uint32_t gk_return_value;
- crypto_session_id_t gk_session;
- crypto_object_id_t gk_handle;
- crypto_mechanism32_t gk_mechanism;
- uint32_t gk_count;
- caddr32_t gk_attributes;
-} crypto_object_generate_key32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-typedef struct crypto_object_generate_key_pair32 {
- uint32_t kp_return_value;
- crypto_session_id_t kp_session;
- crypto_object_id_t kp_public_handle;
- crypto_object_id_t kp_private_handle;
- uint32_t kp_public_count;
- uint32_t kp_private_count;
- caddr32_t kp_public_attributes;
- caddr32_t kp_private_attributes;
- crypto_mechanism32_t kp_mechanism;
-} crypto_object_generate_key_pair32_t;
-
-typedef struct crypto_object_wrap_key32 {
- uint32_t wk_return_value;
- crypto_session_id_t wk_session;
- crypto_mechanism32_t wk_mechanism;
- crypto_key32_t wk_wrapping_key;
- crypto_object_id_t wk_object_handle;
- size32_t wk_wrapped_key_len;
- caddr32_t wk_wrapped_key;
-} crypto_object_wrap_key32_t;
-
-typedef struct crypto_object_unwrap_key32 {
- uint32_t uk_return_value;
- crypto_session_id_t uk_session;
- crypto_mechanism32_t uk_mechanism;
- crypto_key32_t uk_unwrapping_key;
- crypto_object_id_t uk_object_handle;
- size32_t uk_wrapped_key_len;
- caddr32_t uk_wrapped_key;
- uint32_t uk_count;
- caddr32_t uk_attributes;
-} crypto_object_unwrap_key32_t;
-
-typedef struct crypto_derive_key32 {
- uint32_t dk_return_value;
- crypto_session_id_t dk_session;
- crypto_mechanism32_t dk_mechanism;
- crypto_key32_t dk_base_key;
- crypto_object_id_t dk_object_handle;
- uint32_t dk_count;
- caddr32_t dk_attributes;
-} crypto_derive_key32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_GENERATE_KEY CRYPTO(110)
-#define CRYPTO_GENERATE_KEY_PAIR CRYPTO(111)
-#define CRYPTO_WRAP_KEY CRYPTO(112)
-#define CRYPTO_UNWRAP_KEY CRYPTO(113)
-#define CRYPTO_DERIVE_KEY CRYPTO(114)
-
-/*
- * Provider Management Ioctls
- */
-
-typedef struct crypto_get_provider_list {
- uint_t pl_return_value;
- uint_t pl_count;
- crypto_provider_entry_t pl_list[1];
-} crypto_get_provider_list_t;
-
-typedef struct crypto_provider_data {
- uchar_t pd_prov_desc[CRYPTO_PROVIDER_DESCR_MAX_LEN];
- uchar_t pd_label[CRYPTO_EXT_SIZE_LABEL];
- uchar_t pd_manufacturerID[CRYPTO_EXT_SIZE_MANUF];
- uchar_t pd_model[CRYPTO_EXT_SIZE_MODEL];
- uchar_t pd_serial_number[CRYPTO_EXT_SIZE_SERIAL];
- ulong_t pd_flags;
- ulong_t pd_max_session_count;
- ulong_t pd_session_count;
- ulong_t pd_max_rw_session_count;
- ulong_t pd_rw_session_count;
- ulong_t pd_max_pin_len;
- ulong_t pd_min_pin_len;
- ulong_t pd_total_public_memory;
- ulong_t pd_free_public_memory;
- ulong_t pd_total_private_memory;
- ulong_t pd_free_private_memory;
- crypto_version_t pd_hardware_version;
- crypto_version_t pd_firmware_version;
- uchar_t pd_time[CRYPTO_EXT_SIZE_TIME];
-} crypto_provider_data_t;
-
-typedef struct crypto_get_provider_info {
- uint_t gi_return_value;
- crypto_provider_id_t gi_provider_id;
- crypto_provider_data_t gi_provider_data;
-} crypto_get_provider_info_t;
-
-typedef struct crypto_get_provider_mechanisms {
- uint_t pm_return_value;
- crypto_provider_id_t pm_provider_id;
- uint_t pm_count;
- crypto_mech_name_t pm_list[1];
-} crypto_get_provider_mechanisms_t;
-
-typedef struct crypto_get_provider_mechanism_info {
- uint_t mi_return_value;
- crypto_provider_id_t mi_provider_id;
- crypto_mech_name_t mi_mechanism_name;
- uint32_t mi_min_key_size;
- uint32_t mi_max_key_size;
- uint32_t mi_flags;
-} crypto_get_provider_mechanism_info_t;
-
-typedef struct crypto_init_token {
- uint_t it_return_value;
- crypto_provider_id_t it_provider_id;
- caddr_t it_pin;
- size_t it_pin_len;
- caddr_t it_label;
-} crypto_init_token_t;
-
-typedef struct crypto_init_pin {
- uint_t ip_return_value;
- crypto_session_id_t ip_session;
- caddr_t ip_pin;
- size_t ip_pin_len;
-} crypto_init_pin_t;
-
-typedef struct crypto_set_pin {
- uint_t sp_return_value;
- crypto_session_id_t sp_session;
- caddr_t sp_old_pin;
- size_t sp_old_len;
- caddr_t sp_new_pin;
- size_t sp_new_len;
-} crypto_set_pin_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_get_provider_list32 {
- uint32_t pl_return_value;
- uint32_t pl_count;
- crypto_provider_entry_t pl_list[1];
-} crypto_get_provider_list32_t;
-
-typedef struct crypto_version32 {
- uchar_t cv_major;
- uchar_t cv_minor;
-} crypto_version32_t;
-
-typedef struct crypto_provider_data32 {
- uchar_t pd_prov_desc[CRYPTO_PROVIDER_DESCR_MAX_LEN];
- uchar_t pd_label[CRYPTO_EXT_SIZE_LABEL];
- uchar_t pd_manufacturerID[CRYPTO_EXT_SIZE_MANUF];
- uchar_t pd_model[CRYPTO_EXT_SIZE_MODEL];
- uchar_t pd_serial_number[CRYPTO_EXT_SIZE_SERIAL];
- uint32_t pd_flags;
- uint32_t pd_max_session_count;
- uint32_t pd_session_count;
- uint32_t pd_max_rw_session_count;
- uint32_t pd_rw_session_count;
- uint32_t pd_max_pin_len;
- uint32_t pd_min_pin_len;
- uint32_t pd_total_public_memory;
- uint32_t pd_free_public_memory;
- uint32_t pd_total_private_memory;
- uint32_t pd_free_private_memory;
- crypto_version32_t pd_hardware_version;
- crypto_version32_t pd_firmware_version;
- uchar_t pd_time[CRYPTO_EXT_SIZE_TIME];
-} crypto_provider_data32_t;
-
-typedef struct crypto_get_provider_info32 {
- uint32_t gi_return_value;
- crypto_provider_id_t gi_provider_id;
- crypto_provider_data32_t gi_provider_data;
-} crypto_get_provider_info32_t;
-
-typedef struct crypto_get_provider_mechanisms32 {
- uint32_t pm_return_value;
- crypto_provider_id_t pm_provider_id;
- uint32_t pm_count;
- crypto_mech_name_t pm_list[1];
-} crypto_get_provider_mechanisms32_t;
-
-typedef struct crypto_init_token32 {
- uint32_t it_return_value;
- crypto_provider_id_t it_provider_id;
- caddr32_t it_pin;
- size32_t it_pin_len;
- caddr32_t it_label;
-} crypto_init_token32_t;
-
-typedef struct crypto_init_pin32 {
- uint32_t ip_return_value;
- crypto_session_id_t ip_session;
- caddr32_t ip_pin;
- size32_t ip_pin_len;
-} crypto_init_pin32_t;
-
-typedef struct crypto_set_pin32 {
- uint32_t sp_return_value;
- crypto_session_id_t sp_session;
- caddr32_t sp_old_pin;
- size32_t sp_old_len;
- caddr32_t sp_new_pin;
- size32_t sp_new_len;
-} crypto_set_pin32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_GET_PROVIDER_LIST CRYPTO(120)
-#define CRYPTO_GET_PROVIDER_INFO CRYPTO(121)
-#define CRYPTO_GET_PROVIDER_MECHANISMS CRYPTO(122)
-#define CRYPTO_GET_PROVIDER_MECHANISM_INFO CRYPTO(123)
-#define CRYPTO_INIT_TOKEN CRYPTO(124)
-#define CRYPTO_INIT_PIN CRYPTO(125)
-#define CRYPTO_SET_PIN CRYPTO(126)
-
-/*
- * No (Key) Store Key Generation Ioctls
- */
-typedef struct crypto_nostore_generate_key {
- uint_t ngk_return_value;
- crypto_session_id_t ngk_session;
- crypto_mechanism_t ngk_mechanism;
- uint_t ngk_in_count;
- uint_t ngk_out_count;
- caddr_t ngk_in_attributes;
- caddr_t ngk_out_attributes;
-} crypto_nostore_generate_key_t;
-
-typedef struct crypto_nostore_generate_key_pair {
- uint_t nkp_return_value;
- crypto_session_id_t nkp_session;
- uint_t nkp_in_public_count;
- uint_t nkp_in_private_count;
- uint_t nkp_out_public_count;
- uint_t nkp_out_private_count;
- caddr_t nkp_in_public_attributes;
- caddr_t nkp_in_private_attributes;
- caddr_t nkp_out_public_attributes;
- caddr_t nkp_out_private_attributes;
- crypto_mechanism_t nkp_mechanism;
-} crypto_nostore_generate_key_pair_t;
-
-typedef struct crypto_nostore_derive_key {
- uint_t ndk_return_value;
- crypto_session_id_t ndk_session;
- crypto_mechanism_t ndk_mechanism;
- crypto_key_t ndk_base_key;
- uint_t ndk_in_count;
- uint_t ndk_out_count;
- caddr_t ndk_in_attributes;
- caddr_t ndk_out_attributes;
-} crypto_nostore_derive_key_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_nostore_generate_key32 {
- uint32_t ngk_return_value;
- crypto_session_id_t ngk_session;
- crypto_mechanism32_t ngk_mechanism;
- uint32_t ngk_in_count;
- uint32_t ngk_out_count;
- caddr32_t ngk_in_attributes;
- caddr32_t ngk_out_attributes;
-} crypto_nostore_generate_key32_t;
-
-typedef struct crypto_nostore_generate_key_pair32 {
- uint32_t nkp_return_value;
- crypto_session_id_t nkp_session;
- uint32_t nkp_in_public_count;
- uint32_t nkp_in_private_count;
- uint32_t nkp_out_public_count;
- uint32_t nkp_out_private_count;
- caddr32_t nkp_in_public_attributes;
- caddr32_t nkp_in_private_attributes;
- caddr32_t nkp_out_public_attributes;
- caddr32_t nkp_out_private_attributes;
- crypto_mechanism32_t nkp_mechanism;
-} crypto_nostore_generate_key_pair32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-
-typedef struct crypto_nostore_derive_key32 {
- uint32_t ndk_return_value;
- crypto_session_id_t ndk_session;
- crypto_mechanism32_t ndk_mechanism;
- crypto_key32_t ndk_base_key;
- uint32_t ndk_in_count;
- uint32_t ndk_out_count;
- caddr32_t ndk_in_attributes;
- caddr32_t ndk_out_attributes;
-} crypto_nostore_derive_key32_t;
-
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_NOSTORE_GENERATE_KEY CRYPTO(127)
-#define CRYPTO_NOSTORE_GENERATE_KEY_PAIR CRYPTO(128)
-#define CRYPTO_NOSTORE_DERIVE_KEY CRYPTO(129)
-
-/*
- * Mechanism Ioctls
- */
-
-typedef struct crypto_get_mechanism_list {
- uint_t ml_return_value;
- uint_t ml_count;
- crypto_mech_name_t ml_list[1];
-} crypto_get_mechanism_list_t;
-
-typedef struct crypto_get_all_mechanism_info {
- uint_t mi_return_value;
- crypto_mech_name_t mi_mechanism_name;
- uint_t mi_count;
- crypto_mechanism_info_t mi_list[1];
-} crypto_get_all_mechanism_info_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_get_mechanism_list32 {
- uint32_t ml_return_value;
- uint32_t ml_count;
- crypto_mech_name_t ml_list[1];
-} crypto_get_mechanism_list32_t;
-
-typedef struct crypto_get_all_mechanism_info32 {
- uint32_t mi_return_value;
- crypto_mech_name_t mi_mechanism_name;
- uint32_t mi_count;
- crypto_mechanism_info32_t mi_list[1];
-} crypto_get_all_mechanism_info32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_GET_MECHANISM_LIST CRYPTO(140)
-#define CRYPTO_GET_ALL_MECHANISM_INFO CRYPTO(141)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CRYPTO_IOCTL_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h
deleted file mode 100644
index 24babd7755cc..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CRYPTO_IOCTLADMIN_H
-#define _SYS_CRYPTO_IOCTLADMIN_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/zfs_context.h>
-#include <sys/crypto/common.h>
-
-#define ADMIN_IOCTL_DEVICE "/dev/cryptoadm"
-
-#define CRYPTOADMIN(x) (('y' << 8) | (x))
-
-/*
- * Administrative IOCTLs
- */
-
-typedef struct crypto_get_dev_list {
- uint_t dl_return_value;
- uint_t dl_dev_count;
- crypto_dev_list_entry_t dl_devs[1];
-} crypto_get_dev_list_t;
-
-typedef struct crypto_get_soft_list {
- uint_t sl_return_value;
- uint_t sl_soft_count;
- size_t sl_soft_len;
- caddr_t sl_soft_names;
-} crypto_get_soft_list_t;
-
-typedef struct crypto_get_dev_info {
- uint_t di_return_value;
- char di_dev_name[MAXNAMELEN];
- uint_t di_dev_instance;
- uint_t di_count;
- crypto_mech_name_t di_list[1];
-} crypto_get_dev_info_t;
-
-typedef struct crypto_get_soft_info {
- uint_t si_return_value;
- char si_name[MAXNAMELEN];
- uint_t si_count;
- crypto_mech_name_t si_list[1];
-} crypto_get_soft_info_t;
-
-typedef struct crypto_load_dev_disabled {
- uint_t dd_return_value;
- char dd_dev_name[MAXNAMELEN];
- uint_t dd_dev_instance;
- uint_t dd_count;
- crypto_mech_name_t dd_list[1];
-} crypto_load_dev_disabled_t;
-
-typedef struct crypto_load_soft_disabled {
- uint_t sd_return_value;
- char sd_name[MAXNAMELEN];
- uint_t sd_count;
- crypto_mech_name_t sd_list[1];
-} crypto_load_soft_disabled_t;
-
-typedef struct crypto_unload_soft_module {
- uint_t sm_return_value;
- char sm_name[MAXNAMELEN];
-} crypto_unload_soft_module_t;
-
-typedef struct crypto_load_soft_config {
- uint_t sc_return_value;
- char sc_name[MAXNAMELEN];
- uint_t sc_count;
- crypto_mech_name_t sc_list[1];
-} crypto_load_soft_config_t;
-
-typedef struct crypto_load_door {
- uint_t ld_return_value;
- uint_t ld_did;
-} crypto_load_door_t;
-
-#ifdef _KERNEL
-#ifdef _SYSCALL32
-
-typedef struct crypto_get_soft_list32 {
- uint32_t sl_return_value;
- uint32_t sl_soft_count;
- size32_t sl_soft_len;
- caddr32_t sl_soft_names;
-} crypto_get_soft_list32_t;
-
-#endif /* _SYSCALL32 */
-#endif /* _KERNEL */
-
-#define CRYPTO_GET_VERSION CRYPTOADMIN(1)
-#define CRYPTO_GET_DEV_LIST CRYPTOADMIN(2)
-#define CRYPTO_GET_SOFT_LIST CRYPTOADMIN(3)
-#define CRYPTO_GET_DEV_INFO CRYPTOADMIN(4)
-#define CRYPTO_GET_SOFT_INFO CRYPTOADMIN(5)
-#define CRYPTO_LOAD_DEV_DISABLED CRYPTOADMIN(8)
-#define CRYPTO_LOAD_SOFT_DISABLED CRYPTOADMIN(9)
-#define CRYPTO_UNLOAD_SOFT_MODULE CRYPTOADMIN(10)
-#define CRYPTO_LOAD_SOFT_CONFIG CRYPTOADMIN(11)
-#define CRYPTO_POOL_CREATE CRYPTOADMIN(12)
-#define CRYPTO_POOL_WAIT CRYPTOADMIN(13)
-#define CRYPTO_POOL_RUN CRYPTOADMIN(14)
-#define CRYPTO_LOAD_DOOR CRYPTOADMIN(15)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CRYPTO_IOCTLADMIN_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h
deleted file mode 100644
index 230d74b063fc..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CRYPTO_OPS_IMPL_H
-#define _SYS_CRYPTO_OPS_IMPL_H
-
-/*
- * Scheduler internal structures.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/zfs_context.h>
-#include <sys/crypto/api.h>
-#include <sys/crypto/spi.h>
-#include <sys/crypto/impl.h>
-#include <sys/crypto/common.h>
-
-/*
- * The parameters needed for each function group are batched
- * in one structure. This is much simpler than having a
- * separate structure for each function.
- *
- * In some cases, a field is generically named to keep the
- * structure small. The comments indicate these cases.
- */
-typedef struct kcf_digest_ops_params {
- crypto_session_id_t do_sid;
- crypto_mech_type_t do_framework_mechtype;
- crypto_mechanism_t do_mech;
- crypto_data_t *do_data;
- crypto_data_t *do_digest;
- crypto_key_t *do_digest_key; /* Argument for digest_key() */
-} kcf_digest_ops_params_t;
-
-typedef struct kcf_mac_ops_params {
- crypto_session_id_t mo_sid;
- crypto_mech_type_t mo_framework_mechtype;
- crypto_mechanism_t mo_mech;
- crypto_key_t *mo_key;
- crypto_data_t *mo_data;
- crypto_data_t *mo_mac;
- crypto_spi_ctx_template_t mo_templ;
-} kcf_mac_ops_params_t;
-
-typedef struct kcf_encrypt_ops_params {
- crypto_session_id_t eo_sid;
- crypto_mech_type_t eo_framework_mechtype;
- crypto_mechanism_t eo_mech;
- crypto_key_t *eo_key;
- crypto_data_t *eo_plaintext;
- crypto_data_t *eo_ciphertext;
- crypto_spi_ctx_template_t eo_templ;
-} kcf_encrypt_ops_params_t;
-
-typedef struct kcf_decrypt_ops_params {
- crypto_session_id_t dop_sid;
- crypto_mech_type_t dop_framework_mechtype;
- crypto_mechanism_t dop_mech;
- crypto_key_t *dop_key;
- crypto_data_t *dop_ciphertext;
- crypto_data_t *dop_plaintext;
- crypto_spi_ctx_template_t dop_templ;
-} kcf_decrypt_ops_params_t;
-
-typedef struct kcf_sign_ops_params {
- crypto_session_id_t so_sid;
- crypto_mech_type_t so_framework_mechtype;
- crypto_mechanism_t so_mech;
- crypto_key_t *so_key;
- crypto_data_t *so_data;
- crypto_data_t *so_signature;
- crypto_spi_ctx_template_t so_templ;
-} kcf_sign_ops_params_t;
-
-typedef struct kcf_verify_ops_params {
- crypto_session_id_t vo_sid;
- crypto_mech_type_t vo_framework_mechtype;
- crypto_mechanism_t vo_mech;
- crypto_key_t *vo_key;
- crypto_data_t *vo_data;
- crypto_data_t *vo_signature;
- crypto_spi_ctx_template_t vo_templ;
-} kcf_verify_ops_params_t;
-
-typedef struct kcf_encrypt_mac_ops_params {
- crypto_session_id_t em_sid;
- crypto_mech_type_t em_framework_encr_mechtype;
- crypto_mechanism_t em_encr_mech;
- crypto_key_t *em_encr_key;
- crypto_mech_type_t em_framework_mac_mechtype;
- crypto_mechanism_t em_mac_mech;
- crypto_key_t *em_mac_key;
- crypto_data_t *em_plaintext;
- crypto_dual_data_t *em_ciphertext;
- crypto_data_t *em_mac;
- crypto_spi_ctx_template_t em_encr_templ;
- crypto_spi_ctx_template_t em_mac_templ;
-} kcf_encrypt_mac_ops_params_t;
-
-typedef struct kcf_mac_decrypt_ops_params {
- crypto_session_id_t md_sid;
- crypto_mech_type_t md_framework_mac_mechtype;
- crypto_mechanism_t md_mac_mech;
- crypto_key_t *md_mac_key;
- crypto_mech_type_t md_framework_decr_mechtype;
- crypto_mechanism_t md_decr_mech;
- crypto_key_t *md_decr_key;
- crypto_dual_data_t *md_ciphertext;
- crypto_data_t *md_mac;
- crypto_data_t *md_plaintext;
- crypto_spi_ctx_template_t md_mac_templ;
- crypto_spi_ctx_template_t md_decr_templ;
-} kcf_mac_decrypt_ops_params_t;
-
-typedef struct kcf_random_number_ops_params {
- crypto_session_id_t rn_sid;
- uchar_t *rn_buf;
- size_t rn_buflen;
- uint_t rn_entropy_est;
- uint32_t rn_flags;
-} kcf_random_number_ops_params_t;
-
-/*
- * so_pd is useful when the provider descriptor (pd) supplying the
- * provider handle is different from the pd supplying the ops vector.
- * This is the case for session open/close where so_pd can be the pd
- * of a logical provider. The pd supplying the ops vector is passed
- * as an argument to kcf_submit_request().
- */
-typedef struct kcf_session_ops_params {
- crypto_session_id_t *so_sid_ptr;
- crypto_session_id_t so_sid;
- crypto_user_type_t so_user_type;
- char *so_pin;
- size_t so_pin_len;
- kcf_provider_desc_t *so_pd;
-} kcf_session_ops_params_t;
-
-typedef struct kcf_object_ops_params {
- crypto_session_id_t oo_sid;
- crypto_object_id_t oo_object_id;
- crypto_object_attribute_t *oo_template;
- uint_t oo_attribute_count;
- crypto_object_id_t *oo_object_id_ptr;
- size_t *oo_object_size;
- void **oo_find_init_pp_ptr;
- void *oo_find_pp;
- uint_t oo_max_object_count;
- uint_t *oo_object_count_ptr;
-} kcf_object_ops_params_t;
-
-/*
- * ko_key is used to encode wrapping key in key_wrap() and
- * unwrapping key in key_unwrap(). ko_key_template and
- * ko_key_attribute_count are used to encode public template
- * and public template attr count in key_generate_pair().
- * kops->ko_key_object_id_ptr is used to encode public key
- * in key_generate_pair().
- */
-typedef struct kcf_key_ops_params {
- crypto_session_id_t ko_sid;
- crypto_mech_type_t ko_framework_mechtype;
- crypto_mechanism_t ko_mech;
- crypto_object_attribute_t *ko_key_template;
- uint_t ko_key_attribute_count;
- crypto_object_id_t *ko_key_object_id_ptr;
- crypto_object_attribute_t *ko_private_key_template;
- uint_t ko_private_key_attribute_count;
- crypto_object_id_t *ko_private_key_object_id_ptr;
- crypto_key_t *ko_key;
- uchar_t *ko_wrapped_key;
- size_t *ko_wrapped_key_len_ptr;
- crypto_object_attribute_t *ko_out_template1;
- crypto_object_attribute_t *ko_out_template2;
- uint_t ko_out_attribute_count1;
- uint_t ko_out_attribute_count2;
-} kcf_key_ops_params_t;
-
-/*
- * po_pin and po_pin_len are used to encode new_pin and new_pin_len
- * when wrapping set_pin() function parameters.
- *
- * po_pd is useful when the provider descriptor (pd) supplying the
- * provider handle is different from the pd supplying the ops vector.
- * This is true for the ext_info provider entry point where po_pd
- * can be the pd of a logical provider. The pd supplying the ops vector
- * is passed as an argument to kcf_submit_request().
- */
-typedef struct kcf_provmgmt_ops_params {
- crypto_session_id_t po_sid;
- char *po_pin;
- size_t po_pin_len;
- char *po_old_pin;
- size_t po_old_pin_len;
- char *po_label;
- crypto_provider_ext_info_t *po_ext_info;
- kcf_provider_desc_t *po_pd;
-} kcf_provmgmt_ops_params_t;
-
-/*
- * The operation type within a function group.
- */
-typedef enum kcf_op_type {
- /* common ops for all mechanisms */
- KCF_OP_INIT = 1,
- KCF_OP_SINGLE, /* pkcs11 sense. So, INIT is already done */
- KCF_OP_UPDATE,
- KCF_OP_FINAL,
- KCF_OP_ATOMIC,
-
- /* digest_key op */
- KCF_OP_DIGEST_KEY,
-
- /* mac specific op */
- KCF_OP_MAC_VERIFY_ATOMIC,
-
- /* mac/cipher specific op */
- KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC,
-
- /* sign_recover ops */
- KCF_OP_SIGN_RECOVER_INIT,
- KCF_OP_SIGN_RECOVER,
- KCF_OP_SIGN_RECOVER_ATOMIC,
-
- /* verify_recover ops */
- KCF_OP_VERIFY_RECOVER_INIT,
- KCF_OP_VERIFY_RECOVER,
- KCF_OP_VERIFY_RECOVER_ATOMIC,
-
- /* random number ops */
- KCF_OP_RANDOM_SEED,
- KCF_OP_RANDOM_GENERATE,
-
- /* session management ops */
- KCF_OP_SESSION_OPEN,
- KCF_OP_SESSION_CLOSE,
- KCF_OP_SESSION_LOGIN,
- KCF_OP_SESSION_LOGOUT,
-
- /* object management ops */
- KCF_OP_OBJECT_CREATE,
- KCF_OP_OBJECT_COPY,
- KCF_OP_OBJECT_DESTROY,
- KCF_OP_OBJECT_GET_SIZE,
- KCF_OP_OBJECT_GET_ATTRIBUTE_VALUE,
- KCF_OP_OBJECT_SET_ATTRIBUTE_VALUE,
- KCF_OP_OBJECT_FIND_INIT,
- KCF_OP_OBJECT_FIND,
- KCF_OP_OBJECT_FIND_FINAL,
-
- /* key management ops */
- KCF_OP_KEY_GENERATE,
- KCF_OP_KEY_GENERATE_PAIR,
- KCF_OP_KEY_WRAP,
- KCF_OP_KEY_UNWRAP,
- KCF_OP_KEY_DERIVE,
- KCF_OP_KEY_CHECK,
-
- /* provider management ops */
- KCF_OP_MGMT_EXTINFO,
- KCF_OP_MGMT_INITTOKEN,
- KCF_OP_MGMT_INITPIN,
- KCF_OP_MGMT_SETPIN
-} kcf_op_type_t;
-
-/*
- * The operation groups that need wrapping of parameters. This is somewhat
- * similar to the function group type in spi.h except that this also includes
- * all the functions that don't have a mechanism.
- *
- * The wrapper macros should never take these enum values as an argument.
- * Rather, they are assigned in the macro itself since they are known
- * from the macro name.
- */
-typedef enum kcf_op_group {
- KCF_OG_DIGEST = 1,
- KCF_OG_MAC,
- KCF_OG_ENCRYPT,
- KCF_OG_DECRYPT,
- KCF_OG_SIGN,
- KCF_OG_VERIFY,
- KCF_OG_ENCRYPT_MAC,
- KCF_OG_MAC_DECRYPT,
- KCF_OG_RANDOM,
- KCF_OG_SESSION,
- KCF_OG_OBJECT,
- KCF_OG_KEY,
- KCF_OG_PROVMGMT,
- KCF_OG_NOSTORE_KEY
-} kcf_op_group_t;
-
-/*
- * The kcf_op_type_t enum values used here should be only for those
- * operations for which there is a k-api routine in sys/crypto/api.h.
- */
-#define IS_INIT_OP(ftype) ((ftype) == KCF_OP_INIT)
-#define IS_SINGLE_OP(ftype) ((ftype) == KCF_OP_SINGLE)
-#define IS_UPDATE_OP(ftype) ((ftype) == KCF_OP_UPDATE)
-#define IS_FINAL_OP(ftype) ((ftype) == KCF_OP_FINAL)
-#define IS_ATOMIC_OP(ftype) ( \
- (ftype) == KCF_OP_ATOMIC || (ftype) == KCF_OP_MAC_VERIFY_ATOMIC || \
- (ftype) == KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC || \
- (ftype) == KCF_OP_SIGN_RECOVER_ATOMIC || \
- (ftype) == KCF_OP_VERIFY_RECOVER_ATOMIC)
-
-/*
- * Keep the parameters associated with a request around.
- * We need to pass them to the SPI.
- */
-typedef struct kcf_req_params {
- kcf_op_group_t rp_opgrp;
- kcf_op_type_t rp_optype;
-
- union {
- kcf_digest_ops_params_t digest_params;
- kcf_mac_ops_params_t mac_params;
- kcf_encrypt_ops_params_t encrypt_params;
- kcf_decrypt_ops_params_t decrypt_params;
- kcf_sign_ops_params_t sign_params;
- kcf_verify_ops_params_t verify_params;
- kcf_encrypt_mac_ops_params_t encrypt_mac_params;
- kcf_mac_decrypt_ops_params_t mac_decrypt_params;
- kcf_random_number_ops_params_t random_number_params;
- kcf_session_ops_params_t session_params;
- kcf_object_ops_params_t object_params;
- kcf_key_ops_params_t key_params;
- kcf_provmgmt_ops_params_t provmgmt_params;
- } rp_u;
-} kcf_req_params_t;
-
-
-/*
- * The ioctl/k-api code should bundle the parameters into a kcf_req_params_t
- * structure before calling a scheduler routine. The following macros are
- * available for that purpose.
- *
- * For the most part, the macro arguments closely correspond to the
- * function parameters. In some cases, we use generic names. The comments
- * for the structure should indicate these cases.
- */
-#define KCF_WRAP_DIGEST_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
- _data, _digest) { \
- kcf_digest_ops_params_t *dops = &(req)->rp_u.digest_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_DIGEST; \
- (req)->rp_optype = ftype; \
- dops->do_sid = _sid; \
- if (mechp != NULL) { \
- dops->do_mech = *mechp; \
- dops->do_framework_mechtype = mechp->cm_type; \
- } \
- dops->do_digest_key = _key; \
- dops->do_data = _data; \
- dops->do_digest = _digest; \
-}
-
-#define KCF_WRAP_MAC_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
- _data, _mac, _templ) { \
- kcf_mac_ops_params_t *mops = &(req)->rp_u.mac_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_MAC; \
- (req)->rp_optype = ftype; \
- mops->mo_sid = _sid; \
- if (mechp != NULL) { \
- mops->mo_mech = *mechp; \
- mops->mo_framework_mechtype = mechp->cm_type; \
- } \
- mops->mo_key = _key; \
- mops->mo_data = _data; \
- mops->mo_mac = _mac; \
- mops->mo_templ = _templ; \
-}
-
-#define KCF_WRAP_ENCRYPT_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
- _plaintext, _ciphertext, _templ) { \
- kcf_encrypt_ops_params_t *cops = &(req)->rp_u.encrypt_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_ENCRYPT; \
- (req)->rp_optype = ftype; \
- cops->eo_sid = _sid; \
- if (mechp != NULL) { \
- cops->eo_mech = *mechp; \
- cops->eo_framework_mechtype = mechp->cm_type; \
- } \
- cops->eo_key = _key; \
- cops->eo_plaintext = _plaintext; \
- cops->eo_ciphertext = _ciphertext; \
- cops->eo_templ = _templ; \
-}
-
-#define KCF_WRAP_DECRYPT_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
- _ciphertext, _plaintext, _templ) { \
- kcf_decrypt_ops_params_t *cops = &(req)->rp_u.decrypt_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_DECRYPT; \
- (req)->rp_optype = ftype; \
- cops->dop_sid = _sid; \
- if (mechp != NULL) { \
- cops->dop_mech = *mechp; \
- cops->dop_framework_mechtype = mechp->cm_type; \
- } \
- cops->dop_key = _key; \
- cops->dop_ciphertext = _ciphertext; \
- cops->dop_plaintext = _plaintext; \
- cops->dop_templ = _templ; \
-}
-
-#define KCF_WRAP_SIGN_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
- _data, _signature, _templ) { \
- kcf_sign_ops_params_t *sops = &(req)->rp_u.sign_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_SIGN; \
- (req)->rp_optype = ftype; \
- sops->so_sid = _sid; \
- if (mechp != NULL) { \
- sops->so_mech = *mechp; \
- sops->so_framework_mechtype = mechp->cm_type; \
- } \
- sops->so_key = _key; \
- sops->so_data = _data; \
- sops->so_signature = _signature; \
- sops->so_templ = _templ; \
-}
-
-#define KCF_WRAP_VERIFY_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
- _data, _signature, _templ) { \
- kcf_verify_ops_params_t *vops = &(req)->rp_u.verify_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_VERIFY; \
- (req)->rp_optype = ftype; \
- vops->vo_sid = _sid; \
- if (mechp != NULL) { \
- vops->vo_mech = *mechp; \
- vops->vo_framework_mechtype = mechp->cm_type; \
- } \
- vops->vo_key = _key; \
- vops->vo_data = _data; \
- vops->vo_signature = _signature; \
- vops->vo_templ = _templ; \
-}
-
-#define KCF_WRAP_ENCRYPT_MAC_OPS_PARAMS(req, ftype, _sid, _encr_key, \
- _mac_key, _plaintext, _ciphertext, _mac, _encr_templ, _mac_templ) { \
- kcf_encrypt_mac_ops_params_t *cmops = &(req)->rp_u.encrypt_mac_params; \
- \
- (req)->rp_opgrp = KCF_OG_ENCRYPT_MAC; \
- (req)->rp_optype = ftype; \
- cmops->em_sid = _sid; \
- cmops->em_encr_key = _encr_key; \
- cmops->em_mac_key = _mac_key; \
- cmops->em_plaintext = _plaintext; \
- cmops->em_ciphertext = _ciphertext; \
- cmops->em_mac = _mac; \
- cmops->em_encr_templ = _encr_templ; \
- cmops->em_mac_templ = _mac_templ; \
-}
-
-#define KCF_WRAP_MAC_DECRYPT_OPS_PARAMS(req, ftype, _sid, _mac_key, \
- _decr_key, _ciphertext, _mac, _plaintext, _mac_templ, _decr_templ) { \
- kcf_mac_decrypt_ops_params_t *cmops = &(req)->rp_u.mac_decrypt_params; \
- \
- (req)->rp_opgrp = KCF_OG_MAC_DECRYPT; \
- (req)->rp_optype = ftype; \
- cmops->md_sid = _sid; \
- cmops->md_mac_key = _mac_key; \
- cmops->md_decr_key = _decr_key; \
- cmops->md_ciphertext = _ciphertext; \
- cmops->md_mac = _mac; \
- cmops->md_plaintext = _plaintext; \
- cmops->md_mac_templ = _mac_templ; \
- cmops->md_decr_templ = _decr_templ; \
-}
-
-#define KCF_WRAP_RANDOM_OPS_PARAMS(req, ftype, _sid, _buf, _buflen, \
- _est, _flags) { \
- kcf_random_number_ops_params_t *rops = \
- &(req)->rp_u.random_number_params; \
- \
- (req)->rp_opgrp = KCF_OG_RANDOM; \
- (req)->rp_optype = ftype; \
- rops->rn_sid = _sid; \
- rops->rn_buf = _buf; \
- rops->rn_buflen = _buflen; \
- rops->rn_entropy_est = _est; \
- rops->rn_flags = _flags; \
-}
-
-#define KCF_WRAP_SESSION_OPS_PARAMS(req, ftype, _sid_ptr, _sid, \
- _user_type, _pin, _pin_len, _pd) { \
- kcf_session_ops_params_t *sops = &(req)->rp_u.session_params; \
- \
- (req)->rp_opgrp = KCF_OG_SESSION; \
- (req)->rp_optype = ftype; \
- sops->so_sid_ptr = _sid_ptr; \
- sops->so_sid = _sid; \
- sops->so_user_type = _user_type; \
- sops->so_pin = _pin; \
- sops->so_pin_len = _pin_len; \
- sops->so_pd = _pd; \
-}
-
-#define KCF_WRAP_OBJECT_OPS_PARAMS(req, ftype, _sid, _object_id, \
- _template, _attribute_count, _object_id_ptr, _object_size, \
- _find_init_pp_ptr, _find_pp, _max_object_count, _object_count_ptr) { \
- kcf_object_ops_params_t *jops = &(req)->rp_u.object_params; \
- \
- (req)->rp_opgrp = KCF_OG_OBJECT; \
- (req)->rp_optype = ftype; \
- jops->oo_sid = _sid; \
- jops->oo_object_id = _object_id; \
- jops->oo_template = _template; \
- jops->oo_attribute_count = _attribute_count; \
- jops->oo_object_id_ptr = _object_id_ptr; \
- jops->oo_object_size = _object_size; \
- jops->oo_find_init_pp_ptr = _find_init_pp_ptr; \
- jops->oo_find_pp = _find_pp; \
- jops->oo_max_object_count = _max_object_count; \
- jops->oo_object_count_ptr = _object_count_ptr; \
-}
-
-#define KCF_WRAP_KEY_OPS_PARAMS(req, ftype, _sid, _mech, _key_template, \
- _key_attribute_count, _key_object_id_ptr, _private_key_template, \
- _private_key_attribute_count, _private_key_object_id_ptr, \
- _key, _wrapped_key, _wrapped_key_len_ptr) { \
- kcf_key_ops_params_t *kops = &(req)->rp_u.key_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_KEY; \
- (req)->rp_optype = ftype; \
- kops->ko_sid = _sid; \
- if (mechp != NULL) { \
- kops->ko_mech = *mechp; \
- kops->ko_framework_mechtype = mechp->cm_type; \
- } \
- kops->ko_key_template = _key_template; \
- kops->ko_key_attribute_count = _key_attribute_count; \
- kops->ko_key_object_id_ptr = _key_object_id_ptr; \
- kops->ko_private_key_template = _private_key_template; \
- kops->ko_private_key_attribute_count = _private_key_attribute_count; \
- kops->ko_private_key_object_id_ptr = _private_key_object_id_ptr; \
- kops->ko_key = _key; \
- kops->ko_wrapped_key = _wrapped_key; \
- kops->ko_wrapped_key_len_ptr = _wrapped_key_len_ptr; \
-}
-
-#define KCF_WRAP_PROVMGMT_OPS_PARAMS(req, ftype, _sid, _old_pin, \
- _old_pin_len, _pin, _pin_len, _label, _ext_info, _pd) { \
- kcf_provmgmt_ops_params_t *pops = &(req)->rp_u.provmgmt_params; \
- \
- (req)->rp_opgrp = KCF_OG_PROVMGMT; \
- (req)->rp_optype = ftype; \
- pops->po_sid = _sid; \
- pops->po_pin = _pin; \
- pops->po_pin_len = _pin_len; \
- pops->po_old_pin = _old_pin; \
- pops->po_old_pin_len = _old_pin_len; \
- pops->po_label = _label; \
- pops->po_ext_info = _ext_info; \
- pops->po_pd = _pd; \
-}
-
-#define KCF_WRAP_NOSTORE_KEY_OPS_PARAMS(req, ftype, _sid, _mech, \
- _key_template, _key_attribute_count, _private_key_template, \
- _private_key_attribute_count, _key, _out_template1, \
- _out_attribute_count1, _out_template2, _out_attribute_count2) { \
- kcf_key_ops_params_t *kops = &(req)->rp_u.key_params; \
- crypto_mechanism_t *mechp = _mech; \
- \
- (req)->rp_opgrp = KCF_OG_NOSTORE_KEY; \
- (req)->rp_optype = ftype; \
- kops->ko_sid = _sid; \
- if (mechp != NULL) { \
- kops->ko_mech = *mechp; \
- kops->ko_framework_mechtype = mechp->cm_type; \
- } \
- kops->ko_key_template = _key_template; \
- kops->ko_key_attribute_count = _key_attribute_count; \
- kops->ko_key_object_id_ptr = NULL; \
- kops->ko_private_key_template = _private_key_template; \
- kops->ko_private_key_attribute_count = _private_key_attribute_count; \
- kops->ko_private_key_object_id_ptr = NULL; \
- kops->ko_key = _key; \
- kops->ko_wrapped_key = NULL; \
- kops->ko_wrapped_key_len_ptr = 0; \
- kops->ko_out_template1 = _out_template1; \
- kops->ko_out_template2 = _out_template2; \
- kops->ko_out_attribute_count1 = _out_attribute_count1; \
- kops->ko_out_attribute_count2 = _out_attribute_count2; \
-}
-
-#define KCF_SET_PROVIDER_MECHNUM(fmtype, pd, mechp) \
- (mechp)->cm_type = \
- KCF_TO_PROV_MECHNUM(pd, fmtype);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CRYPTO_OPS_IMPL_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h
index 85ea0ba1d092..355c1a87faa4 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -39,60 +39,6 @@ extern "C" {
#include <sys/crypto/spi.h>
#include <sys/crypto/impl.h>
#include <sys/crypto/common.h>
-#include <sys/crypto/ops_impl.h>
-
-typedef void (kcf_func_t)(void *, int);
-
-typedef enum kcf_req_status {
- REQ_ALLOCATED = 1,
- REQ_WAITING, /* At the framework level */
- REQ_INPROGRESS, /* At the provider level */
- REQ_DONE,
- REQ_CANCELED
-} kcf_req_status_t;
-
-typedef enum kcf_call_type {
- CRYPTO_SYNCH = 1,
- CRYPTO_ASYNCH
-} kcf_call_type_t;
-
-#define CHECK_RESTRICT(crq) (crq != NULL && \
- ((crq)->cr_flag & CRYPTO_RESTRICTED))
-
-#define CHECK_RESTRICT_FALSE B_FALSE
-
-#define CHECK_FASTPATH(crq, pd) ((crq) == NULL || \
- !((crq)->cr_flag & CRYPTO_ALWAYS_QUEUE)) && \
- (pd)->pd_prov_type == CRYPTO_SW_PROVIDER
-
-#define KCF_KMFLAG(crq) (((crq) == NULL) ? KM_SLEEP : KM_NOSLEEP)
-
-/*
- * The framework keeps an internal handle to use in the adaptive
- * asynchronous case. This is the case when a client has the
- * CRYPTO_ALWAYS_QUEUE bit clear and a software provider is used for
- * the request. The request is completed in the context of the calling
- * thread and kernel memory must be allocated with KM_NOSLEEP.
- *
- * The framework passes a pointer to the handle in crypto_req_handle_t
- * argument when it calls the SPI of the software provider. The macros
- * KCF_RHNDL() and KCF_SWFP_RHNDL() are used to do this.
- *
- * When a provider asks the framework for kmflag value via
- * crypto_kmflag(9S) we use REQHNDL2_KMFLAG() macro.
- */
-extern ulong_t kcf_swprov_hndl;
-#define KCF_RHNDL(kmflag) (((kmflag) == KM_SLEEP) ? NULL : &kcf_swprov_hndl)
-#define KCF_SWFP_RHNDL(crq) (((crq) == NULL) ? NULL : &kcf_swprov_hndl)
-#define REQHNDL2_KMFLAG(rhndl) \
- ((rhndl == &kcf_swprov_hndl) ? KM_NOSLEEP : KM_SLEEP)
-
-/* Internal call_req flags. They start after the public ones in api.h */
-
-#define CRYPTO_SETDUAL 0x00001000 /* Set the 'cont' boolean before */
- /* submitting the request */
-#define KCF_ISDUALREQ(crq) \
- (((crq) == NULL) ? B_FALSE : (crq->cr_flag & CRYPTO_SETDUAL))
typedef struct kcf_prov_tried {
kcf_provider_desc_t *pt_pd;
@@ -106,178 +52,8 @@ typedef struct kcf_prov_tried {
(tlist != NULL && is_in_triedlist(pd, tlist))
#define IS_RECOVERABLE(error) \
- (error == CRYPTO_BUFFER_TOO_BIG || \
- error == CRYPTO_BUSY || \
- error == CRYPTO_DEVICE_ERROR || \
- error == CRYPTO_DEVICE_MEMORY || \
- error == CRYPTO_KEY_SIZE_RANGE || \
- error == CRYPTO_NO_PERMISSION)
-
-#define KCF_ATOMIC_INCR(x) atomic_add_32(&(x), 1)
-#define KCF_ATOMIC_DECR(x) atomic_add_32(&(x), -1)
-
-/*
- * Node structure for synchronous requests.
- */
-typedef struct kcf_sreq_node {
- /* Should always be the first field in this structure */
- kcf_call_type_t sn_type;
- /*
- * sn_cv and sr_lock are used to wait for the
- * operation to complete. sn_lock also protects
- * the sn_state field.
- */
- kcondvar_t sn_cv;
- kmutex_t sn_lock;
- kcf_req_status_t sn_state;
-
- /*
- * Return value from the operation. This will be
- * one of the CRYPTO_* errors defined in common.h.
- */
- int sn_rv;
-
- /*
- * parameters to call the SPI with. This can be
- * a pointer as we know the caller context/stack stays.
- */
- struct kcf_req_params *sn_params;
-
- /* Internal context for this request */
- struct kcf_context *sn_context;
-
- /* Provider handling this request */
- kcf_provider_desc_t *sn_provider;
-} kcf_sreq_node_t;
-
-/*
- * Node structure for asynchronous requests. A node can be on
- * on a chain of requests hanging of the internal context
- * structure and can be in the global software provider queue.
- */
-typedef struct kcf_areq_node {
- /* Should always be the first field in this structure */
- kcf_call_type_t an_type;
-
- /* an_lock protects the field an_state */
- kmutex_t an_lock;
- kcf_req_status_t an_state;
- crypto_call_req_t an_reqarg;
-
- /*
- * parameters to call the SPI with. We need to
- * save the params since the caller stack can go away.
- */
- struct kcf_req_params an_params;
-
- /*
- * The next two fields should be NULL for operations that
- * don't need a context.
- */
- /* Internal context for this request */
- struct kcf_context *an_context;
-
- /* next in chain of requests for context */
- struct kcf_areq_node *an_ctxchain_next;
-
- kcondvar_t an_turn_cv;
- boolean_t an_is_my_turn;
- boolean_t an_isdual; /* for internal reuse */
-
- /*
- * Next and previous nodes in the global software
- * queue. These fields are NULL for a hardware
- * provider since we use a taskq there.
- */
- struct kcf_areq_node *an_next;
- struct kcf_areq_node *an_prev;
-
- /* Provider handling this request */
- kcf_provider_desc_t *an_provider;
- kcf_prov_tried_t *an_tried_plist;
-
- struct kcf_areq_node *an_idnext; /* Next in ID hash */
- struct kcf_areq_node *an_idprev; /* Prev in ID hash */
- kcondvar_t an_done; /* Signal request completion */
- uint_t an_refcnt;
-} kcf_areq_node_t;
-
-#define KCF_AREQ_REFHOLD(areq) { \
- atomic_add_32(&(areq)->an_refcnt, 1); \
- ASSERT((areq)->an_refcnt != 0); \
-}
-
-#define KCF_AREQ_REFRELE(areq) { \
- ASSERT((areq)->an_refcnt != 0); \
- membar_exit(); \
- if (atomic_add_32_nv(&(areq)->an_refcnt, -1) == 0) \
- kcf_free_req(areq); \
-}
-
-#define GET_REQ_TYPE(arg) *((kcf_call_type_t *)(arg))
-
-#define NOTIFY_CLIENT(areq, err) (*(areq)->an_reqarg.cr_callback_func)(\
- (areq)->an_reqarg.cr_callback_arg, err);
-
-/* For internally generated call requests for dual operations */
-typedef struct kcf_call_req {
- crypto_call_req_t kr_callreq; /* external client call req */
- kcf_req_params_t kr_params; /* Params saved for next call */
- kcf_areq_node_t *kr_areq; /* Use this areq */
- off_t kr_saveoffset;
- size_t kr_savelen;
-} kcf_dual_req_t;
-
-/*
- * The following are some what similar to macros in callo.h, which implement
- * callout tables.
- *
- * The lower four bits of the ID are used to encode the table ID to
- * index in to. The REQID_COUNTER_HIGH bit is used to avoid any check for
- * wrap around when generating ID. We assume that there won't be a request
- * which takes more time than 2^^(sizeof (long) - 5) other requests submitted
- * after it. This ensures there won't be any ID collision.
- */
-#define REQID_COUNTER_HIGH (1UL << (8 * sizeof (long) - 1))
-#define REQID_COUNTER_SHIFT 4
-#define REQID_COUNTER_LOW (1 << REQID_COUNTER_SHIFT)
-#define REQID_TABLES 16
-#define REQID_TABLE_MASK (REQID_TABLES - 1)
-
-#define REQID_BUCKETS 512
-#define REQID_BUCKET_MASK (REQID_BUCKETS - 1)
-#define REQID_HASH(id) (((id) >> REQID_COUNTER_SHIFT) & REQID_BUCKET_MASK)
-
-#define GET_REQID(areq) (areq)->an_reqarg.cr_reqid
-#define SET_REQID(areq, val) GET_REQID(areq) = val
-
-/*
- * Hash table for async requests.
- */
-typedef struct kcf_reqid_table {
- kmutex_t rt_lock;
- crypto_req_id_t rt_curid;
- kcf_areq_node_t *rt_idhash[REQID_BUCKETS];
-} kcf_reqid_table_t;
-
-/*
- * Global software provider queue structure. Requests to be
- * handled by a SW provider and have the ALWAYS_QUEUE flag set
- * get queued here.
- */
-typedef struct kcf_global_swq {
- /*
- * gs_cv and gs_lock are used to wait for new requests.
- * gs_lock protects the changes to the queue.
- */
- kcondvar_t gs_cv;
- kmutex_t gs_lock;
- uint_t gs_njobs;
- uint_t gs_maxjobs;
- kcf_areq_node_t *gs_first;
- kcf_areq_node_t *gs_last;
-} kcf_global_swq_t;
-
+ (error == CRYPTO_BUSY || \
+ error == CRYPTO_KEY_SIZE_RANGE)
/*
* Internal representation of a canonical context. We contain crypto_ctx_t
@@ -287,47 +63,27 @@ typedef struct kcf_global_swq {
typedef struct kcf_context {
crypto_ctx_t kc_glbl_ctx;
uint_t kc_refcnt;
- kmutex_t kc_in_use_lock;
- /*
- * kc_req_chain_first and kc_req_chain_last are used to chain
- * multiple async requests using the same context. They should be
- * NULL for sync requests.
- */
- kcf_areq_node_t *kc_req_chain_first;
- kcf_areq_node_t *kc_req_chain_last;
kcf_provider_desc_t *kc_prov_desc; /* Prov. descriptor */
kcf_provider_desc_t *kc_sw_prov_desc; /* Prov. descriptor */
- kcf_mech_entry_t *kc_mech;
- struct kcf_context *kc_secondctx; /* for dual contexts */
} kcf_context_t;
/*
- * Bump up the reference count on the framework private context. A
- * global context or a request that references this structure should
- * do a hold.
- */
-#define KCF_CONTEXT_REFHOLD(ictx) { \
- atomic_add_32(&(ictx)->kc_refcnt, 1); \
- ASSERT((ictx)->kc_refcnt != 0); \
-}
-
-/*
* Decrement the reference count on the framework private context.
* When the last reference is released, the framework private
* context structure is freed along with the global context.
*/
#define KCF_CONTEXT_REFRELE(ictx) { \
- ASSERT((ictx)->kc_refcnt != 0); \
- membar_exit(); \
- if (atomic_add_32_nv(&(ictx)->kc_refcnt, -1) == 0) \
+ membar_producer(); \
+ int newval = atomic_add_32_nv(&(ictx)->kc_refcnt, -1); \
+ ASSERT(newval != -1); \
+ if (newval == 0) \
kcf_free_context(ictx); \
}
/*
- * Check if we can release the context now. In case of CRYPTO_QUEUED
- * we do not release it as we can do it only after the provider notified
- * us. In case of CRYPTO_BUSY, the client can retry the request using
- * the context, so we do not release the context.
+ * Check if we can release the context now. In case of CRYPTO_BUSY,
+ * the client can retry the request using the context,
+ * so we do not release the context.
*
* This macro should be called only from the final routine in
* an init/update/final sequence. We do not release the context in case
@@ -345,185 +101,33 @@ typedef struct kcf_context {
* This macro determines whether we're done with a context.
*/
#define KCF_CONTEXT_DONE(rv) \
- ((rv) != CRYPTO_QUEUED && (rv) != CRYPTO_BUSY && \
- (rv) != CRYPTO_BUFFER_TOO_SMALL)
+ ((rv) != CRYPTO_BUSY && (rv) != CRYPTO_BUFFER_TOO_SMALL)
+
+
+#define KCF_SET_PROVIDER_MECHNUM(fmtype, pd, mechp) \
+ (mechp)->cm_type = \
+ KCF_TO_PROV_MECHNUM(pd, fmtype);
/*
* A crypto_ctx_template_t is internally a pointer to this struct
*/
typedef struct kcf_ctx_template {
- crypto_kcf_provider_handle_t ct_prov_handle; /* provider handle */
- uint_t ct_generation; /* generation # */
size_t ct_size; /* for freeing */
crypto_spi_ctx_template_t ct_prov_tmpl; /* context template */
- /* from the SW prov */
+ /* from the provider */
} kcf_ctx_template_t;
-/*
- * Structure for pool of threads working on global software queue.
- */
-typedef struct kcf_pool {
- uint32_t kp_threads; /* Number of threads in pool */
- uint32_t kp_idlethreads; /* Idle threads in pool */
- uint32_t kp_blockedthreads; /* Blocked threads in pool */
-
- /*
- * cv & lock to monitor the condition when no threads
- * are around. In this case the failover thread kicks in.
- */
- kcondvar_t kp_nothr_cv;
- kmutex_t kp_thread_lock;
-
- /* Userspace thread creator variables. */
- boolean_t kp_signal_create_thread; /* Create requested flag */
- int kp_nthrs; /* # of threads to create */
- boolean_t kp_user_waiting; /* Thread waiting for work */
-
- /*
- * cv & lock for the condition where more threads need to be
- * created. kp_user_lock also protects the three fields above.
- */
- kcondvar_t kp_user_cv; /* Creator cond. variable */
- kmutex_t kp_user_lock; /* Creator lock */
-} kcf_pool_t;
-
-
-/*
- * State of a crypto bufcall element.
- */
-typedef enum cbuf_state {
- CBUF_FREE = 1,
- CBUF_WAITING,
- CBUF_RUNNING
-} cbuf_state_t;
-
-/*
- * Structure of a crypto bufcall element.
- */
-typedef struct kcf_cbuf_elem {
- /*
- * lock and cv to wait for CBUF_RUNNING to be done
- * kc_lock also protects kc_state.
- */
- kmutex_t kc_lock;
- kcondvar_t kc_cv;
- cbuf_state_t kc_state;
-
- struct kcf_cbuf_elem *kc_next;
- struct kcf_cbuf_elem *kc_prev;
- void (*kc_func)(void *arg);
- void *kc_arg;
-} kcf_cbuf_elem_t;
-
-/*
- * State of a notify element.
- */
-typedef enum ntfy_elem_state {
- NTFY_WAITING = 1,
- NTFY_RUNNING
-} ntfy_elem_state_t;
-
-/*
- * Structure of a notify list element.
- */
-typedef struct kcf_ntfy_elem {
- /*
- * lock and cv to wait for NTFY_RUNNING to be done.
- * kn_lock also protects kn_state.
- */
- kmutex_t kn_lock;
- kcondvar_t kn_cv;
- ntfy_elem_state_t kn_state;
-
- struct kcf_ntfy_elem *kn_next;
- struct kcf_ntfy_elem *kn_prev;
-
- crypto_notify_callback_t kn_func;
- uint32_t kn_event_mask;
-} kcf_ntfy_elem_t;
-
-
-/*
- * The following values are based on the assumption that it would
- * take around eight cpus to load a hardware provider (This is true for
- * at least one product) and a kernel client may come from different
- * low-priority interrupt levels. We will have CRYPTO_TASKQ_MIN number
- * of cached taskq entries. The CRYPTO_TASKQ_MAX number is based on
- * a throughput of 1GB/s using 512-byte buffers. These are just
- * reasonable estimates and might need to change in future.
- */
-#define CRYPTO_TASKQ_THREADS 8
-#define CRYPTO_TASKQ_MIN 64
-#define CRYPTO_TASKQ_MAX 2 * 1024 * 1024
-
-extern int crypto_taskq_threads;
-extern int crypto_taskq_minalloc;
-extern int crypto_taskq_maxalloc;
-extern kcf_global_swq_t *gswq;
-extern int kcf_maxthreads;
-extern int kcf_minthreads;
-
-/*
- * All pending crypto bufcalls are put on a list. cbuf_list_lock
- * protects changes to this list.
- */
-extern kmutex_t cbuf_list_lock;
-extern kcondvar_t cbuf_list_cv;
-
-/*
- * All event subscribers are put on a list. kcf_notify_list_lock
- * protects changes to this list.
- */
-extern kmutex_t ntfy_list_lock;
-extern kcondvar_t ntfy_list_cv;
-
-boolean_t kcf_get_next_logical_provider_member(kcf_provider_desc_t *,
- kcf_provider_desc_t *, kcf_provider_desc_t **);
-extern int kcf_get_hardware_provider(crypto_mech_type_t, crypto_mech_type_t,
- boolean_t, kcf_provider_desc_t *, kcf_provider_desc_t **,
- crypto_func_group_t);
-extern int kcf_get_hardware_provider_nomech(offset_t, offset_t,
- boolean_t, kcf_provider_desc_t *, kcf_provider_desc_t **);
extern void kcf_free_triedlist(kcf_prov_tried_t *);
extern kcf_prov_tried_t *kcf_insert_triedlist(kcf_prov_tried_t **,
kcf_provider_desc_t *, int);
extern kcf_provider_desc_t *kcf_get_mech_provider(crypto_mech_type_t,
- kcf_mech_entry_t **, int *, kcf_prov_tried_t *, crypto_func_group_t,
- boolean_t, size_t);
-extern kcf_provider_desc_t *kcf_get_dual_provider(crypto_mechanism_t *,
- crypto_mechanism_t *, kcf_mech_entry_t **, crypto_mech_type_t *,
- crypto_mech_type_t *, int *, kcf_prov_tried_t *,
- crypto_func_group_t, crypto_func_group_t, boolean_t, size_t);
-extern crypto_ctx_t *kcf_new_ctx(crypto_call_req_t *, kcf_provider_desc_t *,
- crypto_session_id_t);
-extern int kcf_submit_request(kcf_provider_desc_t *, crypto_ctx_t *,
- crypto_call_req_t *, kcf_req_params_t *, boolean_t);
+ kcf_mech_entry_t **, int *, kcf_prov_tried_t *, crypto_func_group_t);
+extern crypto_ctx_t *kcf_new_ctx(kcf_provider_desc_t *);
extern void kcf_sched_destroy(void);
extern void kcf_sched_init(void);
-extern void kcf_sched_start(void);
-extern void kcf_sop_done(kcf_sreq_node_t *, int);
-extern void kcf_aop_done(kcf_areq_node_t *, int);
-extern int common_submit_request(kcf_provider_desc_t *,
- crypto_ctx_t *, kcf_req_params_t *, crypto_req_handle_t);
extern void kcf_free_context(kcf_context_t *);
-extern int kcf_svc_wait(int *);
-extern int kcf_svc_do_run(void);
-extern int kcf_need_signature_verification(kcf_provider_desc_t *);
-extern void kcf_verify_signature(void *);
-extern struct modctl *kcf_get_modctl(crypto_provider_info_t *);
-extern void verify_unverified_providers(void);
-extern void kcf_free_req(kcf_areq_node_t *areq);
-extern void crypto_bufcall_service(void);
-
-extern void kcf_walk_ntfylist(uint32_t, void *);
-extern void kcf_do_notify(kcf_provider_desc_t *, boolean_t);
-
-extern kcf_dual_req_t *kcf_alloc_req(crypto_call_req_t *);
-extern void kcf_next_req(void *, int);
-extern void kcf_last_req(void *, int);
-
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h
index 2c62b5706651..63dfce7957a8 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -43,39 +43,15 @@ extern "C" {
#define __no_const
#endif /* CONSTIFY_PLUGIN */
-#define CRYPTO_SPI_VERSION_1 1
-#define CRYPTO_SPI_VERSION_2 2
-#define CRYPTO_SPI_VERSION_3 3
-
-/*
- * Provider-private handle. This handle is specified by a provider
- * when it registers by means of the pi_provider_handle field of
- * the crypto_provider_info structure, and passed to the provider
- * when its entry points are invoked.
- */
-typedef void *crypto_provider_handle_t;
-
/*
- * Context templates can be used to by software providers to pre-process
+ * Context templates can be used to by providers to pre-process
* keying material, such as key schedules. They are allocated by
- * a software provider create_ctx_template(9E) entry point, and passed
+ * a provider create_ctx_template(9E) entry point, and passed
* as argument to initialization and atomic provider entry points.
*/
typedef void *crypto_spi_ctx_template_t;
/*
- * Request handles are used by the kernel to identify an asynchronous
- * request being processed by a provider. It is passed by the kernel
- * to a hardware provider when submitting a request, and must be
- * specified by a provider when calling crypto_op_notification(9F)
- */
-typedef void *crypto_req_handle_t;
-
-/* Values for cc_flags field */
-#define CRYPTO_INIT_OPSTATE 0x00000001 /* allocate and init cc_opstate */
-#define CRYPTO_USE_OPSTATE 0x00000002 /* .. start using it as context */
-
-/*
* The context structure is passed from the kernel to a provider.
* It contains the information needed to process a multi-part or
* single part operation. The context structure is not used
@@ -86,81 +62,24 @@ typedef void *crypto_req_handle_t;
* as separate arguments to Provider routines.
*/
typedef struct crypto_ctx {
- crypto_provider_handle_t cc_provider;
- crypto_session_id_t cc_session;
void *cc_provider_private; /* owned by provider */
void *cc_framework_private; /* owned by framework */
- uint32_t cc_flags; /* flags */
- void *cc_opstate; /* state */
} crypto_ctx_t;
/*
- * Extended provider information.
- */
-
-/*
- * valid values for ei_flags field of extended info structure
- * They match the RSA Security, Inc PKCS#11 tokenInfo flags.
- */
-#define CRYPTO_EXTF_RNG 0x00000001
-#define CRYPTO_EXTF_WRITE_PROTECTED 0x00000002
-#define CRYPTO_EXTF_LOGIN_REQUIRED 0x00000004
-#define CRYPTO_EXTF_USER_PIN_INITIALIZED 0x00000008
-#define CRYPTO_EXTF_CLOCK_ON_TOKEN 0x00000040
-#define CRYPTO_EXTF_PROTECTED_AUTHENTICATION_PATH 0x00000100
-#define CRYPTO_EXTF_DUAL_CRYPTO_OPERATIONS 0x00000200
-#define CRYPTO_EXTF_TOKEN_INITIALIZED 0x00000400
-#define CRYPTO_EXTF_USER_PIN_COUNT_LOW 0x00010000
-#define CRYPTO_EXTF_USER_PIN_FINAL_TRY 0x00020000
-#define CRYPTO_EXTF_USER_PIN_LOCKED 0x00040000
-#define CRYPTO_EXTF_USER_PIN_TO_BE_CHANGED 0x00080000
-#define CRYPTO_EXTF_SO_PIN_COUNT_LOW 0x00100000
-#define CRYPTO_EXTF_SO_PIN_FINAL_TRY 0x00200000
-#define CRYPTO_EXTF_SO_PIN_LOCKED 0x00400000
-#define CRYPTO_EXTF_SO_PIN_TO_BE_CHANGED 0x00800000
-
-/*
- * The crypto_control_ops structure contains pointers to control
- * operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_control_ops {
- void (*provider_status)(crypto_provider_handle_t, uint_t *);
-} __no_const crypto_control_ops_t;
-
-/*
- * The crypto_ctx_ops structure contains points to context and context
- * templates management operations for cryptographic providers. It is
- * passed through the crypto_ops(9S) structure when providers register
- * with the kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_ctx_ops {
- int (*create_ctx_template)(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t);
- int (*free_context)(crypto_ctx_t *);
-} __no_const crypto_ctx_ops_t;
-
-/*
* The crypto_digest_ops structure contains pointers to digest
* operations for cryptographic providers. It is passed through
* the crypto_ops(9S) structure when providers register with the
* kernel using crypto_register_provider(9F).
*/
typedef struct crypto_digest_ops {
- int (*digest_init)(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_req_handle_t);
- int (*digest)(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
- int (*digest_update)(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
- int (*digest_key)(crypto_ctx_t *, crypto_key_t *, crypto_req_handle_t);
- int (*digest_final)(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
- int (*digest_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
+ int (*digest_init)(crypto_ctx_t *, crypto_mechanism_t *);
+ int (*digest)(crypto_ctx_t *, crypto_data_t *, crypto_data_t *);
+ int (*digest_update)(crypto_ctx_t *, crypto_data_t *);
+ int (*digest_key)(crypto_ctx_t *, crypto_key_t *);
+ int (*digest_final)(crypto_ctx_t *, crypto_data_t *);
+ int (*digest_atomic)(crypto_mechanism_t *, crypto_data_t *,
+ crypto_data_t *);
} __no_const crypto_digest_ops_t;
/*
@@ -172,29 +91,27 @@ typedef struct crypto_digest_ops {
typedef struct crypto_cipher_ops {
int (*encrypt_init)(crypto_ctx_t *,
crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_spi_ctx_template_t);
int (*encrypt)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ crypto_data_t *, crypto_data_t *);
int (*encrypt_update)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ crypto_data_t *, crypto_data_t *);
int (*encrypt_final)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*encrypt_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_data_t *);
+ int (*encrypt_atomic)(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
int (*decrypt_init)(crypto_ctx_t *,
crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_spi_ctx_template_t);
int (*decrypt)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ crypto_data_t *, crypto_data_t *);
int (*decrypt_update)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ crypto_data_t *, crypto_data_t *);
int (*decrypt_final)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*decrypt_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_data_t *);
+ int (*decrypt_atomic)(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
} __no_const crypto_cipher_ops_t;
/*
@@ -206,289 +123,30 @@ typedef struct crypto_cipher_ops {
typedef struct crypto_mac_ops {
int (*mac_init)(crypto_ctx_t *,
crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_spi_ctx_template_t);
int (*mac)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ crypto_data_t *, crypto_data_t *);
int (*mac_update)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
+ crypto_data_t *);
int (*mac_final)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*mac_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*mac_verify_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
+ crypto_data_t *);
+ int (*mac_atomic)(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
+ int (*mac_verify_atomic)(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
} __no_const crypto_mac_ops_t;
/*
- * The crypto_sign_ops structure contains pointers to signing
- * operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_sign_ops {
- int (*sign_init)(crypto_ctx_t *,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*sign)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*sign_update)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*sign_final)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*sign_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*sign_recover_init)(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*sign_recover)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*sign_recover_atomic)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
- crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
-} __no_const crypto_sign_ops_t;
-
-/*
- * The crypto_verify_ops structure contains pointers to verify
- * operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_verify_ops {
- int (*verify_init)(crypto_ctx_t *,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*do_verify)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*verify_update)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*verify_final)(crypto_ctx_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*verify_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*verify_recover_init)(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
- int (*verify_recover)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*verify_recover_atomic)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
- crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_req_handle_t);
-} __no_const crypto_verify_ops_t;
-
-/*
- * The crypto_dual_ops structure contains pointers to dual
- * cipher and sign/verify operations for cryptographic providers.
- * It is passed through the crypto_ops(9S) structure when
- * providers register with the kernel using
- * crypto_register_provider(9F).
- */
-typedef struct crypto_dual_ops {
- int (*digest_encrypt_update)(
- crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*decrypt_digest_update)(
- crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*sign_encrypt_update)(
- crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
- int (*decrypt_verify_update)(
- crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
-} __no_const crypto_dual_ops_t;
-
-/*
- * The crypto_dual_cipher_mac_ops structure contains pointers to dual
- * cipher and MAC operations for cryptographic providers.
- * It is passed through the crypto_ops(9S) structure when
- * providers register with the kernel using
- * crypto_register_provider(9F).
- */
-typedef struct crypto_dual_cipher_mac_ops {
- int (*encrypt_mac_init)(crypto_ctx_t *,
- crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
- int (*encrypt_mac)(crypto_ctx_t *,
- crypto_data_t *, crypto_dual_data_t *, crypto_data_t *,
- crypto_req_handle_t);
- int (*encrypt_mac_update)(crypto_ctx_t *,
- crypto_data_t *, crypto_dual_data_t *, crypto_req_handle_t);
- int (*encrypt_mac_final)(crypto_ctx_t *,
- crypto_dual_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*encrypt_mac_atomic)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_data_t *, crypto_dual_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-
- int (*mac_decrypt_init)(crypto_ctx_t *,
- crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
- int (*mac_decrypt)(crypto_ctx_t *,
- crypto_dual_data_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
- int (*mac_decrypt_update)(crypto_ctx_t *,
- crypto_dual_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*mac_decrypt_final)(crypto_ctx_t *,
- crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
- int (*mac_decrypt_atomic)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
- crypto_mechanism_t *, crypto_key_t *, crypto_dual_data_t *,
- crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
- int (*mac_verify_decrypt_atomic)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
- crypto_mechanism_t *, crypto_key_t *, crypto_dual_data_t *,
- crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-} __no_const crypto_dual_cipher_mac_ops_t;
-
-/*
- * The crypto_random_number_ops structure contains pointers to random
- * number operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_random_number_ops {
- int (*seed_random)(crypto_provider_handle_t, crypto_session_id_t,
- uchar_t *, size_t, uint_t, uint32_t, crypto_req_handle_t);
- int (*generate_random)(crypto_provider_handle_t, crypto_session_id_t,
- uchar_t *, size_t, crypto_req_handle_t);
-} __no_const crypto_random_number_ops_t;
-
-/*
- * Flag values for seed_random.
- */
-#define CRYPTO_SEED_NOW 0x00000001
-
-/*
- * The crypto_session_ops structure contains pointers to session
- * operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_session_ops {
- int (*session_open)(crypto_provider_handle_t, crypto_session_id_t *,
- crypto_req_handle_t);
- int (*session_close)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_req_handle_t);
- int (*session_login)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_user_type_t, char *, size_t, crypto_req_handle_t);
- int (*session_logout)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_req_handle_t);
-} __no_const crypto_session_ops_t;
-
-/*
- * The crypto_object_ops structure contains pointers to object
- * operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_object_ops {
- int (*object_create)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
- crypto_req_handle_t);
- int (*object_copy)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_object_id_t, crypto_object_attribute_t *, uint_t,
- crypto_object_id_t *, crypto_req_handle_t);
- int (*object_destroy)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_object_id_t, crypto_req_handle_t);
- int (*object_get_size)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_object_id_t, size_t *, crypto_req_handle_t);
- int (*object_get_attribute_value)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_object_id_t,
- crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
- int (*object_set_attribute_value)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_object_id_t,
- crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
- int (*object_find_init)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_object_attribute_t *, uint_t, void **,
- crypto_req_handle_t);
- int (*object_find)(crypto_provider_handle_t, void *,
- crypto_object_id_t *, uint_t, uint_t *, crypto_req_handle_t);
- int (*object_find_final)(crypto_provider_handle_t, void *,
- crypto_req_handle_t);
-} __no_const crypto_object_ops_t;
-
-/*
- * The crypto_key_ops structure contains pointers to key
- * operations for cryptographic providers. It is passed through
- * the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
- */
-typedef struct crypto_key_ops {
- int (*key_generate)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_object_attribute_t *, uint_t,
- crypto_object_id_t *, crypto_req_handle_t);
- int (*key_generate_pair)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_object_attribute_t *, uint_t,
- crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
- crypto_object_id_t *, crypto_req_handle_t);
- int (*key_wrap)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_object_id_t *,
- uchar_t *, size_t *, crypto_req_handle_t);
- int (*key_unwrap)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, uchar_t *, size_t *,
- crypto_object_attribute_t *, uint_t,
- crypto_object_id_t *, crypto_req_handle_t);
- int (*key_derive)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *,
- uint_t, crypto_object_id_t *, crypto_req_handle_t);
- int (*key_check)(crypto_provider_handle_t, crypto_mechanism_t *,
- crypto_key_t *);
-} __no_const crypto_key_ops_t;
-
-/*
- * The crypto_provider_management_ops structure contains pointers
- * to management operations for cryptographic providers. It is passed
- * through the crypto_ops(9S) structure when providers register with the
- * kernel using crypto_register_provider(9F).
+ * The crypto_ctx_ops structure contains points to context and context
+ * templates management operations for cryptographic providers. It is
+ * passed through the crypto_ops(9S) structure when providers register
+ * with the kernel using crypto_register_provider(9F).
*/
-typedef struct crypto_provider_management_ops {
- int (*ext_info)(crypto_provider_handle_t,
- crypto_provider_ext_info_t *, crypto_req_handle_t);
- int (*init_token)(crypto_provider_handle_t, char *, size_t,
- char *, crypto_req_handle_t);
- int (*init_pin)(crypto_provider_handle_t, crypto_session_id_t,
- char *, size_t, crypto_req_handle_t);
- int (*set_pin)(crypto_provider_handle_t, crypto_session_id_t,
- char *, size_t, char *, size_t, crypto_req_handle_t);
-} __no_const crypto_provider_management_ops_t;
-
-typedef struct crypto_mech_ops {
- int (*copyin_mechanism)(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_mechanism_t *, int *, int);
- int (*copyout_mechanism)(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_mechanism_t *, int *, int);
- int (*free_mechanism)(crypto_provider_handle_t, crypto_mechanism_t *);
-} __no_const crypto_mech_ops_t;
-
-typedef struct crypto_nostore_key_ops {
- int (*nostore_key_generate)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_mechanism_t *,
- crypto_object_attribute_t *, uint_t, crypto_object_attribute_t *,
- uint_t, crypto_req_handle_t);
- int (*nostore_key_generate_pair)(crypto_provider_handle_t,
- crypto_session_id_t, crypto_mechanism_t *,
- crypto_object_attribute_t *, uint_t, crypto_object_attribute_t *,
- uint_t, crypto_object_attribute_t *, uint_t,
- crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
- int (*nostore_key_derive)(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *,
- uint_t, crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
-} __no_const crypto_nostore_key_ops_t;
+typedef struct crypto_ctx_ops {
+ int (*create_ctx_template)(crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t *, size_t *);
+ int (*free_context)(crypto_ctx_t *);
+} __no_const crypto_ctx_ops_t;
/*
* The crypto_ops(9S) structure contains the structures containing
@@ -497,58 +155,13 @@ typedef struct crypto_nostore_key_ops {
* supplied by a provider when it registers with the kernel
* by calling crypto_register_provider(9F).
*/
-typedef struct crypto_ops_v1 {
- crypto_control_ops_t *co_control_ops;
- crypto_digest_ops_t *co_digest_ops;
- crypto_cipher_ops_t *co_cipher_ops;
- crypto_mac_ops_t *co_mac_ops;
- crypto_sign_ops_t *co_sign_ops;
- crypto_verify_ops_t *co_verify_ops;
- crypto_dual_ops_t *co_dual_ops;
- crypto_dual_cipher_mac_ops_t *co_dual_cipher_mac_ops;
- crypto_random_number_ops_t *co_random_ops;
- crypto_session_ops_t *co_session_ops;
- crypto_object_ops_t *co_object_ops;
- crypto_key_ops_t *co_key_ops;
- crypto_provider_management_ops_t *co_provider_ops;
- crypto_ctx_ops_t *co_ctx_ops;
-} crypto_ops_v1_t;
-
-typedef struct crypto_ops_v2 {
- crypto_ops_v1_t v1_ops;
- crypto_mech_ops_t *co_mech_ops;
-} crypto_ops_v2_t;
-
-typedef struct crypto_ops_v3 {
- crypto_ops_v2_t v2_ops;
- crypto_nostore_key_ops_t *co_nostore_key_ops;
-} crypto_ops_v3_t;
-
typedef struct crypto_ops {
- union {
- crypto_ops_v3_t cou_v3;
- crypto_ops_v2_t cou_v2;
- crypto_ops_v1_t cou_v1;
- } cou;
+ const crypto_digest_ops_t *co_digest_ops;
+ const crypto_cipher_ops_t *co_cipher_ops;
+ const crypto_mac_ops_t *co_mac_ops;
+ const crypto_ctx_ops_t *co_ctx_ops;
} crypto_ops_t;
-#define co_control_ops cou.cou_v1.co_control_ops
-#define co_digest_ops cou.cou_v1.co_digest_ops
-#define co_cipher_ops cou.cou_v1.co_cipher_ops
-#define co_mac_ops cou.cou_v1.co_mac_ops
-#define co_sign_ops cou.cou_v1.co_sign_ops
-#define co_verify_ops cou.cou_v1.co_verify_ops
-#define co_dual_ops cou.cou_v1.co_dual_ops
-#define co_dual_cipher_mac_ops cou.cou_v1.co_dual_cipher_mac_ops
-#define co_random_ops cou.cou_v1.co_random_ops
-#define co_session_ops cou.cou_v1.co_session_ops
-#define co_object_ops cou.cou_v1.co_object_ops
-#define co_key_ops cou.cou_v1.co_key_ops
-#define co_provider_ops cou.cou_v1.co_provider_ops
-#define co_ctx_ops cou.cou_v1.co_ctx_ops
-#define co_mech_ops cou.cou_v2.co_mech_ops
-#define co_nostore_key_ops cou.cou_v3.co_nostore_key_ops
-
/*
* The mechanism info structure crypto_mech_info_t contains a function group
* bit mask cm_func_group_mask. This field, of type crypto_func_group_t,
@@ -562,29 +175,11 @@ typedef uint32_t crypto_func_group_t;
#define CRYPTO_FG_ENCRYPT 0x00000001 /* encrypt_init() */
#define CRYPTO_FG_DECRYPT 0x00000002 /* decrypt_init() */
#define CRYPTO_FG_DIGEST 0x00000004 /* digest_init() */
-#define CRYPTO_FG_SIGN 0x00000008 /* sign_init() */
-#define CRYPTO_FG_SIGN_RECOVER 0x00000010 /* sign_recover_init() */
-#define CRYPTO_FG_VERIFY 0x00000020 /* verify_init() */
-#define CRYPTO_FG_VERIFY_RECOVER 0x00000040 /* verify_recover_init() */
-#define CRYPTO_FG_GENERATE 0x00000080 /* key_generate() */
-#define CRYPTO_FG_GENERATE_KEY_PAIR 0x00000100 /* key_generate_pair() */
-#define CRYPTO_FG_WRAP 0x00000200 /* key_wrap() */
-#define CRYPTO_FG_UNWRAP 0x00000400 /* key_unwrap() */
-#define CRYPTO_FG_DERIVE 0x00000800 /* key_derive() */
#define CRYPTO_FG_MAC 0x00001000 /* mac_init() */
-#define CRYPTO_FG_ENCRYPT_MAC 0x00002000 /* encrypt_mac_init() */
-#define CRYPTO_FG_MAC_DECRYPT 0x00004000 /* decrypt_mac_init() */
#define CRYPTO_FG_ENCRYPT_ATOMIC 0x00008000 /* encrypt_atomic() */
#define CRYPTO_FG_DECRYPT_ATOMIC 0x00010000 /* decrypt_atomic() */
#define CRYPTO_FG_MAC_ATOMIC 0x00020000 /* mac_atomic() */
#define CRYPTO_FG_DIGEST_ATOMIC 0x00040000 /* digest_atomic() */
-#define CRYPTO_FG_SIGN_ATOMIC 0x00080000 /* sign_atomic() */
-#define CRYPTO_FG_SIGN_RECOVER_ATOMIC 0x00100000 /* sign_recover_atomic() */
-#define CRYPTO_FG_VERIFY_ATOMIC 0x00200000 /* verify_atomic() */
-#define CRYPTO_FG_VERIFY_RECOVER_ATOMIC 0x00400000 /* verify_recover_atomic() */
-#define CRYPTO_FG_ENCRYPT_MAC_ATOMIC 0x00800000 /* encrypt_mac_atomic() */
-#define CRYPTO_FG_MAC_DECRYPT_ATOMIC 0x01000000 /* mac_decrypt_atomic() */
-#define CRYPTO_FG_RESERVED 0x80000000
/*
* Maximum length of the pi_provider_description field of the
@@ -593,21 +188,6 @@ typedef uint32_t crypto_func_group_t;
#define CRYPTO_PROVIDER_DESCR_MAX_LEN 64
-/* Bit mask for all the simple operations */
-#define CRYPTO_FG_SIMPLEOP_MASK (CRYPTO_FG_ENCRYPT | CRYPTO_FG_DECRYPT | \
- CRYPTO_FG_DIGEST | CRYPTO_FG_SIGN | CRYPTO_FG_VERIFY | CRYPTO_FG_MAC | \
- CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT_ATOMIC | \
- CRYPTO_FG_MAC_ATOMIC | CRYPTO_FG_DIGEST_ATOMIC | CRYPTO_FG_SIGN_ATOMIC | \
- CRYPTO_FG_VERIFY_ATOMIC)
-
-/* Bit mask for all the dual operations */
-#define CRYPTO_FG_MAC_CIPHER_MASK (CRYPTO_FG_ENCRYPT_MAC | \
- CRYPTO_FG_MAC_DECRYPT | CRYPTO_FG_ENCRYPT_MAC_ATOMIC | \
- CRYPTO_FG_MAC_DECRYPT_ATOMIC)
-
-/* Add other combos to CRYPTO_FG_DUAL_MASK */
-#define CRYPTO_FG_DUAL_MASK CRYPTO_FG_MAC_CIPHER_MASK
-
/*
* The crypto_mech_info structure specifies one of the mechanisms
* supported by a cryptographic provider. The pi_mechanisms field of
@@ -618,21 +198,8 @@ typedef struct crypto_mech_info {
crypto_mech_name_t cm_mech_name;
crypto_mech_type_t cm_mech_number;
crypto_func_group_t cm_func_group_mask;
- ssize_t cm_min_key_length;
- ssize_t cm_max_key_length;
- uint32_t cm_mech_flags;
} crypto_mech_info_t;
-/* Alias the old name to the new name for compatibility. */
-#define cm_keysize_unit cm_mech_flags
-
-/*
- * The following is used by a provider that sets
- * CRYPTO_HASH_NO_UPDATE. It needs to specify the maximum
- * input data size it can digest in this field.
- */
-#define cm_max_input_length cm_max_key_length
-
/*
* crypto_kcf_provider_handle_t is a handle allocated by the kernel.
* It is returned after the provider registers with
@@ -644,79 +211,24 @@ typedef uint_t crypto_kcf_provider_handle_t;
/*
* Provider information. Passed as argument to crypto_register_provider(9F).
- * Describes the provider and its capabilities. Multiple providers can
- * register for the same device instance. In this case, the same
- * pi_provider_dev must be specified with a different pi_provider_handle.
+ * Describes the provider and its capabilities.
*/
-typedef struct crypto_provider_info_v1 {
- uint_t pi_interface_version;
- char *pi_provider_description;
- crypto_provider_type_t pi_provider_type;
- crypto_provider_handle_t pi_provider_handle;
- crypto_ops_t *pi_ops_vector;
- uint_t pi_mech_list_count;
- crypto_mech_info_t *pi_mechanisms;
- uint_t pi_logical_provider_count;
- crypto_kcf_provider_handle_t *pi_logical_providers;
-} crypto_provider_info_v1_t;
-
-typedef struct crypto_provider_info_v2 {
- crypto_provider_info_v1_t v1_info;
- uint_t pi_flags;
-} crypto_provider_info_v2_t;
-
typedef struct crypto_provider_info {
- union {
- crypto_provider_info_v2_t piu_v2;
- crypto_provider_info_v1_t piu_v1;
- } piu;
+ const char *pi_provider_description;
+ const crypto_ops_t *pi_ops_vector;
+ uint_t pi_mech_list_count;
+ const crypto_mech_info_t *pi_mechanisms;
} crypto_provider_info_t;
-#define pi_interface_version piu.piu_v1.pi_interface_version
-#define pi_provider_description piu.piu_v1.pi_provider_description
-#define pi_provider_type piu.piu_v1.pi_provider_type
-#define pi_provider_handle piu.piu_v1.pi_provider_handle
-#define pi_ops_vector piu.piu_v1.pi_ops_vector
-#define pi_mech_list_count piu.piu_v1.pi_mech_list_count
-#define pi_mechanisms piu.piu_v1.pi_mechanisms
-#define pi_logical_provider_count piu.piu_v1.pi_logical_provider_count
-#define pi_logical_providers piu.piu_v1.pi_logical_providers
-#define pi_flags piu.piu_v2.pi_flags
-
-/* hidden providers can only be accessed via a logical provider */
-#define CRYPTO_HIDE_PROVIDER 0x00000001
-/*
- * provider can not do multi-part digest (updates) and has a limit
- * on maximum input data that it can digest.
- */
-#define CRYPTO_HASH_NO_UPDATE 0x00000002
-
-/* provider can handle the request without returning a CRYPTO_QUEUED */
-#define CRYPTO_SYNCHRONOUS 0x00000004
-
-#define CRYPTO_PIFLAGS_RESERVED2 0x40000000
-#define CRYPTO_PIFLAGS_RESERVED1 0x80000000
-
-/*
- * Provider status passed by a provider to crypto_provider_notification(9F)
- * and returned by the provider_status(9E) entry point.
- */
-#define CRYPTO_PROVIDER_READY 0
-#define CRYPTO_PROVIDER_BUSY 1
-#define CRYPTO_PROVIDER_FAILED 2
-
/*
* Functions exported by Solaris to cryptographic providers. Providers
* call these functions to register and unregister, notify the kernel
* of state changes, and notify the kernel when a asynchronous request
* completed.
*/
-extern int crypto_register_provider(crypto_provider_info_t *,
+extern int crypto_register_provider(const crypto_provider_info_t *,
crypto_kcf_provider_handle_t *);
extern int crypto_unregister_provider(crypto_kcf_provider_handle_t);
-extern void crypto_provider_notification(crypto_kcf_provider_handle_t, uint_t);
-extern void crypto_op_notification(crypto_req_handle_t, int);
-extern int crypto_kmflag(crypto_req_handle_t);
#ifdef __cplusplus
diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
deleted file mode 100644
index f2dae7093b94..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _IA32_SYS_ASM_LINKAGE_H
-#define _IA32_SYS_ASM_LINKAGE_H
-
-#include <sys/stack.h>
-#include <sys/trap.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _ASM /* The remainder of this file is only for assembly files */
-
-/*
- * make annoying differences in assembler syntax go away
- */
-
-/*
- * D16 and A16 are used to insert instructions prefixes; the
- * macros help the assembler code be slightly more portable.
- */
-#if !defined(__GNUC_AS__)
-/*
- * /usr/ccs/bin/as prefixes are parsed as separate instructions
- */
-#define D16 data16;
-#define A16 addr16;
-
-/*
- * (There are some weird constructs in constant expressions)
- */
-#define _CONST(const) [const]
-#define _BITNOT(const) -1!_CONST(const)
-#define _MUL(a, b) _CONST(a \* b)
-
-#else
-/*
- * Why not use the 'data16' and 'addr16' prefixes .. well, the
- * assembler doesn't quite believe in real mode, and thus argues with
- * us about what we're trying to do.
- */
-#define D16 .byte 0x66;
-#define A16 .byte 0x67;
-
-#define _CONST(const) (const)
-#define _BITNOT(const) ~_CONST(const)
-#define _MUL(a, b) _CONST(a * b)
-
-#endif
-
-/*
- * C pointers are different sizes between i386 and amd64.
- * These constants can be used to compute offsets into pointer arrays.
- */
-#if defined(__amd64)
-#define CLONGSHIFT 3
-#define CLONGSIZE 8
-#define CLONGMASK 7
-#elif defined(__i386)
-#define CLONGSHIFT 2
-#define CLONGSIZE 4
-#define CLONGMASK 3
-#endif
-
-/*
- * Since we know we're either ILP32 or LP64 ..
- */
-#define CPTRSHIFT CLONGSHIFT
-#define CPTRSIZE CLONGSIZE
-#define CPTRMASK CLONGMASK
-
-#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
-#error "inconsistent shift constants"
-#endif
-
-#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
-#error "inconsistent mask constants"
-#endif
-
-#define ASM_ENTRY_ALIGN 16
-
-/*
- * SSE register alignment and save areas
- */
-
-#define XMM_SIZE 16
-#define XMM_ALIGN 16
-
-#if defined(__amd64)
-
-#define SAVE_XMM_PROLOG(sreg, nreg) \
- subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \
- movq %rsp, sreg
-
-#define RSTOR_XMM_EPILOG(sreg, nreg) \
- addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp
-
-#elif defined(__i386)
-
-#define SAVE_XMM_PROLOG(sreg, nreg) \
- subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \
- movl %esp, sreg; \
- addl $XMM_ALIGN, sreg; \
- andl $_BITNOT(XMM_ALIGN-1), sreg
-
-#define RSTOR_XMM_EPILOG(sreg, nreg) \
- addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp;
-
-#endif /* __i386 */
-
-/*
- * profiling causes definitions of the MCOUNT and RTMCOUNT
- * particular to the type
- */
-#ifdef GPROF
-
-#define MCOUNT(x) \
- pushl %ebp; \
- movl %esp, %ebp; \
- call _mcount; \
- popl %ebp
-
-#endif /* GPROF */
-
-#ifdef PROF
-
-#define MCOUNT(x) \
-/* CSTYLED */ \
- .lcomm .L_/**/x/**/1, 4, 4; \
- pushl %ebp; \
- movl %esp, %ebp; \
-/* CSTYLED */ \
- movl $.L_/**/x/**/1, %edx; \
- call _mcount; \
- popl %ebp
-
-#endif /* PROF */
-
-/*
- * if we are not profiling, MCOUNT should be defined to nothing
- */
-#if !defined(PROF) && !defined(GPROF)
-#define MCOUNT(x)
-#endif /* !defined(PROF) && !defined(GPROF) */
-
-#define RTMCOUNT(x) MCOUNT(x)
-
-/*
- * Macro to define weak symbol aliases. These are similar to the ANSI-C
- * #pragma weak _name = name
- * except a compiler can determine type. The assembler must be told. Hence,
- * the second parameter must be the type of the symbol (i.e.: function,...)
- */
-#define ANSI_PRAGMA_WEAK(sym, stype) \
-/* CSTYLED */ \
- .weak _/**/sym; \
-/* CSTYLED */ \
- .type _/**/sym, @stype; \
-/* CSTYLED */ \
-_/**/sym = sym
-
-/*
- * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in:
- * #pragma weak sym1 = sym2
- */
-#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \
- .weak sym1; \
- .type sym1, @stype; \
-sym1 = sym2
-
-/*
- * ENTRY provides the standard procedure entry code and an easy way to
- * insert the calls to mcount for profiling. ENTRY_NP is identical, but
- * never calls mcount.
- */
-#define ENTRY(x) \
- .text; \
- .align ASM_ENTRY_ALIGN; \
- .globl x; \
- .type x, @function; \
-x: MCOUNT(x)
-
-#define ENTRY_NP(x) \
- .text; \
- .align ASM_ENTRY_ALIGN; \
- .globl x; \
- .type x, @function; \
-x:
-
-#define RTENTRY(x) \
- .text; \
- .align ASM_ENTRY_ALIGN; \
- .globl x; \
- .type x, @function; \
-x: RTMCOUNT(x)
-
-/*
- * ENTRY2 is identical to ENTRY but provides two labels for the entry point.
- */
-#define ENTRY2(x, y) \
- .text; \
- .align ASM_ENTRY_ALIGN; \
- .globl x, y; \
- .type x, @function; \
- .type y, @function; \
-/* CSTYLED */ \
-x: ; \
-y: MCOUNT(x)
-
-#define ENTRY_NP2(x, y) \
- .text; \
- .align ASM_ENTRY_ALIGN; \
- .globl x, y; \
- .type x, @function; \
- .type y, @function; \
-/* CSTYLED */ \
-x: ; \
-y:
-
-
-/*
- * ALTENTRY provides for additional entry points.
- */
-#define ALTENTRY(x) \
- .globl x; \
- .type x, @function; \
-x:
-
-/*
- * DGDEF and DGDEF2 provide global data declarations.
- *
- * DGDEF provides a word aligned word of storage.
- *
- * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This
- * implies this macro is best used for byte arrays.
- *
- * DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
- */
-#define DGDEF2(name, sz) \
- .data; \
- .globl name; \
- .type name, @object; \
- .size name, sz; \
-name:
-
-#define DGDEF3(name, sz, algn) \
- .data; \
- .align algn; \
- .globl name; \
- .type name, @object; \
- .size name, sz; \
-name:
-
-#define DGDEF(name) DGDEF3(name, 4, 4)
-
-/*
- * SET_SIZE trails a function and set the size for the ELF symbol table.
- */
-#define SET_SIZE(x) \
- .size x, [.-x]
-
-/*
- * NWORD provides native word value.
- */
-#if defined(__amd64)
-
-/*CSTYLED*/
-#define NWORD quad
-
-#elif defined(__i386)
-
-#define NWORD long
-
-#endif /* __i386 */
-
-#endif /* _ASM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _IA32_SYS_ASM_LINKAGE_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h
deleted file mode 100644
index 9e7c089e1182..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _IA32_SYS_STACK_H
-#define _IA32_SYS_STACK_H
-
-#if !defined(_ASM)
-
-#include <sys/types.h>
-
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * In the x86 world, a stack frame looks like this:
- *
- * |--------------------------|
- * 4n+8(%ebp) ->| argument word n |
- * | ... | (Previous frame)
- * 8(%ebp) ->| argument word 0 |
- * |--------------------------|--------------------
- * 4(%ebp) ->| return address |
- * |--------------------------|
- * 0(%ebp) ->| previous %ebp (optional) |
- * |--------------------------|
- * -4(%ebp) ->| unspecified | (Current frame)
- * | ... |
- * 0(%esp) ->| variable size |
- * |--------------------------|
- */
-
-/*
- * Stack alignment macros.
- */
-
-#define STACK_ALIGN32 4
-#define STACK_ENTRY_ALIGN32 4
-#define STACK_BIAS32 0
-#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1))
-#define STACK_RESERVE32 0
-#define MINFRAME32 0
-
-#if defined(__amd64)
-
-/*
- * In the amd64 world, a stack frame looks like this:
- *
- * |--------------------------|
- * 8n+16(%rbp)->| argument word n |
- * | ... | (Previous frame)
- * 16(%rbp) ->| argument word 0 |
- * |--------------------------|--------------------
- * 8(%rbp) ->| return address |
- * |--------------------------|
- * 0(%rbp) ->| previous %rbp |
- * |--------------------------|
- * -8(%rbp) ->| unspecified | (Current frame)
- * | ... |
- * 0(%rsp) ->| variable size |
- * |--------------------------|
- * -128(%rsp) ->| reserved for function |
- * |--------------------------|
- *
- * The end of the input argument area must be aligned on a 16-byte
- * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry.
- *
- * The 128-byte location beyond %rsp is considered to be reserved for
- * functions and is NOT modified by signal handlers. It can be used
- * to store temporary data that is not needed across function calls.
- */
-
-/*
- * Stack alignment macros.
- */
-
-#define STACK_ALIGN64 16
-#define STACK_ENTRY_ALIGN64 8
-#define STACK_BIAS64 0
-#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1))
-#define STACK_RESERVE64 128
-#define MINFRAME64 0
-
-#define STACK_ALIGN STACK_ALIGN64
-#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64
-#define STACK_BIAS STACK_BIAS64
-#define SA(x) SA64(x)
-#define STACK_RESERVE STACK_RESERVE64
-#define MINFRAME MINFRAME64
-
-#elif defined(__i386)
-
-#define STACK_ALIGN STACK_ALIGN32
-#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32
-#define STACK_BIAS STACK_BIAS32
-#define SA(x) SA32(x)
-#define STACK_RESERVE STACK_RESERVE32
-#define MINFRAME MINFRAME32
-
-#endif /* __i386 */
-
-#if defined(_KERNEL) && !defined(_ASM)
-
-#if defined(ZFS_DEBUG)
-#if STACK_ALIGN == 4
-#define ASSERT_STACK_ALIGNED() \
- { \
- uint32_t __tmp; \
- ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \
- }
-#elif (STACK_ALIGN == 16) && (_LONG_DOUBLE_ALIGNMENT == 16)
-#define ASSERT_STACK_ALIGNED() \
- { \
- long double __tmp; \
- ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \
- }
-#endif
-#else /* DEBUG */
-#define ASSERT_STACK_ALIGNED()
-#endif /* DEBUG */
-
-struct regs;
-
-void traceregs(struct regs *);
-void traceback(caddr_t);
-
-#endif /* defined(_KERNEL) && !defined(_ASM) */
-
-#define STACK_GROWTH_DOWN /* stacks grow from high to low addresses */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _IA32_SYS_STACK_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h
deleted file mode 100644
index 55b94969b80b..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
-/* All Rights Reserved */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _IA32_SYS_TRAP_H
-#define _IA32_SYS_TRAP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Trap type values
- */
-
-#define T_ZERODIV 0x0 /* #de divide by 0 error */
-#define T_SGLSTP 0x1 /* #db single step */
-#define T_NMIFLT 0x2 /* NMI */
-#define T_BPTFLT 0x3 /* #bp breakpoint fault, INT3 insn */
-#define T_OVFLW 0x4 /* #of INTO overflow fault */
-#define T_BOUNDFLT 0x5 /* #br BOUND insn fault */
-#define T_ILLINST 0x6 /* #ud invalid opcode fault */
-#define T_NOEXTFLT 0x7 /* #nm device not available: x87 */
-#define T_DBLFLT 0x8 /* #df double fault */
-#define T_EXTOVRFLT 0x9 /* [not generated: 386 only] */
-#define T_TSSFLT 0xa /* #ts invalid TSS fault */
-#define T_SEGFLT 0xb /* #np segment not present fault */
-#define T_STKFLT 0xc /* #ss stack fault */
-#define T_GPFLT 0xd /* #gp general protection fault */
-#define T_PGFLT 0xe /* #pf page fault */
-#define T_EXTERRFLT 0x10 /* #mf x87 FPU error fault */
-#define T_ALIGNMENT 0x11 /* #ac alignment check error */
-#define T_MCE 0x12 /* #mc machine check exception */
-#define T_SIMDFPE 0x13 /* #xm SSE/SSE exception */
-#define T_DBGENTR 0x14 /* debugger entry */
-#define T_ENDPERR 0x21 /* emulated extension error flt */
-#define T_ENOEXTFLT 0x20 /* emulated ext not present */
-#define T_FASTTRAP 0xd2 /* fast system call */
-#define T_SYSCALLINT 0x91 /* general system call */
-#define T_DTRACE_RET 0x7f /* DTrace pid return */
-#define T_INT80 0x80 /* int80 handler for linux emulation */
-#define T_SOFTINT 0x50fd /* pseudo softint trap type */
-
-/*
- * Pseudo traps.
- */
-#define T_INTERRUPT 0x100
-#define T_FAULT 0x200
-#define T_AST 0x400
-#define T_SYSCALL 0x180
-
-
-/*
- * Values of error code on stack in case of page fault
- */
-
-#define PF_ERR_MASK 0x01 /* Mask for error bit */
-#define PF_ERR_PAGE 0x00 /* page not present */
-#define PF_ERR_PROT 0x01 /* protection error */
-#define PF_ERR_WRITE 0x02 /* fault caused by write (else read) */
-#define PF_ERR_USER 0x04 /* processor was in user mode */
- /* (else supervisor) */
-#define PF_ERR_EXEC 0x10 /* attempt to execute a No eXec page (AMD) */
-
-/*
- * Definitions for fast system call subfunctions
- */
-#define T_FNULL 0 /* Null trap for testing */
-#define T_FGETFP 1 /* Get emulated FP context */
-#define T_FSETFP 2 /* Set emulated FP context */
-#define T_GETHRTIME 3 /* Get high resolution time */
-#define T_GETHRVTIME 4 /* Get high resolution virtual time */
-#define T_GETHRESTIME 5 /* Get high resolution time */
-#define T_GETLGRP 6 /* Get home lgrpid */
-
-#define T_LASTFAST 6 /* Last valid subfunction */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _IA32_SYS_TRAP_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/modctl.h b/sys/contrib/openzfs/module/icp/include/sys/modctl.h
deleted file mode 100644
index 6c26ad618c93..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/modctl.h
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_MODCTL_H
-#define _SYS_MODCTL_H
-
-/*
- * loadable module support.
- */
-
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct modlmisc;
-struct modlinkage;
-
-/*
- * The following structure defines the operations used by modctl
- * to load and unload modules. Each supported loadable module type
- * requires a set of mod_ops.
- */
-struct mod_ops {
- int (*modm_install)(struct modlmisc *, struct modlinkage *);
- int (*modm_remove)(struct modlmisc *, struct modlinkage *);
- int (*modm_info)(void *, struct modlinkage *, int *);
-};
-
-/*
- * The defined set of mod_ops structures for each loadable module type
- * Defined in modctl.c
- */
-extern struct mod_ops mod_brandops;
-#if defined(__i386) || defined(__amd64)
-extern struct mod_ops mod_cpuops;
-#endif
-extern struct mod_ops mod_cryptoops;
-extern struct mod_ops mod_driverops;
-extern struct mod_ops mod_execops;
-extern struct mod_ops mod_fsops;
-extern struct mod_ops mod_miscops;
-extern struct mod_ops mod_schedops;
-extern struct mod_ops mod_strmodops;
-extern struct mod_ops mod_syscallops;
-extern struct mod_ops mod_sockmodops;
-#ifdef _SYSCALL32_IMPL
-extern struct mod_ops mod_syscallops32;
-#endif
-extern struct mod_ops mod_dacfops;
-extern struct mod_ops mod_ippops;
-extern struct mod_ops mod_pcbeops;
-extern struct mod_ops mod_devfsops;
-extern struct mod_ops mod_kiconvops;
-
-/*
- * Definitions for the module specific linkage structures.
- * The first two fields are the same in all of the structures.
- * The linkinfo is for informational purposes only and is returned by
- * modctl with the MODINFO cmd.
- */
-
-/* For cryptographic providers */
-struct modlcrypto {
- struct mod_ops *crypto_modops;
- char *crypto_linkinfo;
-};
-
-/* For misc */
-struct modlmisc {
- struct mod_ops *misc_modops;
- char *misc_linkinfo;
-};
-
-/*
- * Revision number of loadable modules support. This is the value
- * that must be used in the modlinkage structure.
- */
-#define MODREV_1 1
-
-/*
- * The modlinkage structure is the structure that the module writer
- * provides to the routines to install, remove, and stat a module.
- * The ml_linkage element is an array of pointers to linkage structures.
- * For most modules there is only one linkage structure. We allocate
- * enough space for 3 linkage structures which happens to be the most
- * we have in any sun supplied module. For those modules with more
- * than 3 linkage structures (which is very unlikely), a modlinkage
- * structure must be kmem_alloc'd in the module wrapper to be big enough
- * for all of the linkage structures.
- */
-struct modlinkage {
- int ml_rev; /* rev of loadable modules system */
-#ifdef _LP64
- void *ml_linkage[7]; /* more space in 64-bit OS */
-#else
- void *ml_linkage[4]; /* NULL terminated list of */
- /* linkage structures */
-#endif
-};
-
-/*
- * commands. These are the commands supported by the modctl system call.
- */
-#define MODLOAD 0
-#define MODUNLOAD 1
-#define MODINFO 2
-#define MODRESERVED 3
-#define MODSETMINIROOT 4
-#define MODADDMAJBIND 5
-#define MODGETPATH 6
-#define MODREADSYSBIND 7
-#define MODGETMAJBIND 8
-#define MODGETNAME 9
-#define MODSIZEOF_DEVID 10
-#define MODGETDEVID 11
-#define MODSIZEOF_MINORNAME 12
-#define MODGETMINORNAME 13
-#define MODGETPATHLEN 14
-#define MODEVENTS 15
-#define MODGETFBNAME 16
-#define MODREREADDACF 17
-#define MODLOADDRVCONF 18
-#define MODUNLOADDRVCONF 19
-#define MODREMMAJBIND 20
-#define MODDEVT2INSTANCE 21
-#define MODGETDEVFSPATH_LEN 22
-#define MODGETDEVFSPATH 23
-#define MODDEVID2PATHS 24
-#define MODSETDEVPOLICY 26
-#define MODGETDEVPOLICY 27
-#define MODALLOCPRIV 28
-#define MODGETDEVPOLICYBYNAME 29
-#define MODLOADMINORPERM 31
-#define MODADDMINORPERM 32
-#define MODREMMINORPERM 33
-#define MODREMDRVCLEANUP 34
-#define MODDEVEXISTS 35
-#define MODDEVREADDIR 36
-#define MODDEVNAME 37
-#define MODGETDEVFSPATH_MI_LEN 38
-#define MODGETDEVFSPATH_MI 39
-#define MODRETIRE 40
-#define MODUNRETIRE 41
-#define MODISRETIRED 42
-#define MODDEVEMPTYDIR 43
-#define MODREMDRVALIAS 44
-
-/*
- * sub cmds for MODEVENTS
- */
-#define MODEVENTS_FLUSH 0
-#define MODEVENTS_FLUSH_DUMP 1
-#define MODEVENTS_SET_DOOR_UPCALL_FILENAME 2
-#define MODEVENTS_GETDATA 3
-#define MODEVENTS_FREEDATA 4
-#define MODEVENTS_POST_EVENT 5
-#define MODEVENTS_REGISTER_EVENT 6
-
-/*
- * devname subcmds for MODDEVNAME
- */
-#define MODDEVNAME_LOOKUPDOOR 0
-#define MODDEVNAME_DEVFSADMNODE 1
-#define MODDEVNAME_NSMAPS 2
-#define MODDEVNAME_PROFILE 3
-#define MODDEVNAME_RECONFIG 4
-#define MODDEVNAME_SYSAVAIL 5
-
-
-/*
- * Data structure passed to modconfig command in kernel to build devfs tree
- */
-
-struct aliases {
- struct aliases *a_next;
- char *a_name;
- int a_len;
-};
-
-#define MAXMODCONFNAME 256
-
-struct modconfig {
- char drvname[MAXMODCONFNAME];
- char drvclass[MAXMODCONFNAME];
- int major;
- int flags;
- int num_aliases;
- struct aliases *ap;
-};
-
-#if defined(_SYSCALL32)
-
-struct aliases32 {
- caddr32_t a_next;
- caddr32_t a_name;
- int32_t a_len;
-};
-
-struct modconfig32 {
- char drvname[MAXMODCONFNAME];
- char drvclass[MAXMODCONFNAME];
- int32_t major;
- int32_t flags;
- int32_t num_aliases;
- caddr32_t ap;
-};
-
-#endif /* _SYSCALL32 */
-
-/* flags for modconfig */
-#define MOD_UNBIND_OVERRIDE 0x01 /* fail unbind if in use */
-
-/*
- * Max module path length
- */
-#define MOD_MAXPATH 256
-
-/*
- * Default search path for modules ADDITIONAL to the directory
- * where the kernel components we booted from are.
- *
- * Most often, this will be "/platform/{platform}/kernel /kernel /usr/kernel",
- * but we don't wire it down here.
- */
-#define MOD_DEFPATH "/kernel /usr/kernel"
-
-/*
- * Default file name extension for autoloading modules.
- */
-#define MOD_DEFEXT ""
-
-/*
- * Parameters for modinfo
- */
-#define MODMAXNAMELEN 32 /* max module name length */
-#define MODMAXLINKINFOLEN 32 /* max link info length */
-
-/*
- * Module specific information.
- */
-struct modspecific_info {
- char msi_linkinfo[MODMAXLINKINFOLEN]; /* name in linkage struct */
- int msi_p0; /* module specific information */
-};
-
-/*
- * Structure returned by modctl with MODINFO command.
- */
-#define MODMAXLINK 10 /* max linkages modinfo can handle */
-
-struct modinfo {
- int mi_info; /* Flags for info wanted */
- int mi_state; /* Flags for module state */
- int mi_id; /* id of this loaded module */
- int mi_nextid; /* id of next module or -1 */
- caddr_t mi_base; /* virtual addr of text */
- size_t mi_size; /* size of module in bytes */
- int mi_rev; /* loadable modules rev */
- int mi_loadcnt; /* # of times loaded */
- char mi_name[MODMAXNAMELEN]; /* name of module */
- struct modspecific_info mi_msinfo[MODMAXLINK];
- /* mod specific info */
-};
-
-
-#if defined(_SYSCALL32)
-
-#define MODMAXNAMELEN32 32 /* max module name length */
-#define MODMAXLINKINFOLEN32 32 /* max link info length */
-#define MODMAXLINK32 10 /* max linkages modinfo can handle */
-
-struct modspecific_info32 {
- char msi_linkinfo[MODMAXLINKINFOLEN32]; /* name in linkage struct */
- int32_t msi_p0; /* module specific information */
-};
-
-struct modinfo32 {
- int32_t mi_info; /* Flags for info wanted */
- int32_t mi_state; /* Flags for module state */
- int32_t mi_id; /* id of this loaded module */
- int32_t mi_nextid; /* id of next module or -1 */
- caddr32_t mi_base; /* virtual addr of text */
- uint32_t mi_size; /* size of module in bytes */
- int32_t mi_rev; /* loadable modules rev */
- int32_t mi_loadcnt; /* # of times loaded */
- char mi_name[MODMAXNAMELEN32]; /* name of module */
- struct modspecific_info32 mi_msinfo[MODMAXLINK32];
- /* mod specific info */
-};
-
-#endif /* _SYSCALL32 */
-
-/* Values for mi_info flags */
-#define MI_INFO_ONE 1
-#define MI_INFO_ALL 2
-#define MI_INFO_CNT 4
-#define MI_INFO_LINKAGE 8 /* used internally to extract modlinkage */
-/*
- * MI_INFO_NOBASE indicates caller does not need mi_base. Failure to use this
- * flag may lead 32-bit apps to receive an EOVERFLOW error from modctl(MODINFO)
- * when used with a 64-bit kernel.
- */
-#define MI_INFO_NOBASE 16
-
-/* Values for mi_state */
-#define MI_LOADED 1
-#define MI_INSTALLED 2
-
-/*
- * Macros to vector to the appropriate module specific routine.
- */
-#define MODL_INSTALL(MODL, MODLP) \
- (*(MODL)->misc_modops->modm_install)(MODL, MODLP)
-#define MODL_REMOVE(MODL, MODLP) \
- (*(MODL)->misc_modops->modm_remove)(MODL, MODLP)
-#define MODL_INFO(MODL, MODLP, P0) \
- (*(MODL)->misc_modops->modm_info)(MODL, MODLP, P0)
-
-/*
- * Definitions for stubs
- */
-struct mod_stub_info {
- uintptr_t mods_func_adr;
- struct mod_modinfo *mods_modinfo;
- uintptr_t mods_stub_adr;
- int (*mods_errfcn)(void);
- int mods_flag; /* flags defined below */
-};
-
-/*
- * Definitions for mods_flag.
- */
-#define MODS_WEAK 0x01 /* weak stub (not loaded if called) */
-#define MODS_NOUNLOAD 0x02 /* module not unloadable (no _fini()) */
-#define MODS_INSTALLED 0x10 /* module installed */
-
-struct mod_modinfo {
- char *modm_module_name;
- struct modctl *mp;
- struct mod_stub_info modm_stubs[1];
-};
-
-struct modctl_list {
- struct modctl_list *modl_next;
- struct modctl *modl_modp;
-};
-
-/*
- * Structure to manage a loadable module.
- * Note: the module (mod_mp) structure's "text" and "text_size" information
- * are replicated in the modctl structure so that mod_containing_pc()
- * doesn't have to grab any locks (modctls are persistent; modules are not.)
- */
-typedef struct modctl {
- struct modctl *mod_next; /* &modules based list */
- struct modctl *mod_prev;
- int mod_id;
- void *mod_mp;
- kthread_t *mod_inprogress_thread;
- struct mod_modinfo *mod_modinfo;
- struct modlinkage *mod_linkage;
- char *mod_filename;
- char *mod_modname;
-
- char mod_busy; /* inprogress_thread has locked */
- char mod_want; /* someone waiting for unlock */
- char mod_prim; /* primary module */
-
- int mod_ref; /* ref count - from dependent or stub */
-
- char mod_loaded; /* module in memory */
- char mod_installed; /* post _init pre _fini */
- char mod_loadflags;
- char mod_delay_unload; /* deferred unload */
-
- struct modctl_list *mod_requisites; /* mods this one depends on. */
- void *____unused; /* NOTE: reuse (same size) is OK, */
- /* deletion causes mdb.vs.core issues */
- int mod_loadcnt; /* number of times mod was loaded */
- int mod_nenabled; /* # of enabled DTrace probes in mod */
- char *mod_text;
- size_t mod_text_size;
-
- int mod_gencount; /* # times loaded/unloaded */
- struct modctl *mod_requisite_loading; /* mod circular dependency */
-} modctl_t;
-
-/*
- * mod_loadflags
- */
-
-#define MOD_NOAUTOUNLOAD 0x1 /* Auto mod-unloader skips this mod */
-#define MOD_NONOTIFY 0x2 /* No krtld notifications on (un)load */
-#define MOD_NOUNLOAD 0x4 /* Assume EBUSY for all _fini's */
-
-#define MOD_BIND_HASHSIZE 64
-#define MOD_BIND_HASHMASK (MOD_BIND_HASHSIZE-1)
-
-typedef int modid_t;
-
-/*
- * global function and data declarations
- */
-extern kmutex_t mod_lock;
-
-extern char *systemfile;
-extern char **syscallnames;
-extern int moddebug;
-
-/*
- * this is the head of a doubly linked list. Only the next and prev
- * pointers are used
- */
-extern modctl_t modules;
-
-/*
- * Only the following are part of the DDI/DKI
- */
-extern int mod_install(struct modlinkage *);
-extern int mod_remove(struct modlinkage *);
-extern int mod_info(struct modlinkage *, struct modinfo *);
-
-/*
- * bit definitions for moddebug.
- */
-#define MODDEBUG_LOADMSG 0x80000000 /* print "[un]loading..." msg */
-#define MODDEBUG_ERRMSG 0x40000000 /* print detailed error msgs */
-#define MODDEBUG_LOADMSG2 0x20000000 /* print 2nd level msgs */
-#define MODDEBUG_RETIRE 0x10000000 /* print retire msgs */
-#define MODDEBUG_BINDING 0x00040000 /* driver/alias binding */
-#define MODDEBUG_FINI_EBUSY 0x00020000 /* pretend fini returns EBUSY */
-#define MODDEBUG_NOAUL_IPP 0x00010000 /* no Autounloading ipp mods */
-#define MODDEBUG_NOAUL_DACF 0x00008000 /* no Autounloading dacf mods */
-#define MODDEBUG_KEEPTEXT 0x00004000 /* keep text after unloading */
-#define MODDEBUG_NOAUL_DRV 0x00001000 /* no Autounloading Drivers */
-#define MODDEBUG_NOAUL_EXEC 0x00000800 /* no Autounloading Execs */
-#define MODDEBUG_NOAUL_FS 0x00000400 /* no Autounloading File sys */
-#define MODDEBUG_NOAUL_MISC 0x00000200 /* no Autounloading misc */
-#define MODDEBUG_NOAUL_SCHED 0x00000100 /* no Autounloading scheds */
-#define MODDEBUG_NOAUL_STR 0x00000080 /* no Autounloading streams */
-#define MODDEBUG_NOAUL_SYS 0x00000040 /* no Autounloading syscalls */
-#define MODDEBUG_NOCTF 0x00000020 /* do not load CTF debug data */
-#define MODDEBUG_NOAUTOUNLOAD 0x00000010 /* no autounloading at all */
-#define MODDEBUG_DDI_MOD 0x00000008 /* ddi_mod{open,sym,close} */
-#define MODDEBUG_MP_MATCH 0x00000004 /* dev_minorperm */
-#define MODDEBUG_MINORPERM 0x00000002 /* minor perm modctls */
-#define MODDEBUG_USERDEBUG 0x00000001 /* bpt after init_module() */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_MODCTL_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/modhash.h b/sys/contrib/openzfs/module/icp/include/sys/modhash.h
deleted file mode 100644
index 06b52ff02604..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/modhash.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_MODHASH_H
-#define _SYS_MODHASH_H
-
-/*
- * Generic hash implementation for the kernel.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/zfs_context.h>
-
-/*
- * Opaque data types for storing keys and values
- */
-typedef void *mod_hash_val_t;
-typedef void *mod_hash_key_t;
-
-/*
- * Opaque data type for reservation
- */
-typedef void *mod_hash_hndl_t;
-
-/*
- * Opaque type for hash itself.
- */
-struct mod_hash;
-typedef struct mod_hash mod_hash_t;
-
-/*
- * String hash table
- */
-mod_hash_t *mod_hash_create_strhash_nodtr(char *, size_t,
- void (*)(mod_hash_val_t));
-mod_hash_t *mod_hash_create_strhash(char *, size_t, void (*)(mod_hash_val_t));
-void mod_hash_destroy_strhash(mod_hash_t *);
-int mod_hash_strkey_cmp(mod_hash_key_t, mod_hash_key_t);
-void mod_hash_strkey_dtor(mod_hash_key_t);
-void mod_hash_strval_dtor(mod_hash_val_t);
-uint_t mod_hash_bystr(void *, mod_hash_key_t);
-
-/*
- * Pointer hash table
- */
-mod_hash_t *mod_hash_create_ptrhash(char *, size_t, void (*)(mod_hash_val_t),
- size_t);
-void mod_hash_destroy_ptrhash(mod_hash_t *);
-int mod_hash_ptrkey_cmp(mod_hash_key_t, mod_hash_key_t);
-uint_t mod_hash_byptr(void *, mod_hash_key_t);
-
-/*
- * ID hash table
- */
-mod_hash_t *mod_hash_create_idhash(char *, size_t, void (*)(mod_hash_val_t));
-void mod_hash_destroy_idhash(mod_hash_t *);
-int mod_hash_idkey_cmp(mod_hash_key_t, mod_hash_key_t);
-uint_t mod_hash_byid(void *, mod_hash_key_t);
-uint_t mod_hash_iddata_gen(size_t);
-
-/*
- * Hash management functions
- */
-mod_hash_t *mod_hash_create_extended(char *, size_t, void (*)(mod_hash_key_t),
- void (*)(mod_hash_val_t), uint_t (*)(void *, mod_hash_key_t), void *,
- int (*)(mod_hash_key_t, mod_hash_key_t), int);
-
-void mod_hash_destroy_hash(mod_hash_t *);
-void mod_hash_clear(mod_hash_t *);
-
-/*
- * Null key and value destructors
- */
-void mod_hash_null_keydtor(mod_hash_key_t);
-void mod_hash_null_valdtor(mod_hash_val_t);
-
-/*
- * Basic hash operations
- */
-
-/*
- * Error codes for insert, remove, find, destroy.
- */
-#define MH_ERR_NOMEM -1
-#define MH_ERR_DUPLICATE -2
-#define MH_ERR_NOTFOUND -3
-
-/*
- * Return codes for hash walkers
- */
-#define MH_WALK_CONTINUE 0
-#define MH_WALK_TERMINATE 1
-
-/*
- * Basic hash operations
- */
-int mod_hash_insert(mod_hash_t *, mod_hash_key_t, mod_hash_val_t);
-int mod_hash_replace(mod_hash_t *, mod_hash_key_t, mod_hash_val_t);
-int mod_hash_remove(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
-int mod_hash_destroy(mod_hash_t *, mod_hash_key_t);
-int mod_hash_find(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
-int mod_hash_find_cb(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *,
- void (*)(mod_hash_key_t, mod_hash_val_t));
-int mod_hash_find_cb_rval(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *,
- int (*)(mod_hash_key_t, mod_hash_val_t), int *);
-void mod_hash_walk(mod_hash_t *,
- uint_t (*)(mod_hash_key_t, mod_hash_val_t *, void *), void *);
-
-/*
- * Reserving hash operations
- */
-int mod_hash_reserve(mod_hash_t *, mod_hash_hndl_t *);
-int mod_hash_reserve_nosleep(mod_hash_t *, mod_hash_hndl_t *);
-void mod_hash_cancel(mod_hash_t *, mod_hash_hndl_t *);
-int mod_hash_insert_reserve(mod_hash_t *, mod_hash_key_t, mod_hash_val_t,
- mod_hash_hndl_t);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_MODHASH_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h b/sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h
deleted file mode 100644
index 3130773aa196..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_MODHASH_IMPL_H
-#define _SYS_MODHASH_IMPL_H
-
-/*
- * Internal details for the kernel's generic hash implementation.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/zfs_context.h>
-#include <sys/modhash.h>
-
-struct mod_hash_entry {
- mod_hash_key_t mhe_key; /* stored hash key */
- mod_hash_val_t mhe_val; /* stored hash value */
- struct mod_hash_entry *mhe_next; /* next item in chain */
-};
-
-struct mod_hash_stat {
- ulong_t mhs_hit; /* tried a 'find' and it succeeded */
- ulong_t mhs_miss; /* tried a 'find' but it failed */
- ulong_t mhs_coll; /* occur when insert fails because of dup's */
- ulong_t mhs_nelems; /* total number of stored key/value pairs */
- ulong_t mhs_nomem; /* number of times kmem_alloc failed */
-};
-
-struct mod_hash {
- krwlock_t mh_contents; /* lock protecting contents */
- char *mh_name; /* hash name */
- int mh_sleep; /* kmem_alloc flag */
- size_t mh_nchains; /* # of elements in mh_entries */
-
- /* key and val destructor */
- void (*mh_kdtor)(mod_hash_key_t);
- void (*mh_vdtor)(mod_hash_val_t);
-
- /* key comparator */
- int (*mh_keycmp)(mod_hash_key_t, mod_hash_key_t);
-
- /* hash algorithm, and algorithm-private data */
- uint_t (*mh_hashalg)(void *, mod_hash_key_t);
- void *mh_hashalg_data;
-
- struct mod_hash *mh_next; /* next hash in list */
-
- struct mod_hash_stat mh_stat;
-
- struct mod_hash_entry *mh_entries[1];
-};
-
-/*
- * MH_SIZE()
- * Compute the size of a mod_hash_t, in bytes, given the number of
- * elements it contains.
- */
-#define MH_SIZE(n) \
- (sizeof (mod_hash_t) + ((n) - 1) * (sizeof (struct mod_hash_entry *)))
-
-/*
- * Module initialization; called once.
- */
-void mod_hash_fini(void);
-void mod_hash_init(void);
-
-/*
- * Internal routines. Use directly with care.
- */
-uint_t i_mod_hash(mod_hash_t *, mod_hash_key_t);
-int i_mod_hash_insert_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t,
- mod_hash_hndl_t);
-int i_mod_hash_remove_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
-int i_mod_hash_find_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
-void i_mod_hash_walk_nosync(mod_hash_t *, uint_t (*)(mod_hash_key_t,
- mod_hash_val_t *, void *), void *);
-void i_mod_hash_clear_nosync(mod_hash_t *hash);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_MODHASH_IMPL_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/stack.h b/sys/contrib/openzfs/module/icp/include/sys/stack.h
index 64fecf409b5c..0bace018b5ab 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/stack.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/stack.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/include/sys/trap.h b/sys/contrib/openzfs/module/icp/include/sys/trap.h
index 7f9fd375805f..2f47d43939c1 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/trap.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/trap.h
@@ -7,7 +7,7 @@
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
diff --git a/sys/contrib/openzfs/module/icp/io/aes.c b/sys/contrib/openzfs/module/icp/io/aes.c
index c47c7567b900..d6f01304f56b 100644
--- a/sys/contrib/openzfs/module/icp/io/aes.c
+++ b/sys/contrib/openzfs/module/icp/io/aes.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -32,102 +32,65 @@
#include <sys/crypto/spi.h>
#include <sys/crypto/icp.h>
#include <modes/modes.h>
-#include <sys/modctl.h>
#define _AES_IMPL
#include <aes/aes_impl.h>
#include <modes/gcm_impl.h>
-#define CRYPTO_PROVIDER_NAME "aes"
-
-extern struct mod_ops mod_cryptoops;
-
-/*
- * Module linkage information for the kernel.
- */
-static struct modlcrypto modlcrypto = {
- &mod_cryptoops,
- "AES Kernel SW Provider"
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, { (void *)&modlcrypto, NULL }
-};
-
/*
* Mechanism info structure passed to KCF during registration.
*/
-static crypto_mech_info_t aes_mech_info_tab[] = {
+static const crypto_mech_info_t aes_mech_info_tab[] = {
/* AES_ECB */
{SUN_CKM_AES_ECB, AES_ECB_MECH_INFO_TYPE,
CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
- CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
- AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC},
/* AES_CBC */
{SUN_CKM_AES_CBC, AES_CBC_MECH_INFO_TYPE,
CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
- CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
- AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC},
/* AES_CTR */
{SUN_CKM_AES_CTR, AES_CTR_MECH_INFO_TYPE,
CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
- CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
- AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC},
/* AES_CCM */
{SUN_CKM_AES_CCM, AES_CCM_MECH_INFO_TYPE,
CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
- CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
- AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC},
/* AES_GCM */
{SUN_CKM_AES_GCM, AES_GCM_MECH_INFO_TYPE,
CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
- CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
- AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC},
/* AES_GMAC */
{SUN_CKM_AES_GMAC, AES_GMAC_MECH_INFO_TYPE,
CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC |
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC |
- CRYPTO_FG_SIGN | CRYPTO_FG_SIGN_ATOMIC |
- CRYPTO_FG_VERIFY | CRYPTO_FG_VERIFY_ATOMIC,
- AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}
-};
-
-static void aes_provider_status(crypto_provider_handle_t, uint_t *);
-
-static crypto_control_ops_t aes_control_ops = {
- aes_provider_status
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
};
static int aes_encrypt_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_key_t *, crypto_spi_ctx_template_t);
static int aes_decrypt_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_key_t *, crypto_spi_ctx_template_t);
static int aes_common_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t, boolean_t);
+ crypto_key_t *, crypto_spi_ctx_template_t, boolean_t);
static int aes_common_init_ctx(aes_ctx_t *, crypto_spi_ctx_template_t *,
crypto_mechanism_t *, crypto_key_t *, int, boolean_t);
-static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
+static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *);
+static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *);
-static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
+static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *);
static int aes_encrypt_update(crypto_ctx_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
-static int aes_encrypt_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_data_t *);
+static int aes_encrypt_atomic(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
-static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
+static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *);
static int aes_decrypt_update(crypto_ctx_t *, crypto_data_t *,
- crypto_data_t *, crypto_req_handle_t);
-static int aes_decrypt_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
- crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_data_t *);
+static int aes_decrypt_atomic(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
-static crypto_cipher_ops_t aes_cipher_ops = {
+static const crypto_cipher_ops_t aes_cipher_ops = {
.encrypt_init = aes_encrypt_init,
.encrypt = aes_encrypt,
.encrypt_update = aes_encrypt_update,
@@ -140,14 +103,12 @@ static crypto_cipher_ops_t aes_cipher_ops = {
.decrypt_atomic = aes_decrypt_atomic
};
-static int aes_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int aes_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int aes_mac_atomic(crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t);
+static int aes_mac_verify_atomic(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
-static crypto_mac_ops_t aes_mac_ops = {
+static const crypto_mac_ops_t aes_mac_ops = {
.mac_init = NULL,
.mac = NULL,
.mac_update = NULL,
@@ -156,42 +117,28 @@ static crypto_mac_ops_t aes_mac_ops = {
.mac_verify_atomic = aes_mac_verify_atomic
};
-static int aes_create_ctx_template(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
- size_t *, crypto_req_handle_t);
+static int aes_create_ctx_template(crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t *, size_t *);
static int aes_free_context(crypto_ctx_t *);
-static crypto_ctx_ops_t aes_ctx_ops = {
+static const crypto_ctx_ops_t aes_ctx_ops = {
.create_ctx_template = aes_create_ctx_template,
.free_context = aes_free_context
};
-static crypto_ops_t aes_crypto_ops = {{{{{
- &aes_control_ops,
+static const crypto_ops_t aes_crypto_ops = {
NULL,
&aes_cipher_ops,
&aes_mac_ops,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- &aes_ctx_ops
-}}}}};
+ &aes_ctx_ops,
+};
-static crypto_provider_info_t aes_prov_info = {{{{
- CRYPTO_SPI_VERSION_1,
+static const crypto_provider_info_t aes_prov_info = {
"AES Software Provider",
- CRYPTO_SW_PROVIDER,
- NULL,
&aes_crypto_ops,
- sizeof (aes_mech_info_tab)/sizeof (crypto_mech_info_t),
+ sizeof (aes_mech_info_tab) / sizeof (crypto_mech_info_t),
aes_mech_info_tab
-}}}};
+};
static crypto_kcf_provider_handle_t aes_prov_handle = 0;
static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW };
@@ -199,20 +146,13 @@ static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW };
int
aes_mod_init(void)
{
- int ret;
-
/* Determine the fastest available implementation. */
aes_impl_init();
gcm_impl_init();
- if ((ret = mod_install(&modlinkage)) != 0)
- return (ret);
-
/* Register with KCF. If the registration fails, remove the module. */
- if (crypto_register_provider(&aes_prov_info, &aes_prov_handle)) {
- (void) mod_remove(&modlinkage);
+ if (crypto_register_provider(&aes_prov_info, &aes_prov_handle))
return (EACCES);
- }
return (0);
}
@@ -228,11 +168,11 @@ aes_mod_fini(void)
aes_prov_handle = 0;
}
- return (mod_remove(&modlinkage));
+ return (0);
}
static int
-aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag)
+aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx)
{
void *p = NULL;
boolean_t param_required = B_TRUE;
@@ -274,7 +214,7 @@ aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag)
rv = CRYPTO_MECHANISM_PARAM_INVALID;
}
if (ctx != NULL) {
- p = (alloc_fun)(kmflag);
+ p = (alloc_fun)(KM_SLEEP);
*ctx = p;
}
return (rv);
@@ -286,52 +226,31 @@ aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag)
static int
init_keysched(crypto_key_t *key, void *newbie)
{
- /*
- * Only keys by value are supported by this module.
- */
- switch (key->ck_format) {
- case CRYPTO_KEY_RAW:
- if (key->ck_length < AES_MINBITS ||
- key->ck_length > AES_MAXBITS) {
- return (CRYPTO_KEY_SIZE_RANGE);
- }
-
- /* key length must be either 128, 192, or 256 */
- if ((key->ck_length & 63) != 0)
- return (CRYPTO_KEY_SIZE_RANGE);
- break;
- default:
- return (CRYPTO_KEY_TYPE_INCONSISTENT);
+ if (key->ck_length < AES_MINBITS ||
+ key->ck_length > AES_MAXBITS) {
+ return (CRYPTO_KEY_SIZE_RANGE);
}
+ /* key length must be either 128, 192, or 256 */
+ if ((key->ck_length & 63) != 0)
+ return (CRYPTO_KEY_SIZE_RANGE);
+
aes_init_keysched(key->ck_data, key->ck_length, newbie);
return (CRYPTO_SUCCESS);
}
-/*
- * KCF software provider control entry points.
- */
-/* ARGSUSED */
-static void
-aes_provider_status(crypto_provider_handle_t provider, uint_t *status)
-{
- *status = CRYPTO_PROVIDER_READY;
-}
-
static int
aes_encrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_spi_ctx_template_t template,
- crypto_req_handle_t req)
+ crypto_key_t *key, crypto_spi_ctx_template_t template)
{
- return (aes_common_init(ctx, mechanism, key, template, req, B_TRUE));
+ return (aes_common_init(ctx, mechanism, key, template, B_TRUE));
}
static int
aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_spi_ctx_template_t template,
- crypto_req_handle_t req)
+ crypto_key_t *key, crypto_spi_ctx_template_t template)
{
- return (aes_common_init(ctx, mechanism, key, template, req, B_FALSE));
+ return (aes_common_init(ctx, mechanism, key, template, B_FALSE));
}
@@ -342,25 +261,16 @@ aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
static int
aes_common_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_spi_ctx_template_t template,
- crypto_req_handle_t req, boolean_t is_encrypt_init)
+ boolean_t is_encrypt_init)
{
aes_ctx_t *aes_ctx;
int rv;
- int kmflag;
-
- /*
- * Only keys by value are supported by this module.
- */
- if (key->ck_format != CRYPTO_KEY_RAW) {
- return (CRYPTO_KEY_TYPE_INCONSISTENT);
- }
- kmflag = crypto_kmflag(req);
- if ((rv = aes_check_mech_param(mechanism, &aes_ctx, kmflag))
+ if ((rv = aes_check_mech_param(mechanism, &aes_ctx))
!= CRYPTO_SUCCESS)
return (rv);
- rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, kmflag,
+ rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, KM_SLEEP,
is_encrypt_init);
if (rv != CRYPTO_SUCCESS) {
crypto_free_mode_ctx(aes_ctx);
@@ -390,7 +300,7 @@ aes_copy_block64(uint8_t *in, uint64_t *out)
static int
aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext,
- crypto_data_t *ciphertext, crypto_req_handle_t req)
+ crypto_data_t *ciphertext)
{
int ret = CRYPTO_FAILED;
@@ -442,7 +352,7 @@ aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext,
/*
* Do an update on the specified input data.
*/
- ret = aes_encrypt_update(ctx, plaintext, ciphertext, req);
+ ret = aes_encrypt_update(ctx, plaintext, ciphertext);
if (ret != CRYPTO_SUCCESS) {
return (ret);
}
@@ -505,7 +415,7 @@ aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext,
static int
aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
- crypto_data_t *plaintext, crypto_req_handle_t req)
+ crypto_data_t *plaintext)
{
int ret = CRYPTO_FAILED;
@@ -563,7 +473,7 @@ aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
/*
* Do an update on the specified input data.
*/
- ret = aes_decrypt_update(ctx, ciphertext, plaintext, req);
+ ret = aes_decrypt_update(ctx, ciphertext, plaintext);
if (ret != CRYPTO_SUCCESS) {
goto cleanup;
}
@@ -617,10 +527,9 @@ cleanup:
}
-/* ARGSUSED */
static int
aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext,
- crypto_data_t *ciphertext, crypto_req_handle_t req)
+ crypto_data_t *ciphertext)
{
off_t saved_offset;
size_t saved_length, out_len;
@@ -652,13 +561,11 @@ aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext,
switch (plaintext->cd_format) {
case CRYPTO_DATA_RAW:
ret = crypto_update_iov(ctx->cc_provider_private,
- plaintext, ciphertext, aes_encrypt_contiguous_blocks,
- aes_copy_block64);
+ plaintext, ciphertext, aes_encrypt_contiguous_blocks);
break;
case CRYPTO_DATA_UIO:
ret = crypto_update_uio(ctx->cc_provider_private,
- plaintext, ciphertext, aes_encrypt_contiguous_blocks,
- aes_copy_block64);
+ plaintext, ciphertext, aes_encrypt_contiguous_blocks);
break;
default:
ret = CRYPTO_ARGUMENTS_BAD;
@@ -690,7 +597,7 @@ aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext,
static int
aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
- crypto_data_t *plaintext, crypto_req_handle_t req)
+ crypto_data_t *plaintext)
{
off_t saved_offset;
size_t saved_length, out_len;
@@ -722,22 +629,17 @@ aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
saved_offset = plaintext->cd_offset;
saved_length = plaintext->cd_length;
- if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE))
- gcm_set_kmflag((gcm_ctx_t *)aes_ctx, crypto_kmflag(req));
-
/*
* Do the AES update on the specified input data.
*/
switch (ciphertext->cd_format) {
case CRYPTO_DATA_RAW:
ret = crypto_update_iov(ctx->cc_provider_private,
- ciphertext, plaintext, aes_decrypt_contiguous_blocks,
- aes_copy_block64);
+ ciphertext, plaintext, aes_decrypt_contiguous_blocks);
break;
case CRYPTO_DATA_UIO:
ret = crypto_update_uio(ctx->cc_provider_private,
- ciphertext, plaintext, aes_decrypt_contiguous_blocks,
- aes_copy_block64);
+ ciphertext, plaintext, aes_decrypt_contiguous_blocks);
break;
default:
ret = CRYPTO_ARGUMENTS_BAD;
@@ -769,10 +671,8 @@ aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
return (ret);
}
-/* ARGSUSED */
static int
-aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
- crypto_req_handle_t req)
+aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data)
{
aes_ctx_t *aes_ctx;
int ret;
@@ -826,10 +726,8 @@ aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
static int
-aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
- crypto_req_handle_t req)
+aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data)
{
aes_ctx_t *aes_ctx;
int ret;
@@ -929,14 +827,12 @@ aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
static int
-aes_encrypt_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+aes_encrypt_atomic(crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_data_t *plaintext, crypto_data_t *ciphertext,
- crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+ crypto_spi_ctx_template_t template)
{
- aes_ctx_t aes_ctx; /* on the stack */
+ aes_ctx_t aes_ctx = {{{{0}}}};
off_t saved_offset;
size_t saved_length;
size_t length_needed;
@@ -959,13 +855,11 @@ aes_encrypt_atomic(crypto_provider_handle_t provider,
return (CRYPTO_DATA_LEN_RANGE);
}
- if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS)
+ if ((ret = aes_check_mech_param(mechanism, NULL)) != CRYPTO_SUCCESS)
return (ret);
- bzero(&aes_ctx, sizeof (aes_ctx_t));
-
ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key,
- crypto_kmflag(req), B_TRUE);
+ KM_SLEEP, B_TRUE);
if (ret != CRYPTO_SUCCESS)
return (ret);
@@ -976,7 +870,7 @@ aes_encrypt_atomic(crypto_provider_handle_t provider,
case AES_GMAC_MECH_INFO_TYPE:
if (plaintext->cd_length != 0)
return (CRYPTO_ARGUMENTS_BAD);
- fallthrough;
+ zfs_fallthrough;
case AES_GCM_MECH_INFO_TYPE:
length_needed = plaintext->cd_length + aes_ctx.ac_tag_len;
break;
@@ -1000,11 +894,11 @@ aes_encrypt_atomic(crypto_provider_handle_t provider,
switch (plaintext->cd_format) {
case CRYPTO_DATA_RAW:
ret = crypto_update_iov(&aes_ctx, plaintext, ciphertext,
- aes_encrypt_contiguous_blocks, aes_copy_block64);
+ aes_encrypt_contiguous_blocks);
break;
case CRYPTO_DATA_UIO:
ret = crypto_update_uio(&aes_ctx, plaintext, ciphertext,
- aes_encrypt_contiguous_blocks, aes_copy_block64);
+ aes_encrypt_contiguous_blocks);
break;
default:
ret = CRYPTO_ARGUMENTS_BAD;
@@ -1048,31 +942,21 @@ aes_encrypt_atomic(crypto_provider_handle_t provider,
out:
if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
- bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
+ memset(aes_ctx.ac_keysched, 0, aes_ctx.ac_keysched_len);
kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
}
-#ifdef CAN_USE_GCM_ASM
- if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) &&
- ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
-
- gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
-
- bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
- kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
+ if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) {
+ gcm_clear_ctx((gcm_ctx_t *)&aes_ctx);
}
-#endif
-
return (ret);
}
-/* ARGSUSED */
static int
-aes_decrypt_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+aes_decrypt_atomic(crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_data_t *ciphertext, crypto_data_t *plaintext,
- crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+ crypto_spi_ctx_template_t template)
{
- aes_ctx_t aes_ctx; /* on the stack */
+ aes_ctx_t aes_ctx = {{{{0}}}};
off_t saved_offset;
size_t saved_length;
size_t length_needed;
@@ -1095,13 +979,11 @@ aes_decrypt_atomic(crypto_provider_handle_t provider,
return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
}
- if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS)
+ if ((ret = aes_check_mech_param(mechanism, NULL)) != CRYPTO_SUCCESS)
return (ret);
- bzero(&aes_ctx, sizeof (aes_ctx_t));
-
ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key,
- crypto_kmflag(req), B_FALSE);
+ KM_SLEEP, B_FALSE);
if (ret != CRYPTO_SUCCESS)
return (ret);
@@ -1131,21 +1013,17 @@ aes_decrypt_atomic(crypto_provider_handle_t provider,
saved_offset = plaintext->cd_offset;
saved_length = plaintext->cd_length;
- if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
- mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE)
- gcm_set_kmflag((gcm_ctx_t *)&aes_ctx, crypto_kmflag(req));
-
/*
* Do an update on the specified input data.
*/
switch (ciphertext->cd_format) {
case CRYPTO_DATA_RAW:
ret = crypto_update_iov(&aes_ctx, ciphertext, plaintext,
- aes_decrypt_contiguous_blocks, aes_copy_block64);
+ aes_decrypt_contiguous_blocks);
break;
case CRYPTO_DATA_UIO:
ret = crypto_update_uio(&aes_ctx, ciphertext, plaintext,
- aes_decrypt_contiguous_blocks, aes_copy_block64);
+ aes_decrypt_contiguous_blocks);
break;
default:
ret = CRYPTO_ARGUMENTS_BAD;
@@ -1206,7 +1084,7 @@ aes_decrypt_atomic(crypto_provider_handle_t provider,
out:
if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
- bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
+ memset(aes_ctx.ac_keysched, 0, aes_ctx.ac_keysched_len);
kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
}
@@ -1215,18 +1093,7 @@ out:
vmem_free(aes_ctx.ac_pt_buf, aes_ctx.ac_data_len);
}
} else if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) {
- if (((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf != NULL) {
- vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf,
- ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len);
- }
-#ifdef CAN_USE_GCM_ASM
- if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
- gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
-
- bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
- kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
- }
-#endif
+ gcm_clear_ctx((gcm_ctx_t *)&aes_ctx);
}
return (ret);
@@ -1235,11 +1102,9 @@ out:
/*
* KCF software provider context template entry points.
*/
-/* ARGSUSED */
static int
-aes_create_ctx_template(crypto_provider_handle_t provider,
- crypto_mechanism_t *mechanism, crypto_key_t *key,
- crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size, crypto_req_handle_t req)
+aes_create_ctx_template(crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size)
{
void *keysched;
size_t size;
@@ -1253,8 +1118,7 @@ aes_create_ctx_template(crypto_provider_handle_t provider,
mechanism->cm_type != AES_GMAC_MECH_INFO_TYPE)
return (CRYPTO_MECHANISM_INVALID);
- if ((keysched = aes_alloc_keysched(&size,
- crypto_kmflag(req))) == NULL) {
+ if ((keysched = aes_alloc_keysched(&size, KM_SLEEP)) == NULL) {
return (CRYPTO_HOST_MEMORY);
}
@@ -1263,7 +1127,7 @@ aes_create_ctx_template(crypto_provider_handle_t provider,
* in the key.
*/
if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) {
- bzero(keysched, size);
+ memset(keysched, 0, size);
kmem_free(keysched, size);
return (rv);
}
@@ -1283,7 +1147,8 @@ aes_free_context(crypto_ctx_t *ctx)
if (aes_ctx != NULL) {
if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
ASSERT(aes_ctx->ac_keysched_len != 0);
- bzero(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len);
+ memset(aes_ctx->ac_keysched, 0,
+ aes_ctx->ac_keysched_len);
kmem_free(aes_ctx->ac_keysched,
aes_ctx->ac_keysched_len);
}
@@ -1373,7 +1238,7 @@ aes_common_init_ctx(aes_ctx_t *aes_ctx, crypto_spi_ctx_template_t *template,
if (rv != CRYPTO_SUCCESS) {
if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
- bzero(keysched, size);
+ memset(keysched, 0, size);
kmem_free(keysched, size);
}
}
@@ -1413,10 +1278,9 @@ process_gmac_mech(crypto_mechanism_t *mech, crypto_data_t *data,
}
static int
-aes_mac_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+aes_mac_atomic(crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+ crypto_spi_ctx_template_t template)
{
CK_AES_GCM_PARAMS gcm_params;
crypto_mechanism_t gcm_mech;
@@ -1430,15 +1294,13 @@ aes_mac_atomic(crypto_provider_handle_t provider,
gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
gcm_mech.cm_param = (char *)&gcm_params;
- return (aes_encrypt_atomic(provider, session_id, &gcm_mech,
- key, &null_crypto_data, mac, template, req));
+ return (aes_encrypt_atomic(&gcm_mech,
+ key, &null_crypto_data, mac, template));
}
static int
-aes_mac_verify_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+aes_mac_verify_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t template)
{
CK_AES_GCM_PARAMS gcm_params;
crypto_mechanism_t gcm_mech;
@@ -1452,6 +1314,6 @@ aes_mac_verify_atomic(crypto_provider_handle_t provider,
gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
gcm_mech.cm_param = (char *)&gcm_params;
- return (aes_decrypt_atomic(provider, session_id, &gcm_mech,
- key, mac, &null_crypto_data, template, req));
+ return (aes_decrypt_atomic(&gcm_mech,
+ key, mac, &null_crypto_data, template));
}
diff --git a/sys/contrib/openzfs/module/icp/io/edonr_mod.c b/sys/contrib/openzfs/module/icp/io/edonr_mod.c
deleted file mode 100644
index a806af610629..000000000000
--- a/sys/contrib/openzfs/module/icp/io/edonr_mod.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2013 Saso Kiselkov. All rights reserved.
- */
-
-#include <sys/modctl.h>
-#include <sys/crypto/common.h>
-#include <sys/crypto/icp.h>
-#include <sys/crypto/spi.h>
-#include <sys/sysmacros.h>
-#include <sys/edonr.h>
-
-/*
- * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic
- * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose
- * cryptographic use. Users of Edon-R must interface directly to this module.
- */
-
-static struct modlmisc modlmisc = {
- &mod_cryptoops,
- "Edon-R Message-Digest Algorithm"
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, {&modlmisc, NULL}
-};
-
-int
-edonr_mod_init(void)
-{
- int error;
-
- if ((error = mod_install(&modlinkage)) != 0)
- return (error);
-
- return (0);
-}
-
-int
-edonr_mod_fini(void)
-{
- return (mod_remove(&modlinkage));
-}
diff --git a/sys/contrib/openzfs/module/icp/io/sha1_mod.c b/sys/contrib/openzfs/module/icp/io/sha1_mod.c
deleted file mode 100644
index 6dcee6b2ecf2..000000000000
--- a/sys/contrib/openzfs/module/icp/io/sha1_mod.c
+++ /dev/null
@@ -1,1230 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/modctl.h>
-#include <sys/crypto/common.h>
-#include <sys/crypto/icp.h>
-#include <sys/crypto/spi.h>
-
-#include <sha1/sha1.h>
-#include <sha1/sha1_impl.h>
-
-/*
- * The sha1 module is created with two modlinkages:
- * - a modlmisc that allows consumers to directly call the entry points
- * SHA1Init, SHA1Update, and SHA1Final.
- * - a modlcrypto that allows the module to register with the Kernel
- * Cryptographic Framework (KCF) as a software provider for the SHA1
- * mechanisms.
- */
-
-static struct modlcrypto modlcrypto = {
- &mod_cryptoops,
- "SHA1 Kernel SW Provider 1.1"
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, { &modlcrypto, NULL }
-};
-
-
-/*
- * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed
- * by KCF to one of the entry points.
- */
-
-#define PROV_SHA1_CTX(ctx) ((sha1_ctx_t *)(ctx)->cc_provider_private)
-#define PROV_SHA1_HMAC_CTX(ctx) ((sha1_hmac_ctx_t *)(ctx)->cc_provider_private)
-
-/* to extract the digest length passed as mechanism parameter */
-#define PROV_SHA1_GET_DIGEST_LEN(m, len) { \
- if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \
- (len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \
- else { \
- ulong_t tmp_ulong; \
- bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \
- (len) = (uint32_t)tmp_ulong; \
- } \
-}
-
-#define PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) { \
- SHA1Init(ctx); \
- SHA1Update(ctx, key, len); \
- SHA1Final(digest, ctx); \
-}
-
-/*
- * Mechanism info structure passed to KCF during registration.
- */
-static crypto_mech_info_t sha1_mech_info_tab[] = {
- /* SHA1 */
- {SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
- /* SHA1-HMAC */
- {SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
- /* SHA1-HMAC GENERAL */
- {SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES}
-};
-
-static void sha1_provider_status(crypto_provider_handle_t, uint_t *);
-
-static crypto_control_ops_t sha1_control_ops = {
- sha1_provider_status
-};
-
-static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_req_handle_t);
-static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-
-static crypto_digest_ops_t sha1_digest_ops = {
- .digest_init = sha1_digest_init,
- .digest = sha1_digest,
- .digest_update = sha1_digest_update,
- .digest_key = NULL,
- .digest_final = sha1_digest_final,
- .digest_atomic = sha1_digest_atomic
-};
-
-static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
-static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-
-static crypto_mac_ops_t sha1_mac_ops = {
- .mac_init = sha1_mac_init,
- .mac = NULL,
- .mac_update = sha1_mac_update,
- .mac_final = sha1_mac_final,
- .mac_atomic = sha1_mac_atomic,
- .mac_verify_atomic = sha1_mac_verify_atomic
-};
-
-static int sha1_create_ctx_template(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
- size_t *, crypto_req_handle_t);
-static int sha1_free_context(crypto_ctx_t *);
-
-static crypto_ctx_ops_t sha1_ctx_ops = {
- .create_ctx_template = sha1_create_ctx_template,
- .free_context = sha1_free_context
-};
-
-static crypto_ops_t sha1_crypto_ops = {{{{{
- &sha1_control_ops,
- &sha1_digest_ops,
- NULL,
- &sha1_mac_ops,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- &sha1_ctx_ops,
-}}}}};
-
-static crypto_provider_info_t sha1_prov_info = {{{{
- CRYPTO_SPI_VERSION_1,
- "SHA1 Software Provider",
- CRYPTO_SW_PROVIDER,
- NULL,
- &sha1_crypto_ops,
- sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t),
- sha1_mech_info_tab
-}}}};
-
-static crypto_kcf_provider_handle_t sha1_prov_handle = 0;
-
-int
-sha1_mod_init(void)
-{
- int ret;
-
- if ((ret = mod_install(&modlinkage)) != 0)
- return (ret);
-
- /*
- * Register with KCF. If the registration fails, log an
- * error but do not uninstall the module, since the functionality
- * provided by misc/sha1 should still be available.
- */
- if ((ret = crypto_register_provider(&sha1_prov_info,
- &sha1_prov_handle)) != CRYPTO_SUCCESS)
- cmn_err(CE_WARN, "sha1 _init: "
- "crypto_register_provider() failed (0x%x)", ret);
-
- return (0);
-}
-
-int
-sha1_mod_fini(void)
-{
- int ret;
-
- if (sha1_prov_handle != 0) {
- if ((ret = crypto_unregister_provider(sha1_prov_handle)) !=
- CRYPTO_SUCCESS) {
- cmn_err(CE_WARN,
- "sha1 _fini: crypto_unregister_provider() "
- "failed (0x%x)", ret);
- return (EBUSY);
- }
- sha1_prov_handle = 0;
- }
-
- return (mod_remove(&modlinkage));
-}
-
-/*
- * KCF software provider control entry points.
- */
-/* ARGSUSED */
-static void
-sha1_provider_status(crypto_provider_handle_t provider, uint_t *status)
-{
- *status = CRYPTO_PROVIDER_READY;
-}
-
-/*
- * KCF software provider digest entry points.
- */
-
-static int
-sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_req_handle_t req)
-{
- if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /*
- * Allocate and initialize SHA1 context.
- */
- ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t),
- crypto_kmflag(req));
- if (ctx->cc_provider_private == NULL)
- return (CRYPTO_HOST_MEMORY);
-
- PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE;
- SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
-
- return (CRYPTO_SUCCESS);
-}
-
-/*
- * Helper SHA1 digest update function for uio data.
- */
-static int
-sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
-{
- off_t offset = data->cd_offset;
- size_t length = data->cd_length;
- uint_t vec_idx = 0;
- size_t cur_len;
-
- /* we support only kernel buffer */
- if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /*
- * Jump to the first iovec containing data to be
- * digested.
- */
- offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
- if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
- /*
- * The caller specified an offset that is larger than the
- * total size of the buffers it provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
-
- /*
- * Now do the digesting on the iovecs.
- */
- while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
- cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
- offset, length);
-
- SHA1Update(sha1_ctx,
- (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset,
- cur_len);
-
- length -= cur_len;
- vec_idx++;
- offset = 0;
- }
-
- if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
- /*
- * The end of the specified iovec's was reached but
- * the length requested could not be processed, i.e.
- * The caller requested to digest more data than it provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
-
- return (CRYPTO_SUCCESS);
-}
-
-/*
- * Helper SHA1 digest final function for uio data.
- * digest_len is the length of the desired digest. If digest_len
- * is smaller than the default SHA1 digest length, the caller
- * must pass a scratch buffer, digest_scratch, which must
- * be at least SHA1_DIGEST_LENGTH bytes.
- */
-static int
-sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
- ulong_t digest_len, uchar_t *digest_scratch)
-{
- off_t offset = digest->cd_offset;
- uint_t vec_idx = 0;
-
- /* we support only kernel buffer */
- if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /*
- * Jump to the first iovec containing ptr to the digest to
- * be returned.
- */
- offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
- if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
- /*
- * The caller specified an offset that is
- * larger than the total size of the buffers
- * it provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
-
- if (offset + digest_len <=
- zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
- /*
- * The computed SHA1 digest will fit in the current
- * iovec.
- */
- if (digest_len != SHA1_DIGEST_LENGTH) {
- /*
- * The caller requested a short digest. Digest
- * into a scratch buffer and return to
- * the user only what was requested.
- */
- SHA1Final(digest_scratch, sha1_ctx);
- bcopy(digest_scratch, (uchar_t *)
- zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
- digest_len);
- } else {
- SHA1Final((uchar_t *)zfs_uio_iovbase(digest->
- cd_uio, vec_idx) + offset,
- sha1_ctx);
- }
- } else {
- /*
- * The computed digest will be crossing one or more iovec's.
- * This is bad performance-wise but we need to support it.
- * Allocate a small scratch buffer on the stack and
- * copy it piece meal to the specified digest iovec's.
- */
- uchar_t digest_tmp[SHA1_DIGEST_LENGTH];
- off_t scratch_offset = 0;
- size_t length = digest_len;
- size_t cur_len;
-
- SHA1Final(digest_tmp, sha1_ctx);
-
- while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
- cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
- offset, length);
- bcopy(digest_tmp + scratch_offset,
- zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
- cur_len);
-
- length -= cur_len;
- vec_idx++;
- scratch_offset += cur_len;
- offset = 0;
- }
-
- if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
- /*
- * The end of the specified iovec's was reached but
- * the length requested could not be processed, i.e.
- * The caller requested to digest more data than it
- * provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
- }
-
- return (CRYPTO_SUCCESS);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * We need to just return the length needed to store the output.
- * We should not destroy the context for the following cases.
- */
- if ((digest->cd_length == 0) ||
- (digest->cd_length < SHA1_DIGEST_LENGTH)) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- return (CRYPTO_BUFFER_TOO_SMALL);
- }
-
- /*
- * Do the SHA1 update on the specified input data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret != CRYPTO_SUCCESS) {
- /* the update failed, free context and bail */
- kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
- ctx->cc_provider_private = NULL;
- digest->cd_length = 0;
- return (ret);
- }
-
- /*
- * Do a SHA1 final, must be done separately since the digest
- * type can be different than the input data type.
- */
- switch (digest->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Final((unsigned char *)digest->cd_raw.iov_base +
- digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- digest, SHA1_DIGEST_LENGTH, NULL);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- /* all done, free context and return */
-
- if (ret == CRYPTO_SUCCESS) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- } else {
- digest->cd_length = 0;
- }
-
- kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
- ctx->cc_provider_private = NULL;
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * Do the SHA1 update on the specified input data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * We need to just return the length needed to store the output.
- * We should not destroy the context for the following cases.
- */
- if ((digest->cd_length == 0) ||
- (digest->cd_length < SHA1_DIGEST_LENGTH)) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- return (CRYPTO_BUFFER_TOO_SMALL);
- }
-
- /*
- * Do a SHA1 final.
- */
- switch (digest->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Final((unsigned char *)digest->cd_raw.iov_base +
- digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- digest, SHA1_DIGEST_LENGTH, NULL);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- /* all done, free context and return */
-
- if (ret == CRYPTO_SUCCESS) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- } else {
- digest->cd_length = 0;
- }
-
- kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
- ctx->cc_provider_private = NULL;
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- SHA1_CTX sha1_ctx;
-
- if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /*
- * Do the SHA1 init.
- */
- SHA1Init(&sha1_ctx);
-
- /*
- * Do the SHA1 update on the specified input data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&sha1_ctx,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(&sha1_ctx, data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret != CRYPTO_SUCCESS) {
- /* the update failed, bail */
- digest->cd_length = 0;
- return (ret);
- }
-
- /*
- * Do a SHA1 final, must be done separately since the digest
- * type can be different than the input data type.
- */
- switch (digest->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Final((unsigned char *)digest->cd_raw.iov_base +
- digest->cd_offset, &sha1_ctx);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&sha1_ctx, digest,
- SHA1_DIGEST_LENGTH, NULL);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret == CRYPTO_SUCCESS) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- } else {
- digest->cd_length = 0;
- }
-
- return (ret);
-}
-
-/*
- * KCF software provider mac entry points.
- *
- * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text))
- *
- * Init:
- * The initialization routine initializes what we denote
- * as the inner and outer contexts by doing
- * - for inner context: SHA1(key XOR ipad)
- * - for outer context: SHA1(key XOR opad)
- *
- * Update:
- * Each subsequent SHA1 HMAC update will result in an
- * update of the inner context with the specified data.
- *
- * Final:
- * The SHA1 HMAC final will do a SHA1 final operation on the
- * inner context, and the resulting digest will be used
- * as the data for an update on the outer context. Last
- * but not least, a SHA1 final on the outer context will
- * be performed to obtain the SHA1 HMAC digest to return
- * to the user.
- */
-
-/*
- * Initialize a SHA1-HMAC context.
- */
-static void
-sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
-{
- uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK];
- uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK];
- uint_t i;
-
- bzero(ipad, SHA1_HMAC_BLOCK_SIZE);
- bzero(opad, SHA1_HMAC_BLOCK_SIZE);
-
- bcopy(keyval, ipad, length_in_bytes);
- bcopy(keyval, opad, length_in_bytes);
-
- /* XOR key with ipad (0x36) and opad (0x5c) */
- for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) {
- ipad[i] ^= 0x36363636;
- opad[i] ^= 0x5c5c5c5c;
- }
-
- /* perform SHA1 on ipad */
- SHA1Init(&ctx->hc_icontext);
- SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE);
-
- /* perform SHA1 on opad */
- SHA1Init(&ctx->hc_ocontext);
- SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE);
-}
-
-/*
- */
-static int
-sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
- mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t),
- crypto_kmflag(req));
- if (ctx->cc_provider_private == NULL)
- return (CRYPTO_HOST_MEMORY);
-
- if (ctx_template != NULL) {
- /* reuse context template */
- bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx),
- sizeof (sha1_hmac_ctx_t));
- } else {
- /* no context template, compute context */
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- uchar_t digested_key[SHA1_DIGEST_LENGTH];
- sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
-
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext,
- key->ck_data, keylen_in_bytes, digested_key);
- sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
- digested_key, SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
- key->ck_data, keylen_in_bytes);
- }
- }
-
- /*
- * Get the mechanism parameters, if applicable.
- */
- PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
- if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
- if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t))
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- PROV_SHA1_GET_DIGEST_LEN(mechanism,
- PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len);
- if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len >
- SHA1_DIGEST_LENGTH)
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- }
-
- if (ret != CRYPTO_SUCCESS) {
- bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- ctx->cc_provider_private = NULL;
- }
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * Do a SHA1 update of the inner context using the specified
- * data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(
- &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uchar_t digest[SHA1_DIGEST_LENGTH];
- uint32_t digest_len = SHA1_DIGEST_LENGTH;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type ==
- SHA1_HMAC_GEN_MECH_INFO_TYPE)
- digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len;
-
- /*
- * We need to just return the length needed to store the output.
- * We should not destroy the context for the following cases.
- */
- if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
- mac->cd_length = digest_len;
- return (CRYPTO_BUFFER_TOO_SMALL);
- }
-
- /*
- * Do a SHA1 final on the inner context.
- */
- SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext);
-
- /*
- * Do a SHA1 update on the outer context, feeding the inner
- * digest as data.
- */
- SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest,
- SHA1_DIGEST_LENGTH);
-
- /*
- * Do a SHA1 final on the outer context, storing the computing
- * digest in the users buffer.
- */
- switch (mac->cd_format) {
- case CRYPTO_DATA_RAW:
- if (digest_len != SHA1_DIGEST_LENGTH) {
- /*
- * The caller requested a short digest. Digest
- * into a scratch buffer and return to
- * the user only what was requested.
- */
- SHA1Final(digest,
- &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
- bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len);
- } else {
- SHA1Final((unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset,
- &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
- }
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(
- &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac,
- digest_len, digest);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret == CRYPTO_SUCCESS) {
- mac->cd_length = digest_len;
- } else {
- mac->cd_length = 0;
- }
-
- bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- ctx->cc_provider_private = NULL;
-
- return (ret);
-}
-
-#define SHA1_MAC_UPDATE(data, ctx, ret) { \
- switch (data->cd_format) { \
- case CRYPTO_DATA_RAW: \
- SHA1Update(&(ctx).hc_icontext, \
- (uint8_t *)data->cd_raw.iov_base + \
- data->cd_offset, data->cd_length); \
- break; \
- case CRYPTO_DATA_UIO: \
- ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \
- break; \
- default: \
- ret = CRYPTO_ARGUMENTS_BAD; \
- } \
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uchar_t digest[SHA1_DIGEST_LENGTH];
- sha1_hmac_ctx_t sha1_hmac_ctx;
- uint32_t digest_len = SHA1_DIGEST_LENGTH;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
- mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- if (ctx_template != NULL) {
- /* reuse context template */
- bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- } else {
- /* no context template, initialize context */
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
- key->ck_data, keylen_in_bytes, digest);
- sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
- SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
- keylen_in_bytes);
- }
- }
-
- /* get the mechanism parameters, if applicable */
- if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
- if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t)) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
- if (digest_len > SHA1_DIGEST_LENGTH) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- }
-
- /* do a SHA1 update of the inner context using the specified data */
- SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
- if (ret != CRYPTO_SUCCESS)
- /* the update failed, free context and bail */
- goto bail;
-
- /*
- * Do a SHA1 final on the inner context.
- */
- SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
-
- /*
- * Do an SHA1 update on the outer context, feeding the inner
- * digest as data.
- */
- SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
-
- /*
- * Do a SHA1 final on the outer context, storing the computed
- * digest in the users buffer.
- */
- switch (mac->cd_format) {
- case CRYPTO_DATA_RAW:
- if (digest_len != SHA1_DIGEST_LENGTH) {
- /*
- * The caller requested a short digest. Digest
- * into a scratch buffer and return to
- * the user only what was requested.
- */
- SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
- bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len);
- } else {
- SHA1Final((unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, &sha1_hmac_ctx.hc_ocontext);
- }
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac,
- digest_len, digest);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret == CRYPTO_SUCCESS) {
- mac->cd_length = digest_len;
- } else {
- mac->cd_length = 0;
- }
- /* Extra paranoia: zeroize the context on the stack */
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-
- return (ret);
-bail:
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- mac->cd_length = 0;
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_verify_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uchar_t digest[SHA1_DIGEST_LENGTH];
- sha1_hmac_ctx_t sha1_hmac_ctx;
- uint32_t digest_len = SHA1_DIGEST_LENGTH;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
- mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- if (ctx_template != NULL) {
- /* reuse context template */
- bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- } else {
- /* no context template, initialize context */
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
- key->ck_data, keylen_in_bytes, digest);
- sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
- SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
- keylen_in_bytes);
- }
- }
-
- /* get the mechanism parameters, if applicable */
- if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
- if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t)) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
- if (digest_len > SHA1_DIGEST_LENGTH) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- }
-
- if (mac->cd_length != digest_len) {
- ret = CRYPTO_INVALID_MAC;
- goto bail;
- }
-
- /* do a SHA1 update of the inner context using the specified data */
- SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
- if (ret != CRYPTO_SUCCESS)
- /* the update failed, free context and bail */
- goto bail;
-
- /* do a SHA1 final on the inner context */
- SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
-
- /*
- * Do an SHA1 update on the outer context, feeding the inner
- * digest as data.
- */
- SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
-
- /*
- * Do a SHA1 final on the outer context, storing the computed
- * digest in the users buffer.
- */
- SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
-
- /*
- * Compare the computed digest against the expected digest passed
- * as argument.
- */
-
- switch (mac->cd_format) {
-
- case CRYPTO_DATA_RAW:
- if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len) != 0)
- ret = CRYPTO_INVALID_MAC;
- break;
-
- case CRYPTO_DATA_UIO: {
- off_t offset = mac->cd_offset;
- uint_t vec_idx = 0;
- off_t scratch_offset = 0;
- size_t length = digest_len;
- size_t cur_len;
-
- /* we support only kernel buffer */
- if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /* jump to the first iovec containing the expected digest */
- offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
- if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
- /*
- * The caller specified an offset that is
- * larger than the total size of the buffers
- * it provided.
- */
- ret = CRYPTO_DATA_LEN_RANGE;
- break;
- }
-
- /* do the comparison of computed digest vs specified one */
- while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
- cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
- offset, length);
-
- if (bcmp(digest + scratch_offset,
- zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
- cur_len) != 0) {
- ret = CRYPTO_INVALID_MAC;
- break;
- }
-
- length -= cur_len;
- vec_idx++;
- scratch_offset += cur_len;
- offset = 0;
- }
- break;
- }
-
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- return (ret);
-bail:
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- mac->cd_length = 0;
- return (ret);
-}
-
-/*
- * KCF software provider context management entry points.
- */
-
-/* ARGSUSED */
-static int
-sha1_create_ctx_template(crypto_provider_handle_t provider,
- crypto_mechanism_t *mechanism, crypto_key_t *key,
- crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
- crypto_req_handle_t req)
-{
- sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) &&
- (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) {
- return (CRYPTO_MECHANISM_INVALID);
- }
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /*
- * Allocate and initialize SHA1 context.
- */
- sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t),
- crypto_kmflag(req));
- if (sha1_hmac_ctx_tmpl == NULL)
- return (CRYPTO_HOST_MEMORY);
-
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- uchar_t digested_key[SHA1_DIGEST_LENGTH];
-
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext,
- key->ck_data, keylen_in_bytes, digested_key);
- sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key,
- SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data,
- keylen_in_bytes);
- }
-
- sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
- *ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl;
- *ctx_template_size = sizeof (sha1_hmac_ctx_t);
-
-
- return (CRYPTO_SUCCESS);
-}
-
-static int
-sha1_free_context(crypto_ctx_t *ctx)
-{
- uint_t ctx_len;
- sha1_mech_type_t mech_type;
-
- if (ctx->cc_provider_private == NULL)
- return (CRYPTO_SUCCESS);
-
- /*
- * We have to free either SHA1 or SHA1-HMAC contexts, which
- * have different lengths.
- */
-
- mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type;
- if (mech_type == SHA1_MECH_INFO_TYPE)
- ctx_len = sizeof (sha1_ctx_t);
- else {
- ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE ||
- mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE);
- ctx_len = sizeof (sha1_hmac_ctx_t);
- }
-
- bzero(ctx->cc_provider_private, ctx_len);
- kmem_free(ctx->cc_provider_private, ctx_len);
- ctx->cc_provider_private = NULL;
-
- return (CRYPTO_SUCCESS);
-}
diff --git a/sys/contrib/openzfs/module/icp/io/sha2_mod.c b/sys/contrib/openzfs/module/icp/io/sha2_mod.c
index d690cd0bcb05..f068951b07f5 100644
--- a/sys/contrib/openzfs/module/icp/io/sha2_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/sha2_mod.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -25,33 +25,13 @@
*/
#include <sys/zfs_context.h>
-#include <sys/modctl.h>
#include <sys/crypto/common.h>
#include <sys/crypto/spi.h>
#include <sys/crypto/icp.h>
-#define _SHA2_IMPL
#include <sys/sha2.h>
#include <sha2/sha2_impl.h>
/*
- * The sha2 module is created with two modlinkages:
- * - a modlmisc that allows consumers to directly call the entry points
- * SHA2Init, SHA2Update, and SHA2Final.
- * - a modlcrypto that allows the module to register with the Kernel
- * Cryptographic Framework (KCF) as a software provider for the SHA2
- * mechanisms.
- */
-
-static struct modlcrypto modlcrypto = {
- &mod_cryptoops,
- "SHA2 Kernel SW Provider"
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, {&modlcrypto, NULL}
-};
-
-/*
* Macros to access the SHA2 or SHA2-HMAC contexts from a context passed
* by KCF to one of the entry points.
*/
@@ -65,7 +45,7 @@ static struct modlinkage modlinkage = {
(len) = (uint32_t)*((ulong_t *)(m)->cm_param); \
else { \
ulong_t tmp_ulong; \
- bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \
+ memcpy(&tmp_ulong, (m)->cm_param, sizeof (ulong_t)); \
(len) = (uint32_t)tmp_ulong; \
} \
}
@@ -79,91 +59,61 @@ static struct modlinkage modlinkage = {
/*
* Mechanism info structure passed to KCF during registration.
*/
-static crypto_mech_info_t sha2_mech_info_tab[] = {
+static const crypto_mech_info_t sha2_mech_info_tab[] = {
/* SHA256 */
{SUN_CKM_SHA256, SHA256_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC},
/* SHA256-HMAC */
{SUN_CKM_SHA256_HMAC, SHA256_HMAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
/* SHA256-HMAC GENERAL */
{SUN_CKM_SHA256_HMAC_GENERAL, SHA256_HMAC_GEN_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
/* SHA384 */
{SUN_CKM_SHA384, SHA384_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC},
/* SHA384-HMAC */
{SUN_CKM_SHA384_HMAC, SHA384_HMAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
/* SHA384-HMAC GENERAL */
{SUN_CKM_SHA384_HMAC_GENERAL, SHA384_HMAC_GEN_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
/* SHA512 */
{SUN_CKM_SHA512, SHA512_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC},
/* SHA512-HMAC */
{SUN_CKM_SHA512_HMAC, SHA512_HMAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
/* SHA512-HMAC GENERAL */
{SUN_CKM_SHA512_HMAC_GENERAL, SHA512_HMAC_GEN_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
};
-static void sha2_provider_status(crypto_provider_handle_t, uint_t *);
-
-static crypto_control_ops_t sha2_control_ops = {
- sha2_provider_status
-};
+static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *);
+static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *);
+static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *);
+static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *);
+static int sha2_digest_atomic(crypto_mechanism_t *, crypto_data_t *,
+ crypto_data_t *);
-static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_req_handle_t);
-static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha2_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-
-static crypto_digest_ops_t sha2_digest_ops = {
+static const crypto_digest_ops_t sha2_digest_ops = {
.digest_init = sha2_digest_init,
.digest = sha2_digest,
.digest_update = sha2_digest_update,
- .digest_key = NULL,
.digest_final = sha2_digest_final,
.digest_atomic = sha2_digest_atomic
};
static int sha2_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
-static int sha2_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha2_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-
-static crypto_mac_ops_t sha2_mac_ops = {
+ crypto_spi_ctx_template_t);
+static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *);
+static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *);
+static int sha2_mac_atomic(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
+static int sha2_mac_verify_atomic(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
+
+static const crypto_mac_ops_t sha2_mac_ops = {
.mac_init = sha2_mac_init,
.mac = NULL,
.mac_update = sha2_mac_update,
@@ -172,42 +122,28 @@ static crypto_mac_ops_t sha2_mac_ops = {
.mac_verify_atomic = sha2_mac_verify_atomic
};
-static int sha2_create_ctx_template(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
- size_t *, crypto_req_handle_t);
+static int sha2_create_ctx_template(crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t *, size_t *);
static int sha2_free_context(crypto_ctx_t *);
-static crypto_ctx_ops_t sha2_ctx_ops = {
+static const crypto_ctx_ops_t sha2_ctx_ops = {
.create_ctx_template = sha2_create_ctx_template,
.free_context = sha2_free_context
};
-static crypto_ops_t sha2_crypto_ops = {{{{{
- &sha2_control_ops,
+static const crypto_ops_t sha2_crypto_ops = {
&sha2_digest_ops,
NULL,
&sha2_mac_ops,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- &sha2_ctx_ops
-}}}}};
+ &sha2_ctx_ops,
+};
-static crypto_provider_info_t sha2_prov_info = {{{{
- CRYPTO_SPI_VERSION_1,
+static const crypto_provider_info_t sha2_prov_info = {
"SHA2 Software Provider",
- CRYPTO_SW_PROVIDER,
- NULL,
&sha2_crypto_ops,
- sizeof (sha2_mech_info_tab)/sizeof (crypto_mech_info_t),
+ sizeof (sha2_mech_info_tab) / sizeof (crypto_mech_info_t),
sha2_mech_info_tab
-}}}};
+};
static crypto_kcf_provider_handle_t sha2_prov_handle = 0;
@@ -216,9 +152,6 @@ sha2_mod_init(void)
{
int ret;
- if ((ret = mod_install(&modlinkage)) != 0)
- return (ret);
-
/*
* Register with KCF. If the registration fails, log an
* error but do not uninstall the module, since the functionality
@@ -235,7 +168,7 @@ sha2_mod_init(void)
int
sha2_mod_fini(void)
{
- int ret;
+ int ret = 0;
if (sha2_prov_handle != 0) {
if ((ret = crypto_unregister_provider(sha2_prov_handle)) !=
@@ -248,17 +181,7 @@ sha2_mod_fini(void)
sha2_prov_handle = 0;
}
- return (mod_remove(&modlinkage));
-}
-
-/*
- * KCF software provider control entry points.
- */
-/* ARGSUSED */
-static void
-sha2_provider_status(crypto_provider_handle_t provider, uint_t *status)
-{
- *status = CRYPTO_PROVIDER_READY;
+ return (ret);
}
/*
@@ -266,15 +189,13 @@ sha2_provider_status(crypto_provider_handle_t provider, uint_t *status)
*/
static int
-sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_req_handle_t req)
+sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism)
{
/*
* Allocate and initialize SHA2 context.
*/
- ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t),
- crypto_kmflag(req));
+ ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t), KM_SLEEP);
if (ctx->cc_provider_private == NULL)
return (CRYPTO_HOST_MEMORY);
@@ -387,9 +308,9 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
*/
SHA2Final(digest_scratch, sha2_ctx);
- bcopy(digest_scratch, (uchar_t *)
+ memcpy((uchar_t *)
zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
- digest_len);
+ digest_scratch, digest_len);
} else {
SHA2Final((uchar_t *)zfs_uio_iovbase(digest->
cd_uio, vec_idx) + offset,
@@ -414,8 +335,9 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
cur_len =
MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
offset, length);
- bcopy(digest_tmp + scratch_offset,
+ memcpy(
zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
+ digest_tmp + scratch_offset,
cur_len);
length -= cur_len;
@@ -438,10 +360,8 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
return (CRYPTO_SUCCESS);
}
-/* ARGSUSED */
static int
-sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
+sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest)
{
int ret = CRYPTO_SUCCESS;
uint_t sha_digest_len;
@@ -526,10 +446,8 @@ sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
return (ret);
}
-/* ARGSUSED */
static int
-sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
- crypto_req_handle_t req)
+sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data)
{
int ret = CRYPTO_SUCCESS;
@@ -555,10 +473,8 @@ sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
return (ret);
}
-/* ARGSUSED */
static int
-sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
- crypto_req_handle_t req)
+sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest)
{
int ret = CRYPTO_SUCCESS;
uint_t sha_digest_len;
@@ -618,12 +534,9 @@ sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
return (ret);
}
-/* ARGSUSED */
static int
-sha2_digest_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
+sha2_digest_atomic(crypto_mechanism_t *mechanism, crypto_data_t *data,
+ crypto_data_t *digest)
{
int ret = CRYPTO_SUCCESS;
SHA2_CTX sha2_ctx;
@@ -717,8 +630,8 @@ sha2_digest_atomic(crypto_provider_handle_t provider,
static void
sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
{
- uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)];
- uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)];
+ uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)] = {0};
+ uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)] = {0};
int i, block_size, blocks_per_int64;
/* Determine the block size */
@@ -730,10 +643,15 @@ sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
blocks_per_int64 = SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t);
}
- (void) bzero(ipad, block_size);
- (void) bzero(opad, block_size);
- (void) bcopy(keyval, ipad, length_in_bytes);
- (void) bcopy(keyval, opad, length_in_bytes);
+ (void) memset(ipad, 0, block_size);
+ (void) memset(opad, 0, block_size);
+
+ if (keyval != NULL) {
+ (void) memcpy(ipad, keyval, length_in_bytes);
+ (void) memcpy(opad, keyval, length_in_bytes);
+ } else {
+ ASSERT0(length_in_bytes);
+ }
/* XOR key with ipad (0x36) and opad (0x5c) */
for (i = 0; i < blocks_per_int64; i ++) {
@@ -748,15 +666,13 @@ sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
/* perform SHA2 on opad */
SHA2Init(ctx->hc_mech_type, &ctx->hc_ocontext);
SHA2Update(&ctx->hc_ocontext, (uint8_t *)opad, block_size);
-
}
/*
*/
static int
sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
- crypto_req_handle_t req)
+ crypto_key_t *key, crypto_spi_ctx_template_t ctx_template)
{
int ret = CRYPTO_SUCCESS;
uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
@@ -783,18 +699,15 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
return (CRYPTO_MECHANISM_INVALID);
}
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t),
- crypto_kmflag(req));
+ ctx->cc_provider_private =
+ kmem_alloc(sizeof (sha2_hmac_ctx_t), KM_SLEEP);
if (ctx->cc_provider_private == NULL)
return (CRYPTO_HOST_MEMORY);
PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
if (ctx_template != NULL) {
/* reuse context template */
- bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx),
+ memcpy(PROV_SHA2_HMAC_CTX(ctx), ctx_template,
sizeof (sha2_hmac_ctx_t));
} else {
/* no context template, compute context */
@@ -823,16 +736,19 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
*/
if (mechanism->cm_type % 3 == 2) {
if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t))
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- PROV_SHA2_GET_DIGEST_LEN(mechanism,
- PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
- if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len)
+ mechanism->cm_param_len != sizeof (ulong_t)) {
ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ } else {
+ PROV_SHA2_GET_DIGEST_LEN(mechanism,
+ PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
+ if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len >
+ sha_digest_len)
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
}
if (ret != CRYPTO_SUCCESS) {
- bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
+ memset(ctx->cc_provider_private, 0, sizeof (sha2_hmac_ctx_t));
kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
ctx->cc_provider_private = NULL;
}
@@ -840,10 +756,8 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
return (ret);
}
-/* ARGSUSED */
static int
-sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data,
- crypto_req_handle_t req)
+sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data)
{
int ret = CRYPTO_SUCCESS;
@@ -870,9 +784,8 @@ sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data,
return (ret);
}
-/* ARGSUSED */
static int
-sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
+sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac)
{
int ret = CRYPTO_SUCCESS;
uchar_t digest[SHA512_DIGEST_LENGTH];
@@ -939,8 +852,8 @@ sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
*/
SHA2Final(digest,
&PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext);
- bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len);
+ memcpy((unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest, digest_len);
} else {
SHA2Final((unsigned char *)mac->cd_raw.iov_base +
mac->cd_offset,
@@ -961,7 +874,7 @@ sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
else
mac->cd_length = 0;
- bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
+ memset(ctx->cc_provider_private, 0, sizeof (sha2_hmac_ctx_t));
kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
ctx->cc_provider_private = NULL;
@@ -983,12 +896,10 @@ sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
} \
}
-/* ARGSUSED */
static int
-sha2_mac_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+sha2_mac_atomic(crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+ crypto_spi_ctx_template_t ctx_template)
{
int ret = CRYPTO_SUCCESS;
uchar_t digest[SHA512_DIGEST_LENGTH];
@@ -1017,13 +928,9 @@ sha2_mac_atomic(crypto_provider_handle_t provider,
return (CRYPTO_MECHANISM_INVALID);
}
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
if (ctx_template != NULL) {
/* reuse context template */
- bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ memcpy(&sha2_hmac_ctx, ctx_template, sizeof (sha2_hmac_ctx_t));
} else {
sha2_hmac_ctx.hc_mech_type = mechanism->cm_type;
/* no context template, initialize context */
@@ -1096,8 +1003,8 @@ sha2_mac_atomic(crypto_provider_handle_t provider,
* the user only what was requested.
*/
SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext);
- bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len);
+ memcpy((unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest, digest_len);
} else {
SHA2Final((unsigned char *)mac->cd_raw.iov_base +
mac->cd_offset, &sha2_hmac_ctx.hc_ocontext);
@@ -1116,17 +1023,15 @@ sha2_mac_atomic(crypto_provider_handle_t provider,
return (CRYPTO_SUCCESS);
}
bail:
- bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ memset(&sha2_hmac_ctx, 0, sizeof (sha2_hmac_ctx_t));
mac->cd_length = 0;
return (ret);
}
-/* ARGSUSED */
static int
-sha2_mac_verify_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+sha2_mac_verify_atomic(crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+ crypto_spi_ctx_template_t ctx_template)
{
int ret = CRYPTO_SUCCESS;
uchar_t digest[SHA512_DIGEST_LENGTH];
@@ -1155,13 +1060,9 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider,
return (CRYPTO_MECHANISM_INVALID);
}
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
if (ctx_template != NULL) {
/* reuse context template */
- bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ memcpy(&sha2_hmac_ctx, ctx_template, sizeof (sha2_hmac_ctx_t));
} else {
sha2_hmac_ctx.hc_mech_type = mechanism->cm_type;
/* no context template, initialize context */
@@ -1238,7 +1139,7 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider,
switch (mac->cd_format) {
case CRYPTO_DATA_RAW:
- if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
+ if (memcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
mac->cd_offset, digest_len) != 0)
ret = CRYPTO_INVALID_MAC;
break;
@@ -1271,7 +1172,7 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider,
cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
offset, length);
- if (bcmp(digest + scratch_offset,
+ if (memcmp(digest + scratch_offset,
zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
cur_len) != 0) {
ret = CRYPTO_INVALID_MAC;
@@ -1292,7 +1193,7 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider,
return (ret);
bail:
- bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ memset(&sha2_hmac_ctx, 0, sizeof (sha2_hmac_ctx_t));
mac->cd_length = 0;
return (ret);
}
@@ -1301,12 +1202,9 @@ bail:
* KCF software provider context management entry points.
*/
-/* ARGSUSED */
static int
-sha2_create_ctx_template(crypto_provider_handle_t provider,
- crypto_mechanism_t *mechanism, crypto_key_t *key,
- crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
- crypto_req_handle_t req)
+sha2_create_ctx_template(crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size)
{
sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl;
uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
@@ -1333,15 +1231,10 @@ sha2_create_ctx_template(crypto_provider_handle_t provider,
return (CRYPTO_MECHANISM_INVALID);
}
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
/*
* Allocate and initialize SHA2 context.
*/
- sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t),
- crypto_kmflag(req));
+ sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t), KM_SLEEP);
if (sha2_hmac_ctx_tmpl == NULL)
return (CRYPTO_HOST_MEMORY);
@@ -1391,7 +1284,7 @@ sha2_free_context(crypto_ctx_t *ctx)
else
ctx_len = sizeof (sha2_hmac_ctx_t);
- bzero(ctx->cc_provider_private, ctx_len);
+ memset(ctx->cc_provider_private, 0, ctx_len);
kmem_free(ctx->cc_provider_private, ctx_len);
ctx->cc_provider_private = NULL;
diff --git a/sys/contrib/openzfs/module/icp/io/skein_mod.c b/sys/contrib/openzfs/module/icp/io/skein_mod.c
index ac7d201eb708..221e1debd45b 100644
--- a/sys/contrib/openzfs/module/icp/io/skein_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/skein_mod.c
@@ -23,7 +23,6 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
*/
-#include <sys/modctl.h>
#include <sys/crypto/common.h>
#include <sys/crypto/icp.h>
#include <sys/crypto/spi.h>
@@ -31,78 +30,42 @@
#define SKEIN_MODULE_IMPL
#include <sys/skein.h>
-/*
- * Like the sha2 module, we create the skein module with two modlinkages:
- * - modlmisc to allow direct calls to Skein_* API functions.
- * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF).
- */
-static struct modlmisc modlmisc = {
- &mod_cryptoops,
- "Skein Message-Digest Algorithm"
-};
-
-static struct modlcrypto modlcrypto = {
- &mod_cryptoops,
- "Skein Kernel SW Provider"
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, {&modlmisc, &modlcrypto, NULL}
-};
-
-static crypto_mech_info_t skein_mech_info_tab[] = {
+static const crypto_mech_info_t skein_mech_info_tab[] = {
{CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC},
{CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
{CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC},
{CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
{CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC},
{CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC},
};
-static void skein_provider_status(crypto_provider_handle_t, uint_t *);
+static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *);
+static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *);
+static int skein_update(crypto_ctx_t *, crypto_data_t *);
+static int skein_final(crypto_ctx_t *, crypto_data_t *);
+static int skein_digest_atomic(crypto_mechanism_t *, crypto_data_t *,
+ crypto_data_t *);
-static crypto_control_ops_t skein_control_ops = {
- skein_provider_status
-};
-
-static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_req_handle_t);
-static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
-static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
-static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-
-static crypto_digest_ops_t skein_digest_ops = {
+static const crypto_digest_ops_t skein_digest_ops = {
.digest_init = skein_digest_init,
.digest = skein_digest,
.digest_update = skein_update,
- .digest_key = NULL,
.digest_final = skein_final,
.digest_atomic = skein_digest_atomic
};
static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
+ crypto_spi_ctx_template_t);
+static int skein_mac_atomic(crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t);
-static crypto_mac_ops_t skein_mac_ops = {
+static const crypto_mac_ops_t skein_mac_ops = {
.mac_init = skein_mac_init,
.mac = NULL,
.mac_update = skein_update, /* using regular digest update is OK here */
@@ -111,42 +74,28 @@ static crypto_mac_ops_t skein_mac_ops = {
.mac_verify_atomic = NULL
};
-static int skein_create_ctx_template(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
- size_t *, crypto_req_handle_t);
+static int skein_create_ctx_template(crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t *, size_t *);
static int skein_free_context(crypto_ctx_t *);
-static crypto_ctx_ops_t skein_ctx_ops = {
+static const crypto_ctx_ops_t skein_ctx_ops = {
.create_ctx_template = skein_create_ctx_template,
.free_context = skein_free_context
};
-static crypto_ops_t skein_crypto_ops = {{{{{
- &skein_control_ops,
+static const crypto_ops_t skein_crypto_ops = {
&skein_digest_ops,
NULL,
&skein_mac_ops,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
&skein_ctx_ops,
-}}}}};
+};
-static crypto_provider_info_t skein_prov_info = {{{{
- CRYPTO_SPI_VERSION_1,
+static const crypto_provider_info_t skein_prov_info = {
"Skein Software Provider",
- CRYPTO_SW_PROVIDER,
- NULL,
&skein_crypto_ops,
sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t),
skein_mech_info_tab
-}}}};
+};
static crypto_kcf_provider_handle_t skein_prov_handle = 0;
@@ -214,11 +163,6 @@ skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result)
int
skein_mod_init(void)
{
- int error;
-
- if ((error = mod_install(&modlinkage)) != 0)
- return (error);
-
/*
* Try to register with KCF - failure shouldn't unload us, since we
* still may want to continue providing misc/skein functionality.
@@ -231,7 +175,7 @@ skein_mod_init(void)
int
skein_mod_fini(void)
{
- int ret;
+ int ret = 0;
if (skein_prov_handle != 0) {
if ((ret = crypto_unregister_provider(skein_prov_handle)) !=
@@ -244,17 +188,7 @@ skein_mod_fini(void)
skein_prov_handle = 0;
}
- return (mod_remove(&modlinkage));
-}
-
-/*
- * KCF software provider control entry points.
- */
-/* ARGSUSED */
-static void
-skein_provider_status(crypto_provider_handle_t provider, uint_t *status)
-{
- *status = CRYPTO_PROVIDER_READY;
+ return (0);
}
/*
@@ -318,8 +252,7 @@ skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data)
* Performs a Final on a context and writes to a uio digest output.
*/
static int
-skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
- crypto_req_handle_t req)
+skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest)
{
off_t offset = digest->cd_offset;
uint_t vec_idx = 0;
@@ -352,15 +285,15 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
size_t cur_len;
digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES(
- ctx->sc_digest_bitlen), crypto_kmflag(req));
+ ctx->sc_digest_bitlen), KM_SLEEP);
if (digest_tmp == NULL)
return (CRYPTO_HOST_MEMORY);
SKEIN_OP(ctx, Final, digest_tmp);
while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset,
length);
- bcopy(digest_tmp + scratch_offset,
- zfs_uio_iovbase(uio, vec_idx) + offset, cur_len);
+ memcpy(zfs_uio_iovbase(uio, vec_idx) + offset,
+ digest_tmp + scratch_offset, cur_len);
length -= cur_len;
vec_idx++;
@@ -396,16 +329,14 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
* for Skein-1024).
*/
static int
-skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_req_handle_t req)
+skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism)
{
int error = CRYPTO_SUCCESS;
if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
return (CRYPTO_MECHANISM_INVALID);
- SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
- crypto_kmflag(req));
+ SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), KM_SLEEP);
if (SKEIN_CTX(ctx) == NULL)
return (CRYPTO_HOST_MEMORY);
@@ -418,7 +349,7 @@ skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
return (CRYPTO_SUCCESS);
errout:
- bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx)));
kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
SKEIN_CTX_LVALUE(ctx) = NULL;
return (error);
@@ -430,8 +361,7 @@ errout:
* see what to pass here.
*/
static int
-skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
+skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest)
{
int error = CRYPTO_SUCCESS;
@@ -444,15 +374,15 @@ skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
return (CRYPTO_BUFFER_TOO_SMALL);
}
- error = skein_update(ctx, data, req);
+ error = skein_update(ctx, data);
if (error != CRYPTO_SUCCESS) {
- bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx)));
kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
SKEIN_CTX_LVALUE(ctx) = NULL;
digest->cd_length = 0;
return (error);
}
- error = skein_final(ctx, digest, req);
+ error = skein_final(ctx, digest);
return (error);
}
@@ -462,9 +392,8 @@ skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
* can push more data). This is used both for digest and MAC operation.
* Supported input data formats are raw, uio and mblk.
*/
-/*ARGSUSED*/
static int
-skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
+skein_update(crypto_ctx_t *ctx, crypto_data_t *data)
{
int error = CRYPTO_SUCCESS;
@@ -491,9 +420,8 @@ skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
* for digest and MAC operation.
* Supported output digest formats are raw, uio and mblk.
*/
-/*ARGSUSED*/
static int
-skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+skein_final_nofree(crypto_ctx_t *ctx, crypto_data_t *digest)
{
int error = CRYPTO_SUCCESS;
@@ -512,7 +440,7 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
(uint8_t *)digest->cd_raw.iov_base + digest->cd_offset);
break;
case CRYPTO_DATA_UIO:
- error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req);
+ error = skein_digest_final_uio(SKEIN_CTX(ctx), digest);
break;
default:
error = CRYPTO_ARGUMENTS_BAD;
@@ -524,7 +452,18 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
else
digest->cd_length = 0;
- bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ return (error);
+}
+
+static int
+skein_final(crypto_ctx_t *ctx, crypto_data_t *digest)
+{
+ int error = skein_final_nofree(ctx, digest);
+
+ if (error == CRYPTO_BUFFER_TOO_SMALL)
+ return (error);
+
+ memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx)));
kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx))));
SKEIN_CTX_LVALUE(ctx) = NULL;
@@ -537,15 +476,13 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
* `data' and writing the output to `digest'.
* Supported input/output formats are raw, uio and mblk.
*/
-/*ARGSUSED*/
static int
-skein_digest_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req)
+skein_digest_atomic(crypto_mechanism_t *mechanism, crypto_data_t *data,
+ crypto_data_t *digest)
{
- int error;
- skein_ctx_t skein_ctx;
- crypto_ctx_t ctx;
+ int error;
+ skein_ctx_t skein_ctx;
+ crypto_ctx_t ctx;
SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
/* Init */
@@ -557,9 +494,9 @@ skein_digest_atomic(crypto_provider_handle_t provider,
goto out;
SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen);
- if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS)
+ if ((error = skein_update(&ctx, data)) != CRYPTO_SUCCESS)
goto out;
- if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS)
+ if ((error = skein_final_nofree(&ctx, data)) != CRYPTO_SUCCESS)
goto out;
out:
@@ -568,7 +505,7 @@ out:
CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen);
else
digest->cd_length = 0;
- bzero(&skein_ctx, sizeof (skein_ctx));
+ memset(&skein_ctx, 0, sizeof (skein_ctx));
return (error);
}
@@ -585,8 +522,6 @@ skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism,
if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type))
return (CRYPTO_MECHANISM_INVALID);
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
ctx->sc_mech_type = mechanism->cm_type;
error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen);
if (error != CRYPTO_SUCCESS)
@@ -610,18 +545,16 @@ skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism,
*/
static int
skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
- crypto_req_handle_t req)
+ crypto_key_t *key, crypto_spi_ctx_template_t ctx_template)
{
int error;
- SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
- crypto_kmflag(req));
+ SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), KM_SLEEP);
if (SKEIN_CTX(ctx) == NULL)
return (CRYPTO_HOST_MEMORY);
if (ctx_template != NULL) {
- bcopy(ctx_template, SKEIN_CTX(ctx),
+ memcpy(SKEIN_CTX(ctx), ctx_template,
sizeof (*SKEIN_CTX(ctx)));
} else {
error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key);
@@ -631,7 +564,7 @@ skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
return (CRYPTO_SUCCESS);
errout:
- bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx)));
kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
return (error);
}
@@ -640,40 +573,38 @@ errout:
* The MAC update and final calls are reused from the regular digest code.
*/
-/*ARGSUSED*/
/*
* Same as skein_digest_atomic, performs an atomic Skein MAC operation in
* one step. All the same properties apply to the arguments of this
* function as to those of the partial operations above.
*/
static int
-skein_mac_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+skein_mac_atomic(crypto_mechanism_t *mechanism,
crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+ crypto_spi_ctx_template_t ctx_template)
{
/* faux crypto context just for skein_digest_{update,final} */
- int error;
- crypto_ctx_t ctx;
- skein_ctx_t skein_ctx;
+ int error;
+ crypto_ctx_t ctx;
+ skein_ctx_t skein_ctx;
SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
if (ctx_template != NULL) {
- bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx));
+ memcpy(&skein_ctx, ctx_template, sizeof (skein_ctx));
} else {
error = skein_mac_ctx_build(&skein_ctx, mechanism, key);
if (error != CRYPTO_SUCCESS)
goto errout;
}
- if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS)
+ if ((error = skein_update(&ctx, data)) != CRYPTO_SUCCESS)
goto errout;
- if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS)
+ if ((error = skein_final_nofree(&ctx, mac)) != CRYPTO_SUCCESS)
goto errout;
return (CRYPTO_SUCCESS);
errout:
- bzero(&skein_ctx, sizeof (skein_ctx));
+ memset(&skein_ctx, 0, sizeof (skein_ctx));
return (error);
}
@@ -686,17 +617,14 @@ errout:
* properties apply to the arguments of this function as to those of
* skein_mac_init.
*/
-/*ARGSUSED*/
static int
-skein_create_ctx_template(crypto_provider_handle_t provider,
- crypto_mechanism_t *mechanism, crypto_key_t *key,
- crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
- crypto_req_handle_t req)
+skein_create_ctx_template(crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size)
{
- int error;
- skein_ctx_t *ctx_tmpl;
+ int error;
+ skein_ctx_t *ctx_tmpl;
- ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req));
+ ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), KM_SLEEP);
if (ctx_tmpl == NULL)
return (CRYPTO_HOST_MEMORY);
error = skein_mac_ctx_build(ctx_tmpl, mechanism, key);
@@ -707,7 +635,7 @@ skein_create_ctx_template(crypto_provider_handle_t provider,
return (CRYPTO_SUCCESS);
errout:
- bzero(ctx_tmpl, sizeof (*ctx_tmpl));
+ memset(ctx_tmpl, 0, sizeof (*ctx_tmpl));
kmem_free(ctx_tmpl, sizeof (*ctx_tmpl));
return (error);
}
@@ -719,7 +647,7 @@ static int
skein_free_context(crypto_ctx_t *ctx)
{
if (SKEIN_CTX(ctx) != NULL) {
- bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx)));
kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
SKEIN_CTX_LVALUE(ctx) = NULL;
}
diff --git a/sys/contrib/openzfs/module/icp/os/modconf.c b/sys/contrib/openzfs/module/icp/os/modconf.c
deleted file mode 100644
index 3743416ed951..000000000000
--- a/sys/contrib/openzfs/module/icp/os/modconf.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/modctl.h>
-
-/*
- * Null operations; used for uninitialized and "misc" modules.
- */
-static int mod_null(struct modlmisc *, struct modlinkage *);
-static int mod_infonull(void *, struct modlinkage *, int *);
-
-/*
- * Cryptographic Modules
- */
-struct mod_ops mod_cryptoops = {
- .modm_install = mod_null,
- .modm_remove = mod_null,
- .modm_info = mod_infonull
-};
-
-/*
- * Null operation; return 0.
- */
-static int
-mod_null(struct modlmisc *modl, struct modlinkage *modlp)
-{
- return (0);
-}
-
-/*
- * Status for User modules.
- */
-static int
-mod_infonull(void *modl, struct modlinkage *modlp, int *p0)
-{
- *p0 = -1; /* for modinfo display */
- return (0);
-}
-
-/*
- * Install a module.
- * (This routine is in the Solaris SPARC DDI/DKI)
- */
-int
-mod_install(struct modlinkage *modlp)
-{
- int retval = -1; /* No linkage structures */
- struct modlmisc **linkpp;
- struct modlmisc **linkpp1;
-
- if (modlp->ml_rev != MODREV_1) {
- cmn_err(CE_WARN, "mod_install: "
- "modlinkage structure is not MODREV_1\n");
- return (EINVAL);
- }
- linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
-
- while (*linkpp != NULL) {
- if ((retval = MODL_INSTALL(*linkpp, modlp)) != 0) {
- linkpp1 = (struct modlmisc **)&modlp->ml_linkage[0];
-
- while (linkpp1 != linkpp) {
- MODL_REMOVE(*linkpp1, modlp); /* clean up */
- linkpp1++;
- }
- break;
- }
- linkpp++;
- }
- return (retval);
-}
-
-static char *reins_err =
- "Could not reinstall %s\nReboot to correct the problem";
-
-/*
- * Remove a module. This is called by the module wrapper routine.
- * (This routine is in the Solaris SPARC DDI/DKI)
- */
-int
-mod_remove(struct modlinkage *modlp)
-{
- int retval = 0;
- struct modlmisc **linkpp, *last_linkp;
-
- linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
-
- while (*linkpp != NULL) {
- if ((retval = MODL_REMOVE(*linkpp, modlp)) != 0) {
- last_linkp = *linkpp;
- linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
- while (*linkpp != last_linkp) {
- if (MODL_INSTALL(*linkpp, modlp) != 0) {
- cmn_err(CE_WARN, reins_err,
- (*linkpp)->misc_linkinfo);
- break;
- }
- linkpp++;
- }
- break;
- }
- linkpp++;
- }
- return (retval);
-}
-
-/*
- * Get module status.
- * (This routine is in the Solaris SPARC DDI/DKI)
- */
-int
-mod_info(struct modlinkage *modlp, struct modinfo *modinfop)
-{
- int i;
- int retval = 0;
- struct modspecific_info *msip;
- struct modlmisc **linkpp;
-
- modinfop->mi_rev = modlp->ml_rev;
-
- linkpp = (struct modlmisc **)modlp->ml_linkage;
- msip = &modinfop->mi_msinfo[0];
-
- for (i = 0; i < MODMAXLINK; i++) {
- if (*linkpp == NULL) {
- msip->msi_linkinfo[0] = '\0';
- } else {
- (void) strlcpy(msip->msi_linkinfo,
- (*linkpp)->misc_linkinfo, MODMAXLINKINFOLEN);
- retval = MODL_INFO(*linkpp, modlp, &msip->msi_p0);
- if (retval != 0)
- break;
- linkpp++;
- }
- msip++;
- }
-
- if (modinfop->mi_info == MI_INFO_LINKAGE) {
- /*
- * Slight kludge used to extract the address of the
- * modlinkage structure from the module (just after
- * loading a module for the very first time)
- */
- modinfop->mi_base = (void *)modlp;
- }
-
- if (retval == 0)
- return (1);
- return (0);
-}
diff --git a/sys/contrib/openzfs/module/icp/os/modhash.c b/sys/contrib/openzfs/module/icp/os/modhash.c
deleted file mode 100644
index a897871001ce..000000000000
--- a/sys/contrib/openzfs/module/icp/os/modhash.c
+++ /dev/null
@@ -1,927 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * mod_hash: flexible hash table implementation.
- *
- * This is a reasonably fast, reasonably flexible hash table implementation
- * which features pluggable hash algorithms to support storing arbitrary keys
- * and values. It is designed to handle small (< 100,000 items) amounts of
- * data. The hash uses chaining to resolve collisions, and does not feature a
- * mechanism to grow the hash. Care must be taken to pick nchains to be large
- * enough for the application at hand, or lots of time will be wasted searching
- * hash chains.
- *
- * The client of the hash is required to supply a number of items to support
- * the various hash functions:
- *
- * - Destructor functions for the key and value being hashed.
- * A destructor is responsible for freeing an object when the hash
- * table is no longer storing it. Since keys and values can be of
- * arbitrary type, separate destructors for keys & values are used.
- * These may be mod_hash_null_keydtor and mod_hash_null_valdtor if no
- * destructor is needed for either a key or value.
- *
- * - A hashing algorithm which returns a uint_t representing a hash index
- * The number returned need _not_ be between 0 and nchains. The mod_hash
- * code will take care of doing that. The second argument (after the
- * key) to the hashing function is a void * that represents
- * hash_alg_data-- this is provided so that the hashing algorithm can
- * maintain some state across calls, or keep algorithm-specific
- * constants associated with the hash table.
- *
- * A pointer-hashing and a string-hashing algorithm are supplied in
- * this file.
- *
- * - A key comparator (a la qsort).
- * This is used when searching the hash chain. The key comparator
- * determines if two keys match. It should follow the return value
- * semantics of strcmp.
- *
- * string and pointer comparators are supplied in this file.
- *
- * mod_hash_create_strhash() and mod_hash_create_ptrhash() provide good
- * examples of how to create a customized hash table.
- *
- * Basic hash operations:
- *
- * mod_hash_create_strhash(name, nchains, dtor),
- * create a hash using strings as keys.
- * NOTE: This create a hash which automatically cleans up the string
- * values it is given for keys.
- *
- * mod_hash_create_ptrhash(name, nchains, dtor, key_elem_size):
- * create a hash using pointers as keys.
- *
- * mod_hash_create_extended(name, nchains, kdtor, vdtor,
- * hash_alg, hash_alg_data,
- * keycmp, sleep)
- * create a customized hash table.
- *
- * mod_hash_destroy_hash(hash):
- * destroy the given hash table, calling the key and value destructors
- * on each key-value pair stored in the hash.
- *
- * mod_hash_insert(hash, key, val):
- * place a key, value pair into the given hash.
- * duplicate keys are rejected.
- *
- * mod_hash_insert_reserve(hash, key, val, handle):
- * place a key, value pair into the given hash, using handle to indicate
- * the reserved storage for the pair. (no memory allocation is needed
- * during a mod_hash_insert_reserve.) duplicate keys are rejected.
- *
- * mod_hash_reserve(hash, *handle):
- * reserve storage for a key-value pair using the memory allocation
- * policy of 'hash', returning the storage handle in 'handle'.
- *
- * mod_hash_reserve_nosleep(hash, *handle): reserve storage for a key-value
- * pair ignoring the memory allocation policy of 'hash' and always without
- * sleep, returning the storage handle in 'handle'.
- *
- * mod_hash_remove(hash, key, *val):
- * remove a key-value pair with key 'key' from 'hash', destroying the
- * stored key, and returning the value in val.
- *
- * mod_hash_replace(hash, key, val)
- * atomically remove an existing key-value pair from a hash, and replace
- * the key and value with the ones supplied. The removed key and value
- * (if any) are destroyed.
- *
- * mod_hash_destroy(hash, key):
- * remove a key-value pair with key 'key' from 'hash', destroying both
- * stored key and stored value.
- *
- * mod_hash_find(hash, key, val):
- * find a value in the hash table corresponding to the given key.
- *
- * mod_hash_find_cb(hash, key, val, found_callback)
- * find a value in the hash table corresponding to the given key.
- * If a value is found, call specified callback passing key and val to it.
- * The callback is called with the hash lock held.
- * It is intended to be used in situations where the act of locating the
- * data must also modify it - such as in reference counting schemes.
- *
- * mod_hash_walk(hash, callback(key, elem, arg), arg)
- * walks all the elements in the hashtable and invokes the callback
- * function with the key/value pair for each element. the hashtable
- * is locked for readers so the callback function should not attempt
- * to do any updates to the hashable. the callback function should
- * return MH_WALK_CONTINUE to continue walking the hashtable or
- * MH_WALK_TERMINATE to abort the walk of the hashtable.
- *
- * mod_hash_clear(hash):
- * clears the given hash table of entries, calling the key and value
- * destructors for every element in the hash.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/bitmap.h>
-#include <sys/modhash_impl.h>
-#include <sys/sysmacros.h>
-
-/*
- * MH_KEY_DESTROY()
- * Invoke the key destructor.
- */
-#define MH_KEY_DESTROY(hash, key) ((hash->mh_kdtor)(key))
-
-/*
- * MH_VAL_DESTROY()
- * Invoke the value destructor.
- */
-#define MH_VAL_DESTROY(hash, val) ((hash->mh_vdtor)(val))
-
-/*
- * MH_KEYCMP()
- * Call the key comparator for the given hash keys.
- */
-#define MH_KEYCMP(hash, key1, key2) ((hash->mh_keycmp)(key1, key2))
-
-/*
- * Cache for struct mod_hash_entry
- */
-kmem_cache_t *mh_e_cache = NULL;
-mod_hash_t *mh_head = NULL;
-kmutex_t mh_head_lock;
-
-/*
- * mod_hash_null_keydtor()
- * mod_hash_null_valdtor()
- * no-op key and value destructors.
- */
-/*ARGSUSED*/
-void
-mod_hash_null_keydtor(mod_hash_key_t key)
-{
-}
-
-/*ARGSUSED*/
-void
-mod_hash_null_valdtor(mod_hash_val_t val)
-{
-}
-
-/*
- * mod_hash_bystr()
- * mod_hash_strkey_cmp()
- * mod_hash_strkey_dtor()
- * mod_hash_strval_dtor()
- * Hash and key comparison routines for hashes with string keys.
- *
- * mod_hash_create_strhash()
- * Create a hash using strings as keys
- *
- * The string hashing algorithm is from the "Dragon Book" --
- * "Compilers: Principles, Tools & Techniques", by Aho, Sethi, Ullman
- */
-
-/*ARGSUSED*/
-uint_t
-mod_hash_bystr(void *hash_data, mod_hash_key_t key)
-{
- uint_t hash = 0;
- uint_t g;
- char *p, *k = (char *)key;
-
- ASSERT(k);
- for (p = k; *p != '\0'; p++) {
- hash = (hash << 4) + *p;
- if ((g = (hash & 0xf0000000)) != 0) {
- hash ^= (g >> 24);
- hash ^= g;
- }
- }
- return (hash);
-}
-
-int
-mod_hash_strkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
-{
- return (strcmp((char *)key1, (char *)key2));
-}
-
-void
-mod_hash_strkey_dtor(mod_hash_key_t key)
-{
- char *c = (char *)key;
- kmem_free(c, strlen(c) + 1);
-}
-
-void
-mod_hash_strval_dtor(mod_hash_val_t val)
-{
- char *c = (char *)val;
- kmem_free(c, strlen(c) + 1);
-}
-
-mod_hash_t *
-mod_hash_create_strhash_nodtr(char *name, size_t nchains,
- void (*val_dtor)(mod_hash_val_t))
-{
- return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
- val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
-}
-
-mod_hash_t *
-mod_hash_create_strhash(char *name, size_t nchains,
- void (*val_dtor)(mod_hash_val_t))
-{
- return mod_hash_create_extended(name, nchains, mod_hash_strkey_dtor,
- val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
-}
-
-void
-mod_hash_destroy_strhash(mod_hash_t *strhash)
-{
- ASSERT(strhash);
- mod_hash_destroy_hash(strhash);
-}
-
-
-/*
- * mod_hash_byptr()
- * mod_hash_ptrkey_cmp()
- * Hash and key comparison routines for hashes with pointer keys.
- *
- * mod_hash_create_ptrhash()
- * mod_hash_destroy_ptrhash()
- * Create a hash that uses pointers as keys. This hash algorithm
- * picks an appropriate set of middle bits in the address to hash on
- * based on the size of the hash table and a hint about the size of
- * the items pointed at.
- */
-uint_t
-mod_hash_byptr(void *hash_data, mod_hash_key_t key)
-{
- uintptr_t k = (uintptr_t)key;
- k >>= (int)(uintptr_t)hash_data;
-
- return ((uint_t)k);
-}
-
-int
-mod_hash_ptrkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
-{
- uintptr_t k1 = (uintptr_t)key1;
- uintptr_t k2 = (uintptr_t)key2;
- if (k1 > k2)
- return (-1);
- else if (k1 < k2)
- return (1);
- else
- return (0);
-}
-
-mod_hash_t *
-mod_hash_create_ptrhash(char *name, size_t nchains,
- void (*val_dtor)(mod_hash_val_t), size_t key_elem_size)
-{
- size_t rshift;
-
- /*
- * We want to hash on the bits in the middle of the address word
- * Bits far to the right in the word have little significance, and
- * are likely to all look the same (for example, an array of
- * 256-byte structures will have the bottom 8 bits of address
- * words the same). So we want to right-shift each address to
- * ignore the bottom bits.
- *
- * The high bits, which are also unused, will get taken out when
- * mod_hash takes hashkey % nchains.
- */
- rshift = highbit64(key_elem_size);
-
- return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
- val_dtor, mod_hash_byptr, (void *)rshift, mod_hash_ptrkey_cmp,
- KM_SLEEP);
-}
-
-void
-mod_hash_destroy_ptrhash(mod_hash_t *hash)
-{
- ASSERT(hash);
- mod_hash_destroy_hash(hash);
-}
-
-/*
- * mod_hash_byid()
- * mod_hash_idkey_cmp()
- * Hash and key comparison routines for hashes with 32-bit unsigned keys.
- *
- * mod_hash_create_idhash()
- * mod_hash_destroy_idhash()
- * mod_hash_iddata_gen()
- * Create a hash that uses numeric keys.
- *
- * The hash algorithm is documented in "Introduction to Algorithms"
- * (Cormen, Leiserson, Rivest); when the hash table is created, it
- * attempts to find the next largest prime above the number of hash
- * slots. The hash index is then this number times the key modulo
- * the hash size, or (key * prime) % nchains.
- */
-uint_t
-mod_hash_byid(void *hash_data, mod_hash_key_t key)
-{
- uint_t kval = (uint_t)(uintptr_t)hash_data;
- return ((uint_t)(uintptr_t)key * (uint_t)kval);
-}
-
-int
-mod_hash_idkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
-{
- return ((uint_t)(uintptr_t)key1 - (uint_t)(uintptr_t)key2);
-}
-
-/*
- * Generate the next largest prime number greater than nchains; this value
- * is intended to be later passed in to mod_hash_create_extended() as the
- * hash_data.
- */
-uint_t
-mod_hash_iddata_gen(size_t nchains)
-{
- uint_t kval, i, prime;
-
- /*
- * Pick the first (odd) prime greater than nchains. Make sure kval is
- * odd (so start with nchains +1 or +2 as appropriate).
- */
- kval = (nchains % 2 == 0) ? nchains + 1 : nchains + 2;
-
- for (;;) {
- prime = 1;
- for (i = 3; i * i <= kval; i += 2) {
- if (kval % i == 0)
- prime = 0;
- }
- if (prime == 1)
- break;
- kval += 2;
- }
- return (kval);
-}
-
-mod_hash_t *
-mod_hash_create_idhash(char *name, size_t nchains,
- void (*val_dtor)(mod_hash_val_t))
-{
- uint_t kval = mod_hash_iddata_gen(nchains);
-
- return (mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
- val_dtor, mod_hash_byid, (void *)(uintptr_t)kval,
- mod_hash_idkey_cmp, KM_SLEEP));
-}
-
-void
-mod_hash_destroy_idhash(mod_hash_t *hash)
-{
- ASSERT(hash);
- mod_hash_destroy_hash(hash);
-}
-
-void
-mod_hash_fini(void)
-{
- mutex_destroy(&mh_head_lock);
-
- if (mh_e_cache) {
- kmem_cache_destroy(mh_e_cache);
- mh_e_cache = NULL;
- }
-}
-
-/*
- * mod_hash_init()
- * sets up globals, etc for mod_hash_*
- */
-void
-mod_hash_init(void)
-{
- ASSERT(mh_e_cache == NULL);
- mh_e_cache = kmem_cache_create("mod_hash_entries",
- sizeof (struct mod_hash_entry), 0, NULL, NULL, NULL, NULL,
- NULL, 0);
-
- mutex_init(&mh_head_lock, NULL, MUTEX_DEFAULT, NULL);
-}
-
-/*
- * mod_hash_create_extended()
- * The full-blown hash creation function.
- *
- * notes:
- * nchains - how many hash slots to create. More hash slots will
- * result in shorter hash chains, but will consume
- * slightly more memory up front.
- * sleep - should be KM_SLEEP or KM_NOSLEEP, to indicate whether
- * to sleep for memory, or fail in low-memory conditions.
- *
- * Fails only if KM_NOSLEEP was specified, and no memory was available.
- */
-mod_hash_t *
-mod_hash_create_extended(
- char *hname, /* descriptive name for hash */
- size_t nchains, /* number of hash slots */
- void (*kdtor)(mod_hash_key_t), /* key destructor */
- void (*vdtor)(mod_hash_val_t), /* value destructor */
- uint_t (*hash_alg)(void *, mod_hash_key_t), /* hash algorithm */
- void *hash_alg_data, /* pass-thru arg for hash_alg */
- int (*keycmp)(mod_hash_key_t, mod_hash_key_t), /* key comparator */
- int sleep) /* whether to sleep for mem */
-{
- mod_hash_t *mod_hash;
- size_t size;
- ASSERT(hname && keycmp && hash_alg && vdtor && kdtor);
-
- if ((mod_hash = kmem_zalloc(MH_SIZE(nchains), sleep)) == NULL)
- return (NULL);
-
- size = strlen(hname) + 1;
- mod_hash->mh_name = kmem_alloc(size, sleep);
- if (mod_hash->mh_name == NULL) {
- kmem_free(mod_hash, MH_SIZE(nchains));
- return (NULL);
- }
- (void) strlcpy(mod_hash->mh_name, hname, size);
-
- rw_init(&mod_hash->mh_contents, NULL, RW_DEFAULT, NULL);
- mod_hash->mh_sleep = sleep;
- mod_hash->mh_nchains = nchains;
- mod_hash->mh_kdtor = kdtor;
- mod_hash->mh_vdtor = vdtor;
- mod_hash->mh_hashalg = hash_alg;
- mod_hash->mh_hashalg_data = hash_alg_data;
- mod_hash->mh_keycmp = keycmp;
-
- /*
- * Link the hash up on the list of hashes
- */
- mutex_enter(&mh_head_lock);
- mod_hash->mh_next = mh_head;
- mh_head = mod_hash;
- mutex_exit(&mh_head_lock);
-
- return (mod_hash);
-}
-
-/*
- * mod_hash_destroy_hash()
- * destroy a hash table, destroying all of its stored keys and values
- * as well.
- */
-void
-mod_hash_destroy_hash(mod_hash_t *hash)
-{
- mod_hash_t *mhp, *mhpp;
-
- mutex_enter(&mh_head_lock);
- /*
- * Remove the hash from the hash list
- */
- if (hash == mh_head) { /* removing 1st list elem */
- mh_head = mh_head->mh_next;
- } else {
- /*
- * mhpp can start out NULL since we know the 1st elem isn't the
- * droid we're looking for.
- */
- mhpp = NULL;
- for (mhp = mh_head; mhp != NULL; mhp = mhp->mh_next) {
- if (mhp == hash) {
- mhpp->mh_next = mhp->mh_next;
- break;
- }
- mhpp = mhp;
- }
- }
- mutex_exit(&mh_head_lock);
-
- /*
- * Clean out keys and values.
- */
- mod_hash_clear(hash);
-
- rw_destroy(&hash->mh_contents);
- kmem_free(hash->mh_name, strlen(hash->mh_name) + 1);
- kmem_free(hash, MH_SIZE(hash->mh_nchains));
-}
-
-/*
- * i_mod_hash()
- * Call the hashing algorithm for this hash table, with the given key.
- */
-uint_t
-i_mod_hash(mod_hash_t *hash, mod_hash_key_t key)
-{
- uint_t h;
- /*
- * Prevent div by 0 problems;
- * Also a nice shortcut when using a hash as a list
- */
- if (hash->mh_nchains == 1)
- return (0);
-
- h = (hash->mh_hashalg)(hash->mh_hashalg_data, key);
- return (h % (hash->mh_nchains - 1));
-}
-
-/*
- * i_mod_hash_insert_nosync()
- * mod_hash_insert()
- * mod_hash_insert_reserve()
- * insert 'val' into the hash table, using 'key' as its key. If 'key' is
- * already a key in the hash, an error will be returned, and the key-val
- * pair will not be inserted. i_mod_hash_insert_nosync() supports a simple
- * handle abstraction, allowing hash entry allocation to be separated from
- * the hash insertion. this abstraction allows simple use of the mod_hash
- * structure in situations where mod_hash_insert() with a KM_SLEEP
- * allocation policy would otherwise be unsafe.
- */
-int
-i_mod_hash_insert_nosync(mod_hash_t *hash, mod_hash_key_t key,
- mod_hash_val_t val, mod_hash_hndl_t handle)
-{
- uint_t hashidx;
- struct mod_hash_entry *entry;
-
- ASSERT(hash);
-
- /*
- * If we've not been given reserved storage, allocate storage directly,
- * using the hash's allocation policy.
- */
- if (handle == (mod_hash_hndl_t)0) {
- entry = kmem_cache_alloc(mh_e_cache, hash->mh_sleep);
- if (entry == NULL) {
- hash->mh_stat.mhs_nomem++;
- return (MH_ERR_NOMEM);
- }
- } else {
- entry = (struct mod_hash_entry *)handle;
- }
-
- hashidx = i_mod_hash(hash, key);
- entry->mhe_key = key;
- entry->mhe_val = val;
- entry->mhe_next = hash->mh_entries[hashidx];
-
- hash->mh_entries[hashidx] = entry;
- hash->mh_stat.mhs_nelems++;
-
- return (0);
-}
-
-int
-mod_hash_insert(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val)
-{
- int res;
- mod_hash_val_t v;
-
- rw_enter(&hash->mh_contents, RW_WRITER);
-
- /*
- * Disallow duplicate keys in the hash
- */
- if (i_mod_hash_find_nosync(hash, key, &v) == 0) {
- rw_exit(&hash->mh_contents);
- hash->mh_stat.mhs_coll++;
- return (MH_ERR_DUPLICATE);
- }
-
- res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0);
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-int
-mod_hash_insert_reserve(mod_hash_t *hash, mod_hash_key_t key,
- mod_hash_val_t val, mod_hash_hndl_t handle)
-{
- int res;
- mod_hash_val_t v;
-
- rw_enter(&hash->mh_contents, RW_WRITER);
-
- /*
- * Disallow duplicate keys in the hash
- */
- if (i_mod_hash_find_nosync(hash, key, &v) == 0) {
- rw_exit(&hash->mh_contents);
- hash->mh_stat.mhs_coll++;
- return (MH_ERR_DUPLICATE);
- }
- res = i_mod_hash_insert_nosync(hash, key, val, handle);
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-/*
- * mod_hash_reserve()
- * mod_hash_reserve_nosleep()
- * mod_hash_cancel()
- * Make or cancel a mod_hash_entry_t reservation. Reservations are used in
- * mod_hash_insert_reserve() above.
- */
-int
-mod_hash_reserve(mod_hash_t *hash, mod_hash_hndl_t *handlep)
-{
- *handlep = kmem_cache_alloc(mh_e_cache, hash->mh_sleep);
- if (*handlep == NULL) {
- hash->mh_stat.mhs_nomem++;
- return (MH_ERR_NOMEM);
- }
-
- return (0);
-}
-
-int
-mod_hash_reserve_nosleep(mod_hash_t *hash, mod_hash_hndl_t *handlep)
-{
- *handlep = kmem_cache_alloc(mh_e_cache, KM_NOSLEEP);
- if (*handlep == NULL) {
- hash->mh_stat.mhs_nomem++;
- return (MH_ERR_NOMEM);
- }
-
- return (0);
-
-}
-
-/*ARGSUSED*/
-void
-mod_hash_cancel(mod_hash_t *hash, mod_hash_hndl_t *handlep)
-{
- kmem_cache_free(mh_e_cache, *handlep);
- *handlep = (mod_hash_hndl_t)0;
-}
-
-/*
- * i_mod_hash_remove_nosync()
- * mod_hash_remove()
- * Remove an element from the hash table.
- */
-int
-i_mod_hash_remove_nosync(mod_hash_t *hash, mod_hash_key_t key,
- mod_hash_val_t *val)
-{
- int hashidx;
- struct mod_hash_entry *e, *ep;
-
- hashidx = i_mod_hash(hash, key);
- ep = NULL; /* e's parent */
-
- for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) {
- if (MH_KEYCMP(hash, e->mhe_key, key) == 0)
- break;
- ep = e;
- }
-
- if (e == NULL) { /* not found */
- return (MH_ERR_NOTFOUND);
- }
-
- if (ep == NULL) /* special case 1st element in bucket */
- hash->mh_entries[hashidx] = e->mhe_next;
- else
- ep->mhe_next = e->mhe_next;
-
- /*
- * Clean up resources used by the node's key.
- */
- MH_KEY_DESTROY(hash, e->mhe_key);
-
- *val = e->mhe_val;
- kmem_cache_free(mh_e_cache, e);
- hash->mh_stat.mhs_nelems--;
-
- return (0);
-}
-
-int
-mod_hash_remove(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val)
-{
- int res;
-
- rw_enter(&hash->mh_contents, RW_WRITER);
- res = i_mod_hash_remove_nosync(hash, key, val);
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-/*
- * mod_hash_replace()
- * atomically remove an existing key-value pair from a hash, and replace
- * the key and value with the ones supplied. The removed key and value
- * (if any) are destroyed.
- */
-int
-mod_hash_replace(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val)
-{
- int res;
- mod_hash_val_t v;
-
- rw_enter(&hash->mh_contents, RW_WRITER);
-
- if (i_mod_hash_remove_nosync(hash, key, &v) == 0) {
- /*
- * mod_hash_remove() takes care of freeing up the key resources.
- */
- MH_VAL_DESTROY(hash, v);
- }
- res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0);
-
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-/*
- * mod_hash_destroy()
- * Remove an element from the hash table matching 'key', and destroy it.
- */
-int
-mod_hash_destroy(mod_hash_t *hash, mod_hash_key_t key)
-{
- mod_hash_val_t val;
- int rv;
-
- rw_enter(&hash->mh_contents, RW_WRITER);
-
- if ((rv = i_mod_hash_remove_nosync(hash, key, &val)) == 0) {
- /*
- * mod_hash_remove() takes care of freeing up the key resources.
- */
- MH_VAL_DESTROY(hash, val);
- }
-
- rw_exit(&hash->mh_contents);
- return (rv);
-}
-
-/*
- * i_mod_hash_find_nosync()
- * mod_hash_find()
- * Find a value in the hash table corresponding to the given key.
- */
-int
-i_mod_hash_find_nosync(mod_hash_t *hash, mod_hash_key_t key,
- mod_hash_val_t *val)
-{
- uint_t hashidx;
- struct mod_hash_entry *e;
-
- hashidx = i_mod_hash(hash, key);
-
- for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) {
- if (MH_KEYCMP(hash, e->mhe_key, key) == 0) {
- *val = e->mhe_val;
- hash->mh_stat.mhs_hit++;
- return (0);
- }
- }
- hash->mh_stat.mhs_miss++;
- return (MH_ERR_NOTFOUND);
-}
-
-int
-mod_hash_find(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val)
-{
- int res;
-
- rw_enter(&hash->mh_contents, RW_READER);
- res = i_mod_hash_find_nosync(hash, key, val);
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-int
-mod_hash_find_cb(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
- void (*find_cb)(mod_hash_key_t, mod_hash_val_t))
-{
- int res;
-
- rw_enter(&hash->mh_contents, RW_READER);
- res = i_mod_hash_find_nosync(hash, key, val);
- if (res == 0) {
- find_cb(key, *val);
- }
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-int
-mod_hash_find_cb_rval(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
- int (*find_cb)(mod_hash_key_t, mod_hash_val_t), int *cb_rval)
-{
- int res;
-
- rw_enter(&hash->mh_contents, RW_READER);
- res = i_mod_hash_find_nosync(hash, key, val);
- if (res == 0) {
- *cb_rval = find_cb(key, *val);
- }
- rw_exit(&hash->mh_contents);
-
- return (res);
-}
-
-void
-i_mod_hash_walk_nosync(mod_hash_t *hash,
- uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
-{
- struct mod_hash_entry *e;
- uint_t hashidx;
- int res = MH_WALK_CONTINUE;
-
- for (hashidx = 0;
- (hashidx < (hash->mh_nchains - 1)) && (res == MH_WALK_CONTINUE);
- hashidx++) {
- e = hash->mh_entries[hashidx];
- while ((e != NULL) && (res == MH_WALK_CONTINUE)) {
- res = callback(e->mhe_key, e->mhe_val, arg);
- e = e->mhe_next;
- }
- }
-}
-
-/*
- * mod_hash_walk()
- * Walks all the elements in the hashtable and invokes the callback
- * function with the key/value pair for each element. The hashtable
- * is locked for readers so the callback function should not attempt
- * to do any updates to the hashable. The callback function should
- * return MH_WALK_CONTINUE to continue walking the hashtable or
- * MH_WALK_TERMINATE to abort the walk of the hashtable.
- */
-void
-mod_hash_walk(mod_hash_t *hash,
- uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
-{
- rw_enter(&hash->mh_contents, RW_READER);
- i_mod_hash_walk_nosync(hash, callback, arg);
- rw_exit(&hash->mh_contents);
-}
-
-
-/*
- * i_mod_hash_clear_nosync()
- * mod_hash_clear()
- * Clears the given hash table by calling the destructor of every hash
- * element and freeing up all mod_hash_entry's.
- */
-void
-i_mod_hash_clear_nosync(mod_hash_t *hash)
-{
- int i;
- struct mod_hash_entry *e, *old_e;
-
- for (i = 0; i < hash->mh_nchains; i++) {
- e = hash->mh_entries[i];
- while (e != NULL) {
- MH_KEY_DESTROY(hash, e->mhe_key);
- MH_VAL_DESTROY(hash, e->mhe_val);
- old_e = e;
- e = e->mhe_next;
- kmem_cache_free(mh_e_cache, old_e);
- }
- hash->mh_entries[i] = NULL;
- }
- hash->mh_stat.mhs_nelems = 0;
-}
-
-void
-mod_hash_clear(mod_hash_t *hash)
-{
- ASSERT(hash);
- rw_enter(&hash->mh_contents, RW_WRITER);
- i_mod_hash_clear_nosync(hash);
- rw_exit(&hash->mh_contents);
-}
diff --git a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c
index 34b36b81c0ab..b0af101990ed 100644
--- a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c
+++ b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -36,146 +36,35 @@
#include <sys/crypto/sched_impl.h>
#include <sys/crypto/spi.h>
-/*
- * minalloc and maxalloc values to be used for taskq_create().
- */
-int crypto_taskq_threads = CRYPTO_TASKQ_THREADS;
-int crypto_taskq_minalloc = CRYPTO_TASKQ_MIN;
-int crypto_taskq_maxalloc = CRYPTO_TASKQ_MAX;
-
-static void remove_provider(kcf_provider_desc_t *);
-static void process_logical_providers(crypto_provider_info_t *,
+static int init_prov_mechs(const crypto_provider_info_t *,
kcf_provider_desc_t *);
-static int init_prov_mechs(crypto_provider_info_t *, kcf_provider_desc_t *);
-static int kcf_prov_kstat_update(kstat_t *, int);
-static void delete_kstat(kcf_provider_desc_t *);
-
-static kcf_prov_stats_t kcf_stats_ks_data_template = {
- { "kcf_ops_total", KSTAT_DATA_UINT64 },
- { "kcf_ops_passed", KSTAT_DATA_UINT64 },
- { "kcf_ops_failed", KSTAT_DATA_UINT64 },
- { "kcf_ops_returned_busy", KSTAT_DATA_UINT64 }
-};
-
-#define KCF_SPI_COPY_OPS(src, dst, ops) if ((src)->ops != NULL) \
- *((dst)->ops) = *((src)->ops);
-
-/*
- * Copy an ops vector from src to dst. Used during provider registration
- * to copy the ops vector from the provider info structure to the
- * provider descriptor maintained by KCF.
- * Copying the ops vector specified by the provider is needed since the
- * framework does not require the provider info structure to be
- * persistent.
- */
-static void
-copy_ops_vector_v1(crypto_ops_t *src_ops, crypto_ops_t *dst_ops)
-{
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_control_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_digest_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_cipher_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_mac_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_sign_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_verify_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_dual_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_dual_cipher_mac_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_random_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_session_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_object_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_key_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_provider_ops);
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_ctx_ops);
-}
-
-static void
-copy_ops_vector_v2(crypto_ops_t *src_ops, crypto_ops_t *dst_ops)
-{
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_mech_ops);
-}
-
-static void
-copy_ops_vector_v3(crypto_ops_t *src_ops, crypto_ops_t *dst_ops)
-{
- KCF_SPI_COPY_OPS(src_ops, dst_ops, co_nostore_key_ops);
-}
/*
* This routine is used to add cryptographic providers to the KEF framework.
* Providers pass a crypto_provider_info structure to crypto_register_provider()
* and get back a handle. The crypto_provider_info structure contains a
* list of mechanisms supported by the provider and an ops vector containing
- * provider entry points. Hardware providers call this routine in their attach
- * routines. Software providers call this routine in their _init() routine.
+ * provider entry points. Providers call this routine in their _init() routine.
*/
int
-crypto_register_provider(crypto_provider_info_t *info,
+crypto_register_provider(const crypto_provider_info_t *info,
crypto_kcf_provider_handle_t *handle)
{
- char *ks_name;
-
kcf_provider_desc_t *prov_desc = NULL;
int ret = CRYPTO_ARGUMENTS_BAD;
- if (info->pi_interface_version > CRYPTO_SPI_VERSION_3)
- return (CRYPTO_VERSION_MISMATCH);
-
- /*
- * Check provider type, must be software, hardware, or logical.
- */
- if (info->pi_provider_type != CRYPTO_HW_PROVIDER &&
- info->pi_provider_type != CRYPTO_SW_PROVIDER &&
- info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER)
- return (CRYPTO_ARGUMENTS_BAD);
-
/*
* Allocate and initialize a new provider descriptor. We also
* hold it and release it when done.
*/
- prov_desc = kcf_alloc_provider_desc(info);
+ prov_desc = kcf_alloc_provider_desc();
KCF_PROV_REFHOLD(prov_desc);
- prov_desc->pd_prov_type = info->pi_provider_type;
-
- /* provider-private handle, opaque to KCF */
- prov_desc->pd_prov_handle = info->pi_provider_handle;
-
/* copy provider description string */
- if (info->pi_provider_description != NULL) {
- /*
- * pi_provider_descriptor is a string that can contain
- * up to CRYPTO_PROVIDER_DESCR_MAX_LEN + 1 characters
- * INCLUDING the terminating null character. A bcopy()
- * is necessary here as pd_description should not have
- * a null character. See comments in kcf_alloc_provider_desc()
- * for details on pd_description field.
- */
- bcopy(info->pi_provider_description, prov_desc->pd_description,
- MIN(strlen(info->pi_provider_description),
- (size_t)CRYPTO_PROVIDER_DESCR_MAX_LEN));
- }
+ prov_desc->pd_description = info->pi_provider_description;
- if (info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) {
- if (info->pi_ops_vector == NULL) {
- goto bail;
- }
- copy_ops_vector_v1(info->pi_ops_vector,
- prov_desc->pd_ops_vector);
- if (info->pi_interface_version >= CRYPTO_SPI_VERSION_2) {
- copy_ops_vector_v2(info->pi_ops_vector,
- prov_desc->pd_ops_vector);
- prov_desc->pd_flags = info->pi_flags;
- }
- if (info->pi_interface_version == CRYPTO_SPI_VERSION_3) {
- copy_ops_vector_v3(info->pi_ops_vector,
- prov_desc->pd_ops_vector);
- }
- }
-
- /* object_ops and nostore_key_ops are mutually exclusive */
- if (prov_desc->pd_ops_vector->co_object_ops &&
- prov_desc->pd_ops_vector->co_nostore_key_ops) {
- goto bail;
- }
+ /* Change from Illumos: the ops vector is persistent. */
+ prov_desc->pd_ops_vector = info->pi_ops_vector;
/* process the mechanisms supported by the provider */
if ((ret = init_prov_mechs(info, prov_desc)) != CRYPTO_SUCCESS)
@@ -191,86 +80,15 @@ crypto_register_provider(crypto_provider_info_t *info,
}
/*
- * We create a taskq only for a hardware provider. The global
- * software queue is used for software providers. We handle ordering
+ * The global queue is used for providers. We handle ordering
* of multi-part requests in the taskq routine. So, it is safe to
* have multiple threads for the taskq. We pass TASKQ_PREPOPULATE flag
* to keep some entries cached to improve performance.
*/
- if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
- prov_desc->pd_sched_info.ks_taskq = taskq_create("kcf_taskq",
- crypto_taskq_threads, minclsyspri,
- crypto_taskq_minalloc, crypto_taskq_maxalloc,
- TASKQ_PREPOPULATE);
- else
- prov_desc->pd_sched_info.ks_taskq = NULL;
-
- /* no kernel session to logical providers */
- if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
- /*
- * Open a session for session-oriented providers. This session
- * is used for all kernel consumers. This is fine as a provider
- * is required to support multiple thread access to a session.
- * We can do this only after the taskq has been created as we
- * do a kcf_submit_request() to open the session.
- */
- if (KCF_PROV_SESSION_OPS(prov_desc) != NULL) {
- kcf_req_params_t params;
-
- KCF_WRAP_SESSION_OPS_PARAMS(&params,
- KCF_OP_SESSION_OPEN, &prov_desc->pd_sid, 0,
- CRYPTO_USER, NULL, 0, prov_desc);
- ret = kcf_submit_request(prov_desc, NULL, NULL, &params,
- B_FALSE);
-
- if (ret != CRYPTO_SUCCESS) {
- undo_register_provider(prov_desc, B_TRUE);
- ret = CRYPTO_FAILED;
- goto bail;
- }
- }
- }
-
- if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
- /*
- * Create the kstat for this provider. There is a kstat
- * installed for each successfully registered provider.
- * This kstat is deleted, when the provider unregisters.
- */
- if (prov_desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
- ks_name = kmem_asprintf("%s_%s",
- "NONAME", "provider_stats");
- } else {
- ks_name = kmem_asprintf("%s_%d_%u_%s",
- "NONAME", 0, prov_desc->pd_prov_id,
- "provider_stats");
- }
-
- prov_desc->pd_kstat = kstat_create("kcf", 0, ks_name, "crypto",
- KSTAT_TYPE_NAMED, sizeof (kcf_prov_stats_t) /
- sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
-
- if (prov_desc->pd_kstat != NULL) {
- bcopy(&kcf_stats_ks_data_template,
- &prov_desc->pd_ks_data,
- sizeof (kcf_stats_ks_data_template));
- prov_desc->pd_kstat->ks_data = &prov_desc->pd_ks_data;
- KCF_PROV_REFHOLD(prov_desc);
- KCF_PROV_IREFHOLD(prov_desc);
- prov_desc->pd_kstat->ks_private = prov_desc;
- prov_desc->pd_kstat->ks_update = kcf_prov_kstat_update;
- kstat_install(prov_desc->pd_kstat);
- }
- kmem_strfree(ks_name);
- }
-
- if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
- process_logical_providers(info, prov_desc);
mutex_enter(&prov_desc->pd_lock);
prov_desc->pd_state = KCF_PROV_READY;
mutex_exit(&prov_desc->pd_lock);
- kcf_do_notify(prov_desc, B_TRUE);
*handle = prov_desc->pd_kcf_prov_handle;
ret = CRYPTO_SUCCESS;
@@ -282,8 +100,7 @@ bail:
/*
* This routine is used to notify the framework when a provider is being
- * removed. Hardware providers call this routine in their detach routines.
- * Software providers call this routine in their _fini() routine.
+ * removed. Providers call this routine in their _fini() routine.
*/
int
crypto_unregister_provider(crypto_kcf_provider_handle_t handle)
@@ -311,46 +128,30 @@ crypto_unregister_provider(crypto_kcf_provider_handle_t handle)
saved_state = desc->pd_state;
desc->pd_state = KCF_PROV_REMOVED;
- if (saved_state == KCF_PROV_BUSY) {
- /*
- * The per-provider taskq threads may be waiting. We
- * signal them so that they can start failing requests.
- */
- cv_broadcast(&desc->pd_resume_cv);
- }
-
- if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
+ /*
+ * Check if this provider is currently being used.
+ * pd_irefcnt is the number of holds from the internal
+ * structures. We add one to account for the above lookup.
+ */
+ if (desc->pd_refcnt > desc->pd_irefcnt + 1) {
+ desc->pd_state = saved_state;
+ mutex_exit(&desc->pd_lock);
+ /* Release reference held by kcf_prov_tab_lookup(). */
+ KCF_PROV_REFRELE(desc);
/*
- * Check if this provider is currently being used.
- * pd_irefcnt is the number of holds from the internal
- * structures. We add one to account for the above lookup.
+ * The administrator will presumably stop the clients,
+ * thus removing the holds, when they get the busy
+ * return value. Any retry will succeed then.
*/
- if (desc->pd_refcnt > desc->pd_irefcnt + 1) {
- desc->pd_state = saved_state;
- mutex_exit(&desc->pd_lock);
- /* Release reference held by kcf_prov_tab_lookup(). */
- KCF_PROV_REFRELE(desc);
- /*
- * The administrator presumably will stop the clients
- * thus removing the holds, when they get the busy
- * return value. Any retry will succeed then.
- */
- return (CRYPTO_BUSY);
- }
+ return (CRYPTO_BUSY);
}
mutex_exit(&desc->pd_lock);
- if (desc->pd_prov_type != CRYPTO_SW_PROVIDER) {
- remove_provider(desc);
- }
-
- if (desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
- /* remove the provider from the mechanisms tables */
- for (mech_idx = 0; mech_idx < desc->pd_mech_list_count;
- mech_idx++) {
- kcf_remove_mech_provider(
- desc->pd_mechanisms[mech_idx].cm_mech_name, desc);
- }
+ /* remove the provider from the mechanisms tables */
+ for (mech_idx = 0; mech_idx < desc->pd_mech_list_count;
+ mech_idx++) {
+ kcf_remove_mech_provider(
+ desc->pd_mechanisms[mech_idx].cm_mech_name, desc);
}
/* remove provider from providers table */
@@ -361,228 +162,46 @@ crypto_unregister_provider(crypto_kcf_provider_handle_t handle)
return (CRYPTO_UNKNOWN_PROVIDER);
}
- delete_kstat(desc);
+ /* Release reference held by kcf_prov_tab_lookup(). */
+ KCF_PROV_REFRELE(desc);
- if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
- /* Release reference held by kcf_prov_tab_lookup(). */
- KCF_PROV_REFRELE(desc);
-
- /*
- * Wait till the existing requests complete.
- */
- mutex_enter(&desc->pd_lock);
- while (desc->pd_state != KCF_PROV_FREED)
- cv_wait(&desc->pd_remove_cv, &desc->pd_lock);
- mutex_exit(&desc->pd_lock);
- } else {
- /*
- * Wait until requests that have been sent to the provider
- * complete.
- */
- mutex_enter(&desc->pd_lock);
- while (desc->pd_irefcnt > 0)
- cv_wait(&desc->pd_remove_cv, &desc->pd_lock);
- mutex_exit(&desc->pd_lock);
- }
-
- kcf_do_notify(desc, B_FALSE);
+ /*
+ * Wait till the existing requests complete.
+ */
+ mutex_enter(&desc->pd_lock);
+ while (desc->pd_state != KCF_PROV_FREED)
+ cv_wait(&desc->pd_remove_cv, &desc->pd_lock);
+ mutex_exit(&desc->pd_lock);
- if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
- /*
- * This is the only place where kcf_free_provider_desc()
- * is called directly. KCF_PROV_REFRELE() should free the
- * structure in all other places.
- */
- ASSERT(desc->pd_state == KCF_PROV_FREED &&
- desc->pd_refcnt == 0);
- kcf_free_provider_desc(desc);
- } else {
- KCF_PROV_REFRELE(desc);
- }
+ /*
+ * This is the only place where kcf_free_provider_desc()
+ * is called directly. KCF_PROV_REFRELE() should free the
+ * structure in all other places.
+ */
+ ASSERT(desc->pd_state == KCF_PROV_FREED &&
+ desc->pd_refcnt == 0);
+ kcf_free_provider_desc(desc);
return (CRYPTO_SUCCESS);
}
/*
- * This routine is used to notify the framework that the state of
- * a cryptographic provider has changed. Valid state codes are:
- *
- * CRYPTO_PROVIDER_READY
- * The provider indicates that it can process more requests. A provider
- * will notify with this event if it previously has notified us with a
- * CRYPTO_PROVIDER_BUSY.
- *
- * CRYPTO_PROVIDER_BUSY
- * The provider can not take more requests.
- *
- * CRYPTO_PROVIDER_FAILED
- * The provider encountered an internal error. The framework will not
- * be sending any more requests to the provider. The provider may notify
- * with a CRYPTO_PROVIDER_READY, if it is able to recover from the error.
- *
- * This routine can be called from user or interrupt context.
- */
-void
-crypto_provider_notification(crypto_kcf_provider_handle_t handle, uint_t state)
-{
- kcf_provider_desc_t *pd;
-
- /* lookup the provider from the given handle */
- if ((pd = kcf_prov_tab_lookup((crypto_provider_id_t)handle)) == NULL)
- return;
-
- mutex_enter(&pd->pd_lock);
-
- if (pd->pd_state <= KCF_PROV_VERIFICATION_FAILED)
- goto out;
-
- if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- cmn_err(CE_WARN, "crypto_provider_notification: "
- "logical provider (%x) ignored\n", handle);
- goto out;
- }
- switch (state) {
- case CRYPTO_PROVIDER_READY:
- switch (pd->pd_state) {
- case KCF_PROV_BUSY:
- pd->pd_state = KCF_PROV_READY;
- /*
- * Signal the per-provider taskq threads that they
- * can start submitting requests.
- */
- cv_broadcast(&pd->pd_resume_cv);
- break;
-
- case KCF_PROV_FAILED:
- /*
- * The provider recovered from the error. Let us
- * use it now.
- */
- pd->pd_state = KCF_PROV_READY;
- break;
- default:
- break;
- }
- break;
-
- case CRYPTO_PROVIDER_BUSY:
- switch (pd->pd_state) {
- case KCF_PROV_READY:
- pd->pd_state = KCF_PROV_BUSY;
- break;
- default:
- break;
- }
- break;
-
- case CRYPTO_PROVIDER_FAILED:
- /*
- * We note the failure and return. The per-provider taskq
- * threads check this flag and start failing the
- * requests, if it is set. See process_req_hwp() for details.
- */
- switch (pd->pd_state) {
- case KCF_PROV_READY:
- pd->pd_state = KCF_PROV_FAILED;
- break;
-
- case KCF_PROV_BUSY:
- pd->pd_state = KCF_PROV_FAILED;
- /*
- * The per-provider taskq threads may be waiting. We
- * signal them so that they can start failing requests.
- */
- cv_broadcast(&pd->pd_resume_cv);
- break;
- default:
- break;
- }
- break;
- default:
- break;
- }
-out:
- mutex_exit(&pd->pd_lock);
- KCF_PROV_REFRELE(pd);
-}
-
-/*
- * This routine is used to notify the framework the result of
- * an asynchronous request handled by a provider. Valid error
- * codes are the same as the CRYPTO_* errors defined in common.h.
- *
- * This routine can be called from user or interrupt context.
- */
-void
-crypto_op_notification(crypto_req_handle_t handle, int error)
-{
- kcf_call_type_t ctype;
-
- if (handle == NULL)
- return;
-
- if ((ctype = GET_REQ_TYPE(handle)) == CRYPTO_SYNCH) {
- kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)handle;
-
- if (error != CRYPTO_SUCCESS)
- sreq->sn_provider->pd_sched_info.ks_nfails++;
- KCF_PROV_IREFRELE(sreq->sn_provider);
- kcf_sop_done(sreq, error);
- } else {
- kcf_areq_node_t *areq = (kcf_areq_node_t *)handle;
-
- ASSERT(ctype == CRYPTO_ASYNCH);
- if (error != CRYPTO_SUCCESS)
- areq->an_provider->pd_sched_info.ks_nfails++;
- KCF_PROV_IREFRELE(areq->an_provider);
- kcf_aop_done(areq, error);
- }
-}
-
-/*
- * This routine is used by software providers to determine
- * whether to use KM_SLEEP or KM_NOSLEEP during memory allocation.
- * Note that hardware providers can always use KM_SLEEP. So,
- * they do not need to call this routine.
- *
- * This routine can be called from user or interrupt context.
- */
-int
-crypto_kmflag(crypto_req_handle_t handle)
-{
- return (REQHNDL2_KMFLAG(handle));
-}
-
-/*
* Process the mechanism info structures specified by the provider
* during registration. A NULL crypto_provider_info_t indicates
* an already initialized provider descriptor.
*
- * Mechanisms are not added to the kernel's mechanism table if the
- * provider is a logical provider.
- *
* Returns CRYPTO_SUCCESS on success, CRYPTO_ARGUMENTS if one
* of the specified mechanisms was malformed, or CRYPTO_HOST_MEMORY
* if the table of mechanisms is full.
*/
static int
-init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc)
+init_prov_mechs(const crypto_provider_info_t *info, kcf_provider_desc_t *desc)
{
uint_t mech_idx;
uint_t cleanup_idx;
int err = CRYPTO_SUCCESS;
kcf_prov_mech_desc_t *pmd;
int desc_use_count = 0;
- int mcount = desc->pd_mech_list_count;
-
- if (desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
- if (info != NULL) {
- ASSERT(info->pi_mechanisms != NULL);
- bcopy(info->pi_mechanisms, desc->pd_mechanisms,
- sizeof (crypto_mech_info_t) * mcount);
- }
- return (CRYPTO_SUCCESS);
- }
/*
* Copy the mechanism list from the provider info to the provider
@@ -591,29 +210,9 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc)
* mechanism, SUN_RANDOM, in this case.
*/
if (info != NULL) {
- if (info->pi_ops_vector->co_random_ops != NULL) {
- crypto_mech_info_t *rand_mi;
-
- /*
- * Need the following check as it is possible to have
- * a provider that implements just random_ops and has
- * pi_mechanisms == NULL.
- */
- if (info->pi_mechanisms != NULL) {
- bcopy(info->pi_mechanisms, desc->pd_mechanisms,
- sizeof (crypto_mech_info_t) * (mcount - 1));
- }
- rand_mi = &desc->pd_mechanisms[mcount - 1];
-
- bzero(rand_mi, sizeof (crypto_mech_info_t));
- (void) strncpy(rand_mi->cm_mech_name, SUN_RANDOM,
- CRYPTO_MAX_MECH_NAME);
- rand_mi->cm_func_group_mask = CRYPTO_FG_RANDOM;
- } else {
- ASSERT(info->pi_mechanisms != NULL);
- bcopy(info->pi_mechanisms, desc->pd_mechanisms,
- sizeof (crypto_mech_info_t) * mcount);
- }
+ ASSERT(info->pi_mechanisms != NULL);
+ desc->pd_mech_list_count = info->pi_mech_list_count;
+ desc->pd_mechanisms = info->pi_mechanisms;
}
/*
@@ -621,32 +220,6 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc)
* to the corresponding KCF mechanism mech_entry chain.
*/
for (mech_idx = 0; mech_idx < desc->pd_mech_list_count; mech_idx++) {
- crypto_mech_info_t *mi = &desc->pd_mechanisms[mech_idx];
-
- if ((mi->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BITS) &&
- (mi->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES)) {
- err = CRYPTO_ARGUMENTS_BAD;
- break;
- }
-
- if (desc->pd_flags & CRYPTO_HASH_NO_UPDATE &&
- mi->cm_func_group_mask & CRYPTO_FG_DIGEST) {
- /*
- * We ask the provider to specify the limit
- * per hash mechanism. But, in practice, a
- * hardware limitation means all hash mechanisms
- * will have the same maximum size allowed for
- * input data. So, we make it a per provider
- * limit to keep it simple.
- */
- if (mi->cm_max_input_length == 0) {
- err = CRYPTO_ARGUMENTS_BAD;
- break;
- } else {
- desc->pd_hash_limit = mi->cm_max_input_length;
- }
- }
-
if ((err = kcf_add_mech_provider(mech_idx, desc, &pmd)) !=
KCF_SUCCESS)
break;
@@ -659,12 +232,12 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc)
}
/*
- * Don't allow multiple software providers with disabled mechanisms
+ * Don't allow multiple providers with disabled mechanisms
* to register. Subsequent enabling of mechanisms will result in
- * an unsupported configuration, i.e. multiple software providers
+ * an unsupported configuration, i.e. multiple providers
* per mechanism.
*/
- if (desc_use_count == 0 && desc->pd_prov_type == CRYPTO_SW_PROVIDER)
+ if (desc_use_count == 0)
return (CRYPTO_ARGUMENTS_BAD);
if (err == KCF_SUCCESS)
@@ -686,35 +259,6 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc)
}
/*
- * Update routine for kstat. Only privileged users are allowed to
- * access this information, since this information is sensitive.
- * There are some cryptographic attacks (e.g. traffic analysis)
- * which can use this information.
- */
-static int
-kcf_prov_kstat_update(kstat_t *ksp, int rw)
-{
- kcf_prov_stats_t *ks_data;
- kcf_provider_desc_t *pd = (kcf_provider_desc_t *)ksp->ks_private;
-
- if (rw == KSTAT_WRITE)
- return (EACCES);
-
- ks_data = ksp->ks_data;
-
- ks_data->ps_ops_total.value.ui64 = pd->pd_sched_info.ks_ndispatches;
- ks_data->ps_ops_failed.value.ui64 = pd->pd_sched_info.ks_nfails;
- ks_data->ps_ops_busy_rval.value.ui64 = pd->pd_sched_info.ks_nbusy_rval;
- ks_data->ps_ops_passed.value.ui64 =
- pd->pd_sched_info.ks_ndispatches -
- pd->pd_sched_info.ks_nfails -
- pd->pd_sched_info.ks_nbusy_rval;
-
- return (0);
-}
-
-
-/*
* Utility routine called from failure paths in crypto_register_provider()
* and from crypto_load_soft_disabled().
*/
@@ -734,192 +278,3 @@ undo_register_provider(kcf_provider_desc_t *desc, boolean_t remove_prov)
if (remove_prov)
(void) kcf_prov_tab_rem_provider(desc->pd_prov_id);
}
-
-/*
- * Utility routine called from crypto_load_soft_disabled(). Callers
- * should have done a prior undo_register_provider().
- */
-void
-redo_register_provider(kcf_provider_desc_t *pd)
-{
- /* process the mechanisms supported by the provider */
- (void) init_prov_mechs(NULL, pd);
-
- /*
- * Hold provider in providers table. We should not call
- * kcf_prov_tab_add_provider() here as the provider descriptor
- * is still valid which means it has an entry in the provider
- * table.
- */
- KCF_PROV_REFHOLD(pd);
- KCF_PROV_IREFHOLD(pd);
-}
-
-/*
- * Add provider (p1) to another provider's array of providers (p2).
- * Hardware and logical providers use this array to cross-reference
- * each other.
- */
-static void
-add_provider_to_array(kcf_provider_desc_t *p1, kcf_provider_desc_t *p2)
-{
- kcf_provider_list_t *new;
-
- new = kmem_alloc(sizeof (kcf_provider_list_t), KM_SLEEP);
- mutex_enter(&p2->pd_lock);
- new->pl_next = p2->pd_provider_list;
- p2->pd_provider_list = new;
- KCF_PROV_IREFHOLD(p1);
- new->pl_provider = p1;
- mutex_exit(&p2->pd_lock);
-}
-
-/*
- * Remove provider (p1) from another provider's array of providers (p2).
- * Hardware and logical providers use this array to cross-reference
- * each other.
- */
-static void
-remove_provider_from_array(kcf_provider_desc_t *p1, kcf_provider_desc_t *p2)
-{
-
- kcf_provider_list_t *pl = NULL, **prev;
-
- mutex_enter(&p2->pd_lock);
- for (pl = p2->pd_provider_list, prev = &p2->pd_provider_list;
- pl != NULL; prev = &pl->pl_next, pl = pl->pl_next) {
- if (pl->pl_provider == p1) {
- break;
- }
- }
-
- if (p1 == NULL) {
- mutex_exit(&p2->pd_lock);
- return;
- }
-
- /* detach and free kcf_provider_list structure */
- KCF_PROV_IREFRELE(p1);
- *prev = pl->pl_next;
- kmem_free(pl, sizeof (*pl));
- mutex_exit(&p2->pd_lock);
-}
-
-/*
- * Convert an array of logical provider handles (crypto_provider_id)
- * stored in a crypto_provider_info structure into an array of provider
- * descriptors (kcf_provider_desc_t) attached to a logical provider.
- */
-static void
-process_logical_providers(crypto_provider_info_t *info, kcf_provider_desc_t *hp)
-{
- kcf_provider_desc_t *lp;
- crypto_provider_id_t handle;
- int count = info->pi_logical_provider_count;
- int i;
-
- /* add hardware provider to each logical provider */
- for (i = 0; i < count; i++) {
- handle = info->pi_logical_providers[i];
- lp = kcf_prov_tab_lookup((crypto_provider_id_t)handle);
- if (lp == NULL) {
- continue;
- }
- add_provider_to_array(hp, lp);
- hp->pd_flags |= KCF_LPROV_MEMBER;
-
- /*
- * A hardware provider has to have the provider descriptor of
- * every logical provider it belongs to, so it can be removed
- * from the logical provider if the hardware provider
- * unregisters from the framework.
- */
- add_provider_to_array(lp, hp);
- KCF_PROV_REFRELE(lp);
- }
-}
-
-/*
- * This routine removes a provider from all of the logical or
- * hardware providers it belongs to, and frees the provider's
- * array of pointers to providers.
- */
-static void
-remove_provider(kcf_provider_desc_t *pp)
-{
- kcf_provider_desc_t *p;
- kcf_provider_list_t *e, *next;
-
- mutex_enter(&pp->pd_lock);
- for (e = pp->pd_provider_list; e != NULL; e = next) {
- p = e->pl_provider;
- remove_provider_from_array(pp, p);
- if (p->pd_prov_type == CRYPTO_HW_PROVIDER &&
- p->pd_provider_list == NULL)
- p->pd_flags &= ~KCF_LPROV_MEMBER;
- KCF_PROV_IREFRELE(p);
- next = e->pl_next;
- kmem_free(e, sizeof (*e));
- }
- pp->pd_provider_list = NULL;
- mutex_exit(&pp->pd_lock);
-}
-
-/*
- * Dispatch events as needed for a provider. is_added flag tells
- * whether the provider is registering or unregistering.
- */
-void
-kcf_do_notify(kcf_provider_desc_t *prov_desc, boolean_t is_added)
-{
- int i;
- crypto_notify_event_change_t ec;
-
- ASSERT(prov_desc->pd_state > KCF_PROV_VERIFICATION_FAILED);
-
- /*
- * Inform interested clients of the mechanisms becoming
- * available/unavailable. We skip this for logical providers
- * as they do not affect mechanisms.
- */
- if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
- ec.ec_provider_type = prov_desc->pd_prov_type;
- ec.ec_change = is_added ? CRYPTO_MECH_ADDED :
- CRYPTO_MECH_REMOVED;
- for (i = 0; i < prov_desc->pd_mech_list_count; i++) {
- (void) strlcpy(ec.ec_mech_name,
- prov_desc->pd_mechanisms[i].cm_mech_name,
- CRYPTO_MAX_MECH_NAME);
- kcf_walk_ntfylist(CRYPTO_EVENT_MECHS_CHANGED, &ec);
- }
-
- }
-
- /*
- * Inform interested clients about the new or departing provider.
- * In case of a logical provider, we need to notify the event only
- * for the logical provider and not for the underlying
- * providers which are known by the KCF_LPROV_MEMBER bit.
- */
- if (prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER ||
- (prov_desc->pd_flags & KCF_LPROV_MEMBER) == 0) {
- kcf_walk_ntfylist(is_added ? CRYPTO_EVENT_PROVIDER_REGISTERED :
- CRYPTO_EVENT_PROVIDER_UNREGISTERED, prov_desc);
- }
-}
-
-static void
-delete_kstat(kcf_provider_desc_t *desc)
-{
- /* destroy the kstat created for this provider */
- if (desc->pd_kstat != NULL) {
- kcf_provider_desc_t *kspd = desc->pd_kstat->ks_private;
-
- /* release reference held by desc->pd_kstat->ks_private */
- ASSERT(desc == kspd);
- kstat_delete(kspd->pd_kstat);
- desc->pd_kstat = NULL;
- KCF_PROV_REFRELE(kspd);
- KCF_PROV_IREFRELE(kspd);
- }
-}